mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-31 17:22:26 -05:00 
			
		
		
		
	
		
			
				
	
	
		
			4151 lines
		
	
	
	
		
			82 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			4151 lines
		
	
	
	
		
			82 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| // Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT.
 | |
| 
 | |
| //go:build !appengine && !noasm && gc && !noasm
 | |
| 
 | |
| // func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 | |
| // Requires: CMOV
 | |
| TEXT ·sequenceDecs_decode_amd64(SB), $8-32
 | |
| 	MOVQ    br+8(FP), CX
 | |
| 	MOVQ    24(CX), DX
 | |
| 	MOVBQZX 40(CX), BX
 | |
| 	MOVQ    (CX), AX
 | |
| 	MOVQ    32(CX), SI
 | |
| 	ADDQ    SI, AX
 | |
| 	MOVQ    AX, (SP)
 | |
| 	MOVQ    ctx+16(FP), AX
 | |
| 	MOVQ    72(AX), DI
 | |
| 	MOVQ    80(AX), R8
 | |
| 	MOVQ    88(AX), R9
 | |
| 	MOVQ    104(AX), R10
 | |
| 	MOVQ    s+0(FP), AX
 | |
| 	MOVQ    144(AX), R11
 | |
| 	MOVQ    152(AX), R12
 | |
| 	MOVQ    160(AX), R13
 | |
| 
 | |
| sequenceDecs_decode_amd64_main_loop:
 | |
| 	MOVQ (SP), R14
 | |
| 
 | |
| 	// Fill bitreader to have enough for the offset and match length.
 | |
| 	CMPQ SI, $0x08
 | |
| 	JL   sequenceDecs_decode_amd64_fill_byte_by_byte
 | |
| 	MOVQ BX, AX
 | |
| 	SHRQ $0x03, AX
 | |
| 	SUBQ AX, R14
 | |
| 	MOVQ (R14), DX
 | |
| 	SUBQ AX, SI
 | |
| 	ANDQ $0x07, BX
 | |
| 	JMP  sequenceDecs_decode_amd64_fill_end
 | |
| 
 | |
| sequenceDecs_decode_amd64_fill_byte_by_byte:
 | |
| 	CMPQ    SI, $0x00
 | |
| 	JLE     sequenceDecs_decode_amd64_fill_check_overread
 | |
| 	CMPQ    BX, $0x07
 | |
| 	JLE     sequenceDecs_decode_amd64_fill_end
 | |
| 	SHLQ    $0x08, DX
 | |
| 	SUBQ    $0x01, R14
 | |
| 	SUBQ    $0x01, SI
 | |
| 	SUBQ    $0x08, BX
 | |
| 	MOVBQZX (R14), AX
 | |
| 	ORQ     AX, DX
 | |
| 	JMP     sequenceDecs_decode_amd64_fill_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decode_amd64_fill_check_overread:
 | |
| 	CMPQ BX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decode_amd64_fill_end:
 | |
| 	// Update offset
 | |
| 	MOVQ  R9, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R15
 | |
| 	SHLQ  CL, R15
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decode_amd64_of_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decode_amd64_of_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decode_amd64_of_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R15
 | |
| 	ADDQ  R15, AX
 | |
| 
 | |
| sequenceDecs_decode_amd64_of_update_zero:
 | |
| 	MOVQ AX, 16(R10)
 | |
| 
 | |
| 	// Update match length
 | |
| 	MOVQ  R8, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R15
 | |
| 	SHLQ  CL, R15
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decode_amd64_ml_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decode_amd64_ml_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decode_amd64_ml_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R15
 | |
| 	ADDQ  R15, AX
 | |
| 
 | |
| sequenceDecs_decode_amd64_ml_update_zero:
 | |
| 	MOVQ AX, 8(R10)
 | |
| 
 | |
| 	// Fill bitreader to have enough for the remaining
 | |
| 	CMPQ SI, $0x08
 | |
| 	JL   sequenceDecs_decode_amd64_fill_2_byte_by_byte
 | |
| 	MOVQ BX, AX
 | |
| 	SHRQ $0x03, AX
 | |
| 	SUBQ AX, R14
 | |
| 	MOVQ (R14), DX
 | |
| 	SUBQ AX, SI
 | |
| 	ANDQ $0x07, BX
 | |
| 	JMP  sequenceDecs_decode_amd64_fill_2_end
 | |
| 
 | |
| sequenceDecs_decode_amd64_fill_2_byte_by_byte:
 | |
| 	CMPQ    SI, $0x00
 | |
| 	JLE     sequenceDecs_decode_amd64_fill_2_check_overread
 | |
| 	CMPQ    BX, $0x07
 | |
| 	JLE     sequenceDecs_decode_amd64_fill_2_end
 | |
| 	SHLQ    $0x08, DX
 | |
| 	SUBQ    $0x01, R14
 | |
| 	SUBQ    $0x01, SI
 | |
| 	SUBQ    $0x08, BX
 | |
| 	MOVBQZX (R14), AX
 | |
| 	ORQ     AX, DX
 | |
| 	JMP     sequenceDecs_decode_amd64_fill_2_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decode_amd64_fill_2_check_overread:
 | |
| 	CMPQ BX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decode_amd64_fill_2_end:
 | |
| 	// Update literal length
 | |
| 	MOVQ  DI, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R15
 | |
| 	SHLQ  CL, R15
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decode_amd64_ll_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decode_amd64_ll_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decode_amd64_ll_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R15
 | |
| 	ADDQ  R15, AX
 | |
| 
 | |
| sequenceDecs_decode_amd64_ll_update_zero:
 | |
| 	MOVQ AX, (R10)
 | |
| 
 | |
| 	// Fill bitreader for state updates
 | |
| 	MOVQ    R14, (SP)
 | |
| 	MOVQ    R9, AX
 | |
| 	SHRQ    $0x08, AX
 | |
| 	MOVBQZX AL, AX
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	CMPQ    96(CX), $0x00
 | |
| 	JZ      sequenceDecs_decode_amd64_skip_update
 | |
| 
 | |
| 	// Update Literal Length State
 | |
| 	MOVBQZX DI, R14
 | |
| 	SHRL    $0x10, DI
 | |
| 	LEAQ    (BX)(R14*1), CX
 | |
| 	MOVQ    DX, R15
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R15
 | |
| 	MOVL    $0x00000001, BP
 | |
| 	MOVB    R14, CL
 | |
| 	SHLL    CL, BP
 | |
| 	DECL    BP
 | |
| 	ANDQ    BP, R15
 | |
| 	ADDQ    R15, DI
 | |
| 
 | |
| 	// Load ctx.llTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ (CX), CX
 | |
| 	MOVQ (CX)(DI*8), DI
 | |
| 
 | |
| 	// Update Match Length State
 | |
| 	MOVBQZX R8, R14
 | |
| 	SHRL    $0x10, R8
 | |
| 	LEAQ    (BX)(R14*1), CX
 | |
| 	MOVQ    DX, R15
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R15
 | |
| 	MOVL    $0x00000001, BP
 | |
| 	MOVB    R14, CL
 | |
| 	SHLL    CL, BP
 | |
| 	DECL    BP
 | |
| 	ANDQ    BP, R15
 | |
| 	ADDQ    R15, R8
 | |
| 
 | |
| 	// Load ctx.mlTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 24(CX), CX
 | |
| 	MOVQ (CX)(R8*8), R8
 | |
| 
 | |
| 	// Update Offset State
 | |
| 	MOVBQZX R9, R14
 | |
| 	SHRL    $0x10, R9
 | |
| 	LEAQ    (BX)(R14*1), CX
 | |
| 	MOVQ    DX, R15
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R15
 | |
| 	MOVL    $0x00000001, BP
 | |
| 	MOVB    R14, CL
 | |
| 	SHLL    CL, BP
 | |
| 	DECL    BP
 | |
| 	ANDQ    BP, R15
 | |
| 	ADDQ    R15, R9
 | |
| 
 | |
| 	// Load ctx.ofTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 48(CX), CX
 | |
| 	MOVQ (CX)(R9*8), R9
 | |
| 
 | |
| sequenceDecs_decode_amd64_skip_update:
 | |
| 	// Adjust offset
 | |
| 	MOVQ 16(R10), CX
 | |
| 	CMPQ AX, $0x01
 | |
| 	JBE  sequenceDecs_decode_amd64_adjust_offsetB_1_or_0
 | |
| 	MOVQ R12, R13
 | |
| 	MOVQ R11, R12
 | |
| 	MOVQ CX, R11
 | |
| 	JMP  sequenceDecs_decode_amd64_after_adjust
 | |
| 
 | |
| sequenceDecs_decode_amd64_adjust_offsetB_1_or_0:
 | |
| 	CMPQ (R10), $0x00000000
 | |
| 	JNE  sequenceDecs_decode_amd64_adjust_offset_maybezero
 | |
| 	INCQ CX
 | |
| 	JMP  sequenceDecs_decode_amd64_adjust_offset_nonzero
 | |
| 
 | |
| sequenceDecs_decode_amd64_adjust_offset_maybezero:
 | |
| 	TESTQ CX, CX
 | |
| 	JNZ   sequenceDecs_decode_amd64_adjust_offset_nonzero
 | |
| 	MOVQ  R11, CX
 | |
| 	JMP   sequenceDecs_decode_amd64_after_adjust
 | |
| 
 | |
| sequenceDecs_decode_amd64_adjust_offset_nonzero:
 | |
| 	CMPQ CX, $0x01
 | |
| 	JB   sequenceDecs_decode_amd64_adjust_zero
 | |
| 	JEQ  sequenceDecs_decode_amd64_adjust_one
 | |
| 	CMPQ CX, $0x02
 | |
| 	JA   sequenceDecs_decode_amd64_adjust_three
 | |
| 	JMP  sequenceDecs_decode_amd64_adjust_two
 | |
| 
 | |
| sequenceDecs_decode_amd64_adjust_zero:
 | |
| 	MOVQ R11, AX
 | |
| 	JMP  sequenceDecs_decode_amd64_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_amd64_adjust_one:
 | |
| 	MOVQ R12, AX
 | |
| 	JMP  sequenceDecs_decode_amd64_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_amd64_adjust_two:
 | |
| 	MOVQ R13, AX
 | |
| 	JMP  sequenceDecs_decode_amd64_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_amd64_adjust_three:
 | |
| 	LEAQ -1(R11), AX
 | |
| 
 | |
| sequenceDecs_decode_amd64_adjust_test_temp_valid:
 | |
| 	TESTQ AX, AX
 | |
| 	JNZ   sequenceDecs_decode_amd64_adjust_temp_valid
 | |
| 	MOVQ  $0x00000001, AX
 | |
| 
 | |
| sequenceDecs_decode_amd64_adjust_temp_valid:
 | |
| 	CMPQ    CX, $0x01
 | |
| 	CMOVQNE R12, R13
 | |
| 	MOVQ    R11, R12
 | |
| 	MOVQ    AX, R11
 | |
| 	MOVQ    AX, CX
 | |
| 
 | |
| sequenceDecs_decode_amd64_after_adjust:
 | |
| 	MOVQ CX, 16(R10)
 | |
| 
 | |
| 	// Check values
 | |
| 	MOVQ  8(R10), AX
 | |
| 	MOVQ  (R10), R14
 | |
| 	LEAQ  (AX)(R14*1), R15
 | |
| 	MOVQ  s+0(FP), BP
 | |
| 	ADDQ  R15, 256(BP)
 | |
| 	MOVQ  ctx+16(FP), R15
 | |
| 	SUBQ  R14, 128(R15)
 | |
| 	JS    error_not_enough_literals
 | |
| 	CMPQ  AX, $0x00020002
 | |
| 	JA    sequenceDecs_decode_amd64_error_match_len_too_big
 | |
| 	TESTQ CX, CX
 | |
| 	JNZ   sequenceDecs_decode_amd64_match_len_ofs_ok
 | |
| 	TESTQ AX, AX
 | |
| 	JNZ   sequenceDecs_decode_amd64_error_match_len_ofs_mismatch
 | |
| 
 | |
| sequenceDecs_decode_amd64_match_len_ofs_ok:
 | |
| 	ADDQ $0x18, R10
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	DECQ 96(AX)
 | |
| 	JNS  sequenceDecs_decode_amd64_main_loop
 | |
| 	MOVQ s+0(FP), AX
 | |
| 	MOVQ R11, 144(AX)
 | |
| 	MOVQ R12, 152(AX)
 | |
| 	MOVQ R13, 160(AX)
 | |
| 	MOVQ br+8(FP), AX
 | |
| 	MOVQ DX, 24(AX)
 | |
| 	MOVB BL, 40(AX)
 | |
| 	MOVQ SI, 32(AX)
 | |
| 
 | |
| 	// Return success
 | |
| 	MOVQ $0x00000000, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match length error
 | |
| sequenceDecs_decode_amd64_error_match_len_ofs_mismatch:
 | |
| 	MOVQ $0x00000001, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match too long error
 | |
| sequenceDecs_decode_amd64_error_match_len_too_big:
 | |
| 	MOVQ $0x00000002, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match offset too long error
 | |
| 	MOVQ $0x00000003, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough literals error
 | |
| error_not_enough_literals:
 | |
| 	MOVQ $0x00000004, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with overread error
 | |
| error_overread:
 | |
| 	MOVQ $0x00000006, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 | |
| // Requires: CMOV
 | |
| TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
 | |
| 	MOVQ    br+8(FP), CX
 | |
| 	MOVQ    24(CX), DX
 | |
| 	MOVBQZX 40(CX), BX
 | |
| 	MOVQ    (CX), AX
 | |
| 	MOVQ    32(CX), SI
 | |
| 	ADDQ    SI, AX
 | |
| 	MOVQ    AX, (SP)
 | |
| 	MOVQ    ctx+16(FP), AX
 | |
| 	MOVQ    72(AX), DI
 | |
| 	MOVQ    80(AX), R8
 | |
| 	MOVQ    88(AX), R9
 | |
| 	MOVQ    104(AX), R10
 | |
| 	MOVQ    s+0(FP), AX
 | |
| 	MOVQ    144(AX), R11
 | |
| 	MOVQ    152(AX), R12
 | |
| 	MOVQ    160(AX), R13
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_main_loop:
 | |
| 	MOVQ (SP), R14
 | |
| 
 | |
| 	// Fill bitreader to have enough for the offset and match length.
 | |
| 	CMPQ SI, $0x08
 | |
| 	JL   sequenceDecs_decode_56_amd64_fill_byte_by_byte
 | |
| 	MOVQ BX, AX
 | |
| 	SHRQ $0x03, AX
 | |
| 	SUBQ AX, R14
 | |
| 	MOVQ (R14), DX
 | |
| 	SUBQ AX, SI
 | |
| 	ANDQ $0x07, BX
 | |
| 	JMP  sequenceDecs_decode_56_amd64_fill_end
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_fill_byte_by_byte:
 | |
| 	CMPQ    SI, $0x00
 | |
| 	JLE     sequenceDecs_decode_56_amd64_fill_check_overread
 | |
| 	CMPQ    BX, $0x07
 | |
| 	JLE     sequenceDecs_decode_56_amd64_fill_end
 | |
| 	SHLQ    $0x08, DX
 | |
| 	SUBQ    $0x01, R14
 | |
| 	SUBQ    $0x01, SI
 | |
| 	SUBQ    $0x08, BX
 | |
| 	MOVBQZX (R14), AX
 | |
| 	ORQ     AX, DX
 | |
| 	JMP     sequenceDecs_decode_56_amd64_fill_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_fill_check_overread:
 | |
| 	CMPQ BX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_fill_end:
 | |
| 	// Update offset
 | |
| 	MOVQ  R9, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R15
 | |
| 	SHLQ  CL, R15
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decode_56_amd64_of_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decode_56_amd64_of_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decode_56_amd64_of_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R15
 | |
| 	ADDQ  R15, AX
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_of_update_zero:
 | |
| 	MOVQ AX, 16(R10)
 | |
| 
 | |
| 	// Update match length
 | |
| 	MOVQ  R8, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R15
 | |
| 	SHLQ  CL, R15
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decode_56_amd64_ml_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decode_56_amd64_ml_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decode_56_amd64_ml_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R15
 | |
| 	ADDQ  R15, AX
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_ml_update_zero:
 | |
| 	MOVQ AX, 8(R10)
 | |
| 
 | |
| 	// Update literal length
 | |
| 	MOVQ  DI, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R15
 | |
| 	SHLQ  CL, R15
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decode_56_amd64_ll_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decode_56_amd64_ll_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decode_56_amd64_ll_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R15
 | |
| 	ADDQ  R15, AX
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_ll_update_zero:
 | |
| 	MOVQ AX, (R10)
 | |
| 
 | |
| 	// Fill bitreader for state updates
 | |
| 	MOVQ    R14, (SP)
 | |
| 	MOVQ    R9, AX
 | |
| 	SHRQ    $0x08, AX
 | |
| 	MOVBQZX AL, AX
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	CMPQ    96(CX), $0x00
 | |
| 	JZ      sequenceDecs_decode_56_amd64_skip_update
 | |
| 
 | |
| 	// Update Literal Length State
 | |
| 	MOVBQZX DI, R14
 | |
| 	SHRL    $0x10, DI
 | |
| 	LEAQ    (BX)(R14*1), CX
 | |
| 	MOVQ    DX, R15
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R15
 | |
| 	MOVL    $0x00000001, BP
 | |
| 	MOVB    R14, CL
 | |
| 	SHLL    CL, BP
 | |
| 	DECL    BP
 | |
| 	ANDQ    BP, R15
 | |
| 	ADDQ    R15, DI
 | |
| 
 | |
| 	// Load ctx.llTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ (CX), CX
 | |
| 	MOVQ (CX)(DI*8), DI
 | |
| 
 | |
| 	// Update Match Length State
 | |
| 	MOVBQZX R8, R14
 | |
| 	SHRL    $0x10, R8
 | |
| 	LEAQ    (BX)(R14*1), CX
 | |
| 	MOVQ    DX, R15
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R15
 | |
| 	MOVL    $0x00000001, BP
 | |
| 	MOVB    R14, CL
 | |
| 	SHLL    CL, BP
 | |
| 	DECL    BP
 | |
| 	ANDQ    BP, R15
 | |
| 	ADDQ    R15, R8
 | |
| 
 | |
| 	// Load ctx.mlTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 24(CX), CX
 | |
| 	MOVQ (CX)(R8*8), R8
 | |
| 
 | |
| 	// Update Offset State
 | |
| 	MOVBQZX R9, R14
 | |
| 	SHRL    $0x10, R9
 | |
| 	LEAQ    (BX)(R14*1), CX
 | |
| 	MOVQ    DX, R15
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R15
 | |
| 	MOVL    $0x00000001, BP
 | |
| 	MOVB    R14, CL
 | |
| 	SHLL    CL, BP
 | |
| 	DECL    BP
 | |
| 	ANDQ    BP, R15
 | |
| 	ADDQ    R15, R9
 | |
| 
 | |
| 	// Load ctx.ofTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 48(CX), CX
 | |
| 	MOVQ (CX)(R9*8), R9
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_skip_update:
 | |
| 	// Adjust offset
 | |
| 	MOVQ 16(R10), CX
 | |
| 	CMPQ AX, $0x01
 | |
| 	JBE  sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0
 | |
| 	MOVQ R12, R13
 | |
| 	MOVQ R11, R12
 | |
| 	MOVQ CX, R11
 | |
| 	JMP  sequenceDecs_decode_56_amd64_after_adjust
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0:
 | |
| 	CMPQ (R10), $0x00000000
 | |
| 	JNE  sequenceDecs_decode_56_amd64_adjust_offset_maybezero
 | |
| 	INCQ CX
 | |
| 	JMP  sequenceDecs_decode_56_amd64_adjust_offset_nonzero
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_adjust_offset_maybezero:
 | |
| 	TESTQ CX, CX
 | |
| 	JNZ   sequenceDecs_decode_56_amd64_adjust_offset_nonzero
 | |
| 	MOVQ  R11, CX
 | |
| 	JMP   sequenceDecs_decode_56_amd64_after_adjust
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_adjust_offset_nonzero:
 | |
| 	CMPQ CX, $0x01
 | |
| 	JB   sequenceDecs_decode_56_amd64_adjust_zero
 | |
| 	JEQ  sequenceDecs_decode_56_amd64_adjust_one
 | |
| 	CMPQ CX, $0x02
 | |
| 	JA   sequenceDecs_decode_56_amd64_adjust_three
 | |
| 	JMP  sequenceDecs_decode_56_amd64_adjust_two
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_adjust_zero:
 | |
| 	MOVQ R11, AX
 | |
| 	JMP  sequenceDecs_decode_56_amd64_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_adjust_one:
 | |
| 	MOVQ R12, AX
 | |
| 	JMP  sequenceDecs_decode_56_amd64_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_adjust_two:
 | |
| 	MOVQ R13, AX
 | |
| 	JMP  sequenceDecs_decode_56_amd64_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_adjust_three:
 | |
| 	LEAQ -1(R11), AX
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_adjust_test_temp_valid:
 | |
| 	TESTQ AX, AX
 | |
| 	JNZ   sequenceDecs_decode_56_amd64_adjust_temp_valid
 | |
| 	MOVQ  $0x00000001, AX
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_adjust_temp_valid:
 | |
| 	CMPQ    CX, $0x01
 | |
| 	CMOVQNE R12, R13
 | |
| 	MOVQ    R11, R12
 | |
| 	MOVQ    AX, R11
 | |
| 	MOVQ    AX, CX
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_after_adjust:
 | |
| 	MOVQ CX, 16(R10)
 | |
| 
 | |
| 	// Check values
 | |
| 	MOVQ  8(R10), AX
 | |
| 	MOVQ  (R10), R14
 | |
| 	LEAQ  (AX)(R14*1), R15
 | |
| 	MOVQ  s+0(FP), BP
 | |
| 	ADDQ  R15, 256(BP)
 | |
| 	MOVQ  ctx+16(FP), R15
 | |
| 	SUBQ  R14, 128(R15)
 | |
| 	JS    error_not_enough_literals
 | |
| 	CMPQ  AX, $0x00020002
 | |
| 	JA    sequenceDecs_decode_56_amd64_error_match_len_too_big
 | |
| 	TESTQ CX, CX
 | |
| 	JNZ   sequenceDecs_decode_56_amd64_match_len_ofs_ok
 | |
| 	TESTQ AX, AX
 | |
| 	JNZ   sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch
 | |
| 
 | |
| sequenceDecs_decode_56_amd64_match_len_ofs_ok:
 | |
| 	ADDQ $0x18, R10
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	DECQ 96(AX)
 | |
| 	JNS  sequenceDecs_decode_56_amd64_main_loop
 | |
| 	MOVQ s+0(FP), AX
 | |
| 	MOVQ R11, 144(AX)
 | |
| 	MOVQ R12, 152(AX)
 | |
| 	MOVQ R13, 160(AX)
 | |
| 	MOVQ br+8(FP), AX
 | |
| 	MOVQ DX, 24(AX)
 | |
| 	MOVB BL, 40(AX)
 | |
| 	MOVQ SI, 32(AX)
 | |
| 
 | |
| 	// Return success
 | |
| 	MOVQ $0x00000000, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match length error
 | |
| sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch:
 | |
| 	MOVQ $0x00000001, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match too long error
 | |
| sequenceDecs_decode_56_amd64_error_match_len_too_big:
 | |
| 	MOVQ $0x00000002, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match offset too long error
 | |
| 	MOVQ $0x00000003, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough literals error
 | |
| error_not_enough_literals:
 | |
| 	MOVQ $0x00000004, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with overread error
 | |
| error_overread:
 | |
| 	MOVQ $0x00000006, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 | |
| // Requires: BMI, BMI2, CMOV
 | |
| TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
 | |
| 	MOVQ    br+8(FP), BX
 | |
| 	MOVQ    24(BX), AX
 | |
| 	MOVBQZX 40(BX), DX
 | |
| 	MOVQ    (BX), CX
 | |
| 	MOVQ    32(BX), BX
 | |
| 	ADDQ    BX, CX
 | |
| 	MOVQ    CX, (SP)
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	MOVQ    72(CX), SI
 | |
| 	MOVQ    80(CX), DI
 | |
| 	MOVQ    88(CX), R8
 | |
| 	MOVQ    104(CX), R9
 | |
| 	MOVQ    s+0(FP), CX
 | |
| 	MOVQ    144(CX), R10
 | |
| 	MOVQ    152(CX), R11
 | |
| 	MOVQ    160(CX), R12
 | |
| 
 | |
| sequenceDecs_decode_bmi2_main_loop:
 | |
| 	MOVQ (SP), R13
 | |
| 
 | |
| 	// Fill bitreader to have enough for the offset and match length.
 | |
| 	CMPQ BX, $0x08
 | |
| 	JL   sequenceDecs_decode_bmi2_fill_byte_by_byte
 | |
| 	MOVQ DX, CX
 | |
| 	SHRQ $0x03, CX
 | |
| 	SUBQ CX, R13
 | |
| 	MOVQ (R13), AX
 | |
| 	SUBQ CX, BX
 | |
| 	ANDQ $0x07, DX
 | |
| 	JMP  sequenceDecs_decode_bmi2_fill_end
 | |
| 
 | |
| sequenceDecs_decode_bmi2_fill_byte_by_byte:
 | |
| 	CMPQ    BX, $0x00
 | |
| 	JLE     sequenceDecs_decode_bmi2_fill_check_overread
 | |
| 	CMPQ    DX, $0x07
 | |
| 	JLE     sequenceDecs_decode_bmi2_fill_end
 | |
| 	SHLQ    $0x08, AX
 | |
| 	SUBQ    $0x01, R13
 | |
| 	SUBQ    $0x01, BX
 | |
| 	SUBQ    $0x08, DX
 | |
| 	MOVBQZX (R13), CX
 | |
| 	ORQ     CX, AX
 | |
| 	JMP     sequenceDecs_decode_bmi2_fill_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decode_bmi2_fill_check_overread:
 | |
| 	CMPQ DX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decode_bmi2_fill_end:
 | |
| 	// Update offset
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, R8, R14
 | |
| 	MOVQ   AX, R15
 | |
| 	LEAQ   (DX)(R14*1), CX
 | |
| 	ROLQ   CL, R15
 | |
| 	BZHIQ  R14, R15, R15
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   R8, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R15, CX
 | |
| 	MOVQ   CX, 16(R9)
 | |
| 
 | |
| 	// Update match length
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, DI, R14
 | |
| 	MOVQ   AX, R15
 | |
| 	LEAQ   (DX)(R14*1), CX
 | |
| 	ROLQ   CL, R15
 | |
| 	BZHIQ  R14, R15, R15
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   DI, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R15, CX
 | |
| 	MOVQ   CX, 8(R9)
 | |
| 
 | |
| 	// Fill bitreader to have enough for the remaining
 | |
| 	CMPQ BX, $0x08
 | |
| 	JL   sequenceDecs_decode_bmi2_fill_2_byte_by_byte
 | |
| 	MOVQ DX, CX
 | |
| 	SHRQ $0x03, CX
 | |
| 	SUBQ CX, R13
 | |
| 	MOVQ (R13), AX
 | |
| 	SUBQ CX, BX
 | |
| 	ANDQ $0x07, DX
 | |
| 	JMP  sequenceDecs_decode_bmi2_fill_2_end
 | |
| 
 | |
| sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
 | |
| 	CMPQ    BX, $0x00
 | |
| 	JLE     sequenceDecs_decode_bmi2_fill_2_check_overread
 | |
| 	CMPQ    DX, $0x07
 | |
| 	JLE     sequenceDecs_decode_bmi2_fill_2_end
 | |
| 	SHLQ    $0x08, AX
 | |
| 	SUBQ    $0x01, R13
 | |
| 	SUBQ    $0x01, BX
 | |
| 	SUBQ    $0x08, DX
 | |
| 	MOVBQZX (R13), CX
 | |
| 	ORQ     CX, AX
 | |
| 	JMP     sequenceDecs_decode_bmi2_fill_2_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decode_bmi2_fill_2_check_overread:
 | |
| 	CMPQ DX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decode_bmi2_fill_2_end:
 | |
| 	// Update literal length
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, SI, R14
 | |
| 	MOVQ   AX, R15
 | |
| 	LEAQ   (DX)(R14*1), CX
 | |
| 	ROLQ   CL, R15
 | |
| 	BZHIQ  R14, R15, R15
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   SI, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R15, CX
 | |
| 	MOVQ   CX, (R9)
 | |
| 
 | |
| 	// Fill bitreader for state updates
 | |
| 	MOVQ    R13, (SP)
 | |
| 	MOVQ    $0x00000808, CX
 | |
| 	BEXTRQ  CX, R8, R13
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	CMPQ    96(CX), $0x00
 | |
| 	JZ      sequenceDecs_decode_bmi2_skip_update
 | |
| 	LEAQ    (SI)(DI*1), R14
 | |
| 	ADDQ    R8, R14
 | |
| 	MOVBQZX R14, R14
 | |
| 	LEAQ    (DX)(R14*1), CX
 | |
| 	MOVQ    AX, R15
 | |
| 	MOVQ    CX, DX
 | |
| 	ROLQ    CL, R15
 | |
| 	BZHIQ   R14, R15, R15
 | |
| 
 | |
| 	// Update Offset State
 | |
| 	BZHIQ R8, R15, CX
 | |
| 	SHRXQ R8, R15, R15
 | |
| 	SHRL  $0x10, R8
 | |
| 	ADDQ  CX, R8
 | |
| 
 | |
| 	// Load ctx.ofTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 48(CX), CX
 | |
| 	MOVQ (CX)(R8*8), R8
 | |
| 
 | |
| 	// Update Match Length State
 | |
| 	BZHIQ DI, R15, CX
 | |
| 	SHRXQ DI, R15, R15
 | |
| 	SHRL  $0x10, DI
 | |
| 	ADDQ  CX, DI
 | |
| 
 | |
| 	// Load ctx.mlTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 24(CX), CX
 | |
| 	MOVQ (CX)(DI*8), DI
 | |
| 
 | |
| 	// Update Literal Length State
 | |
| 	BZHIQ SI, R15, CX
 | |
| 	SHRL  $0x10, SI
 | |
| 	ADDQ  CX, SI
 | |
| 
 | |
| 	// Load ctx.llTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ (CX), CX
 | |
| 	MOVQ (CX)(SI*8), SI
 | |
| 
 | |
| sequenceDecs_decode_bmi2_skip_update:
 | |
| 	// Adjust offset
 | |
| 	MOVQ 16(R9), CX
 | |
| 	CMPQ R13, $0x01
 | |
| 	JBE  sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0
 | |
| 	MOVQ R11, R12
 | |
| 	MOVQ R10, R11
 | |
| 	MOVQ CX, R10
 | |
| 	JMP  sequenceDecs_decode_bmi2_after_adjust
 | |
| 
 | |
| sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0:
 | |
| 	CMPQ (R9), $0x00000000
 | |
| 	JNE  sequenceDecs_decode_bmi2_adjust_offset_maybezero
 | |
| 	INCQ CX
 | |
| 	JMP  sequenceDecs_decode_bmi2_adjust_offset_nonzero
 | |
| 
 | |
| sequenceDecs_decode_bmi2_adjust_offset_maybezero:
 | |
| 	TESTQ CX, CX
 | |
| 	JNZ   sequenceDecs_decode_bmi2_adjust_offset_nonzero
 | |
| 	MOVQ  R10, CX
 | |
| 	JMP   sequenceDecs_decode_bmi2_after_adjust
 | |
| 
 | |
| sequenceDecs_decode_bmi2_adjust_offset_nonzero:
 | |
| 	CMPQ CX, $0x01
 | |
| 	JB   sequenceDecs_decode_bmi2_adjust_zero
 | |
| 	JEQ  sequenceDecs_decode_bmi2_adjust_one
 | |
| 	CMPQ CX, $0x02
 | |
| 	JA   sequenceDecs_decode_bmi2_adjust_three
 | |
| 	JMP  sequenceDecs_decode_bmi2_adjust_two
 | |
| 
 | |
| sequenceDecs_decode_bmi2_adjust_zero:
 | |
| 	MOVQ R10, R13
 | |
| 	JMP  sequenceDecs_decode_bmi2_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_bmi2_adjust_one:
 | |
| 	MOVQ R11, R13
 | |
| 	JMP  sequenceDecs_decode_bmi2_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_bmi2_adjust_two:
 | |
| 	MOVQ R12, R13
 | |
| 	JMP  sequenceDecs_decode_bmi2_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_bmi2_adjust_three:
 | |
| 	LEAQ -1(R10), R13
 | |
| 
 | |
| sequenceDecs_decode_bmi2_adjust_test_temp_valid:
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decode_bmi2_adjust_temp_valid
 | |
| 	MOVQ  $0x00000001, R13
 | |
| 
 | |
| sequenceDecs_decode_bmi2_adjust_temp_valid:
 | |
| 	CMPQ    CX, $0x01
 | |
| 	CMOVQNE R11, R12
 | |
| 	MOVQ    R10, R11
 | |
| 	MOVQ    R13, R10
 | |
| 	MOVQ    R13, CX
 | |
| 
 | |
| sequenceDecs_decode_bmi2_after_adjust:
 | |
| 	MOVQ CX, 16(R9)
 | |
| 
 | |
| 	// Check values
 | |
| 	MOVQ  8(R9), R13
 | |
| 	MOVQ  (R9), R14
 | |
| 	LEAQ  (R13)(R14*1), R15
 | |
| 	MOVQ  s+0(FP), BP
 | |
| 	ADDQ  R15, 256(BP)
 | |
| 	MOVQ  ctx+16(FP), R15
 | |
| 	SUBQ  R14, 128(R15)
 | |
| 	JS    error_not_enough_literals
 | |
| 	CMPQ  R13, $0x00020002
 | |
| 	JA    sequenceDecs_decode_bmi2_error_match_len_too_big
 | |
| 	TESTQ CX, CX
 | |
| 	JNZ   sequenceDecs_decode_bmi2_match_len_ofs_ok
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch
 | |
| 
 | |
| sequenceDecs_decode_bmi2_match_len_ofs_ok:
 | |
| 	ADDQ $0x18, R9
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	DECQ 96(CX)
 | |
| 	JNS  sequenceDecs_decode_bmi2_main_loop
 | |
| 	MOVQ s+0(FP), CX
 | |
| 	MOVQ R10, 144(CX)
 | |
| 	MOVQ R11, 152(CX)
 | |
| 	MOVQ R12, 160(CX)
 | |
| 	MOVQ br+8(FP), CX
 | |
| 	MOVQ AX, 24(CX)
 | |
| 	MOVB DL, 40(CX)
 | |
| 	MOVQ BX, 32(CX)
 | |
| 
 | |
| 	// Return success
 | |
| 	MOVQ $0x00000000, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match length error
 | |
| sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch:
 | |
| 	MOVQ $0x00000001, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match too long error
 | |
| sequenceDecs_decode_bmi2_error_match_len_too_big:
 | |
| 	MOVQ $0x00000002, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match offset too long error
 | |
| 	MOVQ $0x00000003, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough literals error
 | |
| error_not_enough_literals:
 | |
| 	MOVQ $0x00000004, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with overread error
 | |
| error_overread:
 | |
| 	MOVQ $0x00000006, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 | |
| // Requires: BMI, BMI2, CMOV
 | |
| TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
 | |
| 	MOVQ    br+8(FP), BX
 | |
| 	MOVQ    24(BX), AX
 | |
| 	MOVBQZX 40(BX), DX
 | |
| 	MOVQ    (BX), CX
 | |
| 	MOVQ    32(BX), BX
 | |
| 	ADDQ    BX, CX
 | |
| 	MOVQ    CX, (SP)
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	MOVQ    72(CX), SI
 | |
| 	MOVQ    80(CX), DI
 | |
| 	MOVQ    88(CX), R8
 | |
| 	MOVQ    104(CX), R9
 | |
| 	MOVQ    s+0(FP), CX
 | |
| 	MOVQ    144(CX), R10
 | |
| 	MOVQ    152(CX), R11
 | |
| 	MOVQ    160(CX), R12
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_main_loop:
 | |
| 	MOVQ (SP), R13
 | |
| 
 | |
| 	// Fill bitreader to have enough for the offset and match length.
 | |
| 	CMPQ BX, $0x08
 | |
| 	JL   sequenceDecs_decode_56_bmi2_fill_byte_by_byte
 | |
| 	MOVQ DX, CX
 | |
| 	SHRQ $0x03, CX
 | |
| 	SUBQ CX, R13
 | |
| 	MOVQ (R13), AX
 | |
| 	SUBQ CX, BX
 | |
| 	ANDQ $0x07, DX
 | |
| 	JMP  sequenceDecs_decode_56_bmi2_fill_end
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
 | |
| 	CMPQ    BX, $0x00
 | |
| 	JLE     sequenceDecs_decode_56_bmi2_fill_check_overread
 | |
| 	CMPQ    DX, $0x07
 | |
| 	JLE     sequenceDecs_decode_56_bmi2_fill_end
 | |
| 	SHLQ    $0x08, AX
 | |
| 	SUBQ    $0x01, R13
 | |
| 	SUBQ    $0x01, BX
 | |
| 	SUBQ    $0x08, DX
 | |
| 	MOVBQZX (R13), CX
 | |
| 	ORQ     CX, AX
 | |
| 	JMP     sequenceDecs_decode_56_bmi2_fill_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_fill_check_overread:
 | |
| 	CMPQ DX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_fill_end:
 | |
| 	// Update offset
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, R8, R14
 | |
| 	MOVQ   AX, R15
 | |
| 	LEAQ   (DX)(R14*1), CX
 | |
| 	ROLQ   CL, R15
 | |
| 	BZHIQ  R14, R15, R15
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   R8, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R15, CX
 | |
| 	MOVQ   CX, 16(R9)
 | |
| 
 | |
| 	// Update match length
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, DI, R14
 | |
| 	MOVQ   AX, R15
 | |
| 	LEAQ   (DX)(R14*1), CX
 | |
| 	ROLQ   CL, R15
 | |
| 	BZHIQ  R14, R15, R15
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   DI, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R15, CX
 | |
| 	MOVQ   CX, 8(R9)
 | |
| 
 | |
| 	// Update literal length
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, SI, R14
 | |
| 	MOVQ   AX, R15
 | |
| 	LEAQ   (DX)(R14*1), CX
 | |
| 	ROLQ   CL, R15
 | |
| 	BZHIQ  R14, R15, R15
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   SI, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R15, CX
 | |
| 	MOVQ   CX, (R9)
 | |
| 
 | |
| 	// Fill bitreader for state updates
 | |
| 	MOVQ    R13, (SP)
 | |
| 	MOVQ    $0x00000808, CX
 | |
| 	BEXTRQ  CX, R8, R13
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	CMPQ    96(CX), $0x00
 | |
| 	JZ      sequenceDecs_decode_56_bmi2_skip_update
 | |
| 	LEAQ    (SI)(DI*1), R14
 | |
| 	ADDQ    R8, R14
 | |
| 	MOVBQZX R14, R14
 | |
| 	LEAQ    (DX)(R14*1), CX
 | |
| 	MOVQ    AX, R15
 | |
| 	MOVQ    CX, DX
 | |
| 	ROLQ    CL, R15
 | |
| 	BZHIQ   R14, R15, R15
 | |
| 
 | |
| 	// Update Offset State
 | |
| 	BZHIQ R8, R15, CX
 | |
| 	SHRXQ R8, R15, R15
 | |
| 	SHRL  $0x10, R8
 | |
| 	ADDQ  CX, R8
 | |
| 
 | |
| 	// Load ctx.ofTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 48(CX), CX
 | |
| 	MOVQ (CX)(R8*8), R8
 | |
| 
 | |
| 	// Update Match Length State
 | |
| 	BZHIQ DI, R15, CX
 | |
| 	SHRXQ DI, R15, R15
 | |
| 	SHRL  $0x10, DI
 | |
| 	ADDQ  CX, DI
 | |
| 
 | |
| 	// Load ctx.mlTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 24(CX), CX
 | |
| 	MOVQ (CX)(DI*8), DI
 | |
| 
 | |
| 	// Update Literal Length State
 | |
| 	BZHIQ SI, R15, CX
 | |
| 	SHRL  $0x10, SI
 | |
| 	ADDQ  CX, SI
 | |
| 
 | |
| 	// Load ctx.llTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ (CX), CX
 | |
| 	MOVQ (CX)(SI*8), SI
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_skip_update:
 | |
| 	// Adjust offset
 | |
| 	MOVQ 16(R9), CX
 | |
| 	CMPQ R13, $0x01
 | |
| 	JBE  sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0
 | |
| 	MOVQ R11, R12
 | |
| 	MOVQ R10, R11
 | |
| 	MOVQ CX, R10
 | |
| 	JMP  sequenceDecs_decode_56_bmi2_after_adjust
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0:
 | |
| 	CMPQ (R9), $0x00000000
 | |
| 	JNE  sequenceDecs_decode_56_bmi2_adjust_offset_maybezero
 | |
| 	INCQ CX
 | |
| 	JMP  sequenceDecs_decode_56_bmi2_adjust_offset_nonzero
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_adjust_offset_maybezero:
 | |
| 	TESTQ CX, CX
 | |
| 	JNZ   sequenceDecs_decode_56_bmi2_adjust_offset_nonzero
 | |
| 	MOVQ  R10, CX
 | |
| 	JMP   sequenceDecs_decode_56_bmi2_after_adjust
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_adjust_offset_nonzero:
 | |
| 	CMPQ CX, $0x01
 | |
| 	JB   sequenceDecs_decode_56_bmi2_adjust_zero
 | |
| 	JEQ  sequenceDecs_decode_56_bmi2_adjust_one
 | |
| 	CMPQ CX, $0x02
 | |
| 	JA   sequenceDecs_decode_56_bmi2_adjust_three
 | |
| 	JMP  sequenceDecs_decode_56_bmi2_adjust_two
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_adjust_zero:
 | |
| 	MOVQ R10, R13
 | |
| 	JMP  sequenceDecs_decode_56_bmi2_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_adjust_one:
 | |
| 	MOVQ R11, R13
 | |
| 	JMP  sequenceDecs_decode_56_bmi2_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_adjust_two:
 | |
| 	MOVQ R12, R13
 | |
| 	JMP  sequenceDecs_decode_56_bmi2_adjust_test_temp_valid
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_adjust_three:
 | |
| 	LEAQ -1(R10), R13
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_adjust_test_temp_valid:
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decode_56_bmi2_adjust_temp_valid
 | |
| 	MOVQ  $0x00000001, R13
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_adjust_temp_valid:
 | |
| 	CMPQ    CX, $0x01
 | |
| 	CMOVQNE R11, R12
 | |
| 	MOVQ    R10, R11
 | |
| 	MOVQ    R13, R10
 | |
| 	MOVQ    R13, CX
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_after_adjust:
 | |
| 	MOVQ CX, 16(R9)
 | |
| 
 | |
| 	// Check values
 | |
| 	MOVQ  8(R9), R13
 | |
| 	MOVQ  (R9), R14
 | |
| 	LEAQ  (R13)(R14*1), R15
 | |
| 	MOVQ  s+0(FP), BP
 | |
| 	ADDQ  R15, 256(BP)
 | |
| 	MOVQ  ctx+16(FP), R15
 | |
| 	SUBQ  R14, 128(R15)
 | |
| 	JS    error_not_enough_literals
 | |
| 	CMPQ  R13, $0x00020002
 | |
| 	JA    sequenceDecs_decode_56_bmi2_error_match_len_too_big
 | |
| 	TESTQ CX, CX
 | |
| 	JNZ   sequenceDecs_decode_56_bmi2_match_len_ofs_ok
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch
 | |
| 
 | |
| sequenceDecs_decode_56_bmi2_match_len_ofs_ok:
 | |
| 	ADDQ $0x18, R9
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	DECQ 96(CX)
 | |
| 	JNS  sequenceDecs_decode_56_bmi2_main_loop
 | |
| 	MOVQ s+0(FP), CX
 | |
| 	MOVQ R10, 144(CX)
 | |
| 	MOVQ R11, 152(CX)
 | |
| 	MOVQ R12, 160(CX)
 | |
| 	MOVQ br+8(FP), CX
 | |
| 	MOVQ AX, 24(CX)
 | |
| 	MOVB DL, 40(CX)
 | |
| 	MOVQ BX, 32(CX)
 | |
| 
 | |
| 	// Return success
 | |
| 	MOVQ $0x00000000, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match length error
 | |
| sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch:
 | |
| 	MOVQ $0x00000001, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match too long error
 | |
| sequenceDecs_decode_56_bmi2_error_match_len_too_big:
 | |
| 	MOVQ $0x00000002, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match offset too long error
 | |
| 	MOVQ $0x00000003, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough literals error
 | |
| error_not_enough_literals:
 | |
| 	MOVQ $0x00000004, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with overread error
 | |
| error_overread:
 | |
| 	MOVQ $0x00000006, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
 | |
| // Requires: SSE
 | |
| TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
 | |
| 	MOVQ  ctx+0(FP), R10
 | |
| 	MOVQ  8(R10), CX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    empty_seqs
 | |
| 	MOVQ  (R10), AX
 | |
| 	MOVQ  24(R10), DX
 | |
| 	MOVQ  32(R10), BX
 | |
| 	MOVQ  80(R10), SI
 | |
| 	MOVQ  104(R10), DI
 | |
| 	MOVQ  120(R10), R8
 | |
| 	MOVQ  56(R10), R9
 | |
| 	MOVQ  64(R10), R10
 | |
| 	ADDQ  R10, R9
 | |
| 
 | |
| 	// seqsBase += 24 * seqIndex
 | |
| 	LEAQ (DX)(DX*2), R11
 | |
| 	SHLQ $0x03, R11
 | |
| 	ADDQ R11, AX
 | |
| 
 | |
| 	// outBase += outPosition
 | |
| 	ADDQ DI, BX
 | |
| 
 | |
| main_loop:
 | |
| 	MOVQ (AX), R11
 | |
| 	MOVQ 16(AX), R12
 | |
| 	MOVQ 8(AX), R13
 | |
| 
 | |
| 	// Copy literals
 | |
| 	TESTQ R11, R11
 | |
| 	JZ    check_offset
 | |
| 	XORQ  R14, R14
 | |
| 
 | |
| copy_1:
 | |
| 	MOVUPS (SI)(R14*1), X0
 | |
| 	MOVUPS X0, (BX)(R14*1)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	CMPQ   R14, R11
 | |
| 	JB     copy_1
 | |
| 	ADDQ   R11, SI
 | |
| 	ADDQ   R11, BX
 | |
| 	ADDQ   R11, DI
 | |
| 
 | |
| 	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
 | |
| check_offset:
 | |
| 	LEAQ (DI)(R10*1), R11
 | |
| 	CMPQ R12, R11
 | |
| 	JG   error_match_off_too_big
 | |
| 	CMPQ R12, R8
 | |
| 	JG   error_match_off_too_big
 | |
| 
 | |
| 	// Copy match from history
 | |
| 	MOVQ R12, R11
 | |
| 	SUBQ DI, R11
 | |
| 	JLS  copy_match
 | |
| 	MOVQ R9, R14
 | |
| 	SUBQ R11, R14
 | |
| 	CMPQ R13, R11
 | |
| 	JG   copy_all_from_history
 | |
| 	MOVQ R13, R11
 | |
| 	SUBQ $0x10, R11
 | |
| 	JB   copy_4_small
 | |
| 
 | |
| copy_4_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (BX)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, BX
 | |
| 	SUBQ   $0x10, R11
 | |
| 	JAE    copy_4_loop
 | |
| 	LEAQ   16(R14)(R11*1), R14
 | |
| 	LEAQ   16(BX)(R11*1), BX
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(BX)
 | |
| 	JMP    copy_4_end
 | |
| 
 | |
| copy_4_small:
 | |
| 	CMPQ R13, $0x03
 | |
| 	JE   copy_4_move_3
 | |
| 	CMPQ R13, $0x08
 | |
| 	JB   copy_4_move_4through7
 | |
| 	JMP  copy_4_move_8through16
 | |
| 
 | |
| copy_4_move_3:
 | |
| 	MOVW (R14), R11
 | |
| 	MOVB 2(R14), R12
 | |
| 	MOVW R11, (BX)
 | |
| 	MOVB R12, 2(BX)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, BX
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_4through7:
 | |
| 	MOVL (R14), R11
 | |
| 	MOVL -4(R14)(R13*1), R12
 | |
| 	MOVL R11, (BX)
 | |
| 	MOVL R12, -4(BX)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, BX
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_8through16:
 | |
| 	MOVQ (R14), R11
 | |
| 	MOVQ -8(R14)(R13*1), R12
 | |
| 	MOVQ R11, (BX)
 | |
| 	MOVQ R12, -8(BX)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, BX
 | |
| 
 | |
| copy_4_end:
 | |
| 	ADDQ R13, DI
 | |
| 	ADDQ $0x18, AX
 | |
| 	INCQ DX
 | |
| 	CMPQ DX, CX
 | |
| 	JB   main_loop
 | |
| 	JMP  loop_finished
 | |
| 
 | |
| copy_all_from_history:
 | |
| 	MOVQ R11, R15
 | |
| 	SUBQ $0x10, R15
 | |
| 	JB   copy_5_small
 | |
| 
 | |
| copy_5_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (BX)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, BX
 | |
| 	SUBQ   $0x10, R15
 | |
| 	JAE    copy_5_loop
 | |
| 	LEAQ   16(R14)(R15*1), R14
 | |
| 	LEAQ   16(BX)(R15*1), BX
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(BX)
 | |
| 	JMP    copy_5_end
 | |
| 
 | |
| copy_5_small:
 | |
| 	CMPQ R11, $0x03
 | |
| 	JE   copy_5_move_3
 | |
| 	JB   copy_5_move_1or2
 | |
| 	CMPQ R11, $0x08
 | |
| 	JB   copy_5_move_4through7
 | |
| 	JMP  copy_5_move_8through16
 | |
| 
 | |
| copy_5_move_1or2:
 | |
| 	MOVB (R14), R15
 | |
| 	MOVB -1(R14)(R11*1), BP
 | |
| 	MOVB R15, (BX)
 | |
| 	MOVB BP, -1(BX)(R11*1)
 | |
| 	ADDQ R11, R14
 | |
| 	ADDQ R11, BX
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_3:
 | |
| 	MOVW (R14), R15
 | |
| 	MOVB 2(R14), BP
 | |
| 	MOVW R15, (BX)
 | |
| 	MOVB BP, 2(BX)
 | |
| 	ADDQ R11, R14
 | |
| 	ADDQ R11, BX
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_4through7:
 | |
| 	MOVL (R14), R15
 | |
| 	MOVL -4(R14)(R11*1), BP
 | |
| 	MOVL R15, (BX)
 | |
| 	MOVL BP, -4(BX)(R11*1)
 | |
| 	ADDQ R11, R14
 | |
| 	ADDQ R11, BX
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_8through16:
 | |
| 	MOVQ (R14), R15
 | |
| 	MOVQ -8(R14)(R11*1), BP
 | |
| 	MOVQ R15, (BX)
 | |
| 	MOVQ BP, -8(BX)(R11*1)
 | |
| 	ADDQ R11, R14
 | |
| 	ADDQ R11, BX
 | |
| 
 | |
| copy_5_end:
 | |
| 	ADDQ R11, DI
 | |
| 	SUBQ R11, R13
 | |
| 
 | |
| 	// Copy match from the current buffer
 | |
| copy_match:
 | |
| 	MOVQ BX, R11
 | |
| 	SUBQ R12, R11
 | |
| 
 | |
| 	// ml <= mo
 | |
| 	CMPQ R13, R12
 | |
| 	JA   copy_overlapping_match
 | |
| 
 | |
| 	// Copy non-overlapping match
 | |
| 	ADDQ R13, DI
 | |
| 	MOVQ BX, R12
 | |
| 	ADDQ R13, BX
 | |
| 
 | |
| copy_2:
 | |
| 	MOVUPS (R11), X0
 | |
| 	MOVUPS X0, (R12)
 | |
| 	ADDQ   $0x10, R11
 | |
| 	ADDQ   $0x10, R12
 | |
| 	SUBQ   $0x10, R13
 | |
| 	JHI    copy_2
 | |
| 	JMP    handle_loop
 | |
| 
 | |
| 	// Copy overlapping match
 | |
| copy_overlapping_match:
 | |
| 	ADDQ R13, DI
 | |
| 
 | |
| copy_slow_3:
 | |
| 	MOVB (R11), R12
 | |
| 	MOVB R12, (BX)
 | |
| 	INCQ R11
 | |
| 	INCQ BX
 | |
| 	DECQ R13
 | |
| 	JNZ  copy_slow_3
 | |
| 
 | |
| handle_loop:
 | |
| 	ADDQ $0x18, AX
 | |
| 	INCQ DX
 | |
| 	CMPQ DX, CX
 | |
| 	JB   main_loop
 | |
| 
 | |
| loop_finished:
 | |
| 	// Return value
 | |
| 	MOVB $0x01, ret+8(FP)
 | |
| 
 | |
| 	// Update the context
 | |
| 	MOVQ ctx+0(FP), AX
 | |
| 	MOVQ DX, 24(AX)
 | |
| 	MOVQ DI, 104(AX)
 | |
| 	SUBQ 80(AX), SI
 | |
| 	MOVQ SI, 112(AX)
 | |
| 	RET
 | |
| 
 | |
| error_match_off_too_big:
 | |
| 	// Return value
 | |
| 	MOVB $0x00, ret+8(FP)
 | |
| 
 | |
| 	// Update the context
 | |
| 	MOVQ ctx+0(FP), AX
 | |
| 	MOVQ DX, 24(AX)
 | |
| 	MOVQ DI, 104(AX)
 | |
| 	SUBQ 80(AX), SI
 | |
| 	MOVQ SI, 112(AX)
 | |
| 	RET
 | |
| 
 | |
| empty_seqs:
 | |
| 	// Return value
 | |
| 	MOVB $0x01, ret+8(FP)
 | |
| 	RET
 | |
| 
 | |
| // func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
 | |
| // Requires: SSE
 | |
| TEXT ·sequenceDecs_executeSimple_safe_amd64(SB), $8-9
 | |
| 	MOVQ  ctx+0(FP), R10
 | |
| 	MOVQ  8(R10), CX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    empty_seqs
 | |
| 	MOVQ  (R10), AX
 | |
| 	MOVQ  24(R10), DX
 | |
| 	MOVQ  32(R10), BX
 | |
| 	MOVQ  80(R10), SI
 | |
| 	MOVQ  104(R10), DI
 | |
| 	MOVQ  120(R10), R8
 | |
| 	MOVQ  56(R10), R9
 | |
| 	MOVQ  64(R10), R10
 | |
| 	ADDQ  R10, R9
 | |
| 
 | |
| 	// seqsBase += 24 * seqIndex
 | |
| 	LEAQ (DX)(DX*2), R11
 | |
| 	SHLQ $0x03, R11
 | |
| 	ADDQ R11, AX
 | |
| 
 | |
| 	// outBase += outPosition
 | |
| 	ADDQ DI, BX
 | |
| 
 | |
| main_loop:
 | |
| 	MOVQ (AX), R11
 | |
| 	MOVQ 16(AX), R12
 | |
| 	MOVQ 8(AX), R13
 | |
| 
 | |
| 	// Copy literals
 | |
| 	TESTQ R11, R11
 | |
| 	JZ    check_offset
 | |
| 	MOVQ  R11, R14
 | |
| 	SUBQ  $0x10, R14
 | |
| 	JB    copy_1_small
 | |
| 
 | |
| copy_1_loop:
 | |
| 	MOVUPS (SI), X0
 | |
| 	MOVUPS X0, (BX)
 | |
| 	ADDQ   $0x10, SI
 | |
| 	ADDQ   $0x10, BX
 | |
| 	SUBQ   $0x10, R14
 | |
| 	JAE    copy_1_loop
 | |
| 	LEAQ   16(SI)(R14*1), SI
 | |
| 	LEAQ   16(BX)(R14*1), BX
 | |
| 	MOVUPS -16(SI), X0
 | |
| 	MOVUPS X0, -16(BX)
 | |
| 	JMP    copy_1_end
 | |
| 
 | |
| copy_1_small:
 | |
| 	CMPQ R11, $0x03
 | |
| 	JE   copy_1_move_3
 | |
| 	JB   copy_1_move_1or2
 | |
| 	CMPQ R11, $0x08
 | |
| 	JB   copy_1_move_4through7
 | |
| 	JMP  copy_1_move_8through16
 | |
| 
 | |
| copy_1_move_1or2:
 | |
| 	MOVB (SI), R14
 | |
| 	MOVB -1(SI)(R11*1), R15
 | |
| 	MOVB R14, (BX)
 | |
| 	MOVB R15, -1(BX)(R11*1)
 | |
| 	ADDQ R11, SI
 | |
| 	ADDQ R11, BX
 | |
| 	JMP  copy_1_end
 | |
| 
 | |
| copy_1_move_3:
 | |
| 	MOVW (SI), R14
 | |
| 	MOVB 2(SI), R15
 | |
| 	MOVW R14, (BX)
 | |
| 	MOVB R15, 2(BX)
 | |
| 	ADDQ R11, SI
 | |
| 	ADDQ R11, BX
 | |
| 	JMP  copy_1_end
 | |
| 
 | |
| copy_1_move_4through7:
 | |
| 	MOVL (SI), R14
 | |
| 	MOVL -4(SI)(R11*1), R15
 | |
| 	MOVL R14, (BX)
 | |
| 	MOVL R15, -4(BX)(R11*1)
 | |
| 	ADDQ R11, SI
 | |
| 	ADDQ R11, BX
 | |
| 	JMP  copy_1_end
 | |
| 
 | |
| copy_1_move_8through16:
 | |
| 	MOVQ (SI), R14
 | |
| 	MOVQ -8(SI)(R11*1), R15
 | |
| 	MOVQ R14, (BX)
 | |
| 	MOVQ R15, -8(BX)(R11*1)
 | |
| 	ADDQ R11, SI
 | |
| 	ADDQ R11, BX
 | |
| 
 | |
| copy_1_end:
 | |
| 	ADDQ R11, DI
 | |
| 
 | |
| 	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
 | |
| check_offset:
 | |
| 	LEAQ (DI)(R10*1), R11
 | |
| 	CMPQ R12, R11
 | |
| 	JG   error_match_off_too_big
 | |
| 	CMPQ R12, R8
 | |
| 	JG   error_match_off_too_big
 | |
| 
 | |
| 	// Copy match from history
 | |
| 	MOVQ R12, R11
 | |
| 	SUBQ DI, R11
 | |
| 	JLS  copy_match
 | |
| 	MOVQ R9, R14
 | |
| 	SUBQ R11, R14
 | |
| 	CMPQ R13, R11
 | |
| 	JG   copy_all_from_history
 | |
| 	MOVQ R13, R11
 | |
| 	SUBQ $0x10, R11
 | |
| 	JB   copy_4_small
 | |
| 
 | |
| copy_4_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (BX)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, BX
 | |
| 	SUBQ   $0x10, R11
 | |
| 	JAE    copy_4_loop
 | |
| 	LEAQ   16(R14)(R11*1), R14
 | |
| 	LEAQ   16(BX)(R11*1), BX
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(BX)
 | |
| 	JMP    copy_4_end
 | |
| 
 | |
| copy_4_small:
 | |
| 	CMPQ R13, $0x03
 | |
| 	JE   copy_4_move_3
 | |
| 	CMPQ R13, $0x08
 | |
| 	JB   copy_4_move_4through7
 | |
| 	JMP  copy_4_move_8through16
 | |
| 
 | |
| copy_4_move_3:
 | |
| 	MOVW (R14), R11
 | |
| 	MOVB 2(R14), R12
 | |
| 	MOVW R11, (BX)
 | |
| 	MOVB R12, 2(BX)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, BX
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_4through7:
 | |
| 	MOVL (R14), R11
 | |
| 	MOVL -4(R14)(R13*1), R12
 | |
| 	MOVL R11, (BX)
 | |
| 	MOVL R12, -4(BX)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, BX
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_8through16:
 | |
| 	MOVQ (R14), R11
 | |
| 	MOVQ -8(R14)(R13*1), R12
 | |
| 	MOVQ R11, (BX)
 | |
| 	MOVQ R12, -8(BX)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, BX
 | |
| 
 | |
| copy_4_end:
 | |
| 	ADDQ R13, DI
 | |
| 	ADDQ $0x18, AX
 | |
| 	INCQ DX
 | |
| 	CMPQ DX, CX
 | |
| 	JB   main_loop
 | |
| 	JMP  loop_finished
 | |
| 
 | |
| copy_all_from_history:
 | |
| 	MOVQ R11, R15
 | |
| 	SUBQ $0x10, R15
 | |
| 	JB   copy_5_small
 | |
| 
 | |
| copy_5_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (BX)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, BX
 | |
| 	SUBQ   $0x10, R15
 | |
| 	JAE    copy_5_loop
 | |
| 	LEAQ   16(R14)(R15*1), R14
 | |
| 	LEAQ   16(BX)(R15*1), BX
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(BX)
 | |
| 	JMP    copy_5_end
 | |
| 
 | |
| copy_5_small:
 | |
| 	CMPQ R11, $0x03
 | |
| 	JE   copy_5_move_3
 | |
| 	JB   copy_5_move_1or2
 | |
| 	CMPQ R11, $0x08
 | |
| 	JB   copy_5_move_4through7
 | |
| 	JMP  copy_5_move_8through16
 | |
| 
 | |
| copy_5_move_1or2:
 | |
| 	MOVB (R14), R15
 | |
| 	MOVB -1(R14)(R11*1), BP
 | |
| 	MOVB R15, (BX)
 | |
| 	MOVB BP, -1(BX)(R11*1)
 | |
| 	ADDQ R11, R14
 | |
| 	ADDQ R11, BX
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_3:
 | |
| 	MOVW (R14), R15
 | |
| 	MOVB 2(R14), BP
 | |
| 	MOVW R15, (BX)
 | |
| 	MOVB BP, 2(BX)
 | |
| 	ADDQ R11, R14
 | |
| 	ADDQ R11, BX
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_4through7:
 | |
| 	MOVL (R14), R15
 | |
| 	MOVL -4(R14)(R11*1), BP
 | |
| 	MOVL R15, (BX)
 | |
| 	MOVL BP, -4(BX)(R11*1)
 | |
| 	ADDQ R11, R14
 | |
| 	ADDQ R11, BX
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_8through16:
 | |
| 	MOVQ (R14), R15
 | |
| 	MOVQ -8(R14)(R11*1), BP
 | |
| 	MOVQ R15, (BX)
 | |
| 	MOVQ BP, -8(BX)(R11*1)
 | |
| 	ADDQ R11, R14
 | |
| 	ADDQ R11, BX
 | |
| 
 | |
| copy_5_end:
 | |
| 	ADDQ R11, DI
 | |
| 	SUBQ R11, R13
 | |
| 
 | |
| 	// Copy match from the current buffer
 | |
| copy_match:
 | |
| 	MOVQ BX, R11
 | |
| 	SUBQ R12, R11
 | |
| 
 | |
| 	// ml <= mo
 | |
| 	CMPQ R13, R12
 | |
| 	JA   copy_overlapping_match
 | |
| 
 | |
| 	// Copy non-overlapping match
 | |
| 	ADDQ R13, DI
 | |
| 	MOVQ R13, R12
 | |
| 	SUBQ $0x10, R12
 | |
| 	JB   copy_2_small
 | |
| 
 | |
| copy_2_loop:
 | |
| 	MOVUPS (R11), X0
 | |
| 	MOVUPS X0, (BX)
 | |
| 	ADDQ   $0x10, R11
 | |
| 	ADDQ   $0x10, BX
 | |
| 	SUBQ   $0x10, R12
 | |
| 	JAE    copy_2_loop
 | |
| 	LEAQ   16(R11)(R12*1), R11
 | |
| 	LEAQ   16(BX)(R12*1), BX
 | |
| 	MOVUPS -16(R11), X0
 | |
| 	MOVUPS X0, -16(BX)
 | |
| 	JMP    copy_2_end
 | |
| 
 | |
| copy_2_small:
 | |
| 	CMPQ R13, $0x03
 | |
| 	JE   copy_2_move_3
 | |
| 	JB   copy_2_move_1or2
 | |
| 	CMPQ R13, $0x08
 | |
| 	JB   copy_2_move_4through7
 | |
| 	JMP  copy_2_move_8through16
 | |
| 
 | |
| copy_2_move_1or2:
 | |
| 	MOVB (R11), R12
 | |
| 	MOVB -1(R11)(R13*1), R14
 | |
| 	MOVB R12, (BX)
 | |
| 	MOVB R14, -1(BX)(R13*1)
 | |
| 	ADDQ R13, R11
 | |
| 	ADDQ R13, BX
 | |
| 	JMP  copy_2_end
 | |
| 
 | |
| copy_2_move_3:
 | |
| 	MOVW (R11), R12
 | |
| 	MOVB 2(R11), R14
 | |
| 	MOVW R12, (BX)
 | |
| 	MOVB R14, 2(BX)
 | |
| 	ADDQ R13, R11
 | |
| 	ADDQ R13, BX
 | |
| 	JMP  copy_2_end
 | |
| 
 | |
| copy_2_move_4through7:
 | |
| 	MOVL (R11), R12
 | |
| 	MOVL -4(R11)(R13*1), R14
 | |
| 	MOVL R12, (BX)
 | |
| 	MOVL R14, -4(BX)(R13*1)
 | |
| 	ADDQ R13, R11
 | |
| 	ADDQ R13, BX
 | |
| 	JMP  copy_2_end
 | |
| 
 | |
| copy_2_move_8through16:
 | |
| 	MOVQ (R11), R12
 | |
| 	MOVQ -8(R11)(R13*1), R14
 | |
| 	MOVQ R12, (BX)
 | |
| 	MOVQ R14, -8(BX)(R13*1)
 | |
| 	ADDQ R13, R11
 | |
| 	ADDQ R13, BX
 | |
| 
 | |
| copy_2_end:
 | |
| 	JMP handle_loop
 | |
| 
 | |
| 	// Copy overlapping match
 | |
| copy_overlapping_match:
 | |
| 	ADDQ R13, DI
 | |
| 
 | |
| copy_slow_3:
 | |
| 	MOVB (R11), R12
 | |
| 	MOVB R12, (BX)
 | |
| 	INCQ R11
 | |
| 	INCQ BX
 | |
| 	DECQ R13
 | |
| 	JNZ  copy_slow_3
 | |
| 
 | |
| handle_loop:
 | |
| 	ADDQ $0x18, AX
 | |
| 	INCQ DX
 | |
| 	CMPQ DX, CX
 | |
| 	JB   main_loop
 | |
| 
 | |
| loop_finished:
 | |
| 	// Return value
 | |
| 	MOVB $0x01, ret+8(FP)
 | |
| 
 | |
| 	// Update the context
 | |
| 	MOVQ ctx+0(FP), AX
 | |
| 	MOVQ DX, 24(AX)
 | |
| 	MOVQ DI, 104(AX)
 | |
| 	SUBQ 80(AX), SI
 | |
| 	MOVQ SI, 112(AX)
 | |
| 	RET
 | |
| 
 | |
| error_match_off_too_big:
 | |
| 	// Return value
 | |
| 	MOVB $0x00, ret+8(FP)
 | |
| 
 | |
| 	// Update the context
 | |
| 	MOVQ ctx+0(FP), AX
 | |
| 	MOVQ DX, 24(AX)
 | |
| 	MOVQ DI, 104(AX)
 | |
| 	SUBQ 80(AX), SI
 | |
| 	MOVQ SI, 112(AX)
 | |
| 	RET
 | |
| 
 | |
| empty_seqs:
 | |
| 	// Return value
 | |
| 	MOVB $0x01, ret+8(FP)
 | |
| 	RET
 | |
| 
 | |
| // func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 | |
| // Requires: CMOV, SSE
 | |
| TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
 | |
| 	MOVQ    br+8(FP), CX
 | |
| 	MOVQ    24(CX), DX
 | |
| 	MOVBQZX 40(CX), BX
 | |
| 	MOVQ    (CX), AX
 | |
| 	MOVQ    32(CX), SI
 | |
| 	ADDQ    SI, AX
 | |
| 	MOVQ    AX, (SP)
 | |
| 	MOVQ    ctx+16(FP), AX
 | |
| 	MOVQ    72(AX), DI
 | |
| 	MOVQ    80(AX), R8
 | |
| 	MOVQ    88(AX), R9
 | |
| 	XORQ    CX, CX
 | |
| 	MOVQ    CX, 8(SP)
 | |
| 	MOVQ    CX, 16(SP)
 | |
| 	MOVQ    CX, 24(SP)
 | |
| 	MOVQ    112(AX), R10
 | |
| 	MOVQ    128(AX), CX
 | |
| 	MOVQ    CX, 32(SP)
 | |
| 	MOVQ    144(AX), R11
 | |
| 	MOVQ    136(AX), R12
 | |
| 	MOVQ    200(AX), CX
 | |
| 	MOVQ    CX, 56(SP)
 | |
| 	MOVQ    176(AX), CX
 | |
| 	MOVQ    CX, 48(SP)
 | |
| 	MOVQ    184(AX), AX
 | |
| 	MOVQ    AX, 40(SP)
 | |
| 	MOVQ    40(SP), AX
 | |
| 	ADDQ    AX, 48(SP)
 | |
| 
 | |
| 	// Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
 | |
| 	ADDQ R10, 32(SP)
 | |
| 
 | |
| 	// outBase += outPosition
 | |
| 	ADDQ R12, R10
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_main_loop:
 | |
| 	MOVQ (SP), R13
 | |
| 
 | |
| 	// Fill bitreader to have enough for the offset and match length.
 | |
| 	CMPQ SI, $0x08
 | |
| 	JL   sequenceDecs_decodeSync_amd64_fill_byte_by_byte
 | |
| 	MOVQ BX, AX
 | |
| 	SHRQ $0x03, AX
 | |
| 	SUBQ AX, R13
 | |
| 	MOVQ (R13), DX
 | |
| 	SUBQ AX, SI
 | |
| 	ANDQ $0x07, BX
 | |
| 	JMP  sequenceDecs_decodeSync_amd64_fill_end
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
 | |
| 	CMPQ    SI, $0x00
 | |
| 	JLE     sequenceDecs_decodeSync_amd64_fill_check_overread
 | |
| 	CMPQ    BX, $0x07
 | |
| 	JLE     sequenceDecs_decodeSync_amd64_fill_end
 | |
| 	SHLQ    $0x08, DX
 | |
| 	SUBQ    $0x01, R13
 | |
| 	SUBQ    $0x01, SI
 | |
| 	SUBQ    $0x08, BX
 | |
| 	MOVBQZX (R13), AX
 | |
| 	ORQ     AX, DX
 | |
| 	JMP     sequenceDecs_decodeSync_amd64_fill_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_fill_check_overread:
 | |
| 	CMPQ BX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_fill_end:
 | |
| 	// Update offset
 | |
| 	MOVQ  R9, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R14
 | |
| 	SHLQ  CL, R14
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decodeSync_amd64_of_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decodeSync_amd64_of_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decodeSync_amd64_of_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R14
 | |
| 	ADDQ  R14, AX
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_of_update_zero:
 | |
| 	MOVQ AX, 8(SP)
 | |
| 
 | |
| 	// Update match length
 | |
| 	MOVQ  R8, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R14
 | |
| 	SHLQ  CL, R14
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decodeSync_amd64_ml_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decodeSync_amd64_ml_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decodeSync_amd64_ml_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R14
 | |
| 	ADDQ  R14, AX
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_ml_update_zero:
 | |
| 	MOVQ AX, 16(SP)
 | |
| 
 | |
| 	// Fill bitreader to have enough for the remaining
 | |
| 	CMPQ SI, $0x08
 | |
| 	JL   sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte
 | |
| 	MOVQ BX, AX
 | |
| 	SHRQ $0x03, AX
 | |
| 	SUBQ AX, R13
 | |
| 	MOVQ (R13), DX
 | |
| 	SUBQ AX, SI
 | |
| 	ANDQ $0x07, BX
 | |
| 	JMP  sequenceDecs_decodeSync_amd64_fill_2_end
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
 | |
| 	CMPQ    SI, $0x00
 | |
| 	JLE     sequenceDecs_decodeSync_amd64_fill_2_check_overread
 | |
| 	CMPQ    BX, $0x07
 | |
| 	JLE     sequenceDecs_decodeSync_amd64_fill_2_end
 | |
| 	SHLQ    $0x08, DX
 | |
| 	SUBQ    $0x01, R13
 | |
| 	SUBQ    $0x01, SI
 | |
| 	SUBQ    $0x08, BX
 | |
| 	MOVBQZX (R13), AX
 | |
| 	ORQ     AX, DX
 | |
| 	JMP     sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_fill_2_check_overread:
 | |
| 	CMPQ BX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_fill_2_end:
 | |
| 	// Update literal length
 | |
| 	MOVQ  DI, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R14
 | |
| 	SHLQ  CL, R14
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decodeSync_amd64_ll_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decodeSync_amd64_ll_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decodeSync_amd64_ll_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R14
 | |
| 	ADDQ  R14, AX
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_ll_update_zero:
 | |
| 	MOVQ AX, 24(SP)
 | |
| 
 | |
| 	// Fill bitreader for state updates
 | |
| 	MOVQ    R13, (SP)
 | |
| 	MOVQ    R9, AX
 | |
| 	SHRQ    $0x08, AX
 | |
| 	MOVBQZX AL, AX
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	CMPQ    96(CX), $0x00
 | |
| 	JZ      sequenceDecs_decodeSync_amd64_skip_update
 | |
| 
 | |
| 	// Update Literal Length State
 | |
| 	MOVBQZX DI, R13
 | |
| 	SHRL    $0x10, DI
 | |
| 	LEAQ    (BX)(R13*1), CX
 | |
| 	MOVQ    DX, R14
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R14
 | |
| 	MOVL    $0x00000001, R15
 | |
| 	MOVB    R13, CL
 | |
| 	SHLL    CL, R15
 | |
| 	DECL    R15
 | |
| 	ANDQ    R15, R14
 | |
| 	ADDQ    R14, DI
 | |
| 
 | |
| 	// Load ctx.llTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ (CX), CX
 | |
| 	MOVQ (CX)(DI*8), DI
 | |
| 
 | |
| 	// Update Match Length State
 | |
| 	MOVBQZX R8, R13
 | |
| 	SHRL    $0x10, R8
 | |
| 	LEAQ    (BX)(R13*1), CX
 | |
| 	MOVQ    DX, R14
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R14
 | |
| 	MOVL    $0x00000001, R15
 | |
| 	MOVB    R13, CL
 | |
| 	SHLL    CL, R15
 | |
| 	DECL    R15
 | |
| 	ANDQ    R15, R14
 | |
| 	ADDQ    R14, R8
 | |
| 
 | |
| 	// Load ctx.mlTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 24(CX), CX
 | |
| 	MOVQ (CX)(R8*8), R8
 | |
| 
 | |
| 	// Update Offset State
 | |
| 	MOVBQZX R9, R13
 | |
| 	SHRL    $0x10, R9
 | |
| 	LEAQ    (BX)(R13*1), CX
 | |
| 	MOVQ    DX, R14
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R14
 | |
| 	MOVL    $0x00000001, R15
 | |
| 	MOVB    R13, CL
 | |
| 	SHLL    CL, R15
 | |
| 	DECL    R15
 | |
| 	ANDQ    R15, R14
 | |
| 	ADDQ    R14, R9
 | |
| 
 | |
| 	// Load ctx.ofTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 48(CX), CX
 | |
| 	MOVQ (CX)(R9*8), R9
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_skip_update:
 | |
| 	// Adjust offset
 | |
| 	MOVQ   s+0(FP), CX
 | |
| 	MOVQ   8(SP), R13
 | |
| 	CMPQ   AX, $0x01
 | |
| 	JBE    sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0
 | |
| 	MOVUPS 144(CX), X0
 | |
| 	MOVQ   R13, 144(CX)
 | |
| 	MOVUPS X0, 152(CX)
 | |
| 	JMP    sequenceDecs_decodeSync_amd64_after_adjust
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0:
 | |
| 	CMPQ 24(SP), $0x00000000
 | |
| 	JNE  sequenceDecs_decodeSync_amd64_adjust_offset_maybezero
 | |
| 	INCQ R13
 | |
| 	JMP  sequenceDecs_decodeSync_amd64_adjust_offset_nonzero
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_adjust_offset_maybezero:
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decodeSync_amd64_adjust_offset_nonzero
 | |
| 	MOVQ  144(CX), R13
 | |
| 	JMP   sequenceDecs_decodeSync_amd64_after_adjust
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_adjust_offset_nonzero:
 | |
| 	MOVQ    R13, AX
 | |
| 	XORQ    R14, R14
 | |
| 	MOVQ    $-1, R15
 | |
| 	CMPQ    R13, $0x03
 | |
| 	CMOVQEQ R14, AX
 | |
| 	CMOVQEQ R15, R14
 | |
| 	ADDQ    144(CX)(AX*8), R14
 | |
| 	JNZ     sequenceDecs_decodeSync_amd64_adjust_temp_valid
 | |
| 	MOVQ    $0x00000001, R14
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_adjust_temp_valid:
 | |
| 	CMPQ R13, $0x01
 | |
| 	JZ   sequenceDecs_decodeSync_amd64_adjust_skip
 | |
| 	MOVQ 152(CX), AX
 | |
| 	MOVQ AX, 160(CX)
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_adjust_skip:
 | |
| 	MOVQ 144(CX), AX
 | |
| 	MOVQ AX, 152(CX)
 | |
| 	MOVQ R14, 144(CX)
 | |
| 	MOVQ R14, R13
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_after_adjust:
 | |
| 	MOVQ R13, 8(SP)
 | |
| 
 | |
| 	// Check values
 | |
| 	MOVQ  16(SP), AX
 | |
| 	MOVQ  24(SP), CX
 | |
| 	LEAQ  (AX)(CX*1), R14
 | |
| 	MOVQ  s+0(FP), R15
 | |
| 	ADDQ  R14, 256(R15)
 | |
| 	MOVQ  ctx+16(FP), R14
 | |
| 	SUBQ  CX, 104(R14)
 | |
| 	JS    error_not_enough_literals
 | |
| 	CMPQ  AX, $0x00020002
 | |
| 	JA    sequenceDecs_decodeSync_amd64_error_match_len_too_big
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decodeSync_amd64_match_len_ofs_ok
 | |
| 	TESTQ AX, AX
 | |
| 	JNZ   sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch
 | |
| 
 | |
| sequenceDecs_decodeSync_amd64_match_len_ofs_ok:
 | |
| 	MOVQ 24(SP), AX
 | |
| 	MOVQ 8(SP), CX
 | |
| 	MOVQ 16(SP), R13
 | |
| 
 | |
| 	// Check if we have enough space in s.out
 | |
| 	LEAQ (AX)(R13*1), R14
 | |
| 	ADDQ R10, R14
 | |
| 	CMPQ R14, 32(SP)
 | |
| 	JA   error_not_enough_space
 | |
| 
 | |
| 	// Copy literals
 | |
| 	TESTQ AX, AX
 | |
| 	JZ    check_offset
 | |
| 	XORQ  R14, R14
 | |
| 
 | |
| copy_1:
 | |
| 	MOVUPS (R11)(R14*1), X0
 | |
| 	MOVUPS X0, (R10)(R14*1)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	CMPQ   R14, AX
 | |
| 	JB     copy_1
 | |
| 	ADDQ   AX, R11
 | |
| 	ADDQ   AX, R10
 | |
| 	ADDQ   AX, R12
 | |
| 
 | |
| 	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
 | |
| check_offset:
 | |
| 	MOVQ R12, AX
 | |
| 	ADDQ 40(SP), AX
 | |
| 	CMPQ CX, AX
 | |
| 	JG   error_match_off_too_big
 | |
| 	CMPQ CX, 56(SP)
 | |
| 	JG   error_match_off_too_big
 | |
| 
 | |
| 	// Copy match from history
 | |
| 	MOVQ CX, AX
 | |
| 	SUBQ R12, AX
 | |
| 	JLS  copy_match
 | |
| 	MOVQ 48(SP), R14
 | |
| 	SUBQ AX, R14
 | |
| 	CMPQ R13, AX
 | |
| 	JG   copy_all_from_history
 | |
| 	MOVQ R13, AX
 | |
| 	SUBQ $0x10, AX
 | |
| 	JB   copy_4_small
 | |
| 
 | |
| copy_4_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (R10)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, R10
 | |
| 	SUBQ   $0x10, AX
 | |
| 	JAE    copy_4_loop
 | |
| 	LEAQ   16(R14)(AX*1), R14
 | |
| 	LEAQ   16(R10)(AX*1), R10
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(R10)
 | |
| 	JMP    copy_4_end
 | |
| 
 | |
| copy_4_small:
 | |
| 	CMPQ R13, $0x03
 | |
| 	JE   copy_4_move_3
 | |
| 	CMPQ R13, $0x08
 | |
| 	JB   copy_4_move_4through7
 | |
| 	JMP  copy_4_move_8through16
 | |
| 
 | |
| copy_4_move_3:
 | |
| 	MOVW (R14), AX
 | |
| 	MOVB 2(R14), CL
 | |
| 	MOVW AX, (R10)
 | |
| 	MOVB CL, 2(R10)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R10
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_4through7:
 | |
| 	MOVL (R14), AX
 | |
| 	MOVL -4(R14)(R13*1), CX
 | |
| 	MOVL AX, (R10)
 | |
| 	MOVL CX, -4(R10)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R10
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_8through16:
 | |
| 	MOVQ (R14), AX
 | |
| 	MOVQ -8(R14)(R13*1), CX
 | |
| 	MOVQ AX, (R10)
 | |
| 	MOVQ CX, -8(R10)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R10
 | |
| 
 | |
| copy_4_end:
 | |
| 	ADDQ R13, R12
 | |
| 	JMP  handle_loop
 | |
| 	JMP loop_finished
 | |
| 
 | |
| copy_all_from_history:
 | |
| 	MOVQ AX, R15
 | |
| 	SUBQ $0x10, R15
 | |
| 	JB   copy_5_small
 | |
| 
 | |
| copy_5_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (R10)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, R10
 | |
| 	SUBQ   $0x10, R15
 | |
| 	JAE    copy_5_loop
 | |
| 	LEAQ   16(R14)(R15*1), R14
 | |
| 	LEAQ   16(R10)(R15*1), R10
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(R10)
 | |
| 	JMP    copy_5_end
 | |
| 
 | |
| copy_5_small:
 | |
| 	CMPQ AX, $0x03
 | |
| 	JE   copy_5_move_3
 | |
| 	JB   copy_5_move_1or2
 | |
| 	CMPQ AX, $0x08
 | |
| 	JB   copy_5_move_4through7
 | |
| 	JMP  copy_5_move_8through16
 | |
| 
 | |
| copy_5_move_1or2:
 | |
| 	MOVB (R14), R15
 | |
| 	MOVB -1(R14)(AX*1), BP
 | |
| 	MOVB R15, (R10)
 | |
| 	MOVB BP, -1(R10)(AX*1)
 | |
| 	ADDQ AX, R14
 | |
| 	ADDQ AX, R10
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_3:
 | |
| 	MOVW (R14), R15
 | |
| 	MOVB 2(R14), BP
 | |
| 	MOVW R15, (R10)
 | |
| 	MOVB BP, 2(R10)
 | |
| 	ADDQ AX, R14
 | |
| 	ADDQ AX, R10
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_4through7:
 | |
| 	MOVL (R14), R15
 | |
| 	MOVL -4(R14)(AX*1), BP
 | |
| 	MOVL R15, (R10)
 | |
| 	MOVL BP, -4(R10)(AX*1)
 | |
| 	ADDQ AX, R14
 | |
| 	ADDQ AX, R10
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_8through16:
 | |
| 	MOVQ (R14), R15
 | |
| 	MOVQ -8(R14)(AX*1), BP
 | |
| 	MOVQ R15, (R10)
 | |
| 	MOVQ BP, -8(R10)(AX*1)
 | |
| 	ADDQ AX, R14
 | |
| 	ADDQ AX, R10
 | |
| 
 | |
| copy_5_end:
 | |
| 	ADDQ AX, R12
 | |
| 	SUBQ AX, R13
 | |
| 
 | |
| 	// Copy match from the current buffer
 | |
| copy_match:
 | |
| 	MOVQ R10, AX
 | |
| 	SUBQ CX, AX
 | |
| 
 | |
| 	// ml <= mo
 | |
| 	CMPQ R13, CX
 | |
| 	JA   copy_overlapping_match
 | |
| 
 | |
| 	// Copy non-overlapping match
 | |
| 	ADDQ R13, R12
 | |
| 	MOVQ R10, CX
 | |
| 	ADDQ R13, R10
 | |
| 
 | |
| copy_2:
 | |
| 	MOVUPS (AX), X0
 | |
| 	MOVUPS X0, (CX)
 | |
| 	ADDQ   $0x10, AX
 | |
| 	ADDQ   $0x10, CX
 | |
| 	SUBQ   $0x10, R13
 | |
| 	JHI    copy_2
 | |
| 	JMP    handle_loop
 | |
| 
 | |
| 	// Copy overlapping match
 | |
| copy_overlapping_match:
 | |
| 	ADDQ R13, R12
 | |
| 
 | |
| copy_slow_3:
 | |
| 	MOVB (AX), CL
 | |
| 	MOVB CL, (R10)
 | |
| 	INCQ AX
 | |
| 	INCQ R10
 | |
| 	DECQ R13
 | |
| 	JNZ  copy_slow_3
 | |
| 
 | |
| handle_loop:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	DECQ 96(AX)
 | |
| 	JNS  sequenceDecs_decodeSync_amd64_main_loop
 | |
| 
 | |
| loop_finished:
 | |
| 	MOVQ br+8(FP), AX
 | |
| 	MOVQ DX, 24(AX)
 | |
| 	MOVB BL, 40(AX)
 | |
| 	MOVQ SI, 32(AX)
 | |
| 
 | |
| 	// Update the context
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ R12, 136(AX)
 | |
| 	MOVQ 144(AX), CX
 | |
| 	SUBQ CX, R11
 | |
| 	MOVQ R11, 168(AX)
 | |
| 
 | |
| 	// Return success
 | |
| 	MOVQ $0x00000000, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match length error
 | |
| sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch:
 | |
| 	MOVQ 16(SP), AX
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ AX, 216(CX)
 | |
| 	MOVQ $0x00000001, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match too long error
 | |
| sequenceDecs_decodeSync_amd64_error_match_len_too_big:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 16(SP), CX
 | |
| 	MOVQ CX, 216(AX)
 | |
| 	MOVQ $0x00000002, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match offset too long error
 | |
| error_match_off_too_big:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 8(SP), CX
 | |
| 	MOVQ CX, 224(AX)
 | |
| 	MOVQ R12, 136(AX)
 | |
| 	MOVQ $0x00000003, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough literals error
 | |
| error_not_enough_literals:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 24(SP), CX
 | |
| 	MOVQ CX, 208(AX)
 | |
| 	MOVQ $0x00000004, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with overread error
 | |
| error_overread:
 | |
| 	MOVQ $0x00000006, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough output space error
 | |
| error_not_enough_space:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 24(SP), CX
 | |
| 	MOVQ CX, 208(AX)
 | |
| 	MOVQ 16(SP), CX
 | |
| 	MOVQ CX, 216(AX)
 | |
| 	MOVQ R12, 136(AX)
 | |
| 	MOVQ $0x00000005, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| // func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 | |
| // Requires: BMI, BMI2, CMOV, SSE
 | |
| TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
 | |
| 	MOVQ    br+8(FP), BX
 | |
| 	MOVQ    24(BX), AX
 | |
| 	MOVBQZX 40(BX), DX
 | |
| 	MOVQ    (BX), CX
 | |
| 	MOVQ    32(BX), BX
 | |
| 	ADDQ    BX, CX
 | |
| 	MOVQ    CX, (SP)
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	MOVQ    72(CX), SI
 | |
| 	MOVQ    80(CX), DI
 | |
| 	MOVQ    88(CX), R8
 | |
| 	XORQ    R9, R9
 | |
| 	MOVQ    R9, 8(SP)
 | |
| 	MOVQ    R9, 16(SP)
 | |
| 	MOVQ    R9, 24(SP)
 | |
| 	MOVQ    112(CX), R9
 | |
| 	MOVQ    128(CX), R10
 | |
| 	MOVQ    R10, 32(SP)
 | |
| 	MOVQ    144(CX), R10
 | |
| 	MOVQ    136(CX), R11
 | |
| 	MOVQ    200(CX), R12
 | |
| 	MOVQ    R12, 56(SP)
 | |
| 	MOVQ    176(CX), R12
 | |
| 	MOVQ    R12, 48(SP)
 | |
| 	MOVQ    184(CX), CX
 | |
| 	MOVQ    CX, 40(SP)
 | |
| 	MOVQ    40(SP), CX
 | |
| 	ADDQ    CX, 48(SP)
 | |
| 
 | |
| 	// Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
 | |
| 	ADDQ R9, 32(SP)
 | |
| 
 | |
| 	// outBase += outPosition
 | |
| 	ADDQ R11, R9
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_main_loop:
 | |
| 	MOVQ (SP), R12
 | |
| 
 | |
| 	// Fill bitreader to have enough for the offset and match length.
 | |
| 	CMPQ BX, $0x08
 | |
| 	JL   sequenceDecs_decodeSync_bmi2_fill_byte_by_byte
 | |
| 	MOVQ DX, CX
 | |
| 	SHRQ $0x03, CX
 | |
| 	SUBQ CX, R12
 | |
| 	MOVQ (R12), AX
 | |
| 	SUBQ CX, BX
 | |
| 	ANDQ $0x07, DX
 | |
| 	JMP  sequenceDecs_decodeSync_bmi2_fill_end
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
 | |
| 	CMPQ    BX, $0x00
 | |
| 	JLE     sequenceDecs_decodeSync_bmi2_fill_check_overread
 | |
| 	CMPQ    DX, $0x07
 | |
| 	JLE     sequenceDecs_decodeSync_bmi2_fill_end
 | |
| 	SHLQ    $0x08, AX
 | |
| 	SUBQ    $0x01, R12
 | |
| 	SUBQ    $0x01, BX
 | |
| 	SUBQ    $0x08, DX
 | |
| 	MOVBQZX (R12), CX
 | |
| 	ORQ     CX, AX
 | |
| 	JMP     sequenceDecs_decodeSync_bmi2_fill_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_fill_check_overread:
 | |
| 	CMPQ DX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_fill_end:
 | |
| 	// Update offset
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, R8, R13
 | |
| 	MOVQ   AX, R14
 | |
| 	LEAQ   (DX)(R13*1), CX
 | |
| 	ROLQ   CL, R14
 | |
| 	BZHIQ  R13, R14, R14
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   R8, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R14, CX
 | |
| 	MOVQ   CX, 8(SP)
 | |
| 
 | |
| 	// Update match length
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, DI, R13
 | |
| 	MOVQ   AX, R14
 | |
| 	LEAQ   (DX)(R13*1), CX
 | |
| 	ROLQ   CL, R14
 | |
| 	BZHIQ  R13, R14, R14
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   DI, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R14, CX
 | |
| 	MOVQ   CX, 16(SP)
 | |
| 
 | |
| 	// Fill bitreader to have enough for the remaining
 | |
| 	CMPQ BX, $0x08
 | |
| 	JL   sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte
 | |
| 	MOVQ DX, CX
 | |
| 	SHRQ $0x03, CX
 | |
| 	SUBQ CX, R12
 | |
| 	MOVQ (R12), AX
 | |
| 	SUBQ CX, BX
 | |
| 	ANDQ $0x07, DX
 | |
| 	JMP  sequenceDecs_decodeSync_bmi2_fill_2_end
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
 | |
| 	CMPQ    BX, $0x00
 | |
| 	JLE     sequenceDecs_decodeSync_bmi2_fill_2_check_overread
 | |
| 	CMPQ    DX, $0x07
 | |
| 	JLE     sequenceDecs_decodeSync_bmi2_fill_2_end
 | |
| 	SHLQ    $0x08, AX
 | |
| 	SUBQ    $0x01, R12
 | |
| 	SUBQ    $0x01, BX
 | |
| 	SUBQ    $0x08, DX
 | |
| 	MOVBQZX (R12), CX
 | |
| 	ORQ     CX, AX
 | |
| 	JMP     sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_fill_2_check_overread:
 | |
| 	CMPQ DX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_fill_2_end:
 | |
| 	// Update literal length
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, SI, R13
 | |
| 	MOVQ   AX, R14
 | |
| 	LEAQ   (DX)(R13*1), CX
 | |
| 	ROLQ   CL, R14
 | |
| 	BZHIQ  R13, R14, R14
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   SI, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R14, CX
 | |
| 	MOVQ   CX, 24(SP)
 | |
| 
 | |
| 	// Fill bitreader for state updates
 | |
| 	MOVQ    R12, (SP)
 | |
| 	MOVQ    $0x00000808, CX
 | |
| 	BEXTRQ  CX, R8, R12
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	CMPQ    96(CX), $0x00
 | |
| 	JZ      sequenceDecs_decodeSync_bmi2_skip_update
 | |
| 	LEAQ    (SI)(DI*1), R13
 | |
| 	ADDQ    R8, R13
 | |
| 	MOVBQZX R13, R13
 | |
| 	LEAQ    (DX)(R13*1), CX
 | |
| 	MOVQ    AX, R14
 | |
| 	MOVQ    CX, DX
 | |
| 	ROLQ    CL, R14
 | |
| 	BZHIQ   R13, R14, R14
 | |
| 
 | |
| 	// Update Offset State
 | |
| 	BZHIQ R8, R14, CX
 | |
| 	SHRXQ R8, R14, R14
 | |
| 	SHRL  $0x10, R8
 | |
| 	ADDQ  CX, R8
 | |
| 
 | |
| 	// Load ctx.ofTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 48(CX), CX
 | |
| 	MOVQ (CX)(R8*8), R8
 | |
| 
 | |
| 	// Update Match Length State
 | |
| 	BZHIQ DI, R14, CX
 | |
| 	SHRXQ DI, R14, R14
 | |
| 	SHRL  $0x10, DI
 | |
| 	ADDQ  CX, DI
 | |
| 
 | |
| 	// Load ctx.mlTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 24(CX), CX
 | |
| 	MOVQ (CX)(DI*8), DI
 | |
| 
 | |
| 	// Update Literal Length State
 | |
| 	BZHIQ SI, R14, CX
 | |
| 	SHRL  $0x10, SI
 | |
| 	ADDQ  CX, SI
 | |
| 
 | |
| 	// Load ctx.llTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ (CX), CX
 | |
| 	MOVQ (CX)(SI*8), SI
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_skip_update:
 | |
| 	// Adjust offset
 | |
| 	MOVQ   s+0(FP), CX
 | |
| 	MOVQ   8(SP), R13
 | |
| 	CMPQ   R12, $0x01
 | |
| 	JBE    sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0
 | |
| 	MOVUPS 144(CX), X0
 | |
| 	MOVQ   R13, 144(CX)
 | |
| 	MOVUPS X0, 152(CX)
 | |
| 	JMP    sequenceDecs_decodeSync_bmi2_after_adjust
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0:
 | |
| 	CMPQ 24(SP), $0x00000000
 | |
| 	JNE  sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero
 | |
| 	INCQ R13
 | |
| 	JMP  sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero:
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero
 | |
| 	MOVQ  144(CX), R13
 | |
| 	JMP   sequenceDecs_decodeSync_bmi2_after_adjust
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero:
 | |
| 	MOVQ    R13, R12
 | |
| 	XORQ    R14, R14
 | |
| 	MOVQ    $-1, R15
 | |
| 	CMPQ    R13, $0x03
 | |
| 	CMOVQEQ R14, R12
 | |
| 	CMOVQEQ R15, R14
 | |
| 	ADDQ    144(CX)(R12*8), R14
 | |
| 	JNZ     sequenceDecs_decodeSync_bmi2_adjust_temp_valid
 | |
| 	MOVQ    $0x00000001, R14
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_adjust_temp_valid:
 | |
| 	CMPQ R13, $0x01
 | |
| 	JZ   sequenceDecs_decodeSync_bmi2_adjust_skip
 | |
| 	MOVQ 152(CX), R12
 | |
| 	MOVQ R12, 160(CX)
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_adjust_skip:
 | |
| 	MOVQ 144(CX), R12
 | |
| 	MOVQ R12, 152(CX)
 | |
| 	MOVQ R14, 144(CX)
 | |
| 	MOVQ R14, R13
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_after_adjust:
 | |
| 	MOVQ R13, 8(SP)
 | |
| 
 | |
| 	// Check values
 | |
| 	MOVQ  16(SP), CX
 | |
| 	MOVQ  24(SP), R12
 | |
| 	LEAQ  (CX)(R12*1), R14
 | |
| 	MOVQ  s+0(FP), R15
 | |
| 	ADDQ  R14, 256(R15)
 | |
| 	MOVQ  ctx+16(FP), R14
 | |
| 	SUBQ  R12, 104(R14)
 | |
| 	JS    error_not_enough_literals
 | |
| 	CMPQ  CX, $0x00020002
 | |
| 	JA    sequenceDecs_decodeSync_bmi2_error_match_len_too_big
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decodeSync_bmi2_match_len_ofs_ok
 | |
| 	TESTQ CX, CX
 | |
| 	JNZ   sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch
 | |
| 
 | |
| sequenceDecs_decodeSync_bmi2_match_len_ofs_ok:
 | |
| 	MOVQ 24(SP), CX
 | |
| 	MOVQ 8(SP), R12
 | |
| 	MOVQ 16(SP), R13
 | |
| 
 | |
| 	// Check if we have enough space in s.out
 | |
| 	LEAQ (CX)(R13*1), R14
 | |
| 	ADDQ R9, R14
 | |
| 	CMPQ R14, 32(SP)
 | |
| 	JA   error_not_enough_space
 | |
| 
 | |
| 	// Copy literals
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    check_offset
 | |
| 	XORQ  R14, R14
 | |
| 
 | |
| copy_1:
 | |
| 	MOVUPS (R10)(R14*1), X0
 | |
| 	MOVUPS X0, (R9)(R14*1)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	CMPQ   R14, CX
 | |
| 	JB     copy_1
 | |
| 	ADDQ   CX, R10
 | |
| 	ADDQ   CX, R9
 | |
| 	ADDQ   CX, R11
 | |
| 
 | |
| 	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
 | |
| check_offset:
 | |
| 	MOVQ R11, CX
 | |
| 	ADDQ 40(SP), CX
 | |
| 	CMPQ R12, CX
 | |
| 	JG   error_match_off_too_big
 | |
| 	CMPQ R12, 56(SP)
 | |
| 	JG   error_match_off_too_big
 | |
| 
 | |
| 	// Copy match from history
 | |
| 	MOVQ R12, CX
 | |
| 	SUBQ R11, CX
 | |
| 	JLS  copy_match
 | |
| 	MOVQ 48(SP), R14
 | |
| 	SUBQ CX, R14
 | |
| 	CMPQ R13, CX
 | |
| 	JG   copy_all_from_history
 | |
| 	MOVQ R13, CX
 | |
| 	SUBQ $0x10, CX
 | |
| 	JB   copy_4_small
 | |
| 
 | |
| copy_4_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (R9)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, R9
 | |
| 	SUBQ   $0x10, CX
 | |
| 	JAE    copy_4_loop
 | |
| 	LEAQ   16(R14)(CX*1), R14
 | |
| 	LEAQ   16(R9)(CX*1), R9
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(R9)
 | |
| 	JMP    copy_4_end
 | |
| 
 | |
| copy_4_small:
 | |
| 	CMPQ R13, $0x03
 | |
| 	JE   copy_4_move_3
 | |
| 	CMPQ R13, $0x08
 | |
| 	JB   copy_4_move_4through7
 | |
| 	JMP  copy_4_move_8through16
 | |
| 
 | |
| copy_4_move_3:
 | |
| 	MOVW (R14), CX
 | |
| 	MOVB 2(R14), R12
 | |
| 	MOVW CX, (R9)
 | |
| 	MOVB R12, 2(R9)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R9
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_4through7:
 | |
| 	MOVL (R14), CX
 | |
| 	MOVL -4(R14)(R13*1), R12
 | |
| 	MOVL CX, (R9)
 | |
| 	MOVL R12, -4(R9)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R9
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_8through16:
 | |
| 	MOVQ (R14), CX
 | |
| 	MOVQ -8(R14)(R13*1), R12
 | |
| 	MOVQ CX, (R9)
 | |
| 	MOVQ R12, -8(R9)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R9
 | |
| 
 | |
| copy_4_end:
 | |
| 	ADDQ R13, R11
 | |
| 	JMP  handle_loop
 | |
| 	JMP loop_finished
 | |
| 
 | |
| copy_all_from_history:
 | |
| 	MOVQ CX, R15
 | |
| 	SUBQ $0x10, R15
 | |
| 	JB   copy_5_small
 | |
| 
 | |
| copy_5_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (R9)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, R9
 | |
| 	SUBQ   $0x10, R15
 | |
| 	JAE    copy_5_loop
 | |
| 	LEAQ   16(R14)(R15*1), R14
 | |
| 	LEAQ   16(R9)(R15*1), R9
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(R9)
 | |
| 	JMP    copy_5_end
 | |
| 
 | |
| copy_5_small:
 | |
| 	CMPQ CX, $0x03
 | |
| 	JE   copy_5_move_3
 | |
| 	JB   copy_5_move_1or2
 | |
| 	CMPQ CX, $0x08
 | |
| 	JB   copy_5_move_4through7
 | |
| 	JMP  copy_5_move_8through16
 | |
| 
 | |
| copy_5_move_1or2:
 | |
| 	MOVB (R14), R15
 | |
| 	MOVB -1(R14)(CX*1), BP
 | |
| 	MOVB R15, (R9)
 | |
| 	MOVB BP, -1(R9)(CX*1)
 | |
| 	ADDQ CX, R14
 | |
| 	ADDQ CX, R9
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_3:
 | |
| 	MOVW (R14), R15
 | |
| 	MOVB 2(R14), BP
 | |
| 	MOVW R15, (R9)
 | |
| 	MOVB BP, 2(R9)
 | |
| 	ADDQ CX, R14
 | |
| 	ADDQ CX, R9
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_4through7:
 | |
| 	MOVL (R14), R15
 | |
| 	MOVL -4(R14)(CX*1), BP
 | |
| 	MOVL R15, (R9)
 | |
| 	MOVL BP, -4(R9)(CX*1)
 | |
| 	ADDQ CX, R14
 | |
| 	ADDQ CX, R9
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_8through16:
 | |
| 	MOVQ (R14), R15
 | |
| 	MOVQ -8(R14)(CX*1), BP
 | |
| 	MOVQ R15, (R9)
 | |
| 	MOVQ BP, -8(R9)(CX*1)
 | |
| 	ADDQ CX, R14
 | |
| 	ADDQ CX, R9
 | |
| 
 | |
| copy_5_end:
 | |
| 	ADDQ CX, R11
 | |
| 	SUBQ CX, R13
 | |
| 
 | |
| 	// Copy match from the current buffer
 | |
| copy_match:
 | |
| 	MOVQ R9, CX
 | |
| 	SUBQ R12, CX
 | |
| 
 | |
| 	// ml <= mo
 | |
| 	CMPQ R13, R12
 | |
| 	JA   copy_overlapping_match
 | |
| 
 | |
| 	// Copy non-overlapping match
 | |
| 	ADDQ R13, R11
 | |
| 	MOVQ R9, R12
 | |
| 	ADDQ R13, R9
 | |
| 
 | |
| copy_2:
 | |
| 	MOVUPS (CX), X0
 | |
| 	MOVUPS X0, (R12)
 | |
| 	ADDQ   $0x10, CX
 | |
| 	ADDQ   $0x10, R12
 | |
| 	SUBQ   $0x10, R13
 | |
| 	JHI    copy_2
 | |
| 	JMP    handle_loop
 | |
| 
 | |
| 	// Copy overlapping match
 | |
| copy_overlapping_match:
 | |
| 	ADDQ R13, R11
 | |
| 
 | |
| copy_slow_3:
 | |
| 	MOVB (CX), R12
 | |
| 	MOVB R12, (R9)
 | |
| 	INCQ CX
 | |
| 	INCQ R9
 | |
| 	DECQ R13
 | |
| 	JNZ  copy_slow_3
 | |
| 
 | |
| handle_loop:
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	DECQ 96(CX)
 | |
| 	JNS  sequenceDecs_decodeSync_bmi2_main_loop
 | |
| 
 | |
| loop_finished:
 | |
| 	MOVQ br+8(FP), CX
 | |
| 	MOVQ AX, 24(CX)
 | |
| 	MOVB DL, 40(CX)
 | |
| 	MOVQ BX, 32(CX)
 | |
| 
 | |
| 	// Update the context
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ R11, 136(AX)
 | |
| 	MOVQ 144(AX), CX
 | |
| 	SUBQ CX, R10
 | |
| 	MOVQ R10, 168(AX)
 | |
| 
 | |
| 	// Return success
 | |
| 	MOVQ $0x00000000, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match length error
 | |
| sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch:
 | |
| 	MOVQ 16(SP), AX
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ AX, 216(CX)
 | |
| 	MOVQ $0x00000001, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match too long error
 | |
| sequenceDecs_decodeSync_bmi2_error_match_len_too_big:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 16(SP), CX
 | |
| 	MOVQ CX, 216(AX)
 | |
| 	MOVQ $0x00000002, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match offset too long error
 | |
| error_match_off_too_big:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 8(SP), CX
 | |
| 	MOVQ CX, 224(AX)
 | |
| 	MOVQ R11, 136(AX)
 | |
| 	MOVQ $0x00000003, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough literals error
 | |
| error_not_enough_literals:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 24(SP), CX
 | |
| 	MOVQ CX, 208(AX)
 | |
| 	MOVQ $0x00000004, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with overread error
 | |
| error_overread:
 | |
| 	MOVQ $0x00000006, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough output space error
 | |
| error_not_enough_space:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 24(SP), CX
 | |
| 	MOVQ CX, 208(AX)
 | |
| 	MOVQ 16(SP), CX
 | |
| 	MOVQ CX, 216(AX)
 | |
| 	MOVQ R11, 136(AX)
 | |
| 	MOVQ $0x00000005, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| // func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 | |
| // Requires: CMOV, SSE
 | |
| TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
 | |
| 	MOVQ    br+8(FP), CX
 | |
| 	MOVQ    24(CX), DX
 | |
| 	MOVBQZX 40(CX), BX
 | |
| 	MOVQ    (CX), AX
 | |
| 	MOVQ    32(CX), SI
 | |
| 	ADDQ    SI, AX
 | |
| 	MOVQ    AX, (SP)
 | |
| 	MOVQ    ctx+16(FP), AX
 | |
| 	MOVQ    72(AX), DI
 | |
| 	MOVQ    80(AX), R8
 | |
| 	MOVQ    88(AX), R9
 | |
| 	XORQ    CX, CX
 | |
| 	MOVQ    CX, 8(SP)
 | |
| 	MOVQ    CX, 16(SP)
 | |
| 	MOVQ    CX, 24(SP)
 | |
| 	MOVQ    112(AX), R10
 | |
| 	MOVQ    128(AX), CX
 | |
| 	MOVQ    CX, 32(SP)
 | |
| 	MOVQ    144(AX), R11
 | |
| 	MOVQ    136(AX), R12
 | |
| 	MOVQ    200(AX), CX
 | |
| 	MOVQ    CX, 56(SP)
 | |
| 	MOVQ    176(AX), CX
 | |
| 	MOVQ    CX, 48(SP)
 | |
| 	MOVQ    184(AX), AX
 | |
| 	MOVQ    AX, 40(SP)
 | |
| 	MOVQ    40(SP), AX
 | |
| 	ADDQ    AX, 48(SP)
 | |
| 
 | |
| 	// Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
 | |
| 	ADDQ R10, 32(SP)
 | |
| 
 | |
| 	// outBase += outPosition
 | |
| 	ADDQ R12, R10
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_main_loop:
 | |
| 	MOVQ (SP), R13
 | |
| 
 | |
| 	// Fill bitreader to have enough for the offset and match length.
 | |
| 	CMPQ SI, $0x08
 | |
| 	JL   sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte
 | |
| 	MOVQ BX, AX
 | |
| 	SHRQ $0x03, AX
 | |
| 	SUBQ AX, R13
 | |
| 	MOVQ (R13), DX
 | |
| 	SUBQ AX, SI
 | |
| 	ANDQ $0x07, BX
 | |
| 	JMP  sequenceDecs_decodeSync_safe_amd64_fill_end
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
 | |
| 	CMPQ    SI, $0x00
 | |
| 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_check_overread
 | |
| 	CMPQ    BX, $0x07
 | |
| 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_end
 | |
| 	SHLQ    $0x08, DX
 | |
| 	SUBQ    $0x01, R13
 | |
| 	SUBQ    $0x01, SI
 | |
| 	SUBQ    $0x08, BX
 | |
| 	MOVBQZX (R13), AX
 | |
| 	ORQ     AX, DX
 | |
| 	JMP     sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_fill_check_overread:
 | |
| 	CMPQ BX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_fill_end:
 | |
| 	// Update offset
 | |
| 	MOVQ  R9, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R14
 | |
| 	SHLQ  CL, R14
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decodeSync_safe_amd64_of_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decodeSync_safe_amd64_of_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decodeSync_safe_amd64_of_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R14
 | |
| 	ADDQ  R14, AX
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_of_update_zero:
 | |
| 	MOVQ AX, 8(SP)
 | |
| 
 | |
| 	// Update match length
 | |
| 	MOVQ  R8, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R14
 | |
| 	SHLQ  CL, R14
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decodeSync_safe_amd64_ml_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decodeSync_safe_amd64_ml_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decodeSync_safe_amd64_ml_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R14
 | |
| 	ADDQ  R14, AX
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_ml_update_zero:
 | |
| 	MOVQ AX, 16(SP)
 | |
| 
 | |
| 	// Fill bitreader to have enough for the remaining
 | |
| 	CMPQ SI, $0x08
 | |
| 	JL   sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte
 | |
| 	MOVQ BX, AX
 | |
| 	SHRQ $0x03, AX
 | |
| 	SUBQ AX, R13
 | |
| 	MOVQ (R13), DX
 | |
| 	SUBQ AX, SI
 | |
| 	ANDQ $0x07, BX
 | |
| 	JMP  sequenceDecs_decodeSync_safe_amd64_fill_2_end
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
 | |
| 	CMPQ    SI, $0x00
 | |
| 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread
 | |
| 	CMPQ    BX, $0x07
 | |
| 	JLE     sequenceDecs_decodeSync_safe_amd64_fill_2_end
 | |
| 	SHLQ    $0x08, DX
 | |
| 	SUBQ    $0x01, R13
 | |
| 	SUBQ    $0x01, SI
 | |
| 	SUBQ    $0x08, BX
 | |
| 	MOVBQZX (R13), AX
 | |
| 	ORQ     AX, DX
 | |
| 	JMP     sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread:
 | |
| 	CMPQ BX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_fill_2_end:
 | |
| 	// Update literal length
 | |
| 	MOVQ  DI, AX
 | |
| 	MOVQ  BX, CX
 | |
| 	MOVQ  DX, R14
 | |
| 	SHLQ  CL, R14
 | |
| 	MOVB  AH, CL
 | |
| 	SHRQ  $0x20, AX
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    sequenceDecs_decodeSync_safe_amd64_ll_update_zero
 | |
| 	ADDQ  CX, BX
 | |
| 	CMPQ  BX, $0x40
 | |
| 	JA    sequenceDecs_decodeSync_safe_amd64_ll_update_zero
 | |
| 	CMPQ  CX, $0x40
 | |
| 	JAE   sequenceDecs_decodeSync_safe_amd64_ll_update_zero
 | |
| 	NEGQ  CX
 | |
| 	SHRQ  CL, R14
 | |
| 	ADDQ  R14, AX
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
 | |
| 	MOVQ AX, 24(SP)
 | |
| 
 | |
| 	// Fill bitreader for state updates
 | |
| 	MOVQ    R13, (SP)
 | |
| 	MOVQ    R9, AX
 | |
| 	SHRQ    $0x08, AX
 | |
| 	MOVBQZX AL, AX
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	CMPQ    96(CX), $0x00
 | |
| 	JZ      sequenceDecs_decodeSync_safe_amd64_skip_update
 | |
| 
 | |
| 	// Update Literal Length State
 | |
| 	MOVBQZX DI, R13
 | |
| 	SHRL    $0x10, DI
 | |
| 	LEAQ    (BX)(R13*1), CX
 | |
| 	MOVQ    DX, R14
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R14
 | |
| 	MOVL    $0x00000001, R15
 | |
| 	MOVB    R13, CL
 | |
| 	SHLL    CL, R15
 | |
| 	DECL    R15
 | |
| 	ANDQ    R15, R14
 | |
| 	ADDQ    R14, DI
 | |
| 
 | |
| 	// Load ctx.llTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ (CX), CX
 | |
| 	MOVQ (CX)(DI*8), DI
 | |
| 
 | |
| 	// Update Match Length State
 | |
| 	MOVBQZX R8, R13
 | |
| 	SHRL    $0x10, R8
 | |
| 	LEAQ    (BX)(R13*1), CX
 | |
| 	MOVQ    DX, R14
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R14
 | |
| 	MOVL    $0x00000001, R15
 | |
| 	MOVB    R13, CL
 | |
| 	SHLL    CL, R15
 | |
| 	DECL    R15
 | |
| 	ANDQ    R15, R14
 | |
| 	ADDQ    R14, R8
 | |
| 
 | |
| 	// Load ctx.mlTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 24(CX), CX
 | |
| 	MOVQ (CX)(R8*8), R8
 | |
| 
 | |
| 	// Update Offset State
 | |
| 	MOVBQZX R9, R13
 | |
| 	SHRL    $0x10, R9
 | |
| 	LEAQ    (BX)(R13*1), CX
 | |
| 	MOVQ    DX, R14
 | |
| 	MOVQ    CX, BX
 | |
| 	ROLQ    CL, R14
 | |
| 	MOVL    $0x00000001, R15
 | |
| 	MOVB    R13, CL
 | |
| 	SHLL    CL, R15
 | |
| 	DECL    R15
 | |
| 	ANDQ    R15, R14
 | |
| 	ADDQ    R14, R9
 | |
| 
 | |
| 	// Load ctx.ofTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 48(CX), CX
 | |
| 	MOVQ (CX)(R9*8), R9
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_skip_update:
 | |
| 	// Adjust offset
 | |
| 	MOVQ   s+0(FP), CX
 | |
| 	MOVQ   8(SP), R13
 | |
| 	CMPQ   AX, $0x01
 | |
| 	JBE    sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0
 | |
| 	MOVUPS 144(CX), X0
 | |
| 	MOVQ   R13, 144(CX)
 | |
| 	MOVUPS X0, 152(CX)
 | |
| 	JMP    sequenceDecs_decodeSync_safe_amd64_after_adjust
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0:
 | |
| 	CMPQ 24(SP), $0x00000000
 | |
| 	JNE  sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero
 | |
| 	INCQ R13
 | |
| 	JMP  sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero:
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero
 | |
| 	MOVQ  144(CX), R13
 | |
| 	JMP   sequenceDecs_decodeSync_safe_amd64_after_adjust
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero:
 | |
| 	MOVQ    R13, AX
 | |
| 	XORQ    R14, R14
 | |
| 	MOVQ    $-1, R15
 | |
| 	CMPQ    R13, $0x03
 | |
| 	CMOVQEQ R14, AX
 | |
| 	CMOVQEQ R15, R14
 | |
| 	ADDQ    144(CX)(AX*8), R14
 | |
| 	JNZ     sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid
 | |
| 	MOVQ    $0x00000001, R14
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid:
 | |
| 	CMPQ R13, $0x01
 | |
| 	JZ   sequenceDecs_decodeSync_safe_amd64_adjust_skip
 | |
| 	MOVQ 152(CX), AX
 | |
| 	MOVQ AX, 160(CX)
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_adjust_skip:
 | |
| 	MOVQ 144(CX), AX
 | |
| 	MOVQ AX, 152(CX)
 | |
| 	MOVQ R14, 144(CX)
 | |
| 	MOVQ R14, R13
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_after_adjust:
 | |
| 	MOVQ R13, 8(SP)
 | |
| 
 | |
| 	// Check values
 | |
| 	MOVQ  16(SP), AX
 | |
| 	MOVQ  24(SP), CX
 | |
| 	LEAQ  (AX)(CX*1), R14
 | |
| 	MOVQ  s+0(FP), R15
 | |
| 	ADDQ  R14, 256(R15)
 | |
| 	MOVQ  ctx+16(FP), R14
 | |
| 	SUBQ  CX, 104(R14)
 | |
| 	JS    error_not_enough_literals
 | |
| 	CMPQ  AX, $0x00020002
 | |
| 	JA    sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok
 | |
| 	TESTQ AX, AX
 | |
| 	JNZ   sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok:
 | |
| 	MOVQ 24(SP), AX
 | |
| 	MOVQ 8(SP), CX
 | |
| 	MOVQ 16(SP), R13
 | |
| 
 | |
| 	// Check if we have enough space in s.out
 | |
| 	LEAQ (AX)(R13*1), R14
 | |
| 	ADDQ R10, R14
 | |
| 	CMPQ R14, 32(SP)
 | |
| 	JA   error_not_enough_space
 | |
| 
 | |
| 	// Copy literals
 | |
| 	TESTQ AX, AX
 | |
| 	JZ    check_offset
 | |
| 	MOVQ  AX, R14
 | |
| 	SUBQ  $0x10, R14
 | |
| 	JB    copy_1_small
 | |
| 
 | |
| copy_1_loop:
 | |
| 	MOVUPS (R11), X0
 | |
| 	MOVUPS X0, (R10)
 | |
| 	ADDQ   $0x10, R11
 | |
| 	ADDQ   $0x10, R10
 | |
| 	SUBQ   $0x10, R14
 | |
| 	JAE    copy_1_loop
 | |
| 	LEAQ   16(R11)(R14*1), R11
 | |
| 	LEAQ   16(R10)(R14*1), R10
 | |
| 	MOVUPS -16(R11), X0
 | |
| 	MOVUPS X0, -16(R10)
 | |
| 	JMP    copy_1_end
 | |
| 
 | |
| copy_1_small:
 | |
| 	CMPQ AX, $0x03
 | |
| 	JE   copy_1_move_3
 | |
| 	JB   copy_1_move_1or2
 | |
| 	CMPQ AX, $0x08
 | |
| 	JB   copy_1_move_4through7
 | |
| 	JMP  copy_1_move_8through16
 | |
| 
 | |
| copy_1_move_1or2:
 | |
| 	MOVB (R11), R14
 | |
| 	MOVB -1(R11)(AX*1), R15
 | |
| 	MOVB R14, (R10)
 | |
| 	MOVB R15, -1(R10)(AX*1)
 | |
| 	ADDQ AX, R11
 | |
| 	ADDQ AX, R10
 | |
| 	JMP  copy_1_end
 | |
| 
 | |
| copy_1_move_3:
 | |
| 	MOVW (R11), R14
 | |
| 	MOVB 2(R11), R15
 | |
| 	MOVW R14, (R10)
 | |
| 	MOVB R15, 2(R10)
 | |
| 	ADDQ AX, R11
 | |
| 	ADDQ AX, R10
 | |
| 	JMP  copy_1_end
 | |
| 
 | |
| copy_1_move_4through7:
 | |
| 	MOVL (R11), R14
 | |
| 	MOVL -4(R11)(AX*1), R15
 | |
| 	MOVL R14, (R10)
 | |
| 	MOVL R15, -4(R10)(AX*1)
 | |
| 	ADDQ AX, R11
 | |
| 	ADDQ AX, R10
 | |
| 	JMP  copy_1_end
 | |
| 
 | |
| copy_1_move_8through16:
 | |
| 	MOVQ (R11), R14
 | |
| 	MOVQ -8(R11)(AX*1), R15
 | |
| 	MOVQ R14, (R10)
 | |
| 	MOVQ R15, -8(R10)(AX*1)
 | |
| 	ADDQ AX, R11
 | |
| 	ADDQ AX, R10
 | |
| 
 | |
| copy_1_end:
 | |
| 	ADDQ AX, R12
 | |
| 
 | |
| 	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
 | |
| check_offset:
 | |
| 	MOVQ R12, AX
 | |
| 	ADDQ 40(SP), AX
 | |
| 	CMPQ CX, AX
 | |
| 	JG   error_match_off_too_big
 | |
| 	CMPQ CX, 56(SP)
 | |
| 	JG   error_match_off_too_big
 | |
| 
 | |
| 	// Copy match from history
 | |
| 	MOVQ CX, AX
 | |
| 	SUBQ R12, AX
 | |
| 	JLS  copy_match
 | |
| 	MOVQ 48(SP), R14
 | |
| 	SUBQ AX, R14
 | |
| 	CMPQ R13, AX
 | |
| 	JG   copy_all_from_history
 | |
| 	MOVQ R13, AX
 | |
| 	SUBQ $0x10, AX
 | |
| 	JB   copy_4_small
 | |
| 
 | |
| copy_4_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (R10)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, R10
 | |
| 	SUBQ   $0x10, AX
 | |
| 	JAE    copy_4_loop
 | |
| 	LEAQ   16(R14)(AX*1), R14
 | |
| 	LEAQ   16(R10)(AX*1), R10
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(R10)
 | |
| 	JMP    copy_4_end
 | |
| 
 | |
| copy_4_small:
 | |
| 	CMPQ R13, $0x03
 | |
| 	JE   copy_4_move_3
 | |
| 	CMPQ R13, $0x08
 | |
| 	JB   copy_4_move_4through7
 | |
| 	JMP  copy_4_move_8through16
 | |
| 
 | |
| copy_4_move_3:
 | |
| 	MOVW (R14), AX
 | |
| 	MOVB 2(R14), CL
 | |
| 	MOVW AX, (R10)
 | |
| 	MOVB CL, 2(R10)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R10
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_4through7:
 | |
| 	MOVL (R14), AX
 | |
| 	MOVL -4(R14)(R13*1), CX
 | |
| 	MOVL AX, (R10)
 | |
| 	MOVL CX, -4(R10)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R10
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_8through16:
 | |
| 	MOVQ (R14), AX
 | |
| 	MOVQ -8(R14)(R13*1), CX
 | |
| 	MOVQ AX, (R10)
 | |
| 	MOVQ CX, -8(R10)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R10
 | |
| 
 | |
| copy_4_end:
 | |
| 	ADDQ R13, R12
 | |
| 	JMP  handle_loop
 | |
| 	JMP loop_finished
 | |
| 
 | |
| copy_all_from_history:
 | |
| 	MOVQ AX, R15
 | |
| 	SUBQ $0x10, R15
 | |
| 	JB   copy_5_small
 | |
| 
 | |
| copy_5_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (R10)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, R10
 | |
| 	SUBQ   $0x10, R15
 | |
| 	JAE    copy_5_loop
 | |
| 	LEAQ   16(R14)(R15*1), R14
 | |
| 	LEAQ   16(R10)(R15*1), R10
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(R10)
 | |
| 	JMP    copy_5_end
 | |
| 
 | |
| copy_5_small:
 | |
| 	CMPQ AX, $0x03
 | |
| 	JE   copy_5_move_3
 | |
| 	JB   copy_5_move_1or2
 | |
| 	CMPQ AX, $0x08
 | |
| 	JB   copy_5_move_4through7
 | |
| 	JMP  copy_5_move_8through16
 | |
| 
 | |
| copy_5_move_1or2:
 | |
| 	MOVB (R14), R15
 | |
| 	MOVB -1(R14)(AX*1), BP
 | |
| 	MOVB R15, (R10)
 | |
| 	MOVB BP, -1(R10)(AX*1)
 | |
| 	ADDQ AX, R14
 | |
| 	ADDQ AX, R10
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_3:
 | |
| 	MOVW (R14), R15
 | |
| 	MOVB 2(R14), BP
 | |
| 	MOVW R15, (R10)
 | |
| 	MOVB BP, 2(R10)
 | |
| 	ADDQ AX, R14
 | |
| 	ADDQ AX, R10
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_4through7:
 | |
| 	MOVL (R14), R15
 | |
| 	MOVL -4(R14)(AX*1), BP
 | |
| 	MOVL R15, (R10)
 | |
| 	MOVL BP, -4(R10)(AX*1)
 | |
| 	ADDQ AX, R14
 | |
| 	ADDQ AX, R10
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_8through16:
 | |
| 	MOVQ (R14), R15
 | |
| 	MOVQ -8(R14)(AX*1), BP
 | |
| 	MOVQ R15, (R10)
 | |
| 	MOVQ BP, -8(R10)(AX*1)
 | |
| 	ADDQ AX, R14
 | |
| 	ADDQ AX, R10
 | |
| 
 | |
| copy_5_end:
 | |
| 	ADDQ AX, R12
 | |
| 	SUBQ AX, R13
 | |
| 
 | |
| 	// Copy match from the current buffer
 | |
| copy_match:
 | |
| 	MOVQ R10, AX
 | |
| 	SUBQ CX, AX
 | |
| 
 | |
| 	// ml <= mo
 | |
| 	CMPQ R13, CX
 | |
| 	JA   copy_overlapping_match
 | |
| 
 | |
| 	// Copy non-overlapping match
 | |
| 	ADDQ R13, R12
 | |
| 	MOVQ R13, CX
 | |
| 	SUBQ $0x10, CX
 | |
| 	JB   copy_2_small
 | |
| 
 | |
| copy_2_loop:
 | |
| 	MOVUPS (AX), X0
 | |
| 	MOVUPS X0, (R10)
 | |
| 	ADDQ   $0x10, AX
 | |
| 	ADDQ   $0x10, R10
 | |
| 	SUBQ   $0x10, CX
 | |
| 	JAE    copy_2_loop
 | |
| 	LEAQ   16(AX)(CX*1), AX
 | |
| 	LEAQ   16(R10)(CX*1), R10
 | |
| 	MOVUPS -16(AX), X0
 | |
| 	MOVUPS X0, -16(R10)
 | |
| 	JMP    copy_2_end
 | |
| 
 | |
| copy_2_small:
 | |
| 	CMPQ R13, $0x03
 | |
| 	JE   copy_2_move_3
 | |
| 	JB   copy_2_move_1or2
 | |
| 	CMPQ R13, $0x08
 | |
| 	JB   copy_2_move_4through7
 | |
| 	JMP  copy_2_move_8through16
 | |
| 
 | |
| copy_2_move_1or2:
 | |
| 	MOVB (AX), CL
 | |
| 	MOVB -1(AX)(R13*1), R14
 | |
| 	MOVB CL, (R10)
 | |
| 	MOVB R14, -1(R10)(R13*1)
 | |
| 	ADDQ R13, AX
 | |
| 	ADDQ R13, R10
 | |
| 	JMP  copy_2_end
 | |
| 
 | |
| copy_2_move_3:
 | |
| 	MOVW (AX), CX
 | |
| 	MOVB 2(AX), R14
 | |
| 	MOVW CX, (R10)
 | |
| 	MOVB R14, 2(R10)
 | |
| 	ADDQ R13, AX
 | |
| 	ADDQ R13, R10
 | |
| 	JMP  copy_2_end
 | |
| 
 | |
| copy_2_move_4through7:
 | |
| 	MOVL (AX), CX
 | |
| 	MOVL -4(AX)(R13*1), R14
 | |
| 	MOVL CX, (R10)
 | |
| 	MOVL R14, -4(R10)(R13*1)
 | |
| 	ADDQ R13, AX
 | |
| 	ADDQ R13, R10
 | |
| 	JMP  copy_2_end
 | |
| 
 | |
| copy_2_move_8through16:
 | |
| 	MOVQ (AX), CX
 | |
| 	MOVQ -8(AX)(R13*1), R14
 | |
| 	MOVQ CX, (R10)
 | |
| 	MOVQ R14, -8(R10)(R13*1)
 | |
| 	ADDQ R13, AX
 | |
| 	ADDQ R13, R10
 | |
| 
 | |
| copy_2_end:
 | |
| 	JMP handle_loop
 | |
| 
 | |
| 	// Copy overlapping match
 | |
| copy_overlapping_match:
 | |
| 	ADDQ R13, R12
 | |
| 
 | |
| copy_slow_3:
 | |
| 	MOVB (AX), CL
 | |
| 	MOVB CL, (R10)
 | |
| 	INCQ AX
 | |
| 	INCQ R10
 | |
| 	DECQ R13
 | |
| 	JNZ  copy_slow_3
 | |
| 
 | |
| handle_loop:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	DECQ 96(AX)
 | |
| 	JNS  sequenceDecs_decodeSync_safe_amd64_main_loop
 | |
| 
 | |
| loop_finished:
 | |
| 	MOVQ br+8(FP), AX
 | |
| 	MOVQ DX, 24(AX)
 | |
| 	MOVB BL, 40(AX)
 | |
| 	MOVQ SI, 32(AX)
 | |
| 
 | |
| 	// Update the context
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ R12, 136(AX)
 | |
| 	MOVQ 144(AX), CX
 | |
| 	SUBQ CX, R11
 | |
| 	MOVQ R11, 168(AX)
 | |
| 
 | |
| 	// Return success
 | |
| 	MOVQ $0x00000000, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match length error
 | |
| sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch:
 | |
| 	MOVQ 16(SP), AX
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ AX, 216(CX)
 | |
| 	MOVQ $0x00000001, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match too long error
 | |
| sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 16(SP), CX
 | |
| 	MOVQ CX, 216(AX)
 | |
| 	MOVQ $0x00000002, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match offset too long error
 | |
| error_match_off_too_big:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 8(SP), CX
 | |
| 	MOVQ CX, 224(AX)
 | |
| 	MOVQ R12, 136(AX)
 | |
| 	MOVQ $0x00000003, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough literals error
 | |
| error_not_enough_literals:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 24(SP), CX
 | |
| 	MOVQ CX, 208(AX)
 | |
| 	MOVQ $0x00000004, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with overread error
 | |
| error_overread:
 | |
| 	MOVQ $0x00000006, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough output space error
 | |
| error_not_enough_space:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 24(SP), CX
 | |
| 	MOVQ CX, 208(AX)
 | |
| 	MOVQ 16(SP), CX
 | |
| 	MOVQ CX, 216(AX)
 | |
| 	MOVQ R12, 136(AX)
 | |
| 	MOVQ $0x00000005, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| // func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 | |
| // Requires: BMI, BMI2, CMOV, SSE
 | |
| TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
 | |
| 	MOVQ    br+8(FP), BX
 | |
| 	MOVQ    24(BX), AX
 | |
| 	MOVBQZX 40(BX), DX
 | |
| 	MOVQ    (BX), CX
 | |
| 	MOVQ    32(BX), BX
 | |
| 	ADDQ    BX, CX
 | |
| 	MOVQ    CX, (SP)
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	MOVQ    72(CX), SI
 | |
| 	MOVQ    80(CX), DI
 | |
| 	MOVQ    88(CX), R8
 | |
| 	XORQ    R9, R9
 | |
| 	MOVQ    R9, 8(SP)
 | |
| 	MOVQ    R9, 16(SP)
 | |
| 	MOVQ    R9, 24(SP)
 | |
| 	MOVQ    112(CX), R9
 | |
| 	MOVQ    128(CX), R10
 | |
| 	MOVQ    R10, 32(SP)
 | |
| 	MOVQ    144(CX), R10
 | |
| 	MOVQ    136(CX), R11
 | |
| 	MOVQ    200(CX), R12
 | |
| 	MOVQ    R12, 56(SP)
 | |
| 	MOVQ    176(CX), R12
 | |
| 	MOVQ    R12, 48(SP)
 | |
| 	MOVQ    184(CX), CX
 | |
| 	MOVQ    CX, 40(SP)
 | |
| 	MOVQ    40(SP), CX
 | |
| 	ADDQ    CX, 48(SP)
 | |
| 
 | |
| 	// Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
 | |
| 	ADDQ R9, 32(SP)
 | |
| 
 | |
| 	// outBase += outPosition
 | |
| 	ADDQ R11, R9
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_main_loop:
 | |
| 	MOVQ (SP), R12
 | |
| 
 | |
| 	// Fill bitreader to have enough for the offset and match length.
 | |
| 	CMPQ BX, $0x08
 | |
| 	JL   sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte
 | |
| 	MOVQ DX, CX
 | |
| 	SHRQ $0x03, CX
 | |
| 	SUBQ CX, R12
 | |
| 	MOVQ (R12), AX
 | |
| 	SUBQ CX, BX
 | |
| 	ANDQ $0x07, DX
 | |
| 	JMP  sequenceDecs_decodeSync_safe_bmi2_fill_end
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
 | |
| 	CMPQ    BX, $0x00
 | |
| 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_check_overread
 | |
| 	CMPQ    DX, $0x07
 | |
| 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_end
 | |
| 	SHLQ    $0x08, AX
 | |
| 	SUBQ    $0x01, R12
 | |
| 	SUBQ    $0x01, BX
 | |
| 	SUBQ    $0x08, DX
 | |
| 	MOVBQZX (R12), CX
 | |
| 	ORQ     CX, AX
 | |
| 	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_fill_check_overread:
 | |
| 	CMPQ DX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_fill_end:
 | |
| 	// Update offset
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, R8, R13
 | |
| 	MOVQ   AX, R14
 | |
| 	LEAQ   (DX)(R13*1), CX
 | |
| 	ROLQ   CL, R14
 | |
| 	BZHIQ  R13, R14, R14
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   R8, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R14, CX
 | |
| 	MOVQ   CX, 8(SP)
 | |
| 
 | |
| 	// Update match length
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, DI, R13
 | |
| 	MOVQ   AX, R14
 | |
| 	LEAQ   (DX)(R13*1), CX
 | |
| 	ROLQ   CL, R14
 | |
| 	BZHIQ  R13, R14, R14
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   DI, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R14, CX
 | |
| 	MOVQ   CX, 16(SP)
 | |
| 
 | |
| 	// Fill bitreader to have enough for the remaining
 | |
| 	CMPQ BX, $0x08
 | |
| 	JL   sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte
 | |
| 	MOVQ DX, CX
 | |
| 	SHRQ $0x03, CX
 | |
| 	SUBQ CX, R12
 | |
| 	MOVQ (R12), AX
 | |
| 	SUBQ CX, BX
 | |
| 	ANDQ $0x07, DX
 | |
| 	JMP  sequenceDecs_decodeSync_safe_bmi2_fill_2_end
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
 | |
| 	CMPQ    BX, $0x00
 | |
| 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread
 | |
| 	CMPQ    DX, $0x07
 | |
| 	JLE     sequenceDecs_decodeSync_safe_bmi2_fill_2_end
 | |
| 	SHLQ    $0x08, AX
 | |
| 	SUBQ    $0x01, R12
 | |
| 	SUBQ    $0x01, BX
 | |
| 	SUBQ    $0x08, DX
 | |
| 	MOVBQZX (R12), CX
 | |
| 	ORQ     CX, AX
 | |
| 	JMP     sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread:
 | |
| 	CMPQ DX, $0x40
 | |
| 	JA   error_overread
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 | |
| 	// Update literal length
 | |
| 	MOVQ   $0x00000808, CX
 | |
| 	BEXTRQ CX, SI, R13
 | |
| 	MOVQ   AX, R14
 | |
| 	LEAQ   (DX)(R13*1), CX
 | |
| 	ROLQ   CL, R14
 | |
| 	BZHIQ  R13, R14, R14
 | |
| 	MOVQ   CX, DX
 | |
| 	MOVQ   SI, CX
 | |
| 	SHRQ   $0x20, CX
 | |
| 	ADDQ   R14, CX
 | |
| 	MOVQ   CX, 24(SP)
 | |
| 
 | |
| 	// Fill bitreader for state updates
 | |
| 	MOVQ    R12, (SP)
 | |
| 	MOVQ    $0x00000808, CX
 | |
| 	BEXTRQ  CX, R8, R12
 | |
| 	MOVQ    ctx+16(FP), CX
 | |
| 	CMPQ    96(CX), $0x00
 | |
| 	JZ      sequenceDecs_decodeSync_safe_bmi2_skip_update
 | |
| 	LEAQ    (SI)(DI*1), R13
 | |
| 	ADDQ    R8, R13
 | |
| 	MOVBQZX R13, R13
 | |
| 	LEAQ    (DX)(R13*1), CX
 | |
| 	MOVQ    AX, R14
 | |
| 	MOVQ    CX, DX
 | |
| 	ROLQ    CL, R14
 | |
| 	BZHIQ   R13, R14, R14
 | |
| 
 | |
| 	// Update Offset State
 | |
| 	BZHIQ R8, R14, CX
 | |
| 	SHRXQ R8, R14, R14
 | |
| 	SHRL  $0x10, R8
 | |
| 	ADDQ  CX, R8
 | |
| 
 | |
| 	// Load ctx.ofTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 48(CX), CX
 | |
| 	MOVQ (CX)(R8*8), R8
 | |
| 
 | |
| 	// Update Match Length State
 | |
| 	BZHIQ DI, R14, CX
 | |
| 	SHRXQ DI, R14, R14
 | |
| 	SHRL  $0x10, DI
 | |
| 	ADDQ  CX, DI
 | |
| 
 | |
| 	// Load ctx.mlTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ 24(CX), CX
 | |
| 	MOVQ (CX)(DI*8), DI
 | |
| 
 | |
| 	// Update Literal Length State
 | |
| 	BZHIQ SI, R14, CX
 | |
| 	SHRL  $0x10, SI
 | |
| 	ADDQ  CX, SI
 | |
| 
 | |
| 	// Load ctx.llTable
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ (CX), CX
 | |
| 	MOVQ (CX)(SI*8), SI
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_skip_update:
 | |
| 	// Adjust offset
 | |
| 	MOVQ   s+0(FP), CX
 | |
| 	MOVQ   8(SP), R13
 | |
| 	CMPQ   R12, $0x01
 | |
| 	JBE    sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0
 | |
| 	MOVUPS 144(CX), X0
 | |
| 	MOVQ   R13, 144(CX)
 | |
| 	MOVUPS X0, 152(CX)
 | |
| 	JMP    sequenceDecs_decodeSync_safe_bmi2_after_adjust
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0:
 | |
| 	CMPQ 24(SP), $0x00000000
 | |
| 	JNE  sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero
 | |
| 	INCQ R13
 | |
| 	JMP  sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero:
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero
 | |
| 	MOVQ  144(CX), R13
 | |
| 	JMP   sequenceDecs_decodeSync_safe_bmi2_after_adjust
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero:
 | |
| 	MOVQ    R13, R12
 | |
| 	XORQ    R14, R14
 | |
| 	MOVQ    $-1, R15
 | |
| 	CMPQ    R13, $0x03
 | |
| 	CMOVQEQ R14, R12
 | |
| 	CMOVQEQ R15, R14
 | |
| 	ADDQ    144(CX)(R12*8), R14
 | |
| 	JNZ     sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid
 | |
| 	MOVQ    $0x00000001, R14
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid:
 | |
| 	CMPQ R13, $0x01
 | |
| 	JZ   sequenceDecs_decodeSync_safe_bmi2_adjust_skip
 | |
| 	MOVQ 152(CX), R12
 | |
| 	MOVQ R12, 160(CX)
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_adjust_skip:
 | |
| 	MOVQ 144(CX), R12
 | |
| 	MOVQ R12, 152(CX)
 | |
| 	MOVQ R14, 144(CX)
 | |
| 	MOVQ R14, R13
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_after_adjust:
 | |
| 	MOVQ R13, 8(SP)
 | |
| 
 | |
| 	// Check values
 | |
| 	MOVQ  16(SP), CX
 | |
| 	MOVQ  24(SP), R12
 | |
| 	LEAQ  (CX)(R12*1), R14
 | |
| 	MOVQ  s+0(FP), R15
 | |
| 	ADDQ  R14, 256(R15)
 | |
| 	MOVQ  ctx+16(FP), R14
 | |
| 	SUBQ  R12, 104(R14)
 | |
| 	JS    error_not_enough_literals
 | |
| 	CMPQ  CX, $0x00020002
 | |
| 	JA    sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big
 | |
| 	TESTQ R13, R13
 | |
| 	JNZ   sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok
 | |
| 	TESTQ CX, CX
 | |
| 	JNZ   sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch
 | |
| 
 | |
| sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok:
 | |
| 	MOVQ 24(SP), CX
 | |
| 	MOVQ 8(SP), R12
 | |
| 	MOVQ 16(SP), R13
 | |
| 
 | |
| 	// Check if we have enough space in s.out
 | |
| 	LEAQ (CX)(R13*1), R14
 | |
| 	ADDQ R9, R14
 | |
| 	CMPQ R14, 32(SP)
 | |
| 	JA   error_not_enough_space
 | |
| 
 | |
| 	// Copy literals
 | |
| 	TESTQ CX, CX
 | |
| 	JZ    check_offset
 | |
| 	MOVQ  CX, R14
 | |
| 	SUBQ  $0x10, R14
 | |
| 	JB    copy_1_small
 | |
| 
 | |
| copy_1_loop:
 | |
| 	MOVUPS (R10), X0
 | |
| 	MOVUPS X0, (R9)
 | |
| 	ADDQ   $0x10, R10
 | |
| 	ADDQ   $0x10, R9
 | |
| 	SUBQ   $0x10, R14
 | |
| 	JAE    copy_1_loop
 | |
| 	LEAQ   16(R10)(R14*1), R10
 | |
| 	LEAQ   16(R9)(R14*1), R9
 | |
| 	MOVUPS -16(R10), X0
 | |
| 	MOVUPS X0, -16(R9)
 | |
| 	JMP    copy_1_end
 | |
| 
 | |
| copy_1_small:
 | |
| 	CMPQ CX, $0x03
 | |
| 	JE   copy_1_move_3
 | |
| 	JB   copy_1_move_1or2
 | |
| 	CMPQ CX, $0x08
 | |
| 	JB   copy_1_move_4through7
 | |
| 	JMP  copy_1_move_8through16
 | |
| 
 | |
| copy_1_move_1or2:
 | |
| 	MOVB (R10), R14
 | |
| 	MOVB -1(R10)(CX*1), R15
 | |
| 	MOVB R14, (R9)
 | |
| 	MOVB R15, -1(R9)(CX*1)
 | |
| 	ADDQ CX, R10
 | |
| 	ADDQ CX, R9
 | |
| 	JMP  copy_1_end
 | |
| 
 | |
| copy_1_move_3:
 | |
| 	MOVW (R10), R14
 | |
| 	MOVB 2(R10), R15
 | |
| 	MOVW R14, (R9)
 | |
| 	MOVB R15, 2(R9)
 | |
| 	ADDQ CX, R10
 | |
| 	ADDQ CX, R9
 | |
| 	JMP  copy_1_end
 | |
| 
 | |
| copy_1_move_4through7:
 | |
| 	MOVL (R10), R14
 | |
| 	MOVL -4(R10)(CX*1), R15
 | |
| 	MOVL R14, (R9)
 | |
| 	MOVL R15, -4(R9)(CX*1)
 | |
| 	ADDQ CX, R10
 | |
| 	ADDQ CX, R9
 | |
| 	JMP  copy_1_end
 | |
| 
 | |
| copy_1_move_8through16:
 | |
| 	MOVQ (R10), R14
 | |
| 	MOVQ -8(R10)(CX*1), R15
 | |
| 	MOVQ R14, (R9)
 | |
| 	MOVQ R15, -8(R9)(CX*1)
 | |
| 	ADDQ CX, R10
 | |
| 	ADDQ CX, R9
 | |
| 
 | |
| copy_1_end:
 | |
| 	ADDQ CX, R11
 | |
| 
 | |
| 	// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
 | |
| check_offset:
 | |
| 	MOVQ R11, CX
 | |
| 	ADDQ 40(SP), CX
 | |
| 	CMPQ R12, CX
 | |
| 	JG   error_match_off_too_big
 | |
| 	CMPQ R12, 56(SP)
 | |
| 	JG   error_match_off_too_big
 | |
| 
 | |
| 	// Copy match from history
 | |
| 	MOVQ R12, CX
 | |
| 	SUBQ R11, CX
 | |
| 	JLS  copy_match
 | |
| 	MOVQ 48(SP), R14
 | |
| 	SUBQ CX, R14
 | |
| 	CMPQ R13, CX
 | |
| 	JG   copy_all_from_history
 | |
| 	MOVQ R13, CX
 | |
| 	SUBQ $0x10, CX
 | |
| 	JB   copy_4_small
 | |
| 
 | |
| copy_4_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (R9)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, R9
 | |
| 	SUBQ   $0x10, CX
 | |
| 	JAE    copy_4_loop
 | |
| 	LEAQ   16(R14)(CX*1), R14
 | |
| 	LEAQ   16(R9)(CX*1), R9
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(R9)
 | |
| 	JMP    copy_4_end
 | |
| 
 | |
| copy_4_small:
 | |
| 	CMPQ R13, $0x03
 | |
| 	JE   copy_4_move_3
 | |
| 	CMPQ R13, $0x08
 | |
| 	JB   copy_4_move_4through7
 | |
| 	JMP  copy_4_move_8through16
 | |
| 
 | |
| copy_4_move_3:
 | |
| 	MOVW (R14), CX
 | |
| 	MOVB 2(R14), R12
 | |
| 	MOVW CX, (R9)
 | |
| 	MOVB R12, 2(R9)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R9
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_4through7:
 | |
| 	MOVL (R14), CX
 | |
| 	MOVL -4(R14)(R13*1), R12
 | |
| 	MOVL CX, (R9)
 | |
| 	MOVL R12, -4(R9)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R9
 | |
| 	JMP  copy_4_end
 | |
| 
 | |
| copy_4_move_8through16:
 | |
| 	MOVQ (R14), CX
 | |
| 	MOVQ -8(R14)(R13*1), R12
 | |
| 	MOVQ CX, (R9)
 | |
| 	MOVQ R12, -8(R9)(R13*1)
 | |
| 	ADDQ R13, R14
 | |
| 	ADDQ R13, R9
 | |
| 
 | |
| copy_4_end:
 | |
| 	ADDQ R13, R11
 | |
| 	JMP  handle_loop
 | |
| 	JMP loop_finished
 | |
| 
 | |
| copy_all_from_history:
 | |
| 	MOVQ CX, R15
 | |
| 	SUBQ $0x10, R15
 | |
| 	JB   copy_5_small
 | |
| 
 | |
| copy_5_loop:
 | |
| 	MOVUPS (R14), X0
 | |
| 	MOVUPS X0, (R9)
 | |
| 	ADDQ   $0x10, R14
 | |
| 	ADDQ   $0x10, R9
 | |
| 	SUBQ   $0x10, R15
 | |
| 	JAE    copy_5_loop
 | |
| 	LEAQ   16(R14)(R15*1), R14
 | |
| 	LEAQ   16(R9)(R15*1), R9
 | |
| 	MOVUPS -16(R14), X0
 | |
| 	MOVUPS X0, -16(R9)
 | |
| 	JMP    copy_5_end
 | |
| 
 | |
| copy_5_small:
 | |
| 	CMPQ CX, $0x03
 | |
| 	JE   copy_5_move_3
 | |
| 	JB   copy_5_move_1or2
 | |
| 	CMPQ CX, $0x08
 | |
| 	JB   copy_5_move_4through7
 | |
| 	JMP  copy_5_move_8through16
 | |
| 
 | |
| copy_5_move_1or2:
 | |
| 	MOVB (R14), R15
 | |
| 	MOVB -1(R14)(CX*1), BP
 | |
| 	MOVB R15, (R9)
 | |
| 	MOVB BP, -1(R9)(CX*1)
 | |
| 	ADDQ CX, R14
 | |
| 	ADDQ CX, R9
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_3:
 | |
| 	MOVW (R14), R15
 | |
| 	MOVB 2(R14), BP
 | |
| 	MOVW R15, (R9)
 | |
| 	MOVB BP, 2(R9)
 | |
| 	ADDQ CX, R14
 | |
| 	ADDQ CX, R9
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_4through7:
 | |
| 	MOVL (R14), R15
 | |
| 	MOVL -4(R14)(CX*1), BP
 | |
| 	MOVL R15, (R9)
 | |
| 	MOVL BP, -4(R9)(CX*1)
 | |
| 	ADDQ CX, R14
 | |
| 	ADDQ CX, R9
 | |
| 	JMP  copy_5_end
 | |
| 
 | |
| copy_5_move_8through16:
 | |
| 	MOVQ (R14), R15
 | |
| 	MOVQ -8(R14)(CX*1), BP
 | |
| 	MOVQ R15, (R9)
 | |
| 	MOVQ BP, -8(R9)(CX*1)
 | |
| 	ADDQ CX, R14
 | |
| 	ADDQ CX, R9
 | |
| 
 | |
| copy_5_end:
 | |
| 	ADDQ CX, R11
 | |
| 	SUBQ CX, R13
 | |
| 
 | |
| 	// Copy match from the current buffer
 | |
| copy_match:
 | |
| 	MOVQ R9, CX
 | |
| 	SUBQ R12, CX
 | |
| 
 | |
| 	// ml <= mo
 | |
| 	CMPQ R13, R12
 | |
| 	JA   copy_overlapping_match
 | |
| 
 | |
| 	// Copy non-overlapping match
 | |
| 	ADDQ R13, R11
 | |
| 	MOVQ R13, R12
 | |
| 	SUBQ $0x10, R12
 | |
| 	JB   copy_2_small
 | |
| 
 | |
| copy_2_loop:
 | |
| 	MOVUPS (CX), X0
 | |
| 	MOVUPS X0, (R9)
 | |
| 	ADDQ   $0x10, CX
 | |
| 	ADDQ   $0x10, R9
 | |
| 	SUBQ   $0x10, R12
 | |
| 	JAE    copy_2_loop
 | |
| 	LEAQ   16(CX)(R12*1), CX
 | |
| 	LEAQ   16(R9)(R12*1), R9
 | |
| 	MOVUPS -16(CX), X0
 | |
| 	MOVUPS X0, -16(R9)
 | |
| 	JMP    copy_2_end
 | |
| 
 | |
| copy_2_small:
 | |
| 	CMPQ R13, $0x03
 | |
| 	JE   copy_2_move_3
 | |
| 	JB   copy_2_move_1or2
 | |
| 	CMPQ R13, $0x08
 | |
| 	JB   copy_2_move_4through7
 | |
| 	JMP  copy_2_move_8through16
 | |
| 
 | |
| copy_2_move_1or2:
 | |
| 	MOVB (CX), R12
 | |
| 	MOVB -1(CX)(R13*1), R14
 | |
| 	MOVB R12, (R9)
 | |
| 	MOVB R14, -1(R9)(R13*1)
 | |
| 	ADDQ R13, CX
 | |
| 	ADDQ R13, R9
 | |
| 	JMP  copy_2_end
 | |
| 
 | |
| copy_2_move_3:
 | |
| 	MOVW (CX), R12
 | |
| 	MOVB 2(CX), R14
 | |
| 	MOVW R12, (R9)
 | |
| 	MOVB R14, 2(R9)
 | |
| 	ADDQ R13, CX
 | |
| 	ADDQ R13, R9
 | |
| 	JMP  copy_2_end
 | |
| 
 | |
| copy_2_move_4through7:
 | |
| 	MOVL (CX), R12
 | |
| 	MOVL -4(CX)(R13*1), R14
 | |
| 	MOVL R12, (R9)
 | |
| 	MOVL R14, -4(R9)(R13*1)
 | |
| 	ADDQ R13, CX
 | |
| 	ADDQ R13, R9
 | |
| 	JMP  copy_2_end
 | |
| 
 | |
| copy_2_move_8through16:
 | |
| 	MOVQ (CX), R12
 | |
| 	MOVQ -8(CX)(R13*1), R14
 | |
| 	MOVQ R12, (R9)
 | |
| 	MOVQ R14, -8(R9)(R13*1)
 | |
| 	ADDQ R13, CX
 | |
| 	ADDQ R13, R9
 | |
| 
 | |
| copy_2_end:
 | |
| 	JMP handle_loop
 | |
| 
 | |
| 	// Copy overlapping match
 | |
| copy_overlapping_match:
 | |
| 	ADDQ R13, R11
 | |
| 
 | |
| copy_slow_3:
 | |
| 	MOVB (CX), R12
 | |
| 	MOVB R12, (R9)
 | |
| 	INCQ CX
 | |
| 	INCQ R9
 | |
| 	DECQ R13
 | |
| 	JNZ  copy_slow_3
 | |
| 
 | |
| handle_loop:
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	DECQ 96(CX)
 | |
| 	JNS  sequenceDecs_decodeSync_safe_bmi2_main_loop
 | |
| 
 | |
| loop_finished:
 | |
| 	MOVQ br+8(FP), CX
 | |
| 	MOVQ AX, 24(CX)
 | |
| 	MOVB DL, 40(CX)
 | |
| 	MOVQ BX, 32(CX)
 | |
| 
 | |
| 	// Update the context
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ R11, 136(AX)
 | |
| 	MOVQ 144(AX), CX
 | |
| 	SUBQ CX, R10
 | |
| 	MOVQ R10, 168(AX)
 | |
| 
 | |
| 	// Return success
 | |
| 	MOVQ $0x00000000, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match length error
 | |
| sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch:
 | |
| 	MOVQ 16(SP), AX
 | |
| 	MOVQ ctx+16(FP), CX
 | |
| 	MOVQ AX, 216(CX)
 | |
| 	MOVQ $0x00000001, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match too long error
 | |
| sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 16(SP), CX
 | |
| 	MOVQ CX, 216(AX)
 | |
| 	MOVQ $0x00000002, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with match offset too long error
 | |
| error_match_off_too_big:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 8(SP), CX
 | |
| 	MOVQ CX, 224(AX)
 | |
| 	MOVQ R11, 136(AX)
 | |
| 	MOVQ $0x00000003, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough literals error
 | |
| error_not_enough_literals:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 24(SP), CX
 | |
| 	MOVQ CX, 208(AX)
 | |
| 	MOVQ $0x00000004, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with overread error
 | |
| error_overread:
 | |
| 	MOVQ $0x00000006, ret+24(FP)
 | |
| 	RET
 | |
| 
 | |
| 	// Return with not enough output space error
 | |
| error_not_enough_space:
 | |
| 	MOVQ ctx+16(FP), AX
 | |
| 	MOVQ 24(SP), CX
 | |
| 	MOVQ CX, 208(AX)
 | |
| 	MOVQ 16(SP), CX
 | |
| 	MOVQ CX, 216(AX)
 | |
| 	MOVQ R11, 136(AX)
 | |
| 	MOVQ $0x00000005, ret+24(FP)
 | |
| 	RET
 |