mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-10-31 21:22:26 -05:00
[experiment] add alternative wasm sqlite3 implementation available via build-tag (#2863)
This allows for building GoToSocial with [SQLite transpiled to WASM](https://github.com/ncruces/go-sqlite3) and accessed through [Wazero](https://wazero.io/).
This commit is contained in:
parent
cce21c11cb
commit
1e7b32490d
398 changed files with 86174 additions and 684 deletions
170
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go
generated
vendored
Normal file
170
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go
generated
vendored
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
type (
|
||||
// FunctionABI represents the ABI information for a function which corresponds to a ssa.Signature.
|
||||
FunctionABI struct {
|
||||
Initialized bool
|
||||
|
||||
Args, Rets []ABIArg
|
||||
ArgStackSize, RetStackSize int64
|
||||
|
||||
ArgIntRealRegs byte
|
||||
ArgFloatRealRegs byte
|
||||
RetIntRealRegs byte
|
||||
RetFloatRealRegs byte
|
||||
}
|
||||
|
||||
// ABIArg represents either argument or return value's location.
|
||||
ABIArg struct {
|
||||
// Index is the index of the argument.
|
||||
Index int
|
||||
// Kind is the kind of the argument.
|
||||
Kind ABIArgKind
|
||||
// Reg is valid if Kind == ABIArgKindReg.
|
||||
// This VReg must be based on RealReg.
|
||||
Reg regalloc.VReg
|
||||
// Offset is valid if Kind == ABIArgKindStack.
|
||||
// This is the offset from the beginning of either arg or ret stack slot.
|
||||
Offset int64
|
||||
// Type is the type of the argument.
|
||||
Type ssa.Type
|
||||
}
|
||||
|
||||
// ABIArgKind is the kind of ABI argument.
|
||||
ABIArgKind byte
|
||||
)
|
||||
|
||||
const (
|
||||
// ABIArgKindReg represents an argument passed in a register.
|
||||
ABIArgKindReg = iota
|
||||
// ABIArgKindStack represents an argument passed in the stack.
|
||||
ABIArgKindStack
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (a *ABIArg) String() string {
|
||||
return fmt.Sprintf("args[%d]: %s", a.Index, a.Kind)
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (a ABIArgKind) String() string {
|
||||
switch a {
|
||||
case ABIArgKindReg:
|
||||
return "reg"
|
||||
case ABIArgKindStack:
|
||||
return "stack"
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes the abiImpl for the given signature.
|
||||
func (a *FunctionABI) Init(sig *ssa.Signature, argResultInts, argResultFloats []regalloc.RealReg) {
|
||||
if len(a.Rets) < len(sig.Results) {
|
||||
a.Rets = make([]ABIArg, len(sig.Results))
|
||||
}
|
||||
a.Rets = a.Rets[:len(sig.Results)]
|
||||
a.RetStackSize = a.setABIArgs(a.Rets, sig.Results, argResultInts, argResultFloats)
|
||||
if argsNum := len(sig.Params); len(a.Args) < argsNum {
|
||||
a.Args = make([]ABIArg, argsNum)
|
||||
}
|
||||
a.Args = a.Args[:len(sig.Params)]
|
||||
a.ArgStackSize = a.setABIArgs(a.Args, sig.Params, argResultInts, argResultFloats)
|
||||
|
||||
// Gather the real registers usages in arg/return.
|
||||
a.ArgIntRealRegs, a.ArgFloatRealRegs = 0, 0
|
||||
a.RetIntRealRegs, a.RetFloatRealRegs = 0, 0
|
||||
for i := range a.Rets {
|
||||
r := &a.Rets[i]
|
||||
if r.Kind == ABIArgKindReg {
|
||||
if r.Type.IsInt() {
|
||||
a.RetIntRealRegs++
|
||||
} else {
|
||||
a.RetFloatRealRegs++
|
||||
}
|
||||
}
|
||||
}
|
||||
for i := range a.Args {
|
||||
arg := &a.Args[i]
|
||||
if arg.Kind == ABIArgKindReg {
|
||||
if arg.Type.IsInt() {
|
||||
a.ArgIntRealRegs++
|
||||
} else {
|
||||
a.ArgFloatRealRegs++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
a.Initialized = true
|
||||
}
|
||||
|
||||
// setABIArgs sets the ABI arguments in the given slice. This assumes that len(s) >= len(types)
|
||||
// where if len(s) > len(types), the last elements of s is for the multi-return slot.
|
||||
func (a *FunctionABI) setABIArgs(s []ABIArg, types []ssa.Type, ints, floats []regalloc.RealReg) (stackSize int64) {
|
||||
il, fl := len(ints), len(floats)
|
||||
|
||||
var stackOffset int64
|
||||
intParamIndex, floatParamIndex := 0, 0
|
||||
for i, typ := range types {
|
||||
arg := &s[i]
|
||||
arg.Index = i
|
||||
arg.Type = typ
|
||||
if typ.IsInt() {
|
||||
if intParamIndex >= il {
|
||||
arg.Kind = ABIArgKindStack
|
||||
const slotSize = 8 // Align 8 bytes.
|
||||
arg.Offset = stackOffset
|
||||
stackOffset += slotSize
|
||||
} else {
|
||||
arg.Kind = ABIArgKindReg
|
||||
arg.Reg = regalloc.FromRealReg(ints[intParamIndex], regalloc.RegTypeInt)
|
||||
intParamIndex++
|
||||
}
|
||||
} else {
|
||||
if floatParamIndex >= fl {
|
||||
arg.Kind = ABIArgKindStack
|
||||
slotSize := int64(8) // Align at least 8 bytes.
|
||||
if typ.Bits() == 128 { // Vector.
|
||||
slotSize = 16
|
||||
}
|
||||
arg.Offset = stackOffset
|
||||
stackOffset += slotSize
|
||||
} else {
|
||||
arg.Kind = ABIArgKindReg
|
||||
arg.Reg = regalloc.FromRealReg(floats[floatParamIndex], regalloc.RegTypeFloat)
|
||||
floatParamIndex++
|
||||
}
|
||||
}
|
||||
}
|
||||
return stackOffset
|
||||
}
|
||||
|
||||
func (a *FunctionABI) AlignedArgResultStackSlotSize() uint32 {
|
||||
stackSlotSize := a.RetStackSize + a.ArgStackSize
|
||||
// Align stackSlotSize to 16 bytes.
|
||||
stackSlotSize = (stackSlotSize + 15) &^ 15
|
||||
// Check overflow 32-bit.
|
||||
if stackSlotSize > 0xFFFFFFFF {
|
||||
panic("ABI stack slot size overflow")
|
||||
}
|
||||
return uint32(stackSlotSize)
|
||||
}
|
||||
|
||||
func (a *FunctionABI) ABIInfoAsUint64() uint64 {
|
||||
return uint64(a.ArgIntRealRegs)<<56 |
|
||||
uint64(a.ArgFloatRealRegs)<<48 |
|
||||
uint64(a.RetIntRealRegs)<<40 |
|
||||
uint64(a.RetFloatRealRegs)<<32 |
|
||||
uint64(a.AlignedArgResultStackSlotSize())
|
||||
}
|
||||
|
||||
func ABIInfoFromUint64(info uint64) (argIntRealRegs, argFloatRealRegs, retIntRealRegs, retFloatRealRegs byte, stackSlotSize uint32) {
|
||||
return byte(info >> 56), byte(info >> 48), byte(info >> 40), byte(info >> 32), uint32(info)
|
||||
}
|
||||
3
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go
generated
vendored
Normal file
3
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go
generated
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
// Package backend must be free of Wasm-specific concept. In other words,
|
||||
// this package must not import internal/wasm package.
|
||||
package backend
|
||||
417
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
generated
vendored
Normal file
417
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
generated
vendored
Normal file
|
|
@ -0,0 +1,417 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// NewCompiler returns a new Compiler that can generate a machine code.
|
||||
func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler {
|
||||
return newCompiler(ctx, mach, builder)
|
||||
}
|
||||
|
||||
func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler {
|
||||
argResultInts, argResultFloats := mach.ArgsResultsRegs()
|
||||
c := &compiler{
|
||||
mach: mach, ssaBuilder: builder,
|
||||
nextVRegID: regalloc.VRegIDNonReservedBegin,
|
||||
argResultInts: argResultInts,
|
||||
argResultFloats: argResultFloats,
|
||||
}
|
||||
mach.SetCompiler(c)
|
||||
return c
|
||||
}
|
||||
|
||||
// Compiler is the backend of wazevo which takes ssa.Builder and Machine,
|
||||
// use the information there to emit the final machine code.
|
||||
type Compiler interface {
|
||||
// SSABuilder returns the ssa.Builder used by this compiler.
|
||||
SSABuilder() ssa.Builder
|
||||
|
||||
// Compile executes the following steps:
|
||||
// 1. Lower()
|
||||
// 2. RegAlloc()
|
||||
// 3. Finalize()
|
||||
// 4. Encode()
|
||||
//
|
||||
// Each step can be called individually for testing purpose, therefore they are exposed in this interface too.
|
||||
//
|
||||
// The returned byte slices are the machine code and the relocation information for the machine code.
|
||||
// The caller is responsible for copying them immediately since the compiler may reuse the buffer.
|
||||
Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error)
|
||||
|
||||
// Lower lowers the given ssa.Instruction to the machine-specific instructions.
|
||||
Lower()
|
||||
|
||||
// RegAlloc performs the register allocation after Lower is called.
|
||||
RegAlloc()
|
||||
|
||||
// Finalize performs the finalization of the compilation, including machine code emission.
|
||||
// This must be called after RegAlloc.
|
||||
Finalize(ctx context.Context) error
|
||||
|
||||
// Buf returns the buffer of the encoded machine code. This is only used for testing purpose.
|
||||
Buf() []byte
|
||||
|
||||
BufPtr() *[]byte
|
||||
|
||||
// Format returns the debug string of the current state of the compiler.
|
||||
Format() string
|
||||
|
||||
// Init initializes the internal state of the compiler for the next compilation.
|
||||
Init()
|
||||
|
||||
// AllocateVReg allocates a new virtual register of the given type.
|
||||
AllocateVReg(typ ssa.Type) regalloc.VReg
|
||||
|
||||
// ValueDefinition returns the definition of the given value.
|
||||
ValueDefinition(ssa.Value) *SSAValueDefinition
|
||||
|
||||
// VRegOf returns the virtual register of the given ssa.Value.
|
||||
VRegOf(value ssa.Value) regalloc.VReg
|
||||
|
||||
// TypeOf returns the ssa.Type of the given virtual register.
|
||||
TypeOf(regalloc.VReg) ssa.Type
|
||||
|
||||
// MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID,
|
||||
// and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group.
|
||||
MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool
|
||||
|
||||
// MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode,
|
||||
// this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid.
|
||||
//
|
||||
// Note: caller should be careful to avoid excessive allocation on opcodes slice.
|
||||
MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
|
||||
|
||||
// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
|
||||
AddRelocationInfo(funcRef ssa.FuncRef)
|
||||
|
||||
// AddSourceOffsetInfo appends the source offset information for the given offset.
|
||||
AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
|
||||
|
||||
// SourceOffsetInfo returns the source offset information for the current buffer offset.
|
||||
SourceOffsetInfo() []SourceOffsetInfo
|
||||
|
||||
// EmitByte appends a byte to the buffer. Used during the code emission.
|
||||
EmitByte(b byte)
|
||||
|
||||
// Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
|
||||
Emit4Bytes(b uint32)
|
||||
|
||||
// Emit8Bytes appends 8 bytes to the buffer. Used during the code emission.
|
||||
Emit8Bytes(b uint64)
|
||||
|
||||
// GetFunctionABI returns the ABI information for the given signature.
|
||||
GetFunctionABI(sig *ssa.Signature) *FunctionABI
|
||||
}
|
||||
|
||||
// RelocationInfo represents the relocation information for a call instruction.
|
||||
type RelocationInfo struct {
|
||||
// Offset represents the offset from the beginning of the machine code of either a function or the entire module.
|
||||
Offset int64
|
||||
// Target is the target function of the call instruction.
|
||||
FuncRef ssa.FuncRef
|
||||
}
|
||||
|
||||
// compiler implements Compiler.
|
||||
type compiler struct {
|
||||
mach Machine
|
||||
currentGID ssa.InstructionGroupID
|
||||
ssaBuilder ssa.Builder
|
||||
// nextVRegID is the next virtual register ID to be allocated.
|
||||
nextVRegID regalloc.VRegID
|
||||
// ssaValueToVRegs maps ssa.ValueID to regalloc.VReg.
|
||||
ssaValueToVRegs [] /* VRegID to */ regalloc.VReg
|
||||
// ssaValueDefinitions maps ssa.ValueID to its definition.
|
||||
ssaValueDefinitions []SSAValueDefinition
|
||||
// ssaValueRefCounts is a cached list obtained by ssa.Builder.ValueRefCounts().
|
||||
ssaValueRefCounts []int
|
||||
// returnVRegs is the list of virtual registers that store the return values.
|
||||
returnVRegs []regalloc.VReg
|
||||
varEdges [][2]regalloc.VReg
|
||||
varEdgeTypes []ssa.Type
|
||||
constEdges []struct {
|
||||
cInst *ssa.Instruction
|
||||
dst regalloc.VReg
|
||||
}
|
||||
vRegSet []bool
|
||||
vRegIDs []regalloc.VRegID
|
||||
tempRegs []regalloc.VReg
|
||||
tmpVals []ssa.Value
|
||||
ssaTypeOfVRegID [] /* VRegID to */ ssa.Type
|
||||
buf []byte
|
||||
relocations []RelocationInfo
|
||||
sourceOffsets []SourceOffsetInfo
|
||||
// abis maps ssa.SignatureID to the ABI implementation.
|
||||
abis []FunctionABI
|
||||
argResultInts, argResultFloats []regalloc.RealReg
|
||||
}
|
||||
|
||||
// SourceOffsetInfo is a data to associate the source offset with the executable offset.
|
||||
type SourceOffsetInfo struct {
|
||||
// SourceOffset is the source offset in the original source code.
|
||||
SourceOffset ssa.SourceOffset
|
||||
// ExecutableOffset is the offset in the compiled executable.
|
||||
ExecutableOffset int64
|
||||
}
|
||||
|
||||
// Compile implements Compiler.Compile.
|
||||
func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) {
|
||||
c.Lower()
|
||||
if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) {
|
||||
fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
|
||||
}
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format())
|
||||
}
|
||||
c.RegAlloc()
|
||||
if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) {
|
||||
fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
|
||||
}
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format())
|
||||
}
|
||||
if err := c.Finalize(ctx); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) {
|
||||
fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
|
||||
}
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format())
|
||||
}
|
||||
return c.buf, c.relocations, nil
|
||||
}
|
||||
|
||||
// RegAlloc implements Compiler.RegAlloc.
|
||||
func (c *compiler) RegAlloc() {
|
||||
c.mach.RegAlloc()
|
||||
}
|
||||
|
||||
// Finalize implements Compiler.Finalize.
|
||||
func (c *compiler) Finalize(ctx context.Context) error {
|
||||
c.mach.PostRegAlloc()
|
||||
return c.mach.Encode(ctx)
|
||||
}
|
||||
|
||||
// setCurrentGroupID sets the current instruction group ID.
|
||||
func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) {
|
||||
c.currentGID = gid
|
||||
}
|
||||
|
||||
// assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder.
|
||||
func (c *compiler) assignVirtualRegisters() {
|
||||
builder := c.ssaBuilder
|
||||
refCounts := builder.ValueRefCounts()
|
||||
c.ssaValueRefCounts = refCounts
|
||||
|
||||
need := len(refCounts)
|
||||
if need >= len(c.ssaValueToVRegs) {
|
||||
c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, need+1)...)
|
||||
}
|
||||
if need >= len(c.ssaValueDefinitions) {
|
||||
c.ssaValueDefinitions = append(c.ssaValueDefinitions, make([]SSAValueDefinition, need+1)...)
|
||||
}
|
||||
|
||||
for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
|
||||
// First we assign a virtual register to each parameter.
|
||||
for i := 0; i < blk.Params(); i++ {
|
||||
p := blk.Param(i)
|
||||
pid := p.ID()
|
||||
typ := p.Type()
|
||||
vreg := c.AllocateVReg(typ)
|
||||
c.ssaValueToVRegs[pid] = vreg
|
||||
c.ssaValueDefinitions[pid] = SSAValueDefinition{BlockParamValue: p, BlkParamVReg: vreg}
|
||||
c.ssaTypeOfVRegID[vreg.ID()] = p.Type()
|
||||
}
|
||||
|
||||
// Assigns each value to a virtual register produced by instructions.
|
||||
for cur := blk.Root(); cur != nil; cur = cur.Next() {
|
||||
r, rs := cur.Returns()
|
||||
var N int
|
||||
if r.Valid() {
|
||||
id := r.ID()
|
||||
ssaTyp := r.Type()
|
||||
typ := r.Type()
|
||||
vReg := c.AllocateVReg(typ)
|
||||
c.ssaValueToVRegs[id] = vReg
|
||||
c.ssaValueDefinitions[id] = SSAValueDefinition{
|
||||
Instr: cur,
|
||||
N: 0,
|
||||
RefCount: refCounts[id],
|
||||
}
|
||||
c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
|
||||
N++
|
||||
}
|
||||
for _, r := range rs {
|
||||
id := r.ID()
|
||||
ssaTyp := r.Type()
|
||||
vReg := c.AllocateVReg(ssaTyp)
|
||||
c.ssaValueToVRegs[id] = vReg
|
||||
c.ssaValueDefinitions[id] = SSAValueDefinition{
|
||||
Instr: cur,
|
||||
N: N,
|
||||
RefCount: refCounts[id],
|
||||
}
|
||||
c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
|
||||
N++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ {
|
||||
typ := retBlk.Param(i).Type()
|
||||
vReg := c.AllocateVReg(typ)
|
||||
c.returnVRegs = append(c.returnVRegs, vReg)
|
||||
c.ssaTypeOfVRegID[vReg.ID()] = typ
|
||||
}
|
||||
}
|
||||
|
||||
// AllocateVReg implements Compiler.AllocateVReg.
|
||||
func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg {
|
||||
regType := regalloc.RegTypeOf(typ)
|
||||
r := regalloc.VReg(c.nextVRegID).SetRegType(regType)
|
||||
|
||||
id := r.ID()
|
||||
if int(id) >= len(c.ssaTypeOfVRegID) {
|
||||
c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...)
|
||||
}
|
||||
c.ssaTypeOfVRegID[id] = typ
|
||||
c.nextVRegID++
|
||||
return r
|
||||
}
|
||||
|
||||
// Init implements Compiler.Init.
|
||||
func (c *compiler) Init() {
|
||||
c.currentGID = 0
|
||||
c.nextVRegID = regalloc.VRegIDNonReservedBegin
|
||||
c.returnVRegs = c.returnVRegs[:0]
|
||||
c.mach.Reset()
|
||||
c.varEdges = c.varEdges[:0]
|
||||
c.constEdges = c.constEdges[:0]
|
||||
c.buf = c.buf[:0]
|
||||
c.sourceOffsets = c.sourceOffsets[:0]
|
||||
c.relocations = c.relocations[:0]
|
||||
}
|
||||
|
||||
// ValueDefinition implements Compiler.ValueDefinition.
|
||||
func (c *compiler) ValueDefinition(value ssa.Value) *SSAValueDefinition {
|
||||
return &c.ssaValueDefinitions[value.ID()]
|
||||
}
|
||||
|
||||
// VRegOf implements Compiler.VRegOf.
|
||||
func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg {
|
||||
return c.ssaValueToVRegs[value.ID()]
|
||||
}
|
||||
|
||||
// Format implements Compiler.Format.
|
||||
func (c *compiler) Format() string {
|
||||
return c.mach.Format()
|
||||
}
|
||||
|
||||
// TypeOf implements Compiler.Format.
|
||||
func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type {
|
||||
return c.ssaTypeOfVRegID[v.ID()]
|
||||
}
|
||||
|
||||
// MatchInstr implements Compiler.MatchInstr.
|
||||
func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool {
|
||||
instr := def.Instr
|
||||
return def.IsFromInstr() &&
|
||||
instr.Opcode() == opcode &&
|
||||
instr.GroupID() == c.currentGID &&
|
||||
def.RefCount < 2
|
||||
}
|
||||
|
||||
// MatchInstrOneOf implements Compiler.MatchInstrOneOf.
|
||||
func (c *compiler) MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode {
|
||||
instr := def.Instr
|
||||
if !def.IsFromInstr() {
|
||||
return ssa.OpcodeInvalid
|
||||
}
|
||||
|
||||
if instr.GroupID() != c.currentGID {
|
||||
return ssa.OpcodeInvalid
|
||||
}
|
||||
|
||||
if def.RefCount >= 2 {
|
||||
return ssa.OpcodeInvalid
|
||||
}
|
||||
|
||||
opcode := instr.Opcode()
|
||||
for _, op := range opcodes {
|
||||
if opcode == op {
|
||||
return opcode
|
||||
}
|
||||
}
|
||||
return ssa.OpcodeInvalid
|
||||
}
|
||||
|
||||
// SSABuilder implements Compiler .SSABuilder.
|
||||
func (c *compiler) SSABuilder() ssa.Builder {
|
||||
return c.ssaBuilder
|
||||
}
|
||||
|
||||
// AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo.
|
||||
func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) {
|
||||
c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{
|
||||
SourceOffset: sourceOffset,
|
||||
ExecutableOffset: executableOffset,
|
||||
})
|
||||
}
|
||||
|
||||
// SourceOffsetInfo implements Compiler.SourceOffsetInfo.
|
||||
func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
|
||||
return c.sourceOffsets
|
||||
}
|
||||
|
||||
// AddRelocationInfo implements Compiler.AddRelocationInfo.
|
||||
func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
|
||||
c.relocations = append(c.relocations, RelocationInfo{
|
||||
Offset: int64(len(c.buf)),
|
||||
FuncRef: funcRef,
|
||||
})
|
||||
}
|
||||
|
||||
// Emit8Bytes implements Compiler.Emit8Bytes.
|
||||
func (c *compiler) Emit8Bytes(b uint64) {
|
||||
c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24), byte(b>>32), byte(b>>40), byte(b>>48), byte(b>>56))
|
||||
}
|
||||
|
||||
// Emit4Bytes implements Compiler.Emit4Bytes.
|
||||
func (c *compiler) Emit4Bytes(b uint32) {
|
||||
c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24))
|
||||
}
|
||||
|
||||
// EmitByte implements Compiler.EmitByte.
|
||||
func (c *compiler) EmitByte(b byte) {
|
||||
c.buf = append(c.buf, b)
|
||||
}
|
||||
|
||||
// Buf implements Compiler.Buf.
|
||||
func (c *compiler) Buf() []byte {
|
||||
return c.buf
|
||||
}
|
||||
|
||||
// BufPtr implements Compiler.BufPtr.
|
||||
func (c *compiler) BufPtr() *[]byte {
|
||||
return &c.buf
|
||||
}
|
||||
|
||||
func (c *compiler) GetFunctionABI(sig *ssa.Signature) *FunctionABI {
|
||||
if int(sig.ID) >= len(c.abis) {
|
||||
c.abis = append(c.abis, make([]FunctionABI, int(sig.ID)+1)...)
|
||||
}
|
||||
|
||||
abi := &c.abis[sig.ID]
|
||||
if abi.Initialized {
|
||||
return abi
|
||||
}
|
||||
|
||||
abi.Init(sig, c.argResultInts, c.argResultFloats)
|
||||
return abi
|
||||
}
|
||||
226
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go
generated
vendored
Normal file
226
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go
generated
vendored
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// Lower implements Compiler.Lower.
|
||||
func (c *compiler) Lower() {
|
||||
c.assignVirtualRegisters()
|
||||
c.mach.SetCurrentABI(c.GetFunctionABI(c.ssaBuilder.Signature()))
|
||||
c.mach.ExecutableContext().StartLoweringFunction(c.ssaBuilder.BlockIDMax())
|
||||
c.lowerBlocks()
|
||||
}
|
||||
|
||||
// lowerBlocks lowers each block in the ssa.Builder.
|
||||
func (c *compiler) lowerBlocks() {
|
||||
builder := c.ssaBuilder
|
||||
for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
|
||||
c.lowerBlock(blk)
|
||||
}
|
||||
|
||||
ectx := c.mach.ExecutableContext()
|
||||
// After lowering all blocks, we need to link adjacent blocks to layout one single instruction list.
|
||||
var prev ssa.BasicBlock
|
||||
for next := builder.BlockIteratorReversePostOrderBegin(); next != nil; next = builder.BlockIteratorReversePostOrderNext() {
|
||||
if prev != nil {
|
||||
ectx.LinkAdjacentBlocks(prev, next)
|
||||
}
|
||||
prev = next
|
||||
}
|
||||
}
|
||||
|
||||
func (c *compiler) lowerBlock(blk ssa.BasicBlock) {
|
||||
mach := c.mach
|
||||
ectx := mach.ExecutableContext()
|
||||
ectx.StartBlock(blk)
|
||||
|
||||
// We traverse the instructions in reverse order because we might want to lower multiple
|
||||
// instructions together.
|
||||
cur := blk.Tail()
|
||||
|
||||
// First gather the branching instructions at the end of the blocks.
|
||||
var br0, br1 *ssa.Instruction
|
||||
if cur.IsBranching() {
|
||||
br0 = cur
|
||||
cur = cur.Prev()
|
||||
if cur != nil && cur.IsBranching() {
|
||||
br1 = cur
|
||||
cur = cur.Prev()
|
||||
}
|
||||
}
|
||||
|
||||
if br0 != nil {
|
||||
c.lowerBranches(br0, br1)
|
||||
}
|
||||
|
||||
if br1 != nil && br0 == nil {
|
||||
panic("BUG? when a block has conditional branch but doesn't end with an unconditional branch?")
|
||||
}
|
||||
|
||||
// Now start lowering the non-branching instructions.
|
||||
for ; cur != nil; cur = cur.Prev() {
|
||||
c.setCurrentGroupID(cur.GroupID())
|
||||
if cur.Lowered() {
|
||||
continue
|
||||
}
|
||||
|
||||
switch cur.Opcode() {
|
||||
case ssa.OpcodeReturn:
|
||||
rets := cur.ReturnVals()
|
||||
if len(rets) > 0 {
|
||||
c.mach.LowerReturns(rets)
|
||||
}
|
||||
c.mach.InsertReturn()
|
||||
default:
|
||||
mach.LowerInstr(cur)
|
||||
}
|
||||
ectx.FlushPendingInstructions()
|
||||
}
|
||||
|
||||
// Finally, if this is the entry block, we have to insert copies of arguments from the real location to the VReg.
|
||||
if blk.EntryBlock() {
|
||||
c.lowerFunctionArguments(blk)
|
||||
}
|
||||
|
||||
ectx.EndBlock()
|
||||
}
|
||||
|
||||
// lowerBranches is called right after StartBlock and before any LowerInstr call if
|
||||
// there are branches to the given block. br0 is the very end of the block and b1 is the before the br0 if it exists.
|
||||
// At least br0 is not nil, but br1 can be nil if there's no branching before br0.
|
||||
//
|
||||
// See ssa.Instruction IsBranching, and the comment on ssa.BasicBlock.
|
||||
func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) {
|
||||
ectx := c.mach.ExecutableContext()
|
||||
|
||||
c.setCurrentGroupID(br0.GroupID())
|
||||
c.mach.LowerSingleBranch(br0)
|
||||
ectx.FlushPendingInstructions()
|
||||
if br1 != nil {
|
||||
c.setCurrentGroupID(br1.GroupID())
|
||||
c.mach.LowerConditionalBranch(br1)
|
||||
ectx.FlushPendingInstructions()
|
||||
}
|
||||
|
||||
if br0.Opcode() == ssa.OpcodeJump {
|
||||
_, args, target := br0.BranchData()
|
||||
argExists := len(args) != 0
|
||||
if argExists && br1 != nil {
|
||||
panic("BUG: critical edge split failed")
|
||||
}
|
||||
if argExists && target.ReturnBlock() {
|
||||
if len(args) > 0 {
|
||||
c.mach.LowerReturns(args)
|
||||
}
|
||||
} else if argExists {
|
||||
c.lowerBlockArguments(args, target)
|
||||
}
|
||||
}
|
||||
ectx.FlushPendingInstructions()
|
||||
}
|
||||
|
||||
func (c *compiler) lowerFunctionArguments(entry ssa.BasicBlock) {
|
||||
ectx := c.mach.ExecutableContext()
|
||||
|
||||
c.tmpVals = c.tmpVals[:0]
|
||||
for i := 0; i < entry.Params(); i++ {
|
||||
p := entry.Param(i)
|
||||
if c.ssaValueRefCounts[p.ID()] > 0 {
|
||||
c.tmpVals = append(c.tmpVals, p)
|
||||
} else {
|
||||
// If the argument is not used, we can just pass an invalid value.
|
||||
c.tmpVals = append(c.tmpVals, ssa.ValueInvalid)
|
||||
}
|
||||
}
|
||||
c.mach.LowerParams(c.tmpVals)
|
||||
ectx.FlushPendingInstructions()
|
||||
}
|
||||
|
||||
// lowerBlockArguments lowers how to pass arguments to the given successor block.
|
||||
func (c *compiler) lowerBlockArguments(args []ssa.Value, succ ssa.BasicBlock) {
|
||||
if len(args) != succ.Params() {
|
||||
panic("BUG: mismatched number of arguments")
|
||||
}
|
||||
|
||||
c.varEdges = c.varEdges[:0]
|
||||
c.varEdgeTypes = c.varEdgeTypes[:0]
|
||||
c.constEdges = c.constEdges[:0]
|
||||
for i := 0; i < len(args); i++ {
|
||||
dst := succ.Param(i)
|
||||
src := args[i]
|
||||
|
||||
dstReg := c.VRegOf(dst)
|
||||
srcDef := c.ssaValueDefinitions[src.ID()]
|
||||
if srcDef.IsFromInstr() && srcDef.Instr.Constant() {
|
||||
c.constEdges = append(c.constEdges, struct {
|
||||
cInst *ssa.Instruction
|
||||
dst regalloc.VReg
|
||||
}{cInst: srcDef.Instr, dst: dstReg})
|
||||
} else {
|
||||
srcReg := c.VRegOf(src)
|
||||
// Even when the src=dst, insert the move so that we can keep such registers keep-alive.
|
||||
c.varEdges = append(c.varEdges, [2]regalloc.VReg{srcReg, dstReg})
|
||||
c.varEdgeTypes = append(c.varEdgeTypes, src.Type())
|
||||
}
|
||||
}
|
||||
|
||||
// Check if there's an overlap among the dsts and srcs in varEdges.
|
||||
c.vRegIDs = c.vRegIDs[:0]
|
||||
for _, edge := range c.varEdges {
|
||||
src := edge[0].ID()
|
||||
if int(src) >= len(c.vRegSet) {
|
||||
c.vRegSet = append(c.vRegSet, make([]bool, src+1)...)
|
||||
}
|
||||
c.vRegSet[src] = true
|
||||
c.vRegIDs = append(c.vRegIDs, src)
|
||||
}
|
||||
separated := true
|
||||
for _, edge := range c.varEdges {
|
||||
dst := edge[1].ID()
|
||||
if int(dst) >= len(c.vRegSet) {
|
||||
c.vRegSet = append(c.vRegSet, make([]bool, dst+1)...)
|
||||
} else {
|
||||
if c.vRegSet[dst] {
|
||||
separated = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, id := range c.vRegIDs {
|
||||
c.vRegSet[id] = false // reset for the next use.
|
||||
}
|
||||
|
||||
if separated {
|
||||
// If there's no overlap, we can simply move the source to destination.
|
||||
for i, edge := range c.varEdges {
|
||||
src, dst := edge[0], edge[1]
|
||||
c.mach.InsertMove(dst, src, c.varEdgeTypes[i])
|
||||
}
|
||||
} else {
|
||||
// Otherwise, we allocate a temporary registers and move the source to the temporary register,
|
||||
//
|
||||
// First move all of them to temporary registers.
|
||||
c.tempRegs = c.tempRegs[:0]
|
||||
for i, edge := range c.varEdges {
|
||||
src := edge[0]
|
||||
typ := c.varEdgeTypes[i]
|
||||
temp := c.AllocateVReg(typ)
|
||||
c.tempRegs = append(c.tempRegs, temp)
|
||||
c.mach.InsertMove(temp, src, typ)
|
||||
}
|
||||
// Then move the temporary registers to the destination.
|
||||
for i, edge := range c.varEdges {
|
||||
temp := c.tempRegs[i]
|
||||
dst := edge[1]
|
||||
c.mach.InsertMove(dst, temp, c.varEdgeTypes[i])
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, move the constants.
|
||||
for _, edge := range c.constEdges {
|
||||
cInst, dst := edge.cInst, edge.dst
|
||||
c.mach.InsertLoadConstantBlockArg(cInst, dst)
|
||||
}
|
||||
}
|
||||
219
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
generated
vendored
Normal file
219
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
generated
vendored
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
type ExecutableContext interface {
|
||||
// StartLoweringFunction is called when the lowering of the given function is started.
|
||||
// maximumBlockID is the maximum value of ssa.BasicBlockID existing in the function.
|
||||
StartLoweringFunction(maximumBlockID ssa.BasicBlockID)
|
||||
|
||||
// LinkAdjacentBlocks is called after finished lowering all blocks in order to create one single instruction list.
|
||||
LinkAdjacentBlocks(prev, next ssa.BasicBlock)
|
||||
|
||||
// StartBlock is called when the compilation of the given block is started.
|
||||
// The order of this being called is the reverse post order of the ssa.BasicBlock(s) as we iterate with
|
||||
// ssa.Builder BlockIteratorReversePostOrderBegin and BlockIteratorReversePostOrderEnd.
|
||||
StartBlock(ssa.BasicBlock)
|
||||
|
||||
// EndBlock is called when the compilation of the current block is finished.
|
||||
EndBlock()
|
||||
|
||||
// FlushPendingInstructions flushes the pending instructions to the buffer.
|
||||
// This will be called after the lowering of each SSA Instruction.
|
||||
FlushPendingInstructions()
|
||||
}
|
||||
|
||||
type ExecutableContextT[Instr any] struct {
|
||||
CurrentSSABlk ssa.BasicBlock
|
||||
|
||||
// InstrPool is the InstructionPool of instructions.
|
||||
InstructionPool wazevoapi.Pool[Instr]
|
||||
asNop func(*Instr)
|
||||
setNext func(*Instr, *Instr)
|
||||
setPrev func(*Instr, *Instr)
|
||||
|
||||
// RootInstr is the root instruction of the executable.
|
||||
RootInstr *Instr
|
||||
labelPositionPool wazevoapi.Pool[LabelPosition[Instr]]
|
||||
NextLabel Label
|
||||
// LabelPositions maps a label to the instructions of the region which the label represents.
|
||||
LabelPositions map[Label]*LabelPosition[Instr]
|
||||
OrderedBlockLabels []*LabelPosition[Instr]
|
||||
|
||||
// PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock.
|
||||
PerBlockHead, PerBlockEnd *Instr
|
||||
// PendingInstructions are the instructions which are not yet emitted into the instruction list.
|
||||
PendingInstructions []*Instr
|
||||
|
||||
// SsaBlockIDToLabels maps an SSA block ID to the label.
|
||||
SsaBlockIDToLabels []Label
|
||||
}
|
||||
|
||||
func NewExecutableContextT[Instr any](
|
||||
resetInstruction func(*Instr),
|
||||
setNext func(*Instr, *Instr),
|
||||
setPrev func(*Instr, *Instr),
|
||||
asNop func(*Instr),
|
||||
) *ExecutableContextT[Instr] {
|
||||
return &ExecutableContextT[Instr]{
|
||||
InstructionPool: wazevoapi.NewPool[Instr](resetInstruction),
|
||||
asNop: asNop,
|
||||
setNext: setNext,
|
||||
setPrev: setPrev,
|
||||
labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]),
|
||||
LabelPositions: make(map[Label]*LabelPosition[Instr]),
|
||||
NextLabel: LabelInvalid,
|
||||
}
|
||||
}
|
||||
|
||||
func resetLabelPosition[T any](l *LabelPosition[T]) {
|
||||
*l = LabelPosition[T]{}
|
||||
}
|
||||
|
||||
// StartLoweringFunction implements ExecutableContext.
|
||||
func (e *ExecutableContextT[Instr]) StartLoweringFunction(max ssa.BasicBlockID) {
|
||||
imax := int(max)
|
||||
if len(e.SsaBlockIDToLabels) <= imax {
|
||||
// Eagerly allocate labels for the blocks since the underlying slice will be used for the next iteration.
|
||||
e.SsaBlockIDToLabels = append(e.SsaBlockIDToLabels, make([]Label, imax+1)...)
|
||||
}
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) {
|
||||
e.CurrentSSABlk = blk
|
||||
|
||||
l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()]
|
||||
if l == LabelInvalid {
|
||||
l = e.AllocateLabel()
|
||||
e.SsaBlockIDToLabels[blk.ID()] = l
|
||||
}
|
||||
|
||||
end := e.allocateNop0()
|
||||
e.PerBlockHead, e.PerBlockEnd = end, end
|
||||
|
||||
labelPos, ok := e.LabelPositions[l]
|
||||
if !ok {
|
||||
labelPos = e.AllocateLabelPosition(l)
|
||||
e.LabelPositions[l] = labelPos
|
||||
}
|
||||
e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos)
|
||||
labelPos.Begin, labelPos.End = end, end
|
||||
labelPos.SB = blk
|
||||
}
|
||||
|
||||
// EndBlock implements ExecutableContext.
|
||||
func (e *ExecutableContextT[T]) EndBlock() {
|
||||
// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions.
|
||||
e.insertAtPerBlockHead(e.allocateNop0())
|
||||
|
||||
l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()]
|
||||
e.LabelPositions[l].Begin = e.PerBlockHead
|
||||
|
||||
if e.CurrentSSABlk.EntryBlock() {
|
||||
e.RootInstr = e.PerBlockHead
|
||||
}
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) insertAtPerBlockHead(i *T) {
|
||||
if e.PerBlockHead == nil {
|
||||
e.PerBlockHead = i
|
||||
e.PerBlockEnd = i
|
||||
return
|
||||
}
|
||||
e.setNext(i, e.PerBlockHead)
|
||||
e.setPrev(e.PerBlockHead, i)
|
||||
e.PerBlockHead = i
|
||||
}
|
||||
|
||||
// FlushPendingInstructions implements ExecutableContext.
|
||||
func (e *ExecutableContextT[T]) FlushPendingInstructions() {
|
||||
l := len(e.PendingInstructions)
|
||||
if l == 0 {
|
||||
return
|
||||
}
|
||||
for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order.
|
||||
e.insertAtPerBlockHead(e.PendingInstructions[i])
|
||||
}
|
||||
e.PendingInstructions = e.PendingInstructions[:0]
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) Reset() {
|
||||
e.labelPositionPool.Reset()
|
||||
e.InstructionPool.Reset()
|
||||
for l := Label(0); l <= e.NextLabel; l++ {
|
||||
delete(e.LabelPositions, l)
|
||||
}
|
||||
e.PendingInstructions = e.PendingInstructions[:0]
|
||||
e.OrderedBlockLabels = e.OrderedBlockLabels[:0]
|
||||
e.RootInstr = nil
|
||||
e.SsaBlockIDToLabels = e.SsaBlockIDToLabels[:0]
|
||||
e.PerBlockHead, e.PerBlockEnd = nil, nil
|
||||
e.NextLabel = LabelInvalid
|
||||
}
|
||||
|
||||
// AllocateLabel allocates an unused label.
|
||||
func (e *ExecutableContextT[T]) AllocateLabel() Label {
|
||||
e.NextLabel++
|
||||
return e.NextLabel
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] {
|
||||
l := e.labelPositionPool.Allocate()
|
||||
l.L = la
|
||||
return l
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label {
|
||||
if blk.ReturnBlock() {
|
||||
return LabelReturn
|
||||
}
|
||||
l := e.SsaBlockIDToLabels[blk.ID()]
|
||||
if l == LabelInvalid {
|
||||
l = e.AllocateLabel()
|
||||
e.SsaBlockIDToLabels[blk.ID()] = l
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
func (e *ExecutableContextT[T]) allocateNop0() *T {
|
||||
i := e.InstructionPool.Allocate()
|
||||
e.asNop(i)
|
||||
return i
|
||||
}
|
||||
|
||||
// LinkAdjacentBlocks implements backend.Machine.
|
||||
func (e *ExecutableContextT[T]) LinkAdjacentBlocks(prev, next ssa.BasicBlock) {
|
||||
prevLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(prev)]
|
||||
nextLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(next)]
|
||||
e.setNext(prevLabelPos.End, nextLabelPos.Begin)
|
||||
}
|
||||
|
||||
// LabelPosition represents the regions of the generated code which the label represents.
|
||||
type LabelPosition[Instr any] struct {
|
||||
SB ssa.BasicBlock
|
||||
L Label
|
||||
Begin, End *Instr
|
||||
BinaryOffset int64
|
||||
}
|
||||
|
||||
// Label represents a position in the generated code which is either
|
||||
// a real instruction or the constant InstructionPool (e.g. jump tables).
|
||||
//
|
||||
// This is exactly the same as the traditional "label" in assembly code.
|
||||
type Label uint32
|
||||
|
||||
const (
|
||||
LabelInvalid Label = 0
|
||||
LabelReturn Label = math.MaxUint32
|
||||
)
|
||||
|
||||
// String implements backend.Machine.
|
||||
func (l Label) String() string {
|
||||
return fmt.Sprintf("L%d", l)
|
||||
}
|
||||
33
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go
generated
vendored
Normal file
33
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go
generated
vendored
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
package backend
|
||||
|
||||
import "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
|
||||
// GoFunctionCallRequiredStackSize returns the size of the stack required for the Go function call.
|
||||
// argBegin is the index of the first argument in the signature which is not either execution context or module context.
|
||||
func GoFunctionCallRequiredStackSize(sig *ssa.Signature, argBegin int) (ret, retUnaligned int64) {
|
||||
var paramNeededInBytes, resultNeededInBytes int64
|
||||
for _, p := range sig.Params[argBegin:] {
|
||||
s := int64(p.Size())
|
||||
if s < 8 {
|
||||
s = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
}
|
||||
paramNeededInBytes += s
|
||||
}
|
||||
for _, r := range sig.Results {
|
||||
s := int64(r.Size())
|
||||
if s < 8 {
|
||||
s = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
}
|
||||
resultNeededInBytes += s
|
||||
}
|
||||
|
||||
if paramNeededInBytes > resultNeededInBytes {
|
||||
ret = paramNeededInBytes
|
||||
} else {
|
||||
ret = resultNeededInBytes
|
||||
}
|
||||
retUnaligned = ret
|
||||
// Align to 16 bytes.
|
||||
ret = (ret + 15) &^ 15
|
||||
return
|
||||
}
|
||||
186
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go
generated
vendored
Normal file
186
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go
generated
vendored
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// For the details of the ABI, see:
|
||||
// https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#amd64-architecture
|
||||
|
||||
var (
|
||||
intArgResultRegs = []regalloc.RealReg{rax, rbx, rcx, rdi, rsi, r8, r9, r10, r11}
|
||||
floatArgResultRegs = []regalloc.RealReg{xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7}
|
||||
)
|
||||
|
||||
var regInfo = ®alloc.RegisterInfo{
|
||||
AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
|
||||
regalloc.RegTypeInt: {
|
||||
rax, rcx, rdx, rbx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15,
|
||||
},
|
||||
regalloc.RegTypeFloat: {
|
||||
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
|
||||
},
|
||||
},
|
||||
CalleeSavedRegisters: regalloc.NewRegSet(
|
||||
rdx, r12, r13, r14, r15,
|
||||
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
|
||||
),
|
||||
CallerSavedRegisters: regalloc.NewRegSet(
|
||||
rax, rcx, rbx, rsi, rdi, r8, r9, r10, r11,
|
||||
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
|
||||
),
|
||||
RealRegToVReg: []regalloc.VReg{
|
||||
rax: raxVReg, rcx: rcxVReg, rdx: rdxVReg, rbx: rbxVReg, rsp: rspVReg, rbp: rbpVReg, rsi: rsiVReg, rdi: rdiVReg,
|
||||
r8: r8VReg, r9: r9VReg, r10: r10VReg, r11: r11VReg, r12: r12VReg, r13: r13VReg, r14: r14VReg, r15: r15VReg,
|
||||
xmm0: xmm0VReg, xmm1: xmm1VReg, xmm2: xmm2VReg, xmm3: xmm3VReg, xmm4: xmm4VReg, xmm5: xmm5VReg, xmm6: xmm6VReg,
|
||||
xmm7: xmm7VReg, xmm8: xmm8VReg, xmm9: xmm9VReg, xmm10: xmm10VReg, xmm11: xmm11VReg, xmm12: xmm12VReg,
|
||||
xmm13: xmm13VReg, xmm14: xmm14VReg, xmm15: xmm15VReg,
|
||||
},
|
||||
RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
|
||||
RealRegType: func(r regalloc.RealReg) regalloc.RegType {
|
||||
if r < xmm0 {
|
||||
return regalloc.RegTypeInt
|
||||
}
|
||||
return regalloc.RegTypeFloat
|
||||
},
|
||||
}
|
||||
|
||||
// ArgsResultsRegs implements backend.Machine.
|
||||
func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
|
||||
return intArgResultRegs, floatArgResultRegs
|
||||
}
|
||||
|
||||
// LowerParams implements backend.Machine.
|
||||
func (m *machine) LowerParams(args []ssa.Value) {
|
||||
a := m.currentABI
|
||||
|
||||
for i, ssaArg := range args {
|
||||
if !ssaArg.Valid() {
|
||||
continue
|
||||
}
|
||||
reg := m.c.VRegOf(ssaArg)
|
||||
arg := &a.Args[i]
|
||||
if arg.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(reg, arg.Reg, arg.Type)
|
||||
} else {
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <-- RBP
|
||||
// | ........... |
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ........... |
|
||||
// | spill slot 0 |
|
||||
// RSP--> +-----------------+
|
||||
// (low address)
|
||||
|
||||
// Load the value from the arg stack slot above the current RBP.
|
||||
load := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmRBPReg(uint32(arg.Offset + 16)))
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
load.asMovzxRmR(extModeLQ, mem, reg)
|
||||
case ssa.TypeI64:
|
||||
load.asMov64MR(mem, reg)
|
||||
case ssa.TypeF32:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, mem, reg)
|
||||
case ssa.TypeF64:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, mem, reg)
|
||||
case ssa.TypeV128:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, reg)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
m.insert(load)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LowerReturns implements backend.Machine.
|
||||
func (m *machine) LowerReturns(rets []ssa.Value) {
|
||||
// Load the XMM registers first as it might need a temporary register to inline
|
||||
// constant return.
|
||||
a := m.currentABI
|
||||
for i, ret := range rets {
|
||||
r := &a.Rets[i]
|
||||
if !r.Type.IsInt() {
|
||||
m.LowerReturn(ret, r)
|
||||
}
|
||||
}
|
||||
// Then load the GPR registers.
|
||||
for i, ret := range rets {
|
||||
r := &a.Rets[i]
|
||||
if r.Type.IsInt() {
|
||||
m.LowerReturn(ret, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) LowerReturn(ret ssa.Value, r *backend.ABIArg) {
|
||||
reg := m.c.VRegOf(ret)
|
||||
if def := m.c.ValueDefinition(ret); def.IsFromInstr() {
|
||||
// Constant instructions are inlined.
|
||||
if inst := def.Instr; inst.Constant() {
|
||||
m.insertLoadConstant(inst, reg)
|
||||
}
|
||||
}
|
||||
if r.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(r.Reg, reg, ret.Type())
|
||||
} else {
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <-- RBP
|
||||
// | ........... |
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ........... |
|
||||
// | spill slot 0 |
|
||||
// RSP--> +-----------------+
|
||||
// (low address)
|
||||
|
||||
// Store the value to the return stack slot above the current RBP.
|
||||
store := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmRBPReg(uint32(m.currentABI.ArgStackSize + 16 + r.Offset)))
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(reg, mem, 4)
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(reg, mem, 8)
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, reg, mem)
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, reg, mem)
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, reg, mem)
|
||||
}
|
||||
m.insert(store)
|
||||
}
|
||||
}
|
||||
9
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go
generated
vendored
Normal file
9
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go
generated
vendored
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
package amd64
|
||||
|
||||
// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
|
||||
// This implements wazevo.entrypoint, and see the comments there for detail.
|
||||
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
|
||||
|
||||
// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
|
||||
// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
|
||||
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
|
||||
29
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s
generated
vendored
Normal file
29
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s
generated
vendored
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#include "funcdata.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr
|
||||
TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
|
||||
MOVQ preambleExecutable+0(FP), R11
|
||||
MOVQ functionExectuable+8(FP), R14
|
||||
MOVQ executionContextPtr+16(FP), AX // First argument is passed in AX.
|
||||
MOVQ moduleContextPtr+24(FP), BX // Second argument is passed in BX.
|
||||
MOVQ paramResultSlicePtr+32(FP), R12
|
||||
MOVQ goAllocatedStackSlicePtr+40(FP), R13
|
||||
JMP R11
|
||||
|
||||
// afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
|
||||
TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
|
||||
MOVQ executable+0(FP), CX
|
||||
MOVQ executionContextPtr+8(FP), AX // First argument is passed in AX.
|
||||
|
||||
// Save the stack pointer and frame pointer.
|
||||
MOVQ BP, 16(AX) // 16 == ExecutionContextOffsetOriginalFramePointer
|
||||
MOVQ SP, 24(AX) // 24 == ExecutionContextOffsetOriginalStackPointer
|
||||
|
||||
// Then set the stack pointer and frame pointer to the values we got from the Go runtime.
|
||||
MOVQ framePointer+24(FP), BP
|
||||
|
||||
// WARNING: do not update SP before BP, because the Go translates (FP) as (SP) + 8.
|
||||
MOVQ stackPointer+16(FP), SP
|
||||
|
||||
JMP CX
|
||||
248
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go
generated
vendored
Normal file
248
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go
generated
vendored
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
var (
|
||||
executionContextPtrReg = raxVReg
|
||||
|
||||
// Followings are callee saved registers. They can be used freely in the entry preamble
|
||||
// since the preamble is called via Go assembly function which has stack-based ABI.
|
||||
|
||||
// savedExecutionContextPtr also must be a callee-saved reg so that they can be used in the prologue and epilogue.
|
||||
savedExecutionContextPtr = rdxVReg
|
||||
// paramResultSlicePtr must match with entrypoint function in abi_entry_amd64.s.
|
||||
paramResultSlicePtr = r12VReg
|
||||
// goAllocatedStackPtr must match with entrypoint function in abi_entry_amd64.s.
|
||||
goAllocatedStackPtr = r13VReg
|
||||
// functionExecutable must match with entrypoint function in abi_entry_amd64.s.
|
||||
functionExecutable = r14VReg
|
||||
tmpIntReg = r15VReg
|
||||
tmpXmmReg = xmm15VReg
|
||||
)
|
||||
|
||||
// CompileEntryPreamble implements backend.Machine.
|
||||
func (m *machine) CompileEntryPreamble(sig *ssa.Signature) []byte {
|
||||
root := m.compileEntryPreamble(sig)
|
||||
m.encodeWithoutSSA(root)
|
||||
buf := m.c.Buf()
|
||||
return buf
|
||||
}
|
||||
|
||||
func (m *machine) compileEntryPreamble(sig *ssa.Signature) *instruction {
|
||||
abi := backend.FunctionABI{}
|
||||
abi.Init(sig, intArgResultRegs, floatArgResultRegs)
|
||||
|
||||
root := m.allocateNop()
|
||||
|
||||
//// ----------------------------------- prologue ----------------------------------- ////
|
||||
|
||||
// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
|
||||
// mov %executionContextPtrReg, %savedExecutionContextPtr
|
||||
cur := m.move64(executionContextPtrReg, savedExecutionContextPtr, root)
|
||||
|
||||
// Next is to save the original RBP and RSP into the execution context.
|
||||
cur = m.saveOriginalRSPRBP(cur)
|
||||
|
||||
// Now set the RSP to the Go-allocated stack pointer.
|
||||
// mov %goAllocatedStackPtr, %rsp
|
||||
cur = m.move64(goAllocatedStackPtr, rspVReg, cur)
|
||||
|
||||
if stackSlotSize := abi.AlignedArgResultStackSlotSize(); stackSlotSize > 0 {
|
||||
// Allocate stack slots for the arguments and return values.
|
||||
// sub $stackSlotSize, %rsp
|
||||
spDec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(stackSlotSize)), rspVReg, true)
|
||||
cur = linkInstr(cur, spDec)
|
||||
}
|
||||
|
||||
var offset uint32
|
||||
for i := range abi.Args {
|
||||
if i < 2 {
|
||||
// module context ptr and execution context ptr are passed in rax and rbx by the Go assembly function.
|
||||
continue
|
||||
}
|
||||
arg := &abi.Args[i]
|
||||
cur = m.goEntryPreamblePassArg(cur, paramResultSlicePtr, offset, arg)
|
||||
if arg.Type == ssa.TypeV128 {
|
||||
offset += 16
|
||||
} else {
|
||||
offset += 8
|
||||
}
|
||||
}
|
||||
|
||||
// Zero out RBP so that the unwind/stack growth code can correctly detect the end of the stack.
|
||||
zerosRbp := m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(rbpVReg), rbpVReg, true)
|
||||
cur = linkInstr(cur, zerosRbp)
|
||||
|
||||
// Now ready to call the real function. Note that at this point stack pointer is already set to the Go-allocated,
|
||||
// which is aligned to 16 bytes.
|
||||
call := m.allocateInstr().asCallIndirect(newOperandReg(functionExecutable), &abi)
|
||||
cur = linkInstr(cur, call)
|
||||
|
||||
//// ----------------------------------- epilogue ----------------------------------- ////
|
||||
|
||||
// Read the results from regs and the stack, and set them correctly into the paramResultSlicePtr.
|
||||
offset = 0
|
||||
for i := range abi.Rets {
|
||||
r := &abi.Rets[i]
|
||||
cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, offset, r, uint32(abi.ArgStackSize))
|
||||
if r.Type == ssa.TypeV128 {
|
||||
offset += 16
|
||||
} else {
|
||||
offset += 8
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, restore the original RBP and RSP.
|
||||
cur = m.restoreOriginalRSPRBP(cur)
|
||||
|
||||
ret := m.allocateInstr().asRet()
|
||||
linkInstr(cur, ret)
|
||||
return root
|
||||
}
|
||||
|
||||
// saveOriginalRSPRBP saves the original RSP and RBP into the execution context.
|
||||
func (m *machine) saveOriginalRSPRBP(cur *instruction) *instruction {
|
||||
// mov %rbp, wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg)
|
||||
// mov %rsp, wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg)
|
||||
cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, true, cur)
|
||||
cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, true, cur)
|
||||
return cur
|
||||
}
|
||||
|
||||
// restoreOriginalRSPRBP restores the original RSP and RBP from the execution context.
|
||||
func (m *machine) restoreOriginalRSPRBP(cur *instruction) *instruction {
|
||||
// mov wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg), %rbp
|
||||
// mov wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg), %rsp
|
||||
cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, false, cur)
|
||||
cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, false, cur)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) move64(src, dst regalloc.VReg, prev *instruction) *instruction {
|
||||
mov := m.allocateInstr().asMovRR(src, dst, true)
|
||||
return linkInstr(prev, mov)
|
||||
}
|
||||
|
||||
func (m *machine) loadOrStore64AtExecutionCtx(execCtx regalloc.VReg, offset wazevoapi.Offset, r regalloc.VReg, store bool, prev *instruction) *instruction {
|
||||
mem := newOperandMem(m.newAmodeImmReg(offset.U32(), execCtx))
|
||||
instr := m.allocateInstr()
|
||||
if store {
|
||||
instr.asMovRM(r, mem, 8)
|
||||
} else {
|
||||
instr.asMov64MR(mem, r)
|
||||
}
|
||||
return linkInstr(prev, instr)
|
||||
}
|
||||
|
||||
// This is for debugging.
|
||||
func (m *machine) linkUD2(cur *instruction) *instruction { //nolint
|
||||
return linkInstr(cur, m.allocateInstr().asUD2())
|
||||
}
|
||||
|
||||
func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, offsetInParamSlice uint32, arg *backend.ABIArg) *instruction {
|
||||
var dst regalloc.VReg
|
||||
argTyp := arg.Type
|
||||
if arg.Kind == backend.ABIArgKindStack {
|
||||
// Caller saved registers ca
|
||||
switch argTyp {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
dst = tmpIntReg
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
dst = tmpXmmReg
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
} else {
|
||||
dst = arg.Reg
|
||||
}
|
||||
|
||||
load := m.allocateInstr()
|
||||
a := newOperandMem(m.newAmodeImmReg(offsetInParamSlice, paramSlicePtr))
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
load.asMovzxRmR(extModeLQ, a, dst)
|
||||
case ssa.TypeI64:
|
||||
load.asMov64MR(a, dst)
|
||||
case ssa.TypeF32:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, a, dst)
|
||||
case ssa.TypeF64:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, a, dst)
|
||||
case ssa.TypeV128:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, a, dst)
|
||||
}
|
||||
|
||||
cur = linkInstr(cur, load)
|
||||
if arg.Kind == backend.ABIArgKindStack {
|
||||
// Store back to the stack.
|
||||
store := m.allocateInstr()
|
||||
a := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset), rspVReg))
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(dst, a, 4)
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(dst, a, 8)
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, dst, a)
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, dst, a)
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, dst, a)
|
||||
}
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, offsetInResultSlice uint32, result *backend.ABIArg, resultStackSlotBeginOffset uint32) *instruction {
|
||||
var r regalloc.VReg
|
||||
if result.Kind == backend.ABIArgKindStack {
|
||||
// Load the value to the temporary.
|
||||
load := m.allocateInstr()
|
||||
offset := resultStackSlotBeginOffset + uint32(result.Offset)
|
||||
a := newOperandMem(m.newAmodeImmReg(offset, rspVReg))
|
||||
switch result.Type {
|
||||
case ssa.TypeI32:
|
||||
r = tmpIntReg
|
||||
load.asMovzxRmR(extModeLQ, a, r)
|
||||
case ssa.TypeI64:
|
||||
r = tmpIntReg
|
||||
load.asMov64MR(a, r)
|
||||
case ssa.TypeF32:
|
||||
r = tmpXmmReg
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, a, r)
|
||||
case ssa.TypeF64:
|
||||
r = tmpXmmReg
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, a, r)
|
||||
case ssa.TypeV128:
|
||||
r = tmpXmmReg
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, a, r)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
} else {
|
||||
r = result.Reg
|
||||
}
|
||||
|
||||
store := m.allocateInstr()
|
||||
a := newOperandMem(m.newAmodeImmReg(offsetInResultSlice, resultSlicePtr))
|
||||
switch result.Type {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(r, a, 4)
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(r, a, 8)
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, r, a)
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, r, a)
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, r, a)
|
||||
}
|
||||
|
||||
return linkInstr(cur, store)
|
||||
}
|
||||
443
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go
generated
vendored
Normal file
443
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go
generated
vendored
Normal file
|
|
@ -0,0 +1,443 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
var calleeSavedVRegs = []regalloc.VReg{
|
||||
rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
|
||||
xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
|
||||
}
|
||||
|
||||
// CompileGoFunctionTrampoline implements backend.Machine.
|
||||
func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
|
||||
ectx := m.ectx
|
||||
argBegin := 1 // Skips exec context by default.
|
||||
if needModuleContextPtr {
|
||||
argBegin++
|
||||
}
|
||||
|
||||
abi := &backend.FunctionABI{}
|
||||
abi.Init(sig, intArgResultRegs, floatArgResultRegs)
|
||||
m.currentABI = abi
|
||||
|
||||
cur := m.allocateNop()
|
||||
ectx.RootInstr = cur
|
||||
|
||||
// Execution context is always the first argument.
|
||||
execCtrPtr := raxVReg
|
||||
|
||||
// First we update RBP and RSP just like the normal prologue.
|
||||
//
|
||||
// (high address) (high address)
|
||||
// RBP ----> +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | ====> | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | Return Addr | | Return Addr |
|
||||
// RSP ----> +-----------------+ | Caller_RBP |
|
||||
// (low address) +-----------------+ <----- RSP, RBP
|
||||
//
|
||||
cur = m.setupRBPRSP(cur)
|
||||
|
||||
goSliceSizeAligned, goSliceSizeAlignedUnaligned := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
|
||||
cur = m.insertStackBoundsCheck(goSliceSizeAligned+8 /* size of the Go slice */, cur)
|
||||
|
||||
// Save the callee saved registers.
|
||||
cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
|
||||
|
||||
if needModuleContextPtr {
|
||||
moduleCtrPtr := rbxVReg // Module context is always the second argument.
|
||||
mem := m.newAmodeImmReg(
|
||||
wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.U32(),
|
||||
execCtrPtr)
|
||||
store := m.allocateInstr().asMovRM(moduleCtrPtr, newOperandMem(mem), 8)
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
|
||||
// Now let's advance the RSP to the stack slot for the arguments.
|
||||
//
|
||||
// (high address) (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | =======> | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | Return Addr | | Return Addr |
|
||||
// | Caller_RBP | | Caller_RBP |
|
||||
// RBP,RSP --> +-----------------+ +-----------------+ <----- RBP
|
||||
// (low address) | arg[N]/ret[M] |
|
||||
// | .......... |
|
||||
// | arg[1]/ret[1] |
|
||||
// | arg[0]/ret[0] |
|
||||
// +-----------------+ <----- RSP
|
||||
// (low address)
|
||||
//
|
||||
// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
|
||||
// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
|
||||
// the arguments/return values to/from Go function.
|
||||
cur = m.addRSP(-int32(goSliceSizeAligned), cur)
|
||||
|
||||
// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
|
||||
var offsetInGoSlice int32
|
||||
for i := range abi.Args[argBegin:] {
|
||||
arg := &abi.Args[argBegin+i]
|
||||
var v regalloc.VReg
|
||||
if arg.Kind == backend.ABIArgKindReg {
|
||||
v = arg.Reg
|
||||
} else {
|
||||
// We have saved callee saved registers, so we can use them.
|
||||
if arg.Type.IsInt() {
|
||||
v = r15VReg
|
||||
} else {
|
||||
v = xmm15VReg
|
||||
}
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
|
||||
load := m.allocateInstr()
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
load.asMovzxRmR(extModeLQ, mem, v)
|
||||
case ssa.TypeI64:
|
||||
load.asMov64MR(mem, v)
|
||||
case ssa.TypeF32:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
|
||||
case ssa.TypeF64:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
|
||||
case ssa.TypeV128:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
}
|
||||
|
||||
store := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(v, mem, 4)
|
||||
offsetInGoSlice += 8 // always uint64 rep.
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(v, mem, 8)
|
||||
offsetInGoSlice += 8
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, v, mem)
|
||||
offsetInGoSlice += 8 // always uint64 rep.
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, v, mem)
|
||||
offsetInGoSlice += 8
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
|
||||
offsetInGoSlice += 16
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
|
||||
// Finally we push the size of the slice to the stack so the stack looks like:
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | Return Addr |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <----- RBP
|
||||
// | arg[N]/ret[M] |
|
||||
// | .......... |
|
||||
// | arg[1]/ret[1] |
|
||||
// | arg[0]/ret[0] |
|
||||
// | slice size |
|
||||
// +-----------------+ <----- RSP
|
||||
// (low address)
|
||||
//
|
||||
// push $sliceSize
|
||||
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandImm32(uint32(goSliceSizeAlignedUnaligned))))
|
||||
|
||||
// Load the exitCode to the register.
|
||||
exitCodeReg := r12VReg // Callee saved which is already saved.
|
||||
cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(exitCode), false))
|
||||
|
||||
saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
|
||||
cur = linkInstr(cur, setExitCode)
|
||||
cur = linkInstr(cur, saveRsp)
|
||||
cur = linkInstr(cur, saveRbp)
|
||||
|
||||
// Ready to exit the execution.
|
||||
cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
|
||||
|
||||
// We don't need the slice size anymore, so pop it.
|
||||
cur = m.addRSP(8, cur)
|
||||
|
||||
// Ready to set up the results.
|
||||
offsetInGoSlice = 0
|
||||
// To avoid overwriting with the execution context pointer by the result, we need to track the offset,
|
||||
// and defer the restoration of the result to the end of this function.
|
||||
var argOverlapWithExecCtxOffset int32 = -1
|
||||
for i := range abi.Rets {
|
||||
r := &abi.Rets[i]
|
||||
var v regalloc.VReg
|
||||
isRegResult := r.Kind == backend.ABIArgKindReg
|
||||
if isRegResult {
|
||||
v = r.Reg
|
||||
if v.RealReg() == execCtrPtr.RealReg() {
|
||||
argOverlapWithExecCtxOffset = offsetInGoSlice
|
||||
offsetInGoSlice += 8 // always uint64 rep.
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
if r.Type.IsInt() {
|
||||
v = r15VReg
|
||||
} else {
|
||||
v = xmm15VReg
|
||||
}
|
||||
}
|
||||
|
||||
load := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
load.asMovzxRmR(extModeLQ, mem, v)
|
||||
offsetInGoSlice += 8 // always uint64 rep.
|
||||
case ssa.TypeI64:
|
||||
load.asMov64MR(mem, v)
|
||||
offsetInGoSlice += 8
|
||||
case ssa.TypeF32:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
|
||||
offsetInGoSlice += 8 // always uint64 rep.
|
||||
case ssa.TypeF64:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
|
||||
offsetInGoSlice += 8
|
||||
case ssa.TypeV128:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
|
||||
offsetInGoSlice += 16
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
|
||||
if !isRegResult {
|
||||
// We need to store it back to the result slot above rbp.
|
||||
store := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(abi.ArgStackSize+r.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(v, mem, 4)
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(v, mem, 8)
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, v, mem)
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, v, mem)
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
}
|
||||
|
||||
// Before return, we need to restore the callee saved registers.
|
||||
cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
|
||||
|
||||
if argOverlapWithExecCtxOffset >= 0 {
|
||||
// At this point execCtt is not used anymore, so we can finally store the
|
||||
// result to the register which overlaps with the execution context pointer.
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(argOverlapWithExecCtxOffset), rspVReg))
|
||||
load := m.allocateInstr().asMov64MR(mem, execCtrPtr)
|
||||
cur = linkInstr(cur, load)
|
||||
}
|
||||
|
||||
// Finally ready to return.
|
||||
cur = m.revertRBPRSP(cur)
|
||||
linkInstr(cur, m.allocateInstr().asRet())
|
||||
|
||||
m.encodeWithoutSSA(ectx.RootInstr)
|
||||
return m.c.Buf()
|
||||
}
|
||||
|
||||
func (m *machine) saveRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
|
||||
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
|
||||
for _, v := range regs {
|
||||
store := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
|
||||
switch v.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
store.asMovRM(v, mem, 8)
|
||||
case regalloc.RegTypeFloat:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, store)
|
||||
offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) restoreRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
|
||||
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
|
||||
for _, v := range regs {
|
||||
load := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
|
||||
switch v.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
load.asMov64MR(mem, v)
|
||||
case regalloc.RegTypeFloat:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) storeReturnAddressAndExit(cur *instruction, execCtx regalloc.VReg) *instruction {
|
||||
readRip := m.allocateInstr()
|
||||
cur = linkInstr(cur, readRip)
|
||||
|
||||
ripReg := r12VReg // Callee saved which is already saved.
|
||||
saveRip := m.allocateInstr().asMovRM(
|
||||
ripReg,
|
||||
newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), execCtx)),
|
||||
8,
|
||||
)
|
||||
cur = linkInstr(cur, saveRip)
|
||||
|
||||
exit := m.allocateExitSeq(execCtx)
|
||||
cur = linkInstr(cur, exit)
|
||||
|
||||
nop, l := m.allocateBrTarget()
|
||||
cur = linkInstr(cur, nop)
|
||||
readRip.asLEA(newOperandLabel(l), ripReg)
|
||||
return cur
|
||||
}
|
||||
|
||||
// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
|
||||
// stack space left. Basically this is the all allocatable registers except for RSP and RBP, and RAX which contains the
|
||||
// execution context pointer. ExecCtx pointer is always the first argument so we don't need to save it.
|
||||
var stackGrowSaveVRegs = []regalloc.VReg{
|
||||
rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
|
||||
rcxVReg, rbxVReg, rsiVReg, rdiVReg, r8VReg, r9VReg, r10VReg, r11VReg,
|
||||
xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
|
||||
xmm0VReg, xmm1VReg, xmm2VReg, xmm3VReg, xmm4VReg, xmm5VReg, xmm6VReg, xmm7VReg,
|
||||
}
|
||||
|
||||
// CompileStackGrowCallSequence implements backend.Machine.
|
||||
func (m *machine) CompileStackGrowCallSequence() []byte {
|
||||
ectx := m.ectx
|
||||
|
||||
cur := m.allocateNop()
|
||||
ectx.RootInstr = cur
|
||||
|
||||
cur = m.setupRBPRSP(cur)
|
||||
|
||||
// Execution context is always the first argument.
|
||||
execCtrPtr := raxVReg
|
||||
|
||||
// Save the callee saved and argument registers.
|
||||
cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
|
||||
|
||||
// Load the exitCode to the register.
|
||||
exitCodeReg := r12VReg // Already saved.
|
||||
cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(wazevoapi.ExitCodeGrowStack), false))
|
||||
|
||||
saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
|
||||
cur = linkInstr(cur, setExitCode)
|
||||
cur = linkInstr(cur, saveRsp)
|
||||
cur = linkInstr(cur, saveRbp)
|
||||
|
||||
// Ready to exit the execution.
|
||||
cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
|
||||
|
||||
// After the exit, restore the saved registers.
|
||||
cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
|
||||
|
||||
// Finally ready to return.
|
||||
cur = m.revertRBPRSP(cur)
|
||||
linkInstr(cur, m.allocateInstr().asRet())
|
||||
|
||||
m.encodeWithoutSSA(ectx.RootInstr)
|
||||
return m.c.Buf()
|
||||
}
|
||||
|
||||
// insertStackBoundsCheck will insert the instructions after `cur` to check the
|
||||
// stack bounds, and if there's no sufficient spaces required for the function,
|
||||
// exit the execution and try growing it in Go world.
|
||||
func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
|
||||
// add $requiredStackSize, %rsp ;; Temporarily update the sp.
|
||||
// cmp ExecutionContextOffsetStackBottomPtr(%rax), %rsp ;; Compare the stack bottom and the sp.
|
||||
// ja .ok
|
||||
// sub $requiredStackSize, %rsp ;; Reverse the temporary update.
|
||||
// pushq r15 ;; save the temporary.
|
||||
// mov $requiredStackSize, %r15
|
||||
// mov %15, ExecutionContextOffsetStackGrowRequiredSize(%rax) ;; Set the required size in the execution context.
|
||||
// popq r15 ;; restore the temporary.
|
||||
// callq *ExecutionContextOffsetStackGrowCallTrampolineAddress(%rax) ;; Call the Go function to grow the stack.
|
||||
// jmp .cont
|
||||
// .ok:
|
||||
// sub $requiredStackSize, %rsp ;; Reverse the temporary update.
|
||||
// .cont:
|
||||
cur = m.addRSP(-int32(requiredStackSize), cur)
|
||||
cur = linkInstr(cur, m.allocateInstr().asCmpRmiR(true,
|
||||
newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackBottomPtr.U32(), raxVReg)),
|
||||
rspVReg, true))
|
||||
|
||||
ja := m.allocateInstr()
|
||||
cur = linkInstr(cur, ja)
|
||||
|
||||
cur = m.addRSP(int32(requiredStackSize), cur)
|
||||
|
||||
// Save the temporary.
|
||||
|
||||
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r15VReg)))
|
||||
// Load the required size to the temporary.
|
||||
cur = linkInstr(cur, m.allocateInstr().asImm(r15VReg, uint64(requiredStackSize), true))
|
||||
// Set the required size in the execution context.
|
||||
cur = linkInstr(cur, m.allocateInstr().asMovRM(r15VReg,
|
||||
newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.U32(), raxVReg)), 8))
|
||||
// Restore the temporary.
|
||||
cur = linkInstr(cur, m.allocateInstr().asPop64(r15VReg))
|
||||
// Call the Go function to grow the stack.
|
||||
cur = linkInstr(cur, m.allocateInstr().asCallIndirect(newOperandMem(m.newAmodeImmReg(
|
||||
wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.U32(), raxVReg)), nil))
|
||||
// Jump to the continuation.
|
||||
jmpToCont := m.allocateInstr()
|
||||
cur = linkInstr(cur, jmpToCont)
|
||||
|
||||
// .ok:
|
||||
okInstr, ok := m.allocateBrTarget()
|
||||
cur = linkInstr(cur, okInstr)
|
||||
ja.asJmpIf(condNBE, newOperandLabel(ok))
|
||||
// On the ok path, we only need to reverse the temporary update.
|
||||
cur = m.addRSP(int32(requiredStackSize), cur)
|
||||
|
||||
// .cont:
|
||||
contInstr, cont := m.allocateBrTarget()
|
||||
cur = linkInstr(cur, contInstr)
|
||||
jmpToCont.asJmp(newOperandLabel(cont))
|
||||
|
||||
return cur
|
||||
}
|
||||
168
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go
generated
vendored
Normal file
168
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go
generated
vendored
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
type cond byte
|
||||
|
||||
const (
|
||||
// condO represents (overflow) condition.
|
||||
condO cond = iota
|
||||
// condNO represents (no overflow) condition.
|
||||
condNO
|
||||
// condB represents (< unsigned) condition.
|
||||
condB
|
||||
// condNB represents (>= unsigned) condition.
|
||||
condNB
|
||||
// condZ represents (zero) condition.
|
||||
condZ
|
||||
// condNZ represents (not-zero) condition.
|
||||
condNZ
|
||||
// condBE represents (<= unsigned) condition.
|
||||
condBE
|
||||
// condNBE represents (> unsigned) condition.
|
||||
condNBE
|
||||
// condS represents (negative) condition.
|
||||
condS
|
||||
// condNS represents (not-negative) condition.
|
||||
condNS
|
||||
// condP represents (parity) condition.
|
||||
condP
|
||||
// condNP represents (not parity) condition.
|
||||
condNP
|
||||
// condL represents (< signed) condition.
|
||||
condL
|
||||
// condNL represents (>= signed) condition.
|
||||
condNL
|
||||
// condLE represents (<= signed) condition.
|
||||
condLE
|
||||
// condNLE represents (> signed) condition.
|
||||
condNLE
|
||||
|
||||
condInvalid
|
||||
)
|
||||
|
||||
func (c cond) String() string {
|
||||
switch c {
|
||||
case condO:
|
||||
return "o"
|
||||
case condNO:
|
||||
return "no"
|
||||
case condB:
|
||||
return "b"
|
||||
case condNB:
|
||||
return "nb"
|
||||
case condZ:
|
||||
return "z"
|
||||
case condNZ:
|
||||
return "nz"
|
||||
case condBE:
|
||||
return "be"
|
||||
case condNBE:
|
||||
return "nbe"
|
||||
case condS:
|
||||
return "s"
|
||||
case condNS:
|
||||
return "ns"
|
||||
case condL:
|
||||
return "l"
|
||||
case condNL:
|
||||
return "nl"
|
||||
case condLE:
|
||||
return "le"
|
||||
case condNLE:
|
||||
return "nle"
|
||||
case condP:
|
||||
return "p"
|
||||
case condNP:
|
||||
return "np"
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
func condFromSSAIntCmpCond(origin ssa.IntegerCmpCond) cond {
|
||||
switch origin {
|
||||
case ssa.IntegerCmpCondEqual:
|
||||
return condZ
|
||||
case ssa.IntegerCmpCondNotEqual:
|
||||
return condNZ
|
||||
case ssa.IntegerCmpCondSignedLessThan:
|
||||
return condL
|
||||
case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
|
||||
return condNL
|
||||
case ssa.IntegerCmpCondSignedGreaterThan:
|
||||
return condNLE
|
||||
case ssa.IntegerCmpCondSignedLessThanOrEqual:
|
||||
return condLE
|
||||
case ssa.IntegerCmpCondUnsignedLessThan:
|
||||
return condB
|
||||
case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
|
||||
return condNB
|
||||
case ssa.IntegerCmpCondUnsignedGreaterThan:
|
||||
return condNBE
|
||||
case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
|
||||
return condBE
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
func condFromSSAFloatCmpCond(origin ssa.FloatCmpCond) cond {
|
||||
switch origin {
|
||||
case ssa.FloatCmpCondGreaterThanOrEqual:
|
||||
return condNB
|
||||
case ssa.FloatCmpCondGreaterThan:
|
||||
return condNBE
|
||||
case ssa.FloatCmpCondEqual, ssa.FloatCmpCondNotEqual, ssa.FloatCmpCondLessThan, ssa.FloatCmpCondLessThanOrEqual:
|
||||
panic(fmt.Sprintf("cond %s must be treated as a special case", origin))
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
func (c cond) encoding() byte {
|
||||
return byte(c)
|
||||
}
|
||||
|
||||
func (c cond) invert() cond {
|
||||
switch c {
|
||||
case condO:
|
||||
return condNO
|
||||
case condNO:
|
||||
return condO
|
||||
case condB:
|
||||
return condNB
|
||||
case condNB:
|
||||
return condB
|
||||
case condZ:
|
||||
return condNZ
|
||||
case condNZ:
|
||||
return condZ
|
||||
case condBE:
|
||||
return condNBE
|
||||
case condNBE:
|
||||
return condBE
|
||||
case condS:
|
||||
return condNS
|
||||
case condNS:
|
||||
return condS
|
||||
case condP:
|
||||
return condNP
|
||||
case condNP:
|
||||
return condP
|
||||
case condL:
|
||||
return condNL
|
||||
case condNL:
|
||||
return condL
|
||||
case condLE:
|
||||
return condNLE
|
||||
case condNLE:
|
||||
return condLE
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
35
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go
generated
vendored
Normal file
35
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go
generated
vendored
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
package amd64
|
||||
|
||||
// extMode represents the mode of extension in movzx/movsx.
|
||||
type extMode byte
|
||||
|
||||
const (
|
||||
// extModeBL represents Byte -> Longword.
|
||||
extModeBL extMode = iota
|
||||
// extModeBQ represents Byte -> Quadword.
|
||||
extModeBQ
|
||||
// extModeWL represents Word -> Longword.
|
||||
extModeWL
|
||||
// extModeWQ represents Word -> Quadword.
|
||||
extModeWQ
|
||||
// extModeLQ represents Longword -> Quadword.
|
||||
extModeLQ
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (e extMode) String() string {
|
||||
switch e {
|
||||
case extModeBL:
|
||||
return "bl"
|
||||
case extModeBQ:
|
||||
return "bq"
|
||||
case extModeWL:
|
||||
return "wl"
|
||||
case extModeWQ:
|
||||
return "wq"
|
||||
case extModeLQ:
|
||||
return "lq"
|
||||
default:
|
||||
panic("BUG: invalid ext mode")
|
||||
}
|
||||
}
|
||||
2472
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
generated
vendored
Normal file
2472
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1683
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
generated
vendored
Normal file
1683
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
71
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go
generated
vendored
Normal file
71
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go
generated
vendored
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
|
||||
func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
|
||||
val := instr.Return()
|
||||
valType := val.Type()
|
||||
|
||||
vr = m.c.AllocateVReg(valType)
|
||||
m.insertLoadConstant(instr, vr)
|
||||
return
|
||||
}
|
||||
|
||||
// InsertLoadConstantBlockArg implements backend.Machine.
|
||||
func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
|
||||
m.insertLoadConstant(instr, vr)
|
||||
}
|
||||
|
||||
func (m *machine) insertLoadConstant(instr *ssa.Instruction, vr regalloc.VReg) {
|
||||
val := instr.Return()
|
||||
valType := val.Type()
|
||||
v := instr.ConstantVal()
|
||||
|
||||
bits := valType.Bits()
|
||||
if bits < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
|
||||
v = v & ((1 << valType.Bits()) - 1)
|
||||
}
|
||||
|
||||
switch valType {
|
||||
case ssa.TypeF32, ssa.TypeF64:
|
||||
m.lowerFconst(vr, v, bits == 64)
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
m.lowerIconst(vr, v, bits == 64)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerFconst(dst regalloc.VReg, c uint64, _64 bool) {
|
||||
if c == 0 {
|
||||
xor := m.allocateInstr().asZeros(dst)
|
||||
m.insert(xor)
|
||||
} else {
|
||||
var tmpType ssa.Type
|
||||
if _64 {
|
||||
tmpType = ssa.TypeI64
|
||||
} else {
|
||||
tmpType = ssa.TypeI32
|
||||
}
|
||||
tmpInt := m.c.AllocateVReg(tmpType)
|
||||
loadToGP := m.allocateInstr().asImm(tmpInt, c, _64)
|
||||
m.insert(loadToGP)
|
||||
|
||||
movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpInt), dst, _64)
|
||||
m.insert(movToXmm)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerIconst(dst regalloc.VReg, c uint64, _64 bool) {
|
||||
i := m.allocateInstr()
|
||||
if c == 0 {
|
||||
i.asZeros(dst)
|
||||
} else {
|
||||
i.asImm(dst, c, _64)
|
||||
}
|
||||
m.insert(i)
|
||||
}
|
||||
187
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go
generated
vendored
Normal file
187
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go
generated
vendored
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
var addendsMatchOpcodes = [...]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst, ssa.OpcodeIshl}
|
||||
|
||||
type addend struct {
|
||||
r regalloc.VReg
|
||||
off int64
|
||||
shift byte
|
||||
}
|
||||
|
||||
func (a addend) String() string {
|
||||
return fmt.Sprintf("addend{r=%s, off=%d, shift=%d}", a.r, a.off, a.shift)
|
||||
}
|
||||
|
||||
// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
|
||||
func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32) (am *amode) {
|
||||
def := m.c.ValueDefinition(ptr)
|
||||
|
||||
if offsetBase&0x80000000 != 0 {
|
||||
// Special casing the huge base offset whose MSB is set. In x64, the immediate is always
|
||||
// sign-extended, but our IR semantics requires the offset base is always unsigned.
|
||||
// Note that this should be extremely rare or even this shouldn't hit in the real application,
|
||||
// therefore we don't need to optimize this case in my opinion.
|
||||
|
||||
a := m.lowerAddend(def)
|
||||
off64 := a.off + int64(offsetBase)
|
||||
offsetBaseReg := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(offsetBaseReg, uint64(off64), true)
|
||||
if a.r != regalloc.VRegInvalid {
|
||||
return m.newAmodeRegRegShift(0, offsetBaseReg, a.r, a.shift)
|
||||
} else {
|
||||
return m.newAmodeImmReg(0, offsetBaseReg)
|
||||
}
|
||||
}
|
||||
|
||||
if op := m.c.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op == ssa.OpcodeIadd {
|
||||
add := def.Instr
|
||||
x, y := add.Arg2()
|
||||
xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
|
||||
ax := m.lowerAddend(xDef)
|
||||
ay := m.lowerAddend(yDef)
|
||||
add.MarkLowered()
|
||||
return m.lowerAddendsToAmode(ax, ay, offsetBase)
|
||||
} else {
|
||||
// If it is not an Iadd, then we lower the one addend.
|
||||
a := m.lowerAddend(def)
|
||||
// off is always 0 if r is valid.
|
||||
if a.r != regalloc.VRegInvalid {
|
||||
if a.shift != 0 {
|
||||
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(tmpReg, 0, true)
|
||||
return m.newAmodeRegRegShift(offsetBase, tmpReg, a.r, a.shift)
|
||||
}
|
||||
return m.newAmodeImmReg(offsetBase, a.r)
|
||||
} else {
|
||||
off64 := a.off + int64(offsetBase)
|
||||
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(tmpReg, uint64(off64), true)
|
||||
return m.newAmodeImmReg(0, tmpReg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode {
|
||||
if x.r != regalloc.VRegInvalid && x.off != 0 || y.r != regalloc.VRegInvalid && y.off != 0 {
|
||||
panic("invalid input")
|
||||
}
|
||||
|
||||
u64 := uint64(x.off+y.off) + uint64(offBase)
|
||||
if u64 != 0 {
|
||||
if _, ok := asImm32(u64, false); !ok {
|
||||
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(tmpReg, u64, true)
|
||||
// Blank u64 as it has been already lowered.
|
||||
u64 = 0
|
||||
|
||||
if x.r == regalloc.VRegInvalid {
|
||||
x.r = tmpReg
|
||||
} else if y.r == regalloc.VRegInvalid {
|
||||
y.r = tmpReg
|
||||
} else {
|
||||
// We already know that either rx or ry is invalid,
|
||||
// so we overwrite it with the temporary register.
|
||||
panic("BUG")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 := uint32(u64)
|
||||
switch {
|
||||
// We assume rx, ry are valid iff offx, offy are 0.
|
||||
case x.r != regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
|
||||
switch {
|
||||
case x.shift != 0 && y.shift != 0:
|
||||
// Cannot absorb two shifted registers, must lower one to a shift instruction.
|
||||
shifted := m.allocateInstr()
|
||||
shifted.asShiftR(shiftROpShiftLeft, newOperandImm32(uint32(x.shift)), x.r, true)
|
||||
m.insert(shifted)
|
||||
|
||||
return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
|
||||
case x.shift != 0 && y.shift == 0:
|
||||
// Swap base and index.
|
||||
x, y = y, x
|
||||
fallthrough
|
||||
default:
|
||||
return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
|
||||
}
|
||||
case x.r == regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
|
||||
x, y = y, x
|
||||
fallthrough
|
||||
case x.r != regalloc.VRegInvalid && y.r == regalloc.VRegInvalid:
|
||||
if x.shift != 0 {
|
||||
zero := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(zero, 0, true)
|
||||
return m.newAmodeRegRegShift(u32, zero, x.r, x.shift)
|
||||
}
|
||||
return m.newAmodeImmReg(u32, x.r)
|
||||
default: // Both are invalid: use the offset.
|
||||
tmpReg := m.c.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerIconst(tmpReg, u64, true)
|
||||
return m.newAmodeImmReg(0, tmpReg)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerAddend(x *backend.SSAValueDefinition) addend {
|
||||
if x.IsFromBlockParam() {
|
||||
return addend{x.BlkParamVReg, 0, 0}
|
||||
}
|
||||
// Ensure the addend is not referenced in multiple places; we will discard nested Iadds.
|
||||
op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:])
|
||||
if op != ssa.OpcodeInvalid && op != ssa.OpcodeIadd {
|
||||
return m.lowerAddendFromInstr(x.Instr)
|
||||
}
|
||||
p := m.getOperand_Reg(x)
|
||||
return addend{p.reg(), 0, 0}
|
||||
}
|
||||
|
||||
// lowerAddendFromInstr takes an instruction returns a Vreg and an offset that can be used in an address mode.
|
||||
// The Vreg is regalloc.VRegInvalid if the addend cannot be lowered to a register.
|
||||
// The offset is 0 if the addend can be lowered to a register.
|
||||
func (m *machine) lowerAddendFromInstr(instr *ssa.Instruction) addend {
|
||||
instr.MarkLowered()
|
||||
switch op := instr.Opcode(); op {
|
||||
case ssa.OpcodeIconst:
|
||||
u64 := instr.ConstantVal()
|
||||
if instr.Return().Type().Bits() == 32 {
|
||||
return addend{regalloc.VRegInvalid, int64(int32(u64)), 0} // sign-extend.
|
||||
} else {
|
||||
return addend{regalloc.VRegInvalid, int64(u64), 0}
|
||||
}
|
||||
case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
|
||||
input := instr.Arg()
|
||||
inputDef := m.c.ValueDefinition(input)
|
||||
if input.Type().Bits() != 32 {
|
||||
panic("BUG: invalid input type " + input.Type().String())
|
||||
}
|
||||
constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
|
||||
switch {
|
||||
case constInst && op == ssa.OpcodeSExtend:
|
||||
return addend{regalloc.VRegInvalid, int64(uint32(inputDef.Instr.ConstantVal())), 0}
|
||||
case constInst && op == ssa.OpcodeUExtend:
|
||||
return addend{regalloc.VRegInvalid, int64(int32(inputDef.Instr.ConstantVal())), 0} // sign-extend!
|
||||
default:
|
||||
r := m.getOperand_Reg(inputDef)
|
||||
return addend{r.reg(), 0, 0}
|
||||
}
|
||||
case ssa.OpcodeIshl:
|
||||
// If the addend is a shift, we can only handle it if the shift amount is a constant.
|
||||
x, amount := instr.Arg2()
|
||||
amountDef := m.c.ValueDefinition(amount)
|
||||
if amountDef.IsFromInstr() && amountDef.Instr.Constant() && amountDef.Instr.ConstantVal() <= 3 {
|
||||
r := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
return addend{r.reg(), 0, uint8(amountDef.Instr.ConstantVal())}
|
||||
}
|
||||
r := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
return addend{r.reg(), 0, 0}
|
||||
}
|
||||
panic("BUG: invalid opcode")
|
||||
}
|
||||
3611
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
generated
vendored
Normal file
3611
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
304
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
generated
vendored
Normal file
304
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
generated
vendored
Normal file
|
|
@ -0,0 +1,304 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
)
|
||||
|
||||
// PostRegAlloc implements backend.Machine.
|
||||
func (m *machine) PostRegAlloc() {
|
||||
m.setupPrologue()
|
||||
m.postRegAlloc()
|
||||
}
|
||||
|
||||
func (m *machine) setupPrologue() {
|
||||
cur := m.ectx.RootInstr
|
||||
prevInitInst := cur.next
|
||||
|
||||
// At this point, we have the stack layout as follows:
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+ <----- RBP (somewhere in the middle of the stack)
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | Return Addr |
|
||||
// RSP ----> +-----------------+
|
||||
// (low address)
|
||||
|
||||
// First, we push the RBP, and update the RBP to the current RSP.
|
||||
//
|
||||
// (high address) (high address)
|
||||
// RBP ----> +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | ====> | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | Return Addr | | Return Addr |
|
||||
// RSP ----> +-----------------+ | Caller_RBP |
|
||||
// (low address) +-----------------+ <----- RSP, RBP
|
||||
//
|
||||
cur = m.setupRBPRSP(cur)
|
||||
|
||||
if !m.stackBoundsCheckDisabled {
|
||||
cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
|
||||
}
|
||||
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | xxxxx | | xxxxx |
|
||||
// | Return Addr | | Return Addr |
|
||||
// | Caller_RBP | ====> | Caller_RBP |
|
||||
// RBP,RSP->+-----------------+ +-----------------+ <----- RBP
|
||||
// (low address) | clobbered M |
|
||||
// | clobbered 1 |
|
||||
// | ........... |
|
||||
// | clobbered 0 |
|
||||
// +-----------------+ <----- RSP
|
||||
//
|
||||
if regs := m.clobberedRegs; len(regs) > 0 {
|
||||
for i := range regs {
|
||||
r := regs[len(regs)-1-i] // Reverse order.
|
||||
if r.RegType() == regalloc.RegTypeInt {
|
||||
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r)))
|
||||
} else {
|
||||
// Push the XMM register is not supported by the PUSH instruction.
|
||||
cur = m.addRSP(-16, cur)
|
||||
push := m.allocateInstr().asXmmMovRM(
|
||||
sseOpcodeMovdqu, r, newOperandMem(m.newAmodeImmReg(0, rspVReg)),
|
||||
)
|
||||
cur = linkInstr(cur, push)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if size := m.spillSlotSize; size > 0 {
|
||||
// Simply decrease the RSP to allocate the spill slots.
|
||||
// sub $size, %rsp
|
||||
cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(size)), rspVReg, true))
|
||||
|
||||
// At this point, we have the stack layout as follows:
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <--- RBP
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 1 |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ............ |
|
||||
// | spill slot 0 |
|
||||
// +-----------------+ <--- RSP
|
||||
// (low address)
|
||||
}
|
||||
|
||||
linkInstr(cur, prevInitInst)
|
||||
}
|
||||
|
||||
// postRegAlloc does multiple things while walking through the instructions:
|
||||
// 1. Inserts the epilogue code.
|
||||
// 2. Removes the redundant copy instruction.
|
||||
// 3. Inserts the dec/inc RSP instruction right before/after the call instruction.
|
||||
// 4. Lowering that is supposed to be done after regalloc.
|
||||
func (m *machine) postRegAlloc() {
|
||||
ectx := m.ectx
|
||||
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
|
||||
switch k := cur.kind; k {
|
||||
case ret:
|
||||
m.setupEpilogueAfter(cur.prev)
|
||||
continue
|
||||
case fcvtToSintSequence, fcvtToUintSequence:
|
||||
m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
|
||||
if k == fcvtToSintSequence {
|
||||
m.lowerFcvtToSintSequenceAfterRegalloc(cur)
|
||||
} else {
|
||||
m.lowerFcvtToUintSequenceAfterRegalloc(cur)
|
||||
}
|
||||
prev := cur.prev
|
||||
next := cur.next
|
||||
cur := prev
|
||||
for _, instr := range m.ectx.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
linkInstr(cur, next)
|
||||
continue
|
||||
case xmmCMov:
|
||||
m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
|
||||
m.lowerXmmCmovAfterRegAlloc(cur)
|
||||
prev := cur.prev
|
||||
next := cur.next
|
||||
cur := prev
|
||||
for _, instr := range m.ectx.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
linkInstr(cur, next)
|
||||
continue
|
||||
case idivRemSequence:
|
||||
m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
|
||||
m.lowerIDivRemSequenceAfterRegAlloc(cur)
|
||||
prev := cur.prev
|
||||
next := cur.next
|
||||
cur := prev
|
||||
for _, instr := range m.ectx.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
linkInstr(cur, next)
|
||||
continue
|
||||
case call, callIndirect:
|
||||
// At this point, reg alloc is done, therefore we can safely insert dec/inc RPS instruction
|
||||
// right before/after the call instruction. If this is done before reg alloc, the stack slot
|
||||
// can point to the wrong location and therefore results in a wrong value.
|
||||
call := cur
|
||||
next := call.next
|
||||
_, _, _, _, size := backend.ABIInfoFromUint64(call.u2)
|
||||
if size > 0 {
|
||||
dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true)
|
||||
linkInstr(call.prev, dec)
|
||||
linkInstr(dec, call)
|
||||
inc := m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(size), rspVReg, true)
|
||||
linkInstr(call, inc)
|
||||
linkInstr(inc, next)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Removes the redundant copy instruction.
|
||||
if cur.IsCopy() && cur.op1.reg().RealReg() == cur.op2.reg().RealReg() {
|
||||
prev, next := cur.prev, cur.next
|
||||
// Remove the copy instruction.
|
||||
prev.next = next
|
||||
if next != nil {
|
||||
next.prev = prev
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) setupEpilogueAfter(cur *instruction) {
|
||||
prevNext := cur.next
|
||||
|
||||
// At this point, we have the stack layout as follows:
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <--- RBP
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 1 |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ............ |
|
||||
// | spill slot 0 |
|
||||
// +-----------------+ <--- RSP
|
||||
// (low address)
|
||||
|
||||
if size := m.spillSlotSize; size > 0 {
|
||||
// Simply increase the RSP to free the spill slots.
|
||||
// add $size, %rsp
|
||||
cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(uint32(size)), rspVReg, true))
|
||||
}
|
||||
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | ReturnAddress | | ReturnAddress |
|
||||
// | Caller_RBP | | Caller_RBP |
|
||||
// RBP ---> +-----------------+ ========> +-----------------+ <---- RSP, RBP
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 1 |
|
||||
// | clobbered 0 |
|
||||
// RSP ---> +-----------------+
|
||||
// (low address)
|
||||
//
|
||||
if regs := m.clobberedRegs; len(regs) > 0 {
|
||||
for _, r := range regs {
|
||||
if r.RegType() == regalloc.RegTypeInt {
|
||||
cur = linkInstr(cur, m.allocateInstr().asPop64(r))
|
||||
} else {
|
||||
// Pop the XMM register is not supported by the POP instruction.
|
||||
pop := m.allocateInstr().asXmmUnaryRmR(
|
||||
sseOpcodeMovdqu, newOperandMem(m.newAmodeImmReg(0, rspVReg)), r,
|
||||
)
|
||||
cur = linkInstr(cur, pop)
|
||||
cur = m.addRSP(16, cur)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now roll back the RSP to RBP, and pop the caller's RBP.
|
||||
cur = m.revertRBPRSP(cur)
|
||||
|
||||
linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
func (m *machine) addRSP(offset int32, cur *instruction) *instruction {
|
||||
if offset == 0 {
|
||||
return cur
|
||||
}
|
||||
opcode := aluRmiROpcodeAdd
|
||||
if offset < 0 {
|
||||
opcode = aluRmiROpcodeSub
|
||||
offset = -offset
|
||||
}
|
||||
return linkInstr(cur, m.allocateInstr().asAluRmiR(opcode, newOperandImm32(uint32(offset)), rspVReg, true))
|
||||
}
|
||||
|
||||
func (m *machine) setupRBPRSP(cur *instruction) *instruction {
|
||||
cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(rbpVReg)))
|
||||
cur = linkInstr(cur, m.allocateInstr().asMovRR(rspVReg, rbpVReg, true))
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) revertRBPRSP(cur *instruction) *instruction {
|
||||
cur = linkInstr(cur, m.allocateInstr().asMovRR(rbpVReg, rspVReg, true))
|
||||
cur = linkInstr(cur, m.allocateInstr().asPop64(rbpVReg))
|
||||
return cur
|
||||
}
|
||||
153
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go
generated
vendored
Normal file
153
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go
generated
vendored
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// InsertMoveBefore implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
|
||||
typ := src.RegType()
|
||||
if typ != dst.RegType() {
|
||||
panic("BUG: src and dst must have the same type")
|
||||
}
|
||||
|
||||
mov := m.allocateInstr()
|
||||
if typ == regalloc.RegTypeInt {
|
||||
mov.asMovRR(src, dst, true)
|
||||
} else {
|
||||
mov.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), dst)
|
||||
}
|
||||
|
||||
cur := instr.prev
|
||||
prevNext := cur.next
|
||||
cur = linkInstr(cur, mov)
|
||||
linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
|
||||
if !v.IsRealReg() {
|
||||
panic("BUG: VReg must be backed by real reg to be stored")
|
||||
}
|
||||
|
||||
typ := m.c.TypeOf(v)
|
||||
|
||||
var prevNext, cur *instruction
|
||||
if after {
|
||||
cur, prevNext = instr, instr.next
|
||||
} else {
|
||||
cur, prevNext = instr.prev, instr
|
||||
}
|
||||
|
||||
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
|
||||
store := m.allocateInstr()
|
||||
mem := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
|
||||
switch typ {
|
||||
case ssa.TypeI32:
|
||||
store.asMovRM(v, mem, 4)
|
||||
case ssa.TypeI64:
|
||||
store.asMovRM(v, mem, 8)
|
||||
case ssa.TypeF32:
|
||||
store.asXmmMovRM(sseOpcodeMovss, v, mem)
|
||||
case ssa.TypeF64:
|
||||
store.asXmmMovRM(sseOpcodeMovsd, v, mem)
|
||||
case ssa.TypeV128:
|
||||
store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
|
||||
}
|
||||
|
||||
cur = linkInstr(cur, store)
|
||||
return linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
|
||||
if !v.IsRealReg() {
|
||||
panic("BUG: VReg must be backed by real reg to be stored")
|
||||
}
|
||||
|
||||
typ := m.c.TypeOf(v)
|
||||
var prevNext, cur *instruction
|
||||
if after {
|
||||
cur, prevNext = instr, instr.next
|
||||
} else {
|
||||
cur, prevNext = instr.prev, instr
|
||||
}
|
||||
|
||||
// Load the value to the temporary.
|
||||
load := m.allocateInstr()
|
||||
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
|
||||
a := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
|
||||
switch typ {
|
||||
case ssa.TypeI32:
|
||||
load.asMovzxRmR(extModeLQ, a, v)
|
||||
case ssa.TypeI64:
|
||||
load.asMov64MR(a, v)
|
||||
case ssa.TypeF32:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovss, a, v)
|
||||
case ssa.TypeF64:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovsd, a, v)
|
||||
case ssa.TypeV128:
|
||||
load.asXmmUnaryRmR(sseOpcodeMovdqu, a, v)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
|
||||
cur = linkInstr(cur, load)
|
||||
return linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// ClobberedRegisters implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) ClobberedRegisters(regs []regalloc.VReg) {
|
||||
m.clobberedRegs = append(m.clobberedRegs[:0], regs...)
|
||||
}
|
||||
|
||||
// Swap implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
|
||||
if x1.RegType() == regalloc.RegTypeInt {
|
||||
prevNext := cur.next
|
||||
xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8)
|
||||
cur = linkInstr(cur, xc)
|
||||
linkInstr(cur, prevNext)
|
||||
} else {
|
||||
if tmp.Valid() {
|
||||
prevNext := cur.next
|
||||
m.InsertMoveBefore(tmp, x1, prevNext)
|
||||
m.InsertMoveBefore(x1, x2, prevNext)
|
||||
m.InsertMoveBefore(x2, tmp, prevNext)
|
||||
} else {
|
||||
prevNext := cur.next
|
||||
r2 := x2.RealReg()
|
||||
// Temporarily spill x1 to stack.
|
||||
cur = m.InsertStoreRegisterAt(x1, cur, true).prev
|
||||
// Then move x2 to x1.
|
||||
cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1))
|
||||
linkInstr(cur, prevNext)
|
||||
// Then reload the original value on x1 from stack to r2.
|
||||
m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LastInstrForInsertion implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {
|
||||
cur := end
|
||||
for cur.kind == nop0 {
|
||||
cur = cur.prev
|
||||
if cur == begin {
|
||||
return end
|
||||
}
|
||||
}
|
||||
switch cur.kind {
|
||||
case jmp:
|
||||
return cur
|
||||
default:
|
||||
return end
|
||||
}
|
||||
}
|
||||
|
||||
// SSABlockLabel implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label {
|
||||
return m.ectx.SsaBlockIDToLabels[id]
|
||||
}
|
||||
992
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go
generated
vendored
Normal file
992
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go
generated
vendored
Normal file
|
|
@ -0,0 +1,992 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
var swizzleMask = [16]byte{
|
||||
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
|
||||
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
|
||||
}
|
||||
|
||||
func (m *machine) lowerSwizzle(x, y ssa.Value, ret ssa.Value) {
|
||||
masklabel := m.getOrAllocateConstLabel(&m.constSwizzleMaskConstIndex, swizzleMask[:])
|
||||
|
||||
// Load mask to maskReg.
|
||||
maskReg := m.c.AllocateVReg(ssa.TypeV128)
|
||||
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(masklabel)), maskReg)
|
||||
m.insert(loadMask)
|
||||
|
||||
// Copy x and y to tmp registers.
|
||||
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
tmpDst := m.copyToTmp(xx.reg())
|
||||
yy := m.getOperand_Reg(m.c.ValueDefinition(y))
|
||||
tmpX := m.copyToTmp(yy.reg())
|
||||
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddusb, newOperandReg(maskReg), tmpX))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpX), tmpDst))
|
||||
|
||||
// Copy the result to the destination register.
|
||||
m.copyTo(tmpDst, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerInsertLane(x, y ssa.Value, index byte, ret ssa.Value, lane ssa.VecLane) {
|
||||
// Copy x to tmp.
|
||||
tmpDst := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, m.getOperand_Mem_Reg(m.c.ValueDefinition(x)), tmpDst))
|
||||
|
||||
yy := m.getOperand_Reg(m.c.ValueDefinition(y))
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, index, yy, tmpDst))
|
||||
case ssa.VecLaneI16x8:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, index, yy, tmpDst))
|
||||
case ssa.VecLaneI32x4:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, index, yy, tmpDst))
|
||||
case ssa.VecLaneI64x2:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, index, yy, tmpDst))
|
||||
case ssa.VecLaneF32x4:
|
||||
// In INSERTPS instruction, the destination index is encoded at 4 and 5 bits of the argument.
|
||||
// See https://www.felixcloutier.com/x86/insertps
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeInsertps, index<<4, yy, tmpDst))
|
||||
case ssa.VecLaneF64x2:
|
||||
if index == 0 {
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, yy, tmpDst))
|
||||
} else {
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMovlhps, yy, tmpDst))
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
m.copyTo(tmpDst, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerExtractLane(x ssa.Value, index byte, signed bool, ret ssa.Value, lane ssa.VecLane) {
|
||||
// Pextr variants are used to extract a lane from a vector register.
|
||||
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
|
||||
tmpDst := m.c.AllocateVReg(ret.Type())
|
||||
m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrb, index, xx, tmpDst))
|
||||
if signed {
|
||||
m.insert(m.allocateInstr().asMovsxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
|
||||
} else {
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
|
||||
}
|
||||
case ssa.VecLaneI16x8:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrw, index, xx, tmpDst))
|
||||
if signed {
|
||||
m.insert(m.allocateInstr().asMovsxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
|
||||
} else {
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
|
||||
}
|
||||
case ssa.VecLaneI32x4:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrd, index, xx, tmpDst))
|
||||
case ssa.VecLaneI64x2:
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, index, xx, tmpDst))
|
||||
case ssa.VecLaneF32x4:
|
||||
if index == 0 {
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovss, xx, tmpDst))
|
||||
} else {
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, index, xx, tmpDst))
|
||||
}
|
||||
case ssa.VecLaneF64x2:
|
||||
if index == 0 {
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, xx, tmpDst))
|
||||
} else {
|
||||
m.copyTo(xx.reg(), tmpDst)
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0b00_00_11_10, newOperandReg(tmpDst), tmpDst))
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
m.copyTo(tmpDst, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
var sqmulRoundSat = [16]byte{
|
||||
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
|
||||
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
|
||||
}
|
||||
|
||||
func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) {
|
||||
// See https://github.com/WebAssembly/simd/pull/365 for the following logic.
|
||||
maskLabel := m.getOrAllocateConstLabel(&m.constSqmulRoundSatIndex, sqmulRoundSat[:])
|
||||
|
||||
tmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp)
|
||||
m.insert(loadMask)
|
||||
|
||||
xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
|
||||
tmpX := m.copyToTmp(xx.reg())
|
||||
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmpX), tmp))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX))
|
||||
|
||||
m.copyTo(tmpX, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerVUshr(x, y, ret ssa.Value, lane ssa.VecLane) {
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
m.lowerVUshri8x16(x, y, ret)
|
||||
case ssa.VecLaneI16x8, ssa.VecLaneI32x4, ssa.VecLaneI64x2:
|
||||
m.lowerShr(x, y, ret, lane, false)
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
}
|
||||
|
||||
// i8x16LogicalSHRMaskTable is necessary for emulating non-existent packed bytes logical right shifts on amd64.
|
||||
// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
|
||||
var i8x16LogicalSHRMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
|
||||
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // for 1 shift
|
||||
0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, // for 2 shift
|
||||
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, // for 3 shift
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, // for 4 shift
|
||||
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // for 5 shift
|
||||
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // for 6 shift
|
||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // for 7 shift
|
||||
}
|
||||
|
||||
func (m *machine) lowerVUshri8x16(x, y, ret ssa.Value) {
|
||||
tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
|
||||
// Load the modulo 8 mask to tmpReg.
|
||||
m.lowerIconst(tmpGpReg, 0x7, false)
|
||||
// Take the modulo 8 of the shift amount.
|
||||
shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
|
||||
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, tmpGpReg, false))
|
||||
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
vecTmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), vecTmp, false))
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrlw, newOperandReg(vecTmp), xx))
|
||||
|
||||
maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16LogicalSHRMaskTableIndex, i8x16LogicalSHRMaskTable[:])
|
||||
base := m.c.AllocateVReg(ssa.TypeI64)
|
||||
lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
|
||||
m.insert(lea)
|
||||
|
||||
// Shift tmpGpReg by 4 to multiply the shift amount by 16.
|
||||
m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
|
||||
|
||||
mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
|
||||
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), vecTmp)
|
||||
m.insert(loadMask)
|
||||
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(vecTmp), xx))
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerVSshr(x, y, ret ssa.Value, lane ssa.VecLane) {
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
m.lowerVSshri8x16(x, y, ret)
|
||||
case ssa.VecLaneI16x8, ssa.VecLaneI32x4:
|
||||
m.lowerShr(x, y, ret, lane, true)
|
||||
case ssa.VecLaneI64x2:
|
||||
m.lowerVSshri64x2(x, y, ret)
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerVSshri8x16(x, y, ret ssa.Value) {
|
||||
shiftAmtReg := m.c.AllocateVReg(ssa.TypeI32)
|
||||
// Load the modulo 8 mask to tmpReg.
|
||||
m.lowerIconst(shiftAmtReg, 0x7, false)
|
||||
// Take the modulo 8 of the shift amount.
|
||||
shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
|
||||
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, shiftAmtReg, false))
|
||||
|
||||
// Copy the x value to two temporary registers.
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
vecTmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.copyTo(xx, vecTmp)
|
||||
|
||||
// Assuming that we have
|
||||
// xx = [b1, ..., b16]
|
||||
// vecTmp = [b1, ..., b16]
|
||||
// at this point, then we use PUNPCKLBW and PUNPCKHBW to produce:
|
||||
// xx = [b1, b1, b2, b2, ..., b8, b8]
|
||||
// vecTmp = [b9, b9, b10, b10, ..., b16, b16]
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpcklbw, newOperandReg(xx), xx))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpckhbw, newOperandReg(vecTmp), vecTmp))
|
||||
|
||||
// Adding 8 to the shift amount, and then move the amount to vecTmp2.
|
||||
vecTmp2 := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(8), shiftAmtReg, false))
|
||||
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(shiftAmtReg), vecTmp2, false))
|
||||
|
||||
// Perform the word packed arithmetic right shifts on vreg and vecTmp.
|
||||
// This changes these two registers as:
|
||||
// xx = [xxx, b1 >> s, xxx, b2 >> s, ..., xxx, b8 >> s]
|
||||
// vecTmp = [xxx, b9 >> s, xxx, b10 >> s, ..., xxx, b16 >> s]
|
||||
// where xxx is 1 or 0 depending on each byte's sign, and ">>" is the arithmetic shift on a byte.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), xx))
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), vecTmp))
|
||||
|
||||
// Finally, we can get the result by packing these two word vectors.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePacksswb, newOperandReg(vecTmp), xx))
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerVSshri64x2(x, y, ret ssa.Value) {
|
||||
// Load the shift amount to RCX.
|
||||
shiftAmt := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, shiftAmt, rcxVReg))
|
||||
|
||||
tmpGp := m.c.AllocateVReg(ssa.TypeI64)
|
||||
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xxReg := m.copyToTmp(_xx.reg())
|
||||
|
||||
m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 0, newOperandReg(xxReg), tmpGp))
|
||||
m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), xxReg))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 1, newOperandReg(xxReg), tmpGp))
|
||||
m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), xxReg))
|
||||
|
||||
m.copyTo(xxReg, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerShr(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
var modulo uint64
|
||||
var shiftOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI16x8:
|
||||
modulo = 0xf
|
||||
if signed {
|
||||
shiftOp = sseOpcodePsraw
|
||||
} else {
|
||||
shiftOp = sseOpcodePsrlw
|
||||
}
|
||||
case ssa.VecLaneI32x4:
|
||||
modulo = 0x1f
|
||||
if signed {
|
||||
shiftOp = sseOpcodePsrad
|
||||
} else {
|
||||
shiftOp = sseOpcodePsrld
|
||||
}
|
||||
case ssa.VecLaneI64x2:
|
||||
modulo = 0x3f
|
||||
if signed {
|
||||
panic("BUG")
|
||||
}
|
||||
shiftOp = sseOpcodePsrlq
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
|
||||
// Load the modulo 8 mask to tmpReg.
|
||||
m.lowerIconst(tmpGpReg, modulo, false)
|
||||
// Take the modulo 8 of the shift amount.
|
||||
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
|
||||
m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
|
||||
// And move it to a xmm register.
|
||||
tmpVec := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
|
||||
|
||||
// Then do the actual shift.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerVIshl(x, y, ret ssa.Value, lane ssa.VecLane) {
|
||||
var modulo uint64
|
||||
var shiftOp sseOpcode
|
||||
var isI8x16 bool
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
isI8x16 = true
|
||||
modulo = 0x7
|
||||
shiftOp = sseOpcodePsllw
|
||||
case ssa.VecLaneI16x8:
|
||||
modulo = 0xf
|
||||
shiftOp = sseOpcodePsllw
|
||||
case ssa.VecLaneI32x4:
|
||||
modulo = 0x1f
|
||||
shiftOp = sseOpcodePslld
|
||||
case ssa.VecLaneI64x2:
|
||||
modulo = 0x3f
|
||||
shiftOp = sseOpcodePsllq
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
|
||||
// Load the modulo 8 mask to tmpReg.
|
||||
m.lowerIconst(tmpGpReg, modulo, false)
|
||||
// Take the modulo 8 of the shift amount.
|
||||
m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
|
||||
m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
|
||||
// And move it to a xmm register.
|
||||
tmpVec := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
|
||||
|
||||
// Then do the actual shift.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
|
||||
|
||||
if isI8x16 {
|
||||
maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16SHLMaskTableIndex, i8x16SHLMaskTable[:])
|
||||
base := m.c.AllocateVReg(ssa.TypeI64)
|
||||
lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
|
||||
m.insert(lea)
|
||||
|
||||
// Shift tmpGpReg by 4 to multiply the shift amount by 16.
|
||||
m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
|
||||
|
||||
mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
|
||||
loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), tmpVec)
|
||||
m.insert(loadMask)
|
||||
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(tmpVec), xx))
|
||||
}
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
// i8x16SHLMaskTable is necessary for emulating non-existent packed bytes left shifts on amd64.
|
||||
// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
|
||||
var i8x16SHLMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
|
||||
0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, // for 1 shift
|
||||
0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, // for 2 shift
|
||||
0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, // for 3 shift
|
||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // for 4 shift
|
||||
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, // for 5 shift
|
||||
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, // for 6 shift
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, // for 7 shift
|
||||
}
|
||||
|
||||
func (m *machine) lowerVRound(x, ret ssa.Value, imm byte, _64 bool) {
|
||||
xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
|
||||
var round sseOpcode
|
||||
if _64 {
|
||||
round = sseOpcodeRoundpd
|
||||
} else {
|
||||
round = sseOpcodeRoundps
|
||||
}
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmRImm(round, imm, xx, m.c.VRegOf(ret)))
|
||||
}
|
||||
|
||||
var (
|
||||
allOnesI8x16 = [16]byte{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}
|
||||
allOnesI16x8 = [16]byte{0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0}
|
||||
extAddPairwiseI16x8uMask1 = [16]byte{0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80}
|
||||
extAddPairwiseI16x8uMask2 = [16]byte{0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00}
|
||||
)
|
||||
|
||||
func (m *machine) lowerExtIaddPairwise(x, ret ssa.Value, srcLane ssa.VecLane, signed bool) {
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
switch srcLane {
|
||||
case ssa.VecLaneI8x16:
|
||||
allOneReg := m.c.AllocateVReg(ssa.TypeV128)
|
||||
mask := m.getOrAllocateConstLabel(&m.constAllOnesI8x16Index, allOnesI8x16[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOneReg))
|
||||
|
||||
var resultReg regalloc.VReg
|
||||
if signed {
|
||||
resultReg = allOneReg
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(xx), resultReg))
|
||||
} else {
|
||||
// Interpreter tmp (all ones) as signed byte meaning that all the multiply-add is unsigned.
|
||||
resultReg = xx
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(allOneReg), resultReg))
|
||||
}
|
||||
m.copyTo(resultReg, m.c.VRegOf(ret))
|
||||
|
||||
case ssa.VecLaneI16x8:
|
||||
if signed {
|
||||
allOnesReg := m.c.AllocateVReg(ssa.TypeV128)
|
||||
mask := m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOnesReg))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(allOnesReg), xx))
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
} else {
|
||||
maskReg := m.c.AllocateVReg(ssa.TypeV128)
|
||||
mask := m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask1Index, extAddPairwiseI16x8uMask1[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
|
||||
|
||||
// Flip the sign bits on xx.
|
||||
//
|
||||
// Assuming that xx = [w1, ..., w8], now we have,
|
||||
// xx[i] = int8(-w1) for i = 0...8
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(maskReg), xx))
|
||||
|
||||
mask = m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
|
||||
|
||||
// For i = 0,..4 (as this results in i32x4 lanes), now we have
|
||||
// xx[i] = int32(-wn + -w(n+1)) = int32(-(wn + w(n+1)))
|
||||
// c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr)
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(maskReg), xx))
|
||||
|
||||
mask = m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask2Index, extAddPairwiseI16x8uMask2[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
|
||||
|
||||
// vr[i] = int32(-(wn + w(n+1))) + int32(math.MaxInt16+1) = int32((wn + w(n+1))) = uint32(wn + w(n+1)).
|
||||
// c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp, vr)
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(maskReg), xx))
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", srcLane))
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerWidenLow(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
var sseOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxbw
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxbw
|
||||
}
|
||||
case ssa.VecLaneI16x8:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxwd
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxwd
|
||||
}
|
||||
case ssa.VecLaneI32x4:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxdq
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxdq
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, xx, m.c.VRegOf(ret)))
|
||||
}
|
||||
|
||||
func (m *machine) lowerWidenHigh(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
tmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
m.copyTo(xx.reg(), tmp)
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePalignr, 8, newOperandReg(tmp), tmp))
|
||||
|
||||
var sseOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxbw
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxbw
|
||||
}
|
||||
case ssa.VecLaneI16x8:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxwd
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxwd
|
||||
}
|
||||
case ssa.VecLaneI32x4:
|
||||
if signed {
|
||||
sseOp = sseOpcodePmovsxdq
|
||||
} else {
|
||||
sseOp = sseOpcodePmovzxdq
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, newOperandReg(tmp), m.c.VRegOf(ret)))
|
||||
}
|
||||
|
||||
func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, ret ssa.Value, lane ssa.VecLane) {
|
||||
tmpDst, tmpGp := m.c.AllocateVReg(ssa.TypeV128), m.c.AllocateVReg(ssa.TypeI64)
|
||||
am := newOperandMem(m.lowerToAddressMode(ptr, offset))
|
||||
|
||||
m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, am, tmpGp))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, 0, newOperandReg(tmpGp), tmpDst))
|
||||
tmpZeroVec := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asZeros(tmpZeroVec))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpZeroVec), tmpDst))
|
||||
case ssa.VecLaneI16x8:
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, am, tmpGp))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 0, newOperandReg(tmpGp), tmpDst))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 1, newOperandReg(tmpGp), tmpDst))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
|
||||
case ssa.VecLaneI32x4:
|
||||
m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, am, tmpGp))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, 0, newOperandReg(tmpGp), tmpDst))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
|
||||
case ssa.VecLaneI64x2:
|
||||
m.insert(m.allocateInstr().asMov64MR(am, tmpGp))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), tmpDst))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), tmpDst))
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
m.copyTo(tmpDst, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
var f64x2CvtFromIMask = [16]byte{
|
||||
0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
}
|
||||
|
||||
func (m *machine) lowerVFcvtFromInt(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
switch lane {
|
||||
case ssa.VecLaneF32x4:
|
||||
if signed {
|
||||
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, xx, m.c.VRegOf(ret)))
|
||||
} else {
|
||||
xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
// Copy the value to two temporary registers.
|
||||
tmp := m.copyToTmp(xx.reg())
|
||||
tmp2 := m.copyToTmp(xx.reg())
|
||||
|
||||
// Clear the higher 16 bits of each 32-bit element.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePslld, newOperandImm32(0xa), tmp))
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0xa), tmp))
|
||||
|
||||
// Subtract the higher 16-bits from tmp2: clear the lower 16-bits of tmp2.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubd, newOperandReg(tmp), tmp2))
|
||||
|
||||
// Convert the lower 16-bits in tmp.
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
|
||||
|
||||
// Left shift by one and convert tmp2, meaning that halved conversion result of higher 16-bits in tmp2.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(1), tmp2))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp2), tmp2))
|
||||
|
||||
// Double the converted halved higher 16bits.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp2), tmp2))
|
||||
|
||||
// Get the conversion result by add tmp (holding lower 16-bit conversion) into tmp2.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp), tmp2))
|
||||
|
||||
m.copyTo(tmp2, m.c.VRegOf(ret))
|
||||
}
|
||||
case ssa.VecLaneF64x2:
|
||||
if signed {
|
||||
xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2pd, xx, m.c.VRegOf(ret)))
|
||||
} else {
|
||||
maskReg := m.c.AllocateVReg(ssa.TypeV128)
|
||||
maskLabel := m.getOrAllocateConstLabel(&m.constF64x2CvtFromIMaskIndex, f64x2CvtFromIMask[:])
|
||||
// maskReg = [0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
|
||||
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
// Given that we have xx = [d1, d2, d3, d4], this results in
|
||||
// xx = [d1, [0x00, 0x00, 0x30, 0x43], d2, [0x00, 0x00, 0x30, 0x43]]
|
||||
// = [float64(uint32(d1)) + 0x1.0p52, float64(uint32(d2)) + 0x1.0p52]
|
||||
// ^See https://stackoverflow.com/questions/13269523/can-all-32-bit-ints-be-exactly-represented-as-a-double
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeUnpcklps, newOperandReg(maskReg), xx))
|
||||
|
||||
// maskReg = [float64(0x1.0p52), float64(0x1.0p52)]
|
||||
maskLabel = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
|
||||
|
||||
// Now, we get the result as
|
||||
// xx = [float64(uint32(d1)), float64(uint32(d2))]
|
||||
// because the following equality always satisfies:
|
||||
// float64(0x1.0p52 + float64(uint32(x))) - float64(0x1.0p52 + float64(uint32(y))) = float64(uint32(x)) - float64(uint32(y))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubpd, newOperandReg(maskReg), xx))
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// i32sMaxOnF64x2 holds math.MaxInt32(=2147483647.0) on two f64 lanes.
|
||||
i32sMaxOnF64x2 = [16]byte{
|
||||
0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
|
||||
0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
|
||||
}
|
||||
|
||||
// i32sMaxOnF64x2 holds math.MaxUint32(=4294967295.0) on two f64 lanes.
|
||||
i32uMaxOnF64x2 = [16]byte{
|
||||
0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
|
||||
0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
|
||||
}
|
||||
|
||||
// twop52 holds two float64(0x1.0p52) on two f64 lanes. 0x1.0p52 is special in the sense that
|
||||
// with this exponent, the mantissa represents a corresponding uint32 number, and arithmetics,
|
||||
// like addition or subtraction, the resulted floating point holds exactly the same
|
||||
// bit representations in 32-bit integer on its mantissa.
|
||||
//
|
||||
// Note: the name twop52 is common across various compiler ecosystem.
|
||||
// E.g. https://github.com/llvm/llvm-project/blob/92ab024f81e5b64e258b7c3baaf213c7c26fcf40/compiler-rt/lib/builtins/floatdidf.c#L28
|
||||
// E.g. https://opensource.apple.com/source/clang/clang-425.0.24/src/projects/compiler-rt/lib/floatdidf.c.auto.html
|
||||
twop52 = [16]byte{
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
|
||||
}
|
||||
)
|
||||
|
||||
func (m *machine) lowerVFcvtToIntSat(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
switch lane {
|
||||
case ssa.VecLaneF32x4:
|
||||
if signed {
|
||||
tmp := m.copyToTmp(xx)
|
||||
|
||||
// Assuming we have xx = [v1, v2, v3, v4].
|
||||
//
|
||||
// Set all bits if lane is not NaN on tmp.
|
||||
// tmp[i] = 0xffffffff if vi != NaN
|
||||
// = 0 if vi == NaN
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
|
||||
|
||||
// Clear NaN lanes on xx, meaning that
|
||||
// xx[i] = vi if vi != NaN
|
||||
// 0 if vi == NaN
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp), xx))
|
||||
|
||||
// tmp[i] = ^vi if vi != NaN
|
||||
// = 0xffffffff if vi == NaN
|
||||
// which means that tmp[i] & 0x80000000 != 0 if and only if vi is negative.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeXorps, newOperandReg(xx), tmp))
|
||||
|
||||
// xx[i] = int32(vi) if vi != NaN and xx is not overflowing.
|
||||
// = 0x80000000 if vi != NaN and xx is overflowing (See https://www.felixcloutier.com/x86/cvttps2dq)
|
||||
// = 0 if vi == NaN
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
|
||||
|
||||
// Below, we have to convert 0x80000000 into 0x7FFFFFFF for positive overflowing lane.
|
||||
//
|
||||
// tmp[i] = 0x80000000 if vi is positive
|
||||
// = any satisfying any&0x80000000 = 0 if vi is negative or zero.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(xx), tmp))
|
||||
|
||||
// Arithmetic right shifting tmp by 31, meaning that we have
|
||||
// tmp[i] = 0xffffffff if vi is positive, 0 otherwise.
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrad, newOperandImm32(0x1f), tmp))
|
||||
|
||||
// Flipping 0x80000000 if vi is positive, otherwise keep intact.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), xx))
|
||||
} else {
|
||||
tmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asZeros(tmp))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxps, newOperandReg(tmp), xx))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp), tmp))
|
||||
m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0x1), tmp))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
|
||||
tmp2 := m.copyToTmp(xx)
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubps, newOperandReg(tmp), tmp2))
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredLE_OS), newOperandReg(tmp2), tmp))
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(tmp2), tmp2))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp2))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaxsd, newOperandReg(tmp), tmp2))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(tmp2), xx))
|
||||
}
|
||||
|
||||
case ssa.VecLaneF64x2:
|
||||
tmp2 := m.c.AllocateVReg(ssa.TypeV128)
|
||||
if signed {
|
||||
tmp := m.copyToTmp(xx)
|
||||
|
||||
// Set all bits for non-NaN lanes, zeros otherwise.
|
||||
// I.e. tmp[i] = 0xffffffff_ffffffff if vi != NaN, 0 otherwise.
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmppd, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
|
||||
|
||||
maskLabel := m.getOrAllocateConstLabel(&m.constI32sMaxOnF64x2Index, i32sMaxOnF64x2[:])
|
||||
// Load the 2147483647 into tmp2's each lane.
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp2))
|
||||
|
||||
// tmp[i] = 2147483647 if vi != NaN, 0 otherwise.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp2), tmp))
|
||||
|
||||
// MINPD returns the source register's value as-is, so we have
|
||||
// xx[i] = vi if vi != NaN
|
||||
// = 0 if vi == NaN
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp), xx))
|
||||
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttpd2dq, newOperandReg(xx), xx))
|
||||
} else {
|
||||
tmp := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.insert(m.allocateInstr().asZeros(tmp))
|
||||
|
||||
// xx[i] = vi if vi != NaN && vi > 0
|
||||
// = 0 if vi == NaN || vi <= 0
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxpd, newOperandReg(tmp), xx))
|
||||
|
||||
// tmp2[i] = float64(math.MaxUint32) = math.MaxUint32
|
||||
maskIndex := m.getOrAllocateConstLabel(&m.constI32uMaxOnF64x2Index, i32uMaxOnF64x2[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
|
||||
|
||||
// xx[i] = vi if vi != NaN && vi > 0 && vi <= math.MaxUint32
|
||||
// = 0 otherwise
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp2), xx))
|
||||
|
||||
// Round the floating points into integer.
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeRoundpd, 0x3, newOperandReg(xx), xx))
|
||||
|
||||
// tmp2[i] = float64(0x1.0p52)
|
||||
maskIndex = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
|
||||
m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
|
||||
|
||||
// xx[i] = float64(0x1.0p52) + float64(uint32(vi)) if vi != NaN && vi > 0 && vi <= math.MaxUint32
|
||||
// = 0 otherwise
|
||||
//
|
||||
// This means that xx[i] holds exactly the same bit of uint32(vi) in its lower 32-bits.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddpd, newOperandReg(tmp2), xx))
|
||||
|
||||
// At this point, we have
|
||||
// xx = [uint32(v0), float64(0x1.0p52), uint32(v1), float64(0x1.0p52)]
|
||||
// tmp = [0, 0, 0, 0]
|
||||
// as 32x4 lanes. Therefore, SHUFPS with 0b00_00_10_00 results in
|
||||
// xx = [xx[00], xx[10], tmp[00], tmp[00]] = [xx[00], xx[10], 0, 0]
|
||||
// meaning that for i = 0 and 1, we have
|
||||
// xx[i] = uint32(vi) if vi != NaN && vi > 0 && vi <= math.MaxUint32
|
||||
// = 0 otherwise.
|
||||
m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeShufps, 0b00_00_10_00, newOperandReg(tmp), xx))
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerNarrow(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
|
||||
|
||||
var sseOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI16x8:
|
||||
if signed {
|
||||
sseOp = sseOpcodePacksswb
|
||||
} else {
|
||||
sseOp = sseOpcodePackuswb
|
||||
}
|
||||
case ssa.VecLaneI32x4:
|
||||
if signed {
|
||||
sseOp = sseOpcodePackssdw
|
||||
} else {
|
||||
sseOp = sseOpcodePackusdw
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid lane type: %s", lane))
|
||||
}
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOp, yy, xx))
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerWideningPairwiseDotProductS(x, y, ret ssa.Value) {
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, yy, xx))
|
||||
m.copyTo(xx, m.c.VRegOf(ret))
|
||||
}
|
||||
|
||||
func (m *machine) lowerVIabs(instr *ssa.Instruction) {
|
||||
x, lane := instr.ArgWithLane()
|
||||
rd := m.c.VRegOf(instr.Return())
|
||||
|
||||
if lane == ssa.VecLaneI64x2 {
|
||||
_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
|
||||
blendReg := xmm0VReg
|
||||
m.insert(m.allocateInstr().asDefineUninitializedReg(blendReg))
|
||||
|
||||
tmp := m.copyToTmp(_xx.reg())
|
||||
xx := m.copyToTmp(_xx.reg())
|
||||
|
||||
// Clear all bits on blendReg.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(blendReg), blendReg))
|
||||
// Subtract xx from blendMaskReg.
|
||||
m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubq, newOperandReg(xx), blendReg))
|
||||
// Copy the subtracted value ^^ back into tmp.
|
||||
m.copyTo(blendReg, xx)
|
||||
|
||||
m.insert(m.allocateInstr().asBlendvpd(newOperandReg(tmp), xx))
|
||||
|
||||
m.copyTo(xx, rd)
|
||||
} else {
|
||||
var vecOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI8x16:
|
||||
vecOp = sseOpcodePabsb
|
||||
case ssa.VecLaneI16x8:
|
||||
vecOp = sseOpcodePabsw
|
||||
case ssa.VecLaneI32x4:
|
||||
vecOp = sseOpcodePabsd
|
||||
}
|
||||
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
|
||||
i := m.allocateInstr()
|
||||
i.asXmmUnaryRmR(vecOp, rn, rd)
|
||||
m.insert(i)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerVIpopcnt(instr *ssa.Instruction) {
|
||||
x := instr.Arg()
|
||||
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
rd := m.c.VRegOf(instr.Return())
|
||||
|
||||
tmp1 := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.lowerVconst(tmp1, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f)
|
||||
|
||||
// Copy input into tmp2.
|
||||
tmp2 := m.copyToTmp(rn.reg())
|
||||
|
||||
// Given that we have:
|
||||
// rm = [b1, ..., b16] where bn = hn:ln and hn and ln are higher and lower 4-bits of bn.
|
||||
//
|
||||
// Take PAND on tmp1 and tmp2, so that we mask out all the higher bits.
|
||||
// tmp2 = [l1, ..., l16].
|
||||
pand := m.allocateInstr()
|
||||
pand.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp2)
|
||||
m.insert(pand)
|
||||
|
||||
// Do logical (packed word) right shift by 4 on rm and PAND against the mask (tmp1); meaning that we have
|
||||
// tmp3 = [h1, ...., h16].
|
||||
tmp3 := m.copyToTmp(rn.reg())
|
||||
psrlw := m.allocateInstr()
|
||||
psrlw.asXmmRmiReg(sseOpcodePsrlw, newOperandImm32(4), tmp3)
|
||||
m.insert(psrlw)
|
||||
|
||||
pand2 := m.allocateInstr()
|
||||
pand2.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp3)
|
||||
m.insert(pand2)
|
||||
|
||||
// Read the popcntTable into tmp4, and we have
|
||||
// tmp4 = [0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04]
|
||||
tmp4 := m.c.AllocateVReg(ssa.TypeV128)
|
||||
m.lowerVconst(tmp4, 0x03_02_02_01_02_01_01_00, 0x04_03_03_02_03_02_02_01)
|
||||
|
||||
// Make a copy for later.
|
||||
tmp5 := m.copyToTmp(tmp4)
|
||||
|
||||
// tmp4 = [popcnt(l1), ..., popcnt(l16)].
|
||||
pshufb := m.allocateInstr()
|
||||
pshufb.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp2), tmp4)
|
||||
m.insert(pshufb)
|
||||
|
||||
pshufb2 := m.allocateInstr()
|
||||
pshufb2.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp3), tmp5)
|
||||
m.insert(pshufb2)
|
||||
|
||||
// tmp4 + tmp5 is the result.
|
||||
paddb := m.allocateInstr()
|
||||
paddb.asXmmRmR(sseOpcodePaddb, newOperandReg(tmp4), tmp5)
|
||||
m.insert(paddb)
|
||||
|
||||
m.copyTo(tmp5, rd)
|
||||
}
|
||||
|
||||
func (m *machine) lowerVImul(instr *ssa.Instruction) {
|
||||
x, y, lane := instr.Arg2WithLane()
|
||||
rd := m.c.VRegOf(instr.Return())
|
||||
if lane == ssa.VecLaneI64x2 {
|
||||
rn := m.getOperand_Reg(m.c.ValueDefinition(x))
|
||||
rm := m.getOperand_Reg(m.c.ValueDefinition(y))
|
||||
// Assuming that we have
|
||||
// rm = [p1, p2] = [p1_lo, p1_hi, p2_lo, p2_high]
|
||||
// rn = [q1, q2] = [q1_lo, q1_hi, q2_lo, q2_high]
|
||||
// where pN and qN are 64-bit (quad word) lane, and pN_lo, pN_hi, qN_lo and qN_hi are 32-bit (double word) lane.
|
||||
|
||||
// Copy rn into tmp1.
|
||||
tmp1 := m.copyToTmp(rn.reg())
|
||||
|
||||
// And do the logical right shift by 32-bit on tmp1, which makes tmp1 = [0, p1_high, 0, p2_high]
|
||||
shift := m.allocateInstr()
|
||||
shift.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp1)
|
||||
m.insert(shift)
|
||||
|
||||
// Execute "pmuludq rm,tmp1", which makes tmp1 = [p1_high*q1_lo, p2_high*q2_lo] where each lane is 64-bit.
|
||||
mul := m.allocateInstr()
|
||||
mul.asXmmRmR(sseOpcodePmuludq, rm, tmp1)
|
||||
m.insert(mul)
|
||||
|
||||
// Copy rm value into tmp2.
|
||||
tmp2 := m.copyToTmp(rm.reg())
|
||||
|
||||
// And do the logical right shift by 32-bit on tmp2, which makes tmp2 = [0, q1_high, 0, q2_high]
|
||||
shift2 := m.allocateInstr()
|
||||
shift2.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp2)
|
||||
m.insert(shift2)
|
||||
|
||||
// Execute "pmuludq rm,tmp2", which makes tmp2 = [p1_lo*q1_high, p2_lo*q2_high] where each lane is 64-bit.
|
||||
mul2 := m.allocateInstr()
|
||||
mul2.asXmmRmR(sseOpcodePmuludq, rn, tmp2)
|
||||
m.insert(mul2)
|
||||
|
||||
// Adds tmp1 and tmp2 and do the logical left shift by 32-bit,
|
||||
// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32, (p2_lo*q2_high+p2_high*q2_lo)<<32]
|
||||
add := m.allocateInstr()
|
||||
add.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp2), tmp1)
|
||||
m.insert(add)
|
||||
|
||||
shift3 := m.allocateInstr()
|
||||
shift3.asXmmRmiReg(sseOpcodePsllq, newOperandImm32(32), tmp1)
|
||||
m.insert(shift3)
|
||||
|
||||
// Copy rm value into tmp3.
|
||||
tmp3 := m.copyToTmp(rm.reg())
|
||||
|
||||
// "pmuludq rm,tmp3" makes tmp3 = [p1_lo*q1_lo, p2_lo*q2_lo] where each lane is 64-bit.
|
||||
mul3 := m.allocateInstr()
|
||||
mul3.asXmmRmR(sseOpcodePmuludq, rn, tmp3)
|
||||
m.insert(mul3)
|
||||
|
||||
// Finally, we get the result by computing tmp1 + tmp3,
|
||||
// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32+p1_lo*q1_lo, (p2_lo*q2_high+p2_high*q2_lo)<<32+p2_lo*q2_lo]
|
||||
add2 := m.allocateInstr()
|
||||
add2.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp3), tmp1)
|
||||
m.insert(add2)
|
||||
|
||||
m.copyTo(tmp1, rd)
|
||||
|
||||
} else {
|
||||
var vecOp sseOpcode
|
||||
switch lane {
|
||||
case ssa.VecLaneI16x8:
|
||||
vecOp = sseOpcodePmullw
|
||||
case ssa.VecLaneI32x4:
|
||||
vecOp = sseOpcodePmulld
|
||||
default:
|
||||
panic("unsupported: " + lane.String())
|
||||
}
|
||||
m.lowerVbBinOp(vecOp, x, y, instr.Return())
|
||||
}
|
||||
}
|
||||
346
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go
generated
vendored
Normal file
346
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go
generated
vendored
Normal file
|
|
@ -0,0 +1,346 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
type operand struct {
|
||||
kind operandKind
|
||||
data uint64
|
||||
}
|
||||
|
||||
type operandKind byte
|
||||
|
||||
const (
|
||||
// operandKindReg is an operand which is an integer Register.
|
||||
operandKindReg operandKind = iota + 1
|
||||
|
||||
// operandKindMem is a value in Memory.
|
||||
// 32, 64, or 128 bit value.
|
||||
operandKindMem
|
||||
|
||||
// operandKindImm32 is a signed-32-bit integer immediate value.
|
||||
operandKindImm32
|
||||
|
||||
// operandKindLabel is a label.
|
||||
operandKindLabel
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (o operandKind) String() string {
|
||||
switch o {
|
||||
case operandKindReg:
|
||||
return "reg"
|
||||
case operandKindMem:
|
||||
return "mem"
|
||||
case operandKindImm32:
|
||||
return "imm32"
|
||||
case operandKindLabel:
|
||||
return "label"
|
||||
default:
|
||||
panic("BUG: invalid operand kind")
|
||||
}
|
||||
}
|
||||
|
||||
// format returns the string representation of the operand.
|
||||
// _64 is only for the case where the operand is a register, and it's integer.
|
||||
func (o *operand) format(_64 bool) string {
|
||||
switch o.kind {
|
||||
case operandKindReg:
|
||||
return formatVRegSized(o.reg(), _64)
|
||||
case operandKindMem:
|
||||
return o.addressMode().String()
|
||||
case operandKindImm32:
|
||||
return fmt.Sprintf("$%d", int32(o.imm32()))
|
||||
case operandKindLabel:
|
||||
return backend.Label(o.imm32()).String()
|
||||
default:
|
||||
panic(fmt.Sprintf("BUG: invalid operand: %s", o.kind))
|
||||
}
|
||||
}
|
||||
|
||||
//go:inline
|
||||
func (o *operand) reg() regalloc.VReg {
|
||||
return regalloc.VReg(o.data)
|
||||
}
|
||||
|
||||
//go:inline
|
||||
func (o *operand) setReg(r regalloc.VReg) {
|
||||
o.data = uint64(r)
|
||||
}
|
||||
|
||||
//go:inline
|
||||
func (o *operand) addressMode() *amode {
|
||||
return wazevoapi.PtrFromUintptr[amode](uintptr(o.data))
|
||||
}
|
||||
|
||||
//go:inline
|
||||
func (o *operand) imm32() uint32 {
|
||||
return uint32(o.data)
|
||||
}
|
||||
|
||||
func (o *operand) label() backend.Label {
|
||||
switch o.kind {
|
||||
case operandKindLabel:
|
||||
return backend.Label(o.data)
|
||||
case operandKindMem:
|
||||
mem := o.addressMode()
|
||||
if mem.kind() != amodeRipRel {
|
||||
panic("BUG: invalid label")
|
||||
}
|
||||
return backend.Label(mem.imm32)
|
||||
default:
|
||||
panic("BUG: invalid operand kind")
|
||||
}
|
||||
}
|
||||
|
||||
func newOperandLabel(label backend.Label) operand {
|
||||
return operand{kind: operandKindLabel, data: uint64(label)}
|
||||
}
|
||||
|
||||
func newOperandReg(r regalloc.VReg) operand {
|
||||
return operand{kind: operandKindReg, data: uint64(r)}
|
||||
}
|
||||
|
||||
func newOperandImm32(imm32 uint32) operand {
|
||||
return operand{kind: operandKindImm32, data: uint64(imm32)}
|
||||
}
|
||||
|
||||
func newOperandMem(amode *amode) operand {
|
||||
return operand{kind: operandKindMem, data: uint64(uintptr(unsafe.Pointer(amode)))}
|
||||
}
|
||||
|
||||
// amode is a memory operand (addressing mode).
|
||||
type amode struct {
|
||||
kindWithShift uint32
|
||||
imm32 uint32
|
||||
base regalloc.VReg
|
||||
|
||||
// For amodeRegRegShift:
|
||||
index regalloc.VReg
|
||||
}
|
||||
|
||||
type amodeKind byte
|
||||
|
||||
const (
|
||||
// amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base
|
||||
amodeImmReg amodeKind = iota + 1
|
||||
|
||||
// amodeImmRBP is the same as amodeImmReg, but the base register is fixed to RBP.
|
||||
// The only differece is that it doesn't tell the register allocator to use RBP which is distracting for the
|
||||
// register allocator.
|
||||
amodeImmRBP
|
||||
|
||||
// amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base + (Register2 << Shift)
|
||||
amodeRegRegShift
|
||||
|
||||
// amodeRipRel is a RIP-relative addressing mode specified by the label.
|
||||
amodeRipRel
|
||||
|
||||
// TODO: there are other addressing modes such as the one without base register.
|
||||
)
|
||||
|
||||
func (a *amode) kind() amodeKind {
|
||||
return amodeKind(a.kindWithShift & 0xff)
|
||||
}
|
||||
|
||||
func (a *amode) shift() byte {
|
||||
return byte(a.kindWithShift >> 8)
|
||||
}
|
||||
|
||||
func (a *amode) uses(rs *[]regalloc.VReg) {
|
||||
switch a.kind() {
|
||||
case amodeImmReg:
|
||||
*rs = append(*rs, a.base)
|
||||
case amodeRegRegShift:
|
||||
*rs = append(*rs, a.base, a.index)
|
||||
case amodeImmRBP, amodeRipRel:
|
||||
default:
|
||||
panic("BUG: invalid amode kind")
|
||||
}
|
||||
}
|
||||
|
||||
func (a *amode) nregs() int {
|
||||
switch a.kind() {
|
||||
case amodeImmReg:
|
||||
return 1
|
||||
case amodeRegRegShift:
|
||||
return 2
|
||||
case amodeImmRBP, amodeRipRel:
|
||||
return 0
|
||||
default:
|
||||
panic("BUG: invalid amode kind")
|
||||
}
|
||||
}
|
||||
|
||||
func (a *amode) assignUses(i int, reg regalloc.VReg) {
|
||||
switch a.kind() {
|
||||
case amodeImmReg:
|
||||
if i == 0 {
|
||||
a.base = reg
|
||||
} else {
|
||||
panic("BUG: invalid amode assignment")
|
||||
}
|
||||
case amodeRegRegShift:
|
||||
if i == 0 {
|
||||
a.base = reg
|
||||
} else if i == 1 {
|
||||
a.index = reg
|
||||
} else {
|
||||
panic("BUG: invalid amode assignment")
|
||||
}
|
||||
default:
|
||||
panic("BUG: invalid amode assignment")
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) newAmodeImmReg(imm32 uint32, base regalloc.VReg) *amode {
|
||||
ret := m.amodePool.Allocate()
|
||||
*ret = amode{kindWithShift: uint32(amodeImmReg), imm32: imm32, base: base}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *machine) newAmodeImmRBPReg(imm32 uint32) *amode {
|
||||
ret := m.amodePool.Allocate()
|
||||
*ret = amode{kindWithShift: uint32(amodeImmRBP), imm32: imm32, base: rbpVReg}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *machine) newAmodeRegRegShift(imm32 uint32, base, index regalloc.VReg, shift byte) *amode {
|
||||
if shift > 3 {
|
||||
panic(fmt.Sprintf("BUG: invalid shift (must be 3>=): %d", shift))
|
||||
}
|
||||
ret := m.amodePool.Allocate()
|
||||
*ret = amode{kindWithShift: uint32(amodeRegRegShift) | uint32(shift)<<8, imm32: imm32, base: base, index: index}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *machine) newAmodeRipRel(label backend.Label) *amode {
|
||||
ret := m.amodePool.Allocate()
|
||||
*ret = amode{kindWithShift: uint32(amodeRipRel), imm32: uint32(label)}
|
||||
return ret
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (a *amode) String() string {
|
||||
switch a.kind() {
|
||||
case amodeImmReg, amodeImmRBP:
|
||||
if a.imm32 == 0 {
|
||||
return fmt.Sprintf("(%s)", formatVRegSized(a.base, true))
|
||||
}
|
||||
return fmt.Sprintf("%d(%s)", int32(a.imm32), formatVRegSized(a.base, true))
|
||||
case amodeRegRegShift:
|
||||
shift := 1 << a.shift()
|
||||
if a.imm32 == 0 {
|
||||
return fmt.Sprintf(
|
||||
"(%s,%s,%d)",
|
||||
formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)
|
||||
}
|
||||
return fmt.Sprintf(
|
||||
"%d(%s,%s,%d)",
|
||||
int32(a.imm32), formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)
|
||||
case amodeRipRel:
|
||||
return fmt.Sprintf("%s(%%rip)", backend.Label(a.imm32))
|
||||
default:
|
||||
panic("BUG: invalid amode kind")
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return newOperandReg(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
if def.SSAValue().Type() == ssa.TypeV128 {
|
||||
// SIMD instructions require strict memory alignment, so we don't support the memory operand for V128 at the moment.
|
||||
return m.getOperand_Reg(def)
|
||||
}
|
||||
|
||||
if m.c.MatchInstr(def, ssa.OpcodeLoad) {
|
||||
instr := def.Instr
|
||||
ptr, offset, _ := instr.LoadData()
|
||||
op = newOperandMem(m.lowerToAddressMode(ptr, offset))
|
||||
instr.MarkLowered()
|
||||
return op
|
||||
}
|
||||
return m.getOperand_Reg(def)
|
||||
}
|
||||
|
||||
func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return newOperandReg(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
if m.c.MatchInstr(def, ssa.OpcodeLoad) {
|
||||
instr := def.Instr
|
||||
ptr, offset, _ := instr.LoadData()
|
||||
op = newOperandMem(m.lowerToAddressMode(ptr, offset))
|
||||
instr.MarkLowered()
|
||||
return op
|
||||
}
|
||||
return m.getOperand_Imm32_Reg(def)
|
||||
}
|
||||
|
||||
func (m *machine) getOperand_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return newOperandReg(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
instr := def.Instr
|
||||
if instr.Constant() {
|
||||
// If the operation is 64-bit, x64 sign-extends the 32-bit immediate value.
|
||||
// Therefore, we need to check if the immediate value is within the 32-bit range and if the sign bit is set,
|
||||
// we should not use the immediate value.
|
||||
if op, ok := asImm32Operand(instr.ConstantVal(), instr.Return().Type() == ssa.TypeI32); ok {
|
||||
instr.MarkLowered()
|
||||
return op
|
||||
}
|
||||
}
|
||||
return m.getOperand_Reg(def)
|
||||
}
|
||||
|
||||
func asImm32Operand(val uint64, allowSignExt bool) (operand, bool) {
|
||||
if imm32, ok := asImm32(val, allowSignExt); ok {
|
||||
return newOperandImm32(imm32), true
|
||||
}
|
||||
return operand{}, false
|
||||
}
|
||||
|
||||
func asImm32(val uint64, allowSignExt bool) (uint32, bool) {
|
||||
u32val := uint32(val)
|
||||
if uint64(u32val) != val {
|
||||
return 0, false
|
||||
}
|
||||
if !allowSignExt && u32val&0x80000000 != 0 {
|
||||
return 0, false
|
||||
}
|
||||
return u32val, true
|
||||
}
|
||||
|
||||
func (m *machine) getOperand_Reg(def *backend.SSAValueDefinition) (op operand) {
|
||||
var v regalloc.VReg
|
||||
if def.IsFromBlockParam() {
|
||||
v = def.BlkParamVReg
|
||||
} else {
|
||||
instr := def.Instr
|
||||
if instr.Constant() {
|
||||
// We inline all the constant instructions so that we could reduce the register usage.
|
||||
v = m.lowerConstant(instr)
|
||||
instr.MarkLowered()
|
||||
} else {
|
||||
if n := def.N; n == 0 {
|
||||
v = m.c.VRegOf(instr.Return())
|
||||
} else {
|
||||
_, rs := instr.Returns()
|
||||
v = m.c.VRegOf(rs[n-1])
|
||||
}
|
||||
}
|
||||
}
|
||||
return newOperandReg(v)
|
||||
}
|
||||
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go
generated
vendored
Normal file
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go
generated
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
//go:build !tinygo
|
||||
|
||||
package amd64
|
||||
|
||||
import "reflect"
|
||||
|
||||
// setSliceLimits sets both Cap and Len for the given reflected slice.
|
||||
func setSliceLimits(s *reflect.SliceHeader, limit uintptr) {
|
||||
s.Len = int(limit)
|
||||
s.Cap = int(limit)
|
||||
}
|
||||
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go
generated
vendored
Normal file
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go
generated
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
//go:build tinygo
|
||||
|
||||
package amd64
|
||||
|
||||
import "reflect"
|
||||
|
||||
// setSliceLimits sets both Cap and Len for the given reflected slice.
|
||||
func setSliceLimits(s *reflect.SliceHeader, limit uintptr) {
|
||||
s.Len = limit
|
||||
s.Len = limit
|
||||
}
|
||||
181
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go
generated
vendored
Normal file
181
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go
generated
vendored
Normal file
|
|
@ -0,0 +1,181 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
)
|
||||
|
||||
// Amd64-specific registers.
|
||||
const (
|
||||
// rax is a gp register.
|
||||
rax = regalloc.RealRegInvalid + 1 + iota
|
||||
// rcx is a gp register.
|
||||
rcx
|
||||
// rdx is a gp register.
|
||||
rdx
|
||||
// rbx is a gp register.
|
||||
rbx
|
||||
// rsp is a gp register.
|
||||
rsp
|
||||
// rbp is a gp register.
|
||||
rbp
|
||||
// rsi is a gp register.
|
||||
rsi
|
||||
// rdi is a gp register.
|
||||
rdi
|
||||
// r8 is a gp register.
|
||||
r8
|
||||
// r9 is a gp register.
|
||||
r9
|
||||
// r10 is a gp register.
|
||||
r10
|
||||
// r11 is a gp register.
|
||||
r11
|
||||
// r12 is a gp register.
|
||||
r12
|
||||
// r13 is a gp register.
|
||||
r13
|
||||
// r14 is a gp register.
|
||||
r14
|
||||
// r15 is a gp register.
|
||||
r15
|
||||
|
||||
// xmm0 is a vector register.
|
||||
xmm0
|
||||
// xmm1 is a vector register.
|
||||
xmm1
|
||||
// xmm2 is a vector register.
|
||||
xmm2
|
||||
// xmm3 is a vector register.
|
||||
xmm3
|
||||
// xmm4 is a vector register.
|
||||
xmm4
|
||||
// xmm5 is a vector register.
|
||||
xmm5
|
||||
// xmm6 is a vector register.
|
||||
xmm6
|
||||
// xmm7 is a vector register.
|
||||
xmm7
|
||||
// xmm8 is a vector register.
|
||||
xmm8
|
||||
// xmm9 is a vector register.
|
||||
xmm9
|
||||
// xmm10 is a vector register.
|
||||
xmm10
|
||||
// xmm11 is a vector register.
|
||||
xmm11
|
||||
// xmm12 is a vector register.
|
||||
xmm12
|
||||
// xmm13 is a vector register.
|
||||
xmm13
|
||||
// xmm14 is a vector register.
|
||||
xmm14
|
||||
// xmm15 is a vector register.
|
||||
xmm15
|
||||
)
|
||||
|
||||
var (
|
||||
raxVReg = regalloc.FromRealReg(rax, regalloc.RegTypeInt)
|
||||
rcxVReg = regalloc.FromRealReg(rcx, regalloc.RegTypeInt)
|
||||
rdxVReg = regalloc.FromRealReg(rdx, regalloc.RegTypeInt)
|
||||
rbxVReg = regalloc.FromRealReg(rbx, regalloc.RegTypeInt)
|
||||
rspVReg = regalloc.FromRealReg(rsp, regalloc.RegTypeInt)
|
||||
rbpVReg = regalloc.FromRealReg(rbp, regalloc.RegTypeInt)
|
||||
rsiVReg = regalloc.FromRealReg(rsi, regalloc.RegTypeInt)
|
||||
rdiVReg = regalloc.FromRealReg(rdi, regalloc.RegTypeInt)
|
||||
r8VReg = regalloc.FromRealReg(r8, regalloc.RegTypeInt)
|
||||
r9VReg = regalloc.FromRealReg(r9, regalloc.RegTypeInt)
|
||||
r10VReg = regalloc.FromRealReg(r10, regalloc.RegTypeInt)
|
||||
r11VReg = regalloc.FromRealReg(r11, regalloc.RegTypeInt)
|
||||
r12VReg = regalloc.FromRealReg(r12, regalloc.RegTypeInt)
|
||||
r13VReg = regalloc.FromRealReg(r13, regalloc.RegTypeInt)
|
||||
r14VReg = regalloc.FromRealReg(r14, regalloc.RegTypeInt)
|
||||
r15VReg = regalloc.FromRealReg(r15, regalloc.RegTypeInt)
|
||||
|
||||
xmm0VReg = regalloc.FromRealReg(xmm0, regalloc.RegTypeFloat)
|
||||
xmm1VReg = regalloc.FromRealReg(xmm1, regalloc.RegTypeFloat)
|
||||
xmm2VReg = regalloc.FromRealReg(xmm2, regalloc.RegTypeFloat)
|
||||
xmm3VReg = regalloc.FromRealReg(xmm3, regalloc.RegTypeFloat)
|
||||
xmm4VReg = regalloc.FromRealReg(xmm4, regalloc.RegTypeFloat)
|
||||
xmm5VReg = regalloc.FromRealReg(xmm5, regalloc.RegTypeFloat)
|
||||
xmm6VReg = regalloc.FromRealReg(xmm6, regalloc.RegTypeFloat)
|
||||
xmm7VReg = regalloc.FromRealReg(xmm7, regalloc.RegTypeFloat)
|
||||
xmm8VReg = regalloc.FromRealReg(xmm8, regalloc.RegTypeFloat)
|
||||
xmm9VReg = regalloc.FromRealReg(xmm9, regalloc.RegTypeFloat)
|
||||
xmm10VReg = regalloc.FromRealReg(xmm10, regalloc.RegTypeFloat)
|
||||
xmm11VReg = regalloc.FromRealReg(xmm11, regalloc.RegTypeFloat)
|
||||
xmm12VReg = regalloc.FromRealReg(xmm12, regalloc.RegTypeFloat)
|
||||
xmm13VReg = regalloc.FromRealReg(xmm13, regalloc.RegTypeFloat)
|
||||
xmm14VReg = regalloc.FromRealReg(xmm14, regalloc.RegTypeFloat)
|
||||
xmm15VReg = regalloc.FromRealReg(xmm15, regalloc.RegTypeFloat)
|
||||
)
|
||||
|
||||
var regNames = [...]string{
|
||||
rax: "rax",
|
||||
rcx: "rcx",
|
||||
rdx: "rdx",
|
||||
rbx: "rbx",
|
||||
rsp: "rsp",
|
||||
rbp: "rbp",
|
||||
rsi: "rsi",
|
||||
rdi: "rdi",
|
||||
r8: "r8",
|
||||
r9: "r9",
|
||||
r10: "r10",
|
||||
r11: "r11",
|
||||
r12: "r12",
|
||||
r13: "r13",
|
||||
r14: "r14",
|
||||
r15: "r15",
|
||||
xmm0: "xmm0",
|
||||
xmm1: "xmm1",
|
||||
xmm2: "xmm2",
|
||||
xmm3: "xmm3",
|
||||
xmm4: "xmm4",
|
||||
xmm5: "xmm5",
|
||||
xmm6: "xmm6",
|
||||
xmm7: "xmm7",
|
||||
xmm8: "xmm8",
|
||||
xmm9: "xmm9",
|
||||
xmm10: "xmm10",
|
||||
xmm11: "xmm11",
|
||||
xmm12: "xmm12",
|
||||
xmm13: "xmm13",
|
||||
xmm14: "xmm14",
|
||||
xmm15: "xmm15",
|
||||
}
|
||||
|
||||
func formatVRegSized(r regalloc.VReg, _64 bool) string {
|
||||
if r.IsRealReg() {
|
||||
if r.RegType() == regalloc.RegTypeInt {
|
||||
rr := r.RealReg()
|
||||
orig := regNames[rr]
|
||||
if rr <= rdi {
|
||||
if _64 {
|
||||
return "%" + orig
|
||||
} else {
|
||||
return "%e" + orig[1:]
|
||||
}
|
||||
} else {
|
||||
if _64 {
|
||||
return "%" + orig
|
||||
} else {
|
||||
return "%" + orig + "d"
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return "%" + regNames[r.RealReg()]
|
||||
}
|
||||
} else {
|
||||
if r.RegType() == regalloc.RegTypeInt {
|
||||
if _64 {
|
||||
return fmt.Sprintf("%%r%d?", r.ID())
|
||||
} else {
|
||||
return fmt.Sprintf("%%r%dd?", r.ID())
|
||||
}
|
||||
} else {
|
||||
return fmt.Sprintf("%%xmm%d?", r.ID())
|
||||
}
|
||||
}
|
||||
}
|
||||
128
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go
generated
vendored
Normal file
128
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go
generated
vendored
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
package amd64
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"reflect"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/wasmdebug"
|
||||
)
|
||||
|
||||
func stackView(rbp, top uintptr) []byte {
|
||||
var stackBuf []byte
|
||||
{
|
||||
// TODO: use unsafe.Slice after floor version is set to Go 1.20.
|
||||
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))
|
||||
hdr.Data = rbp
|
||||
setSliceLimits(hdr, top-rbp)
|
||||
}
|
||||
return stackBuf
|
||||
}
|
||||
|
||||
// UnwindStack implements wazevo.unwindStack.
|
||||
func UnwindStack(_, rbp, top uintptr, returnAddresses []uintptr) []uintptr {
|
||||
stackBuf := stackView(rbp, top)
|
||||
|
||||
for i := uint64(0); i < uint64(len(stackBuf)); {
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <---- Caller_RBP
|
||||
// | ........... |
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ............ |
|
||||
// | spill slot 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <---- RBP
|
||||
// (low address)
|
||||
|
||||
callerRBP := binary.LittleEndian.Uint64(stackBuf[i:])
|
||||
retAddr := binary.LittleEndian.Uint64(stackBuf[i+8:])
|
||||
returnAddresses = append(returnAddresses, uintptr(retAddr))
|
||||
i = callerRBP - uint64(rbp)
|
||||
if len(returnAddresses) == wasmdebug.MaxFrames {
|
||||
break
|
||||
}
|
||||
}
|
||||
return returnAddresses
|
||||
}
|
||||
|
||||
// GoCallStackView implements wazevo.goCallStackView.
|
||||
func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
|
||||
// (high address)
|
||||
// +-----------------+ <----+
|
||||
// | xxxxxxxxxxx | | ;; optional unused space to make it 16-byte aligned.
|
||||
// ^ | arg[N]/ret[M] | |
|
||||
// sliceSize | | ............ | | SizeInBytes/8
|
||||
// | | arg[1]/ret[1] | |
|
||||
// v | arg[0]/ret[0] | <----+
|
||||
// | SizeInBytes |
|
||||
// +-----------------+ <---- stackPointerBeforeGoCall
|
||||
// (low address)
|
||||
data := unsafe.Pointer(uintptr(unsafe.Pointer(stackPointerBeforeGoCall)) + 8)
|
||||
size := *stackPointerBeforeGoCall / 8
|
||||
return unsafe.Slice((*uint64)(data), int(size))
|
||||
}
|
||||
|
||||
func AdjustClonedStack(oldRsp, oldTop, rsp, rbp, top uintptr) {
|
||||
diff := uint64(rsp - oldRsp)
|
||||
|
||||
newBuf := stackView(rbp, top)
|
||||
for i := uint64(0); i < uint64(len(newBuf)); {
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <---- Caller_RBP
|
||||
// | ........... |
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ............ |
|
||||
// | spill slot 0 |
|
||||
// | ReturnAddress |
|
||||
// | Caller_RBP |
|
||||
// +-----------------+ <---- RBP
|
||||
// (low address)
|
||||
|
||||
callerRBP := binary.LittleEndian.Uint64(newBuf[i:])
|
||||
if callerRBP == 0 {
|
||||
// End of stack.
|
||||
break
|
||||
}
|
||||
if i64 := int64(callerRBP); i64 < int64(oldRsp) || i64 >= int64(oldTop) {
|
||||
panic("BUG: callerRBP is out of range")
|
||||
}
|
||||
if int(callerRBP) < 0 {
|
||||
panic("BUG: callerRBP is negative")
|
||||
}
|
||||
adjustedCallerRBP := callerRBP + diff
|
||||
if int(adjustedCallerRBP) < 0 {
|
||||
panic("BUG: adjustedCallerRBP is negative")
|
||||
}
|
||||
binary.LittleEndian.PutUint64(newBuf[i:], adjustedCallerRBP)
|
||||
i = adjustedCallerRBP - uint64(rbp)
|
||||
}
|
||||
}
|
||||
332
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
generated
vendored
Normal file
332
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
generated
vendored
Normal file
|
|
@ -0,0 +1,332 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// References:
|
||||
// * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture
|
||||
// * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard
|
||||
|
||||
var (
|
||||
intParamResultRegs = []regalloc.RealReg{x0, x1, x2, x3, x4, x5, x6, x7}
|
||||
floatParamResultRegs = []regalloc.RealReg{v0, v1, v2, v3, v4, v5, v6, v7}
|
||||
)
|
||||
|
||||
var regInfo = ®alloc.RegisterInfo{
|
||||
AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
|
||||
// We don't allocate:
|
||||
// - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
|
||||
// - x28: Reserved by Go runtime.
|
||||
// - x27(=tmpReg): because of the reason described on tmpReg.
|
||||
regalloc.RegTypeInt: {
|
||||
x8, x9, x10, x11, x12, x13, x14, x15,
|
||||
x16, x17, x19, x20, x21, x22, x23, x24, x25,
|
||||
x26, x29, x30,
|
||||
// These are the argument/return registers. Less preferred in the allocation.
|
||||
x7, x6, x5, x4, x3, x2, x1, x0,
|
||||
},
|
||||
regalloc.RegTypeFloat: {
|
||||
v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
|
||||
v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30,
|
||||
// These are the argument/return registers. Less preferred in the allocation.
|
||||
v7, v6, v5, v4, v3, v2, v1, v0,
|
||||
},
|
||||
},
|
||||
CalleeSavedRegisters: regalloc.NewRegSet(
|
||||
x19, x20, x21, x22, x23, x24, x25, x26, x28,
|
||||
v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
|
||||
),
|
||||
CallerSavedRegisters: regalloc.NewRegSet(
|
||||
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x29, x30,
|
||||
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
|
||||
),
|
||||
RealRegToVReg: []regalloc.VReg{
|
||||
x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg,
|
||||
v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg,
|
||||
},
|
||||
RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
|
||||
RealRegType: func(r regalloc.RealReg) regalloc.RegType {
|
||||
if r < v0 {
|
||||
return regalloc.RegTypeInt
|
||||
}
|
||||
return regalloc.RegTypeFloat
|
||||
},
|
||||
}
|
||||
|
||||
// ArgsResultsRegs implements backend.Machine.
|
||||
func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
|
||||
return intParamResultRegs, floatParamResultRegs
|
||||
}
|
||||
|
||||
// LowerParams implements backend.FunctionABI.
|
||||
func (m *machine) LowerParams(args []ssa.Value) {
|
||||
a := m.currentABI
|
||||
|
||||
for i, ssaArg := range args {
|
||||
if !ssaArg.Valid() {
|
||||
continue
|
||||
}
|
||||
reg := m.compiler.VRegOf(ssaArg)
|
||||
arg := &a.Args[i]
|
||||
if arg.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(reg, arg.Reg, arg.Type)
|
||||
} else {
|
||||
// TODO: we could use pair load if there's consecutive loads for the same type.
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 | <-|
|
||||
// | ReturnAddress | |
|
||||
// +-----------------+ |
|
||||
// | ........... | |
|
||||
// | clobbered M | | argStackOffset: is unknown at this point of compilation.
|
||||
// | ............ | |
|
||||
// | clobbered 0 | |
|
||||
// | spill slot N | |
|
||||
// | ........... | |
|
||||
// | spill slot 0 | |
|
||||
// SP---> +-----------------+ <-+
|
||||
// (low address)
|
||||
|
||||
bits := arg.Type.Bits()
|
||||
// At this point of compilation, we don't yet know how much space exist below the return address.
|
||||
// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation.
|
||||
amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
|
||||
load := m.allocateInstr()
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
load.asULoad(operandNR(reg), amode, bits)
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
load.asFpuLoad(operandNR(reg), amode, bits)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
m.insert(load)
|
||||
m.unresolvedAddressModes = append(m.unresolvedAddressModes, load)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LowerReturns lowers the given returns.
|
||||
func (m *machine) LowerReturns(rets []ssa.Value) {
|
||||
a := m.currentABI
|
||||
|
||||
l := len(rets) - 1
|
||||
for i := range rets {
|
||||
// Reverse order in order to avoid overwriting the stack returns existing in the return registers.
|
||||
ret := rets[l-i]
|
||||
r := &a.Rets[l-i]
|
||||
reg := m.compiler.VRegOf(ret)
|
||||
if def := m.compiler.ValueDefinition(ret); def.IsFromInstr() {
|
||||
// Constant instructions are inlined.
|
||||
if inst := def.Instr; inst.Constant() {
|
||||
val := inst.Return()
|
||||
valType := val.Type()
|
||||
v := inst.ConstantVal()
|
||||
m.insertLoadConstant(v, valType, reg)
|
||||
}
|
||||
}
|
||||
if r.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(r.Reg, reg, ret.Type())
|
||||
} else {
|
||||
// TODO: we could use pair store if there's consecutive stores for the same type.
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 | <-+
|
||||
// | arg X | |
|
||||
// | ....... | |
|
||||
// | arg 1 | |
|
||||
// | arg 0 | |
|
||||
// | ReturnAddress | |
|
||||
// +-----------------+ |
|
||||
// | ........... | |
|
||||
// | spill slot M | | retStackOffset: is unknown at this point of compilation.
|
||||
// | ............ | |
|
||||
// | spill slot 2 | |
|
||||
// | spill slot 1 | |
|
||||
// | clobbered 0 | |
|
||||
// | clobbered 1 | |
|
||||
// | ........... | |
|
||||
// | clobbered N | |
|
||||
// SP---> +-----------------+ <-+
|
||||
// (low address)
|
||||
|
||||
bits := r.Type.Bits()
|
||||
|
||||
// At this point of compilation, we don't yet know how much space exist below the return address.
|
||||
// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation.
|
||||
amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(reg), amode, bits)
|
||||
m.insert(store)
|
||||
m.unresolvedAddressModes = append(m.unresolvedAddressModes, store)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
|
||||
// caller side of the function call.
|
||||
func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) {
|
||||
arg := &a.Args[argIndex]
|
||||
if def != nil && def.IsFromInstr() {
|
||||
// Constant instructions are inlined.
|
||||
if inst := def.Instr; inst.Constant() {
|
||||
val := inst.Return()
|
||||
valType := val.Type()
|
||||
v := inst.ConstantVal()
|
||||
m.insertLoadConstant(v, valType, reg)
|
||||
}
|
||||
}
|
||||
if arg.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(arg.Reg, reg, arg.Type)
|
||||
} else {
|
||||
// TODO: we could use pair store if there's consecutive stores for the same type.
|
||||
//
|
||||
// Note that at this point, stack pointer is already adjusted.
|
||||
bits := arg.Type.Bits()
|
||||
amode := m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false)
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(reg), amode, bits)
|
||||
m.insert(store)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex int, reg regalloc.VReg, slotBegin int64) {
|
||||
r := &a.Rets[retIndex]
|
||||
if r.Kind == backend.ABIArgKindReg {
|
||||
m.InsertMove(reg, r.Reg, r.Type)
|
||||
} else {
|
||||
// TODO: we could use pair load if there's consecutive loads for the same type.
|
||||
amode := m.resolveAddressModeForOffset(a.ArgStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false)
|
||||
ldr := m.allocateInstr()
|
||||
switch r.Type {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
ldr.asULoad(operandNR(reg), amode, r.Type.Bits())
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits())
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
m.insert(ldr)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) {
|
||||
exct := m.executableContext
|
||||
exct.PendingInstructions = exct.PendingInstructions[:0]
|
||||
mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse)
|
||||
for _, instr := range exct.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
return cur, mode
|
||||
}
|
||||
|
||||
func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode {
|
||||
if rn.RegType() != regalloc.RegTypeInt {
|
||||
panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))
|
||||
}
|
||||
var amode addressMode
|
||||
if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) {
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
|
||||
} else if offsetFitsInAddressModeKindRegSignedImm9(offset) {
|
||||
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
|
||||
} else {
|
||||
var indexReg regalloc.VReg
|
||||
if allowTmpRegUse {
|
||||
m.lowerConstantI64(tmpRegVReg, offset)
|
||||
indexReg = tmpRegVReg
|
||||
} else {
|
||||
indexReg = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerConstantI64(indexReg, offset)
|
||||
}
|
||||
amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
|
||||
}
|
||||
return amode
|
||||
}
|
||||
|
||||
func (m *machine) lowerCall(si *ssa.Instruction) {
|
||||
isDirectCall := si.Opcode() == ssa.OpcodeCall
|
||||
var indirectCalleePtr ssa.Value
|
||||
var directCallee ssa.FuncRef
|
||||
var sigID ssa.SignatureID
|
||||
var args []ssa.Value
|
||||
if isDirectCall {
|
||||
directCallee, sigID, args = si.CallData()
|
||||
} else {
|
||||
indirectCalleePtr, sigID, args, _ /* on arm64, the calling convention is compatible with the Go runtime */ = si.CallIndirectData()
|
||||
}
|
||||
calleeABI := m.compiler.GetFunctionABI(m.compiler.SSABuilder().ResolveSignature(sigID))
|
||||
|
||||
stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
|
||||
if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
|
||||
m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame.
|
||||
}
|
||||
|
||||
for i, arg := range args {
|
||||
reg := m.compiler.VRegOf(arg)
|
||||
def := m.compiler.ValueDefinition(arg)
|
||||
m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
|
||||
}
|
||||
|
||||
if isDirectCall {
|
||||
call := m.allocateInstr()
|
||||
call.asCall(directCallee, calleeABI)
|
||||
m.insert(call)
|
||||
} else {
|
||||
ptr := m.compiler.VRegOf(indirectCalleePtr)
|
||||
callInd := m.allocateInstr()
|
||||
callInd.asCallIndirect(ptr, calleeABI)
|
||||
m.insert(callInd)
|
||||
}
|
||||
|
||||
var index int
|
||||
r1, rs := si.Returns()
|
||||
if r1.Valid() {
|
||||
m.callerGenFunctionReturnVReg(calleeABI, 0, m.compiler.VRegOf(r1), stackSlotSize)
|
||||
index++
|
||||
}
|
||||
|
||||
for _, r := range rs {
|
||||
m.callerGenFunctionReturnVReg(calleeABI, index, m.compiler.VRegOf(r), stackSlotSize)
|
||||
index++
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) {
|
||||
if imm12Operand, ok := asImm12Operand(uint64(diff)); ok {
|
||||
alu := m.allocateInstr()
|
||||
var ao aluOp
|
||||
if add {
|
||||
ao = aluOpAdd
|
||||
} else {
|
||||
ao = aluOpSub
|
||||
}
|
||||
alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true)
|
||||
m.insert(alu)
|
||||
} else {
|
||||
m.lowerConstantI64(tmpRegVReg, diff)
|
||||
alu := m.allocateInstr()
|
||||
var ao aluOp
|
||||
if add {
|
||||
ao = aluOpAdd
|
||||
} else {
|
||||
ao = aluOpSub
|
||||
}
|
||||
alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true)
|
||||
m.insert(alu)
|
||||
}
|
||||
}
|
||||
9
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go
generated
vendored
Normal file
9
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go
generated
vendored
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
package arm64
|
||||
|
||||
// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
|
||||
// This implements wazevo.entrypoint, and see the comments there for detail.
|
||||
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
|
||||
|
||||
// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
|
||||
// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
|
||||
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
|
||||
29
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s
generated
vendored
Normal file
29
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s
generated
vendored
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
//go:build arm64
|
||||
|
||||
#include "funcdata.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// See the comments on EmitGoEntryPreamble for what this function is supposed to do.
|
||||
TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
|
||||
MOVD preambleExecutable+0(FP), R27
|
||||
MOVD functionExectuable+8(FP), R24
|
||||
MOVD executionContextPtr+16(FP), R0
|
||||
MOVD moduleContextPtr+24(FP), R1
|
||||
MOVD paramResultSlicePtr+32(FP), R19
|
||||
MOVD goAllocatedStackSlicePtr+40(FP), R26
|
||||
JMP (R27)
|
||||
|
||||
TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
|
||||
MOVD goCallReturnAddress+0(FP), R20
|
||||
MOVD executionContextPtr+8(FP), R0
|
||||
MOVD stackPointer+16(FP), R19
|
||||
|
||||
// Save the current FP(R29), SP and LR(R30) into the wazevo.executionContext (stored in R0).
|
||||
MOVD R29, 16(R0) // Store FP(R29) into [RO, #ExecutionContextOffsets.OriginalFramePointer]
|
||||
MOVD RSP, R27 // Move SP to R27 (temporary register) since SP cannot be stored directly in str instructions.
|
||||
MOVD R27, 24(R0) // Store R27 into [RO, #ExecutionContextOffsets.OriginalFramePointer]
|
||||
MOVD R30, 32(R0) // Store R30 into [R0, #ExecutionContextOffsets.GoReturnAddress]
|
||||
|
||||
// Load the new stack pointer (which sits somewhere in Go-allocated stack) into SP.
|
||||
MOVD R19, RSP
|
||||
JMP (R20)
|
||||
230
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
generated
vendored
Normal file
230
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
generated
vendored
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// CompileEntryPreamble implements backend.Machine. This assumes `entrypoint` function (in abi_go_entry_arm64.s) passes:
|
||||
//
|
||||
// 1. First (execution context ptr) and Second arguments are already passed in x0, and x1.
|
||||
// 2. param/result slice ptr in x19; the pointer to []uint64{} which is used to pass arguments and accept return values.
|
||||
// 3. Go-allocated stack slice ptr in x26.
|
||||
// 4. Function executable in x24.
|
||||
//
|
||||
// also SP and FP are correct Go-runtime-based values, and LR is the return address to the Go-side caller.
|
||||
func (m *machine) CompileEntryPreamble(signature *ssa.Signature) []byte {
|
||||
root := m.constructEntryPreamble(signature)
|
||||
m.encode(root)
|
||||
return m.compiler.Buf()
|
||||
}
|
||||
|
||||
var (
|
||||
executionContextPtrReg = x0VReg
|
||||
// callee-saved regs so that they can be used in the prologue and epilogue.
|
||||
paramResultSlicePtr = x19VReg
|
||||
savedExecutionContextPtr = x20VReg
|
||||
// goAllocatedStackPtr is not used in the epilogue.
|
||||
goAllocatedStackPtr = x26VReg
|
||||
// paramResultSliceCopied is not used in the epilogue.
|
||||
paramResultSliceCopied = x25VReg
|
||||
// tmpRegVReg is not used in the epilogue.
|
||||
functionExecutable = x24VReg
|
||||
)
|
||||
|
||||
func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, arg *backend.ABIArg, argStartOffsetFromSP int64) *instruction {
|
||||
typ := arg.Type
|
||||
bits := typ.Bits()
|
||||
isStackArg := arg.Kind == backend.ABIArgKindStack
|
||||
|
||||
var loadTargetReg operand
|
||||
if !isStackArg {
|
||||
loadTargetReg = operandNR(arg.Reg)
|
||||
} else {
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
loadTargetReg = operandNR(x15VReg)
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
loadTargetReg = operandNR(v15VReg)
|
||||
default:
|
||||
panic("TODO?")
|
||||
}
|
||||
}
|
||||
|
||||
var postIndexImm int64
|
||||
if typ == ssa.TypeV128 {
|
||||
postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice.
|
||||
} else {
|
||||
postIndexImm = 8
|
||||
}
|
||||
loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
|
||||
|
||||
instr := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32:
|
||||
instr.asULoad(loadTargetReg, loadMode, 32)
|
||||
case ssa.TypeI64:
|
||||
instr.asULoad(loadTargetReg, loadMode, 64)
|
||||
case ssa.TypeF32:
|
||||
instr.asFpuLoad(loadTargetReg, loadMode, 32)
|
||||
case ssa.TypeF64:
|
||||
instr.asFpuLoad(loadTargetReg, loadMode, 64)
|
||||
case ssa.TypeV128:
|
||||
instr.asFpuLoad(loadTargetReg, loadMode, 128)
|
||||
}
|
||||
cur = linkInstr(cur, instr)
|
||||
|
||||
if isStackArg {
|
||||
var storeMode addressMode
|
||||
cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true)
|
||||
toStack := m.allocateInstr()
|
||||
toStack.asStore(loadTargetReg, storeMode, bits)
|
||||
cur = linkInstr(cur, toStack)
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, result *backend.ABIArg, resultStartOffsetFromSP int64) *instruction {
|
||||
isStackArg := result.Kind == backend.ABIArgKindStack
|
||||
typ := result.Type
|
||||
bits := typ.Bits()
|
||||
|
||||
var storeTargetReg operand
|
||||
if !isStackArg {
|
||||
storeTargetReg = operandNR(result.Reg)
|
||||
} else {
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
storeTargetReg = operandNR(x15VReg)
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
storeTargetReg = operandNR(v15VReg)
|
||||
default:
|
||||
panic("TODO?")
|
||||
}
|
||||
}
|
||||
|
||||
var postIndexImm int64
|
||||
if typ == ssa.TypeV128 {
|
||||
postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice.
|
||||
} else {
|
||||
postIndexImm = 8
|
||||
}
|
||||
|
||||
if isStackArg {
|
||||
var loadMode addressMode
|
||||
cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true)
|
||||
toReg := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
toReg.asULoad(storeTargetReg, loadMode, bits)
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
toReg.asFpuLoad(storeTargetReg, loadMode, bits)
|
||||
default:
|
||||
panic("TODO?")
|
||||
}
|
||||
cur = linkInstr(cur, toReg)
|
||||
}
|
||||
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
|
||||
instr := m.allocateInstr()
|
||||
instr.asStore(storeTargetReg, mode, bits)
|
||||
cur = linkInstr(cur, instr)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) constructEntryPreamble(sig *ssa.Signature) (root *instruction) {
|
||||
abi := backend.FunctionABI{}
|
||||
abi.Init(sig, intParamResultRegs, floatParamResultRegs)
|
||||
|
||||
root = m.allocateNop()
|
||||
|
||||
//// ----------------------------------- prologue ----------------------------------- ////
|
||||
|
||||
// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
|
||||
// mov savedExecutionContextPtr, x0
|
||||
cur := m.move64(savedExecutionContextPtr, executionContextPtrReg, root)
|
||||
|
||||
// Next, save the current FP, SP and LR into the wazevo.executionContext:
|
||||
// str fp, [savedExecutionContextPtr, #OriginalFramePointer]
|
||||
// mov tmp, sp ;; sp cannot be str'ed directly.
|
||||
// str sp, [savedExecutionContextPtr, #OriginalStackPointer]
|
||||
// str lr, [savedExecutionContextPtr, #GoReturnAddress]
|
||||
cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, true, cur)
|
||||
cur = m.move64(tmpRegVReg, spVReg, cur)
|
||||
cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, true, cur)
|
||||
cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, true, cur)
|
||||
|
||||
// Then, move the Go-allocated stack pointer to SP:
|
||||
// mov sp, goAllocatedStackPtr
|
||||
cur = m.move64(spVReg, goAllocatedStackPtr, cur)
|
||||
|
||||
prReg := paramResultSlicePtr
|
||||
if len(abi.Args) > 2 && len(abi.Rets) > 0 {
|
||||
// paramResultSlicePtr is modified during the execution of goEntryPreamblePassArg,
|
||||
// so copy it to another reg.
|
||||
cur = m.move64(paramResultSliceCopied, paramResultSlicePtr, cur)
|
||||
prReg = paramResultSliceCopied
|
||||
}
|
||||
|
||||
stackSlotSize := int64(abi.AlignedArgResultStackSlotSize())
|
||||
for i := range abi.Args {
|
||||
if i < 2 {
|
||||
// module context ptr and execution context ptr are passed in x0 and x1 by the Go assembly function.
|
||||
continue
|
||||
}
|
||||
arg := &abi.Args[i]
|
||||
cur = m.goEntryPreamblePassArg(cur, prReg, arg, -stackSlotSize)
|
||||
}
|
||||
|
||||
// Call the real function.
|
||||
bl := m.allocateInstr()
|
||||
bl.asCallIndirect(functionExecutable, &abi)
|
||||
cur = linkInstr(cur, bl)
|
||||
|
||||
///// ----------------------------------- epilogue ----------------------------------- /////
|
||||
|
||||
// Store the register results into paramResultSlicePtr.
|
||||
for i := range abi.Rets {
|
||||
cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, &abi.Rets[i], abi.ArgStackSize-stackSlotSize)
|
||||
}
|
||||
|
||||
// Finally, restore the FP, SP and LR, and return to the Go code.
|
||||
// ldr fp, [savedExecutionContextPtr, #OriginalFramePointer]
|
||||
// ldr tmp, [savedExecutionContextPtr, #OriginalStackPointer]
|
||||
// mov sp, tmp ;; sp cannot be str'ed directly.
|
||||
// ldr lr, [savedExecutionContextPtr, #GoReturnAddress]
|
||||
// ret ;; --> return to the Go code
|
||||
cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, false, cur)
|
||||
cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, false, cur)
|
||||
cur = m.move64(spVReg, tmpRegVReg, cur)
|
||||
cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, false, cur)
|
||||
retInst := m.allocateInstr()
|
||||
retInst.asRet()
|
||||
linkInstr(cur, retInst)
|
||||
return
|
||||
}
|
||||
|
||||
func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMove64(dst, src)
|
||||
return linkInstr(prev, instr)
|
||||
}
|
||||
|
||||
func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction {
|
||||
instr := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
|
||||
if store {
|
||||
instr.asStore(operandNR(d), mode, 64)
|
||||
} else {
|
||||
instr.asULoad(operandNR(d), mode, 64)
|
||||
}
|
||||
return linkInstr(prev, instr)
|
||||
}
|
||||
|
||||
func linkInstr(prev, next *instruction) *instruction {
|
||||
prev.next = next
|
||||
next.prev = prev
|
||||
return next
|
||||
}
|
||||
428
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
generated
vendored
Normal file
428
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
generated
vendored
Normal file
|
|
@ -0,0 +1,428 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
var calleeSavedRegistersSorted = []regalloc.VReg{
|
||||
x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg,
|
||||
v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
|
||||
}
|
||||
|
||||
// CompileGoFunctionTrampoline implements backend.Machine.
|
||||
func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
|
||||
exct := m.executableContext
|
||||
argBegin := 1 // Skips exec context by default.
|
||||
if needModuleContextPtr {
|
||||
argBegin++
|
||||
}
|
||||
|
||||
abi := &backend.FunctionABI{}
|
||||
abi.Init(sig, intParamResultRegs, floatParamResultRegs)
|
||||
m.currentABI = abi
|
||||
|
||||
cur := m.allocateInstr()
|
||||
cur.asNop0()
|
||||
exct.RootInstr = cur
|
||||
|
||||
// Execution context is always the first argument.
|
||||
execCtrPtr := x0VReg
|
||||
|
||||
// In the following, we create the following stack layout:
|
||||
//
|
||||
// (high address)
|
||||
// SP ------> +-----------------+ <----+
|
||||
// | ....... | |
|
||||
// | ret Y | |
|
||||
// | ....... | |
|
||||
// | ret 0 | |
|
||||
// | arg X | | size_of_arg_ret
|
||||
// | ....... | |
|
||||
// | arg 1 | |
|
||||
// | arg 0 | <----+ <-------- originalArg0Reg
|
||||
// | size_of_arg_ret |
|
||||
// | ReturnAddress |
|
||||
// +-----------------+ <----+
|
||||
// | xxxx | | ;; might be padded to make it 16-byte aligned.
|
||||
// +--->| arg[N]/ret[M] | |
|
||||
// sliceSize| | ............ | | goCallStackSize
|
||||
// | | arg[1]/ret[1] | |
|
||||
// +--->| arg[0]/ret[0] | <----+ <-------- arg0ret0AddrReg
|
||||
// | sliceSize |
|
||||
// | frame_size |
|
||||
// +-----------------+
|
||||
// (low address)
|
||||
//
|
||||
// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
|
||||
// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
|
||||
// the arguments/return values.
|
||||
|
||||
// First of all, to update the SP, and create "ReturnAddress + size_of_arg_ret".
|
||||
cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
|
||||
|
||||
const frameInfoSize = 16 // == frame_size + sliceSize.
|
||||
|
||||
// Next, we should allocate the stack for the Go function call if necessary.
|
||||
goCallStackSize, sliceSizeInBytes := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
|
||||
cur = m.insertStackBoundsCheck(goCallStackSize+frameInfoSize, cur)
|
||||
|
||||
originalArg0Reg := x17VReg // Caller save, so we can use it for whatever we want.
|
||||
if m.currentABI.AlignedArgResultStackSlotSize() > 0 {
|
||||
// At this point, SP points to `ReturnAddress`, so add 16 to get the original arg 0 slot.
|
||||
cur = m.addsAddOrSubStackPointer(cur, originalArg0Reg, frameInfoSize, true)
|
||||
}
|
||||
|
||||
// Save the callee saved registers.
|
||||
cur = m.saveRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
|
||||
|
||||
if needModuleContextPtr {
|
||||
offset := wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.I64()
|
||||
if !offsetFitsInAddressModeKindRegUnsignedImm12(64, offset) {
|
||||
panic("BUG: too large or un-aligned offset for goFunctionCallCalleeModuleContextOpaque in execution context")
|
||||
}
|
||||
|
||||
// Module context is always the second argument.
|
||||
moduleCtrPtr := x1VReg
|
||||
store := m.allocateInstr()
|
||||
amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
|
||||
store.asStore(operandNR(moduleCtrPtr), amode, 64)
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
|
||||
// Advances the stack pointer.
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, goCallStackSize, false)
|
||||
|
||||
// Copy the pointer to x15VReg.
|
||||
arg0ret0AddrReg := x15VReg // Caller save, so we can use it for whatever we want.
|
||||
copySp := m.allocateInstr()
|
||||
copySp.asMove64(arg0ret0AddrReg, spVReg)
|
||||
cur = linkInstr(cur, copySp)
|
||||
|
||||
// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
|
||||
for i := range abi.Args[argBegin:] {
|
||||
arg := &abi.Args[argBegin+i]
|
||||
store := m.allocateInstr()
|
||||
var v regalloc.VReg
|
||||
if arg.Kind == backend.ABIArgKindReg {
|
||||
v = arg.Reg
|
||||
} else {
|
||||
cur, v = m.goFunctionCallLoadStackArg(cur, originalArg0Reg, arg,
|
||||
// Caller save, so we can use it for whatever we want.
|
||||
x11VReg, v11VReg)
|
||||
}
|
||||
|
||||
var sizeInBits byte
|
||||
if arg.Type == ssa.TypeV128 {
|
||||
sizeInBits = 128
|
||||
} else {
|
||||
sizeInBits = 64
|
||||
}
|
||||
store.asStore(operandNR(v),
|
||||
addressMode{
|
||||
kind: addressModeKindPostIndex,
|
||||
rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8),
|
||||
}, sizeInBits)
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
|
||||
// Finally, now that we've advanced SP to arg[0]/ret[0], we allocate `frame_size + sliceSize`.
|
||||
var frameSizeReg, sliceSizeReg regalloc.VReg
|
||||
if goCallStackSize > 0 {
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, goCallStackSize)
|
||||
frameSizeReg = tmpRegVReg
|
||||
cur = m.lowerConstantI64AndInsert(cur, x16VReg, sliceSizeInBytes/8)
|
||||
sliceSizeReg = x16VReg
|
||||
} else {
|
||||
frameSizeReg = xzrVReg
|
||||
sliceSizeReg = xzrVReg
|
||||
}
|
||||
_amode := addressModePreOrPostIndex(spVReg, -16, true)
|
||||
storeP := m.allocateInstr()
|
||||
storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode)
|
||||
cur = linkInstr(cur, storeP)
|
||||
|
||||
// Set the exit status on the execution context.
|
||||
cur = m.setExitCode(cur, x0VReg, exitCode)
|
||||
|
||||
// Save the current stack pointer.
|
||||
cur = m.saveCurrentStackPointer(cur, x0VReg)
|
||||
|
||||
// Exit the execution.
|
||||
cur = m.storeReturnAddressAndExit(cur)
|
||||
|
||||
// After the call, we need to restore the callee saved registers.
|
||||
cur = m.restoreRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
|
||||
|
||||
// Get the pointer to the arg[0]/ret[0]: We need to skip `frame_size + sliceSize`.
|
||||
if len(abi.Rets) > 0 {
|
||||
cur = m.addsAddOrSubStackPointer(cur, arg0ret0AddrReg, frameInfoSize, true)
|
||||
}
|
||||
|
||||
// Advances the SP so that it points to `ReturnAddress`.
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true)
|
||||
ldr := m.allocateInstr()
|
||||
// And load the return address.
|
||||
ldr.asULoad(operandNR(lrVReg),
|
||||
addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
|
||||
cur = linkInstr(cur, ldr)
|
||||
|
||||
originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want.
|
||||
if m.currentABI.RetStackSize > 0 {
|
||||
cur = m.addsAddOrSubStackPointer(cur, originalRet0Reg, m.currentABI.ArgStackSize, true)
|
||||
}
|
||||
|
||||
// Make the SP point to the original address (above the result slot).
|
||||
if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
|
||||
}
|
||||
|
||||
for i := range abi.Rets {
|
||||
r := &abi.Rets[i]
|
||||
if r.Kind == backend.ABIArgKindReg {
|
||||
loadIntoReg := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asULoad(operandNR(r.Reg), mode, 32)
|
||||
case ssa.TypeI64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asULoad(operandNR(r.Reg), mode, 64)
|
||||
case ssa.TypeF32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32)
|
||||
case ssa.TypeF64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64)
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128)
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
cur = linkInstr(cur, loadIntoReg)
|
||||
} else {
|
||||
// First we need to load the value to a temporary just like ^^.
|
||||
intTmp, floatTmp := x11VReg, v11VReg
|
||||
loadIntoTmpReg := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
|
||||
var resultReg regalloc.VReg
|
||||
switch r.Type {
|
||||
case ssa.TypeI32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32)
|
||||
resultReg = intTmp
|
||||
case ssa.TypeI64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64)
|
||||
resultReg = intTmp
|
||||
case ssa.TypeF32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32)
|
||||
resultReg = floatTmp
|
||||
case ssa.TypeF64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64)
|
||||
resultReg = floatTmp
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128)
|
||||
resultReg = floatTmp
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
cur = linkInstr(cur, loadIntoTmpReg)
|
||||
cur = m.goFunctionCallStoreStackResult(cur, originalRet0Reg, r, resultReg)
|
||||
}
|
||||
}
|
||||
|
||||
ret := m.allocateInstr()
|
||||
ret.asRet()
|
||||
linkInstr(cur, ret)
|
||||
|
||||
m.encode(m.executableContext.RootInstr)
|
||||
return m.compiler.Buf()
|
||||
}
|
||||
|
||||
func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
|
||||
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
|
||||
for _, v := range regs {
|
||||
store := m.allocateInstr()
|
||||
var sizeInBits byte
|
||||
switch v.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
sizeInBits = 64
|
||||
case regalloc.RegTypeFloat:
|
||||
sizeInBits = 128
|
||||
}
|
||||
store.asStore(operandNR(v),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: offset,
|
||||
}, sizeInBits)
|
||||
store.prev = cur
|
||||
cur.next = store
|
||||
cur = store
|
||||
offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally store regs at the offset of multiple of 16.
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
|
||||
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
|
||||
for _, v := range regs {
|
||||
load := m.allocateInstr()
|
||||
var as func(dst operand, amode addressMode, sizeInBits byte)
|
||||
var sizeInBits byte
|
||||
switch v.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
as = load.asULoad
|
||||
sizeInBits = 64
|
||||
case regalloc.RegTypeFloat:
|
||||
as = load.asFpuLoad
|
||||
sizeInBits = 128
|
||||
}
|
||||
as(operandNR(v),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: offset,
|
||||
}, sizeInBits)
|
||||
cur = linkInstr(cur, load)
|
||||
offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16.
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction {
|
||||
exct := m.executableContext
|
||||
exct.PendingInstructions = exct.PendingInstructions[:0]
|
||||
m.lowerConstantI64(dst, v)
|
||||
for _, instr := range exct.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction {
|
||||
exct := m.executableContext
|
||||
exct.PendingInstructions = exct.PendingInstructions[:0]
|
||||
m.lowerConstantI32(dst, v)
|
||||
for _, instr := range exct.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode wazevoapi.ExitCode) *instruction {
|
||||
constReg := x17VReg // caller-saved, so we can use it.
|
||||
cur = m.lowerConstantI32AndInsert(cur, constReg, int32(exitCode))
|
||||
|
||||
// Set the exit status on the execution context.
|
||||
setExistStatus := m.allocateInstr()
|
||||
setExistStatus.asStore(operandNR(constReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
|
||||
}, 32)
|
||||
cur = linkInstr(cur, setExistStatus)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction {
|
||||
// Read the return address into tmp, and store it in the execution context.
|
||||
adr := m.allocateInstr()
|
||||
adr.asAdr(tmpRegVReg, exitSequenceSize+8)
|
||||
cur = linkInstr(cur, adr)
|
||||
|
||||
storeReturnAddr := m.allocateInstr()
|
||||
storeReturnAddr.asStore(operandNR(tmpRegVReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
|
||||
}, 64)
|
||||
cur = linkInstr(cur, storeReturnAddr)
|
||||
|
||||
// Exit the execution.
|
||||
trapSeq := m.allocateInstr()
|
||||
trapSeq.asExitSequence(x0VReg)
|
||||
cur = linkInstr(cur, trapSeq)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VReg) *instruction {
|
||||
// Save the current stack pointer:
|
||||
// mov tmp, sp,
|
||||
// str tmp, [exec_ctx, #stackPointerBeforeGoCall]
|
||||
movSp := m.allocateInstr()
|
||||
movSp.asMove64(tmpRegVReg, spVReg)
|
||||
cur = linkInstr(cur, movSp)
|
||||
|
||||
strSp := m.allocateInstr()
|
||||
strSp.asStore(operandNR(tmpRegVReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
|
||||
}, 64)
|
||||
cur = linkInstr(cur, strSp)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) {
|
||||
load := m.allocateInstr()
|
||||
var result regalloc.VReg
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
|
||||
switch arg.Type {
|
||||
case ssa.TypeI32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asULoad(operandNR(intVReg), mode, 32)
|
||||
result = intVReg
|
||||
case ssa.TypeI64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asULoad(operandNR(intVReg), mode, 64)
|
||||
result = intVReg
|
||||
case ssa.TypeF32:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asFpuLoad(operandNR(floatVReg), mode, 32)
|
||||
result = floatVReg
|
||||
case ssa.TypeF64:
|
||||
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
|
||||
load.asFpuLoad(operandNR(floatVReg), mode, 64)
|
||||
result = floatVReg
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
load.asFpuLoad(operandNR(floatVReg), mode, 128)
|
||||
result = floatVReg
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
cur = linkInstr(cur, load)
|
||||
return cur, result
|
||||
}
|
||||
|
||||
func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction {
|
||||
store := m.allocateInstr()
|
||||
mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
|
||||
var sizeInBits byte
|
||||
switch result.Type {
|
||||
case ssa.TypeI32, ssa.TypeF32:
|
||||
mode.imm = 8
|
||||
sizeInBits = 32
|
||||
case ssa.TypeI64, ssa.TypeF64:
|
||||
mode.imm = 8
|
||||
sizeInBits = 64
|
||||
case ssa.TypeV128:
|
||||
mode.imm = 16
|
||||
sizeInBits = 128
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
store.asStore(operandNR(resultVReg), mode, sizeInBits)
|
||||
return linkInstr(cur, store)
|
||||
}
|
||||
215
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go
generated
vendored
Normal file
215
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go
generated
vendored
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
type (
|
||||
cond uint64
|
||||
condKind byte
|
||||
)
|
||||
|
||||
const (
|
||||
// condKindRegisterZero represents a condition which checks if the register is zero.
|
||||
// This indicates that the instruction must be encoded as CBZ:
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBZ--Compare-and-Branch-on-Zero-
|
||||
condKindRegisterZero condKind = iota
|
||||
// condKindRegisterNotZero indicates that the instruction must be encoded as CBNZ:
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBNZ--Compare-and-Branch-on-Nonzero-
|
||||
condKindRegisterNotZero
|
||||
// condKindCondFlagSet indicates that the instruction must be encoded as B.cond:
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-
|
||||
condKindCondFlagSet
|
||||
)
|
||||
|
||||
// kind returns the kind of condition which is stored in the first two bits.
|
||||
func (c cond) kind() condKind {
|
||||
return condKind(c & 0b11)
|
||||
}
|
||||
|
||||
func (c cond) asUint64() uint64 {
|
||||
return uint64(c)
|
||||
}
|
||||
|
||||
// register returns the register for register conditions.
|
||||
// This panics if the condition is not a register condition (condKindRegisterZero or condKindRegisterNotZero).
|
||||
func (c cond) register() regalloc.VReg {
|
||||
if c.kind() != condKindRegisterZero && c.kind() != condKindRegisterNotZero {
|
||||
panic("condition is not a register")
|
||||
}
|
||||
return regalloc.VReg(c >> 2)
|
||||
}
|
||||
|
||||
func registerAsRegZeroCond(r regalloc.VReg) cond {
|
||||
return cond(r)<<2 | cond(condKindRegisterZero)
|
||||
}
|
||||
|
||||
func registerAsRegNotZeroCond(r regalloc.VReg) cond {
|
||||
return cond(r)<<2 | cond(condKindRegisterNotZero)
|
||||
}
|
||||
|
||||
func (c cond) flag() condFlag {
|
||||
if c.kind() != condKindCondFlagSet {
|
||||
panic("condition is not a flag")
|
||||
}
|
||||
return condFlag(c >> 2)
|
||||
}
|
||||
|
||||
func (c condFlag) asCond() cond {
|
||||
return cond(c)<<2 | cond(condKindCondFlagSet)
|
||||
}
|
||||
|
||||
// condFlag represents a condition flag for conditional branches.
|
||||
// The value matches the encoding of condition flags in the ARM64 instruction set.
|
||||
// https://developer.arm.com/documentation/den0024/a/The-A64-instruction-set/Data-processing-instructions/Conditional-instructions
|
||||
type condFlag uint8
|
||||
|
||||
const (
|
||||
eq condFlag = iota // eq represents "equal"
|
||||
ne // ne represents "not equal"
|
||||
hs // hs represents "higher or same"
|
||||
lo // lo represents "lower"
|
||||
mi // mi represents "minus or negative result"
|
||||
pl // pl represents "plus or positive result"
|
||||
vs // vs represents "overflow set"
|
||||
vc // vc represents "overflow clear"
|
||||
hi // hi represents "higher"
|
||||
ls // ls represents "lower or same"
|
||||
ge // ge represents "greater or equal"
|
||||
lt // lt represents "less than"
|
||||
gt // gt represents "greater than"
|
||||
le // le represents "less than or equal"
|
||||
al // al represents "always"
|
||||
nv // nv represents "never"
|
||||
)
|
||||
|
||||
// invert returns the inverted condition.
|
||||
func (c condFlag) invert() condFlag {
|
||||
switch c {
|
||||
case eq:
|
||||
return ne
|
||||
case ne:
|
||||
return eq
|
||||
case hs:
|
||||
return lo
|
||||
case lo:
|
||||
return hs
|
||||
case mi:
|
||||
return pl
|
||||
case pl:
|
||||
return mi
|
||||
case vs:
|
||||
return vc
|
||||
case vc:
|
||||
return vs
|
||||
case hi:
|
||||
return ls
|
||||
case ls:
|
||||
return hi
|
||||
case ge:
|
||||
return lt
|
||||
case lt:
|
||||
return ge
|
||||
case gt:
|
||||
return le
|
||||
case le:
|
||||
return gt
|
||||
case al:
|
||||
return nv
|
||||
case nv:
|
||||
return al
|
||||
default:
|
||||
panic(c)
|
||||
}
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (c condFlag) String() string {
|
||||
switch c {
|
||||
case eq:
|
||||
return "eq"
|
||||
case ne:
|
||||
return "ne"
|
||||
case hs:
|
||||
return "hs"
|
||||
case lo:
|
||||
return "lo"
|
||||
case mi:
|
||||
return "mi"
|
||||
case pl:
|
||||
return "pl"
|
||||
case vs:
|
||||
return "vs"
|
||||
case vc:
|
||||
return "vc"
|
||||
case hi:
|
||||
return "hi"
|
||||
case ls:
|
||||
return "ls"
|
||||
case ge:
|
||||
return "ge"
|
||||
case lt:
|
||||
return "lt"
|
||||
case gt:
|
||||
return "gt"
|
||||
case le:
|
||||
return "le"
|
||||
case al:
|
||||
return "al"
|
||||
case nv:
|
||||
return "nv"
|
||||
default:
|
||||
panic(strconv.Itoa(int(c)))
|
||||
}
|
||||
}
|
||||
|
||||
// condFlagFromSSAIntegerCmpCond returns the condition flag for the given ssa.IntegerCmpCond.
|
||||
func condFlagFromSSAIntegerCmpCond(c ssa.IntegerCmpCond) condFlag {
|
||||
switch c {
|
||||
case ssa.IntegerCmpCondEqual:
|
||||
return eq
|
||||
case ssa.IntegerCmpCondNotEqual:
|
||||
return ne
|
||||
case ssa.IntegerCmpCondSignedLessThan:
|
||||
return lt
|
||||
case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
|
||||
return ge
|
||||
case ssa.IntegerCmpCondSignedGreaterThan:
|
||||
return gt
|
||||
case ssa.IntegerCmpCondSignedLessThanOrEqual:
|
||||
return le
|
||||
case ssa.IntegerCmpCondUnsignedLessThan:
|
||||
return lo
|
||||
case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
|
||||
return hs
|
||||
case ssa.IntegerCmpCondUnsignedGreaterThan:
|
||||
return hi
|
||||
case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
|
||||
return ls
|
||||
default:
|
||||
panic(c)
|
||||
}
|
||||
}
|
||||
|
||||
// condFlagFromSSAFloatCmpCond returns the condition flag for the given ssa.FloatCmpCond.
|
||||
func condFlagFromSSAFloatCmpCond(c ssa.FloatCmpCond) condFlag {
|
||||
switch c {
|
||||
case ssa.FloatCmpCondEqual:
|
||||
return eq
|
||||
case ssa.FloatCmpCondNotEqual:
|
||||
return ne
|
||||
case ssa.FloatCmpCondLessThan:
|
||||
return mi
|
||||
case ssa.FloatCmpCondLessThanOrEqual:
|
||||
return ls
|
||||
case ssa.FloatCmpCondGreaterThan:
|
||||
return gt
|
||||
case ssa.FloatCmpCondGreaterThanOrEqual:
|
||||
return ge
|
||||
default:
|
||||
panic(c)
|
||||
}
|
||||
}
|
||||
2545
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
generated
vendored
Normal file
2545
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2351
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
generated
vendored
Normal file
2351
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
301
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
generated
vendored
Normal file
301
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
generated
vendored
Normal file
|
|
@ -0,0 +1,301 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
|
||||
func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
|
||||
val := instr.Return()
|
||||
valType := val.Type()
|
||||
|
||||
vr = m.compiler.AllocateVReg(valType)
|
||||
v := instr.ConstantVal()
|
||||
m.insertLoadConstant(v, valType, vr)
|
||||
return
|
||||
}
|
||||
|
||||
// InsertLoadConstantBlockArg implements backend.Machine.
|
||||
func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
|
||||
val := instr.Return()
|
||||
valType := val.Type()
|
||||
v := instr.ConstantVal()
|
||||
load := m.allocateInstr()
|
||||
load.asLoadConstBlockArg(v, valType, vr)
|
||||
m.insert(load)
|
||||
}
|
||||
|
||||
func (m *machine) lowerLoadConstantBlockArgAfterRegAlloc(i *instruction) {
|
||||
v, typ, dst := i.loadConstBlockArgData()
|
||||
m.insertLoadConstant(v, typ, dst)
|
||||
}
|
||||
|
||||
func (m *machine) insertLoadConstant(v uint64, valType ssa.Type, vr regalloc.VReg) {
|
||||
if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
|
||||
v = v & ((1 << valType.Bits()) - 1)
|
||||
}
|
||||
|
||||
switch valType {
|
||||
case ssa.TypeF32:
|
||||
loadF := m.allocateInstr()
|
||||
loadF.asLoadFpuConst32(vr, v)
|
||||
m.insert(loadF)
|
||||
case ssa.TypeF64:
|
||||
loadF := m.allocateInstr()
|
||||
loadF.asLoadFpuConst64(vr, v)
|
||||
m.insert(loadF)
|
||||
case ssa.TypeI32:
|
||||
if v == 0 {
|
||||
m.InsertMove(vr, xzrVReg, ssa.TypeI32)
|
||||
} else {
|
||||
m.lowerConstantI32(vr, int32(v))
|
||||
}
|
||||
case ssa.TypeI64:
|
||||
if v == 0 {
|
||||
m.InsertMove(vr, xzrVReg, ssa.TypeI64)
|
||||
} else {
|
||||
m.lowerConstantI64(vr, int64(v))
|
||||
}
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
}
|
||||
|
||||
// The following logics are based on the old asm/arm64 package.
|
||||
// https://github.com/tetratelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go
|
||||
|
||||
func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) {
|
||||
// Following the logic here:
|
||||
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637
|
||||
ic := int64(uint32(c))
|
||||
if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) {
|
||||
if isBitMaskImmediate(uint64(c), false) {
|
||||
m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if t := const16bitAligned(int64(uint32(c))); t >= 0 {
|
||||
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
|
||||
// We could load it into temporary with movk.
|
||||
m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false)
|
||||
} else if t := const16bitAligned(int64(^c)); t >= 0 {
|
||||
// Also, if the inverse of the const can fit within 16-bit range, do the same ^^.
|
||||
m.insertMOVN(dst, uint64(^c>>(16*t)), t, false)
|
||||
} else if isBitMaskImmediate(uint64(uint32(c)), false) {
|
||||
m.lowerConstViaBitMaskImmediate(uint64(c), dst, false)
|
||||
} else {
|
||||
// Otherwise, we use MOVZ and MOVK to load it.
|
||||
c16 := uint16(c)
|
||||
m.insertMOVZ(dst, uint64(c16), 0, false)
|
||||
c16 = uint16(uint32(c) >> 16)
|
||||
m.insertMOVK(dst, uint64(c16), 1, false)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) {
|
||||
// Following the logic here:
|
||||
// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852
|
||||
if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
|
||||
if isBitMaskImmediate(uint64(c), true) {
|
||||
m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if t := const16bitAligned(c); t >= 0 {
|
||||
// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
|
||||
// We could load it into temporary with movk.
|
||||
m.insertMOVZ(dst, uint64(c)>>(16*t), t, true)
|
||||
} else if t := const16bitAligned(^c); t >= 0 {
|
||||
// Also, if the reverse of the const can fit within 16-bit range, do the same ^^.
|
||||
m.insertMOVN(dst, uint64(^c)>>(16*t), t, true)
|
||||
} else if isBitMaskImmediate(uint64(c), true) {
|
||||
m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
|
||||
} else {
|
||||
m.load64bitConst(c, dst)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64)
|
||||
m.insert(instr)
|
||||
}
|
||||
|
||||
// isBitMaskImmediate determines if the value can be encoded as "bitmask immediate".
|
||||
//
|
||||
// Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits.
|
||||
// Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits.
|
||||
//
|
||||
// See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate-
|
||||
func isBitMaskImmediate(x uint64, _64 bool) bool {
|
||||
// All zeros and ones are not "bitmask immediate" by definition.
|
||||
if x == 0 || (_64 && x == 0xffff_ffff_ffff_ffff) || (!_64 && x == 0xffff_ffff) {
|
||||
return false
|
||||
}
|
||||
|
||||
switch {
|
||||
case x != x>>32|x<<32:
|
||||
// e = 64
|
||||
case x != x>>16|x<<48:
|
||||
// e = 32 (x == x>>32|x<<32).
|
||||
// e.g. 0x00ff_ff00_00ff_ff00
|
||||
x = uint64(int32(x))
|
||||
case x != x>>8|x<<56:
|
||||
// e = 16 (x == x>>16|x<<48).
|
||||
// e.g. 0x00ff_00ff_00ff_00ff
|
||||
x = uint64(int16(x))
|
||||
case x != x>>4|x<<60:
|
||||
// e = 8 (x == x>>8|x<<56).
|
||||
// e.g. 0x0f0f_0f0f_0f0f_0f0f
|
||||
x = uint64(int8(x))
|
||||
default:
|
||||
// e = 4 or 2.
|
||||
return true
|
||||
}
|
||||
return sequenceOfSetbits(x) || sequenceOfSetbits(^x)
|
||||
}
|
||||
|
||||
// sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1).
|
||||
// For example: 0b1110 -> true, 0b1010 -> false
|
||||
func sequenceOfSetbits(x uint64) bool {
|
||||
y := getLowestBit(x)
|
||||
// If x is a sequence of set bit, this should results in the number
|
||||
// with only one set bit (i.e. power of two).
|
||||
y += x
|
||||
return (y-1)&y == 0
|
||||
}
|
||||
|
||||
func getLowestBit(x uint64) uint64 {
|
||||
return x & (^x + 1)
|
||||
}
|
||||
|
||||
// const16bitAligned check if the value is on the 16-bit alignment.
|
||||
// If so, returns the shift num divided by 16, and otherwise -1.
|
||||
func const16bitAligned(v int64) (ret int) {
|
||||
ret = -1
|
||||
for s := 0; s < 64; s += 16 {
|
||||
if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 {
|
||||
ret = s / 16
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit
|
||||
// consts as in the Go assembler.
|
||||
//
|
||||
// See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759
|
||||
func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
|
||||
var bits [4]uint64
|
||||
var zeros, negs int
|
||||
for i := 0; i < 4; i++ {
|
||||
bits[i] = uint64(c) >> uint(i*16) & 0xffff
|
||||
if v := bits[i]; v == 0 {
|
||||
zeros++
|
||||
} else if v == 0xffff {
|
||||
negs++
|
||||
}
|
||||
}
|
||||
|
||||
if zeros == 3 {
|
||||
// one MOVZ instruction.
|
||||
for i, v := range bits {
|
||||
if v != 0 {
|
||||
m.insertMOVZ(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
} else if negs == 3 {
|
||||
// one MOVN instruction.
|
||||
for i, v := range bits {
|
||||
if v != 0xffff {
|
||||
v = ^v
|
||||
m.insertMOVN(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
} else if zeros == 2 {
|
||||
// one MOVZ then one OVK.
|
||||
var movz bool
|
||||
for i, v := range bits {
|
||||
if !movz && v != 0 { // MOVZ.
|
||||
m.insertMOVZ(dst, v, i, true)
|
||||
movz = true
|
||||
} else if v != 0 {
|
||||
m.insertMOVK(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
|
||||
} else if negs == 2 {
|
||||
// one MOVN then one or two MOVK.
|
||||
var movn bool
|
||||
for i, v := range bits { // Emit MOVN.
|
||||
if !movn && v != 0xffff {
|
||||
v = ^v
|
||||
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
|
||||
m.insertMOVN(dst, v, i, true)
|
||||
movn = true
|
||||
} else if v != 0xffff {
|
||||
m.insertMOVK(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
|
||||
} else if zeros == 1 {
|
||||
// one MOVZ then two MOVK.
|
||||
var movz bool
|
||||
for i, v := range bits {
|
||||
if !movz && v != 0 { // MOVZ.
|
||||
m.insertMOVZ(dst, v, i, true)
|
||||
movz = true
|
||||
} else if v != 0 {
|
||||
m.insertMOVK(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
|
||||
} else if negs == 1 {
|
||||
// one MOVN then two MOVK.
|
||||
var movn bool
|
||||
for i, v := range bits { // Emit MOVN.
|
||||
if !movn && v != 0xffff {
|
||||
v = ^v
|
||||
// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
|
||||
m.insertMOVN(dst, v, i, true)
|
||||
movn = true
|
||||
} else if v != 0xffff {
|
||||
m.insertMOVK(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// one MOVZ then up to three MOVK.
|
||||
var movz bool
|
||||
for i, v := range bits {
|
||||
if !movz && v != 0 { // MOVZ.
|
||||
m.insertMOVZ(dst, v, i, true)
|
||||
movz = true
|
||||
} else if v != 0 {
|
||||
m.insertMOVK(dst, v, i, true)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMOVZ(dst, v, uint64(shift), dst64)
|
||||
m.insert(instr)
|
||||
}
|
||||
|
||||
func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMOVK(dst, v, uint64(shift), dst64)
|
||||
m.insert(instr)
|
||||
}
|
||||
|
||||
func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
|
||||
instr := m.allocateInstr()
|
||||
instr.asMOVN(dst, v, uint64(shift), dst64)
|
||||
m.insert(instr)
|
||||
}
|
||||
2221
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
generated
vendored
Normal file
2221
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
350
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go
generated
vendored
Normal file
350
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go
generated
vendored
Normal file
|
|
@ -0,0 +1,350 @@
|
|||
package arm64
|
||||
|
||||
// This file contains the logic to "find and determine operands" for instructions.
|
||||
// In order to finalize the form of an operand, we might end up merging/eliminating
|
||||
// the source instructions into an operand whenever possible.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
type (
|
||||
// operand represents an operand of an instruction whose type is determined by the kind.
|
||||
operand struct {
|
||||
kind operandKind
|
||||
data, data2 uint64
|
||||
}
|
||||
operandKind byte
|
||||
)
|
||||
|
||||
// Here's the list of operand kinds. We use the abbreviation of the kind name not only for these consts,
|
||||
// but also names of functions which return the operand of the kind.
|
||||
const (
|
||||
// operandKindNR represents "NormalRegister" (NR). This is literally the register without any special operation unlike others.
|
||||
operandKindNR operandKind = iota
|
||||
// operandKindSR represents "Shifted Register" (SR). This is a register which is shifted by a constant.
|
||||
// Some of the arm64 instructions can take this kind of operand.
|
||||
operandKindSR
|
||||
// operandKindER represents "Extended Register (ER). This is a register which is sign/zero-extended to a larger size.
|
||||
// Some of the arm64 instructions can take this kind of operand.
|
||||
operandKindER
|
||||
// operandKindImm12 represents "Immediate 12" (Imm12). This is a 12-bit immediate value which can be either shifted by 12 or not.
|
||||
// See asImm12 function for detail.
|
||||
operandKindImm12
|
||||
// operandKindShiftImm represents "Shifted Immediate" (ShiftImm) used by shift operations.
|
||||
operandKindShiftImm
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer for debugging.
|
||||
func (o operand) format(size byte) string {
|
||||
switch o.kind {
|
||||
case operandKindNR:
|
||||
return formatVRegSized(o.nr(), size)
|
||||
case operandKindSR:
|
||||
r, amt, sop := o.sr()
|
||||
return fmt.Sprintf("%s, %s #%d", formatVRegSized(r, size), sop, amt)
|
||||
case operandKindER:
|
||||
r, eop, _ := o.er()
|
||||
return fmt.Sprintf("%s %s", formatVRegSized(r, size), eop)
|
||||
case operandKindImm12:
|
||||
imm12, shiftBit := o.imm12()
|
||||
if shiftBit == 1 {
|
||||
return fmt.Sprintf("#%#x", uint64(imm12)<<12)
|
||||
} else {
|
||||
return fmt.Sprintf("#%#x", imm12)
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown operand kind: %d", o.kind))
|
||||
}
|
||||
}
|
||||
|
||||
// operandNR encodes the given VReg as an operand of operandKindNR.
|
||||
func operandNR(r regalloc.VReg) operand {
|
||||
return operand{kind: operandKindNR, data: uint64(r)}
|
||||
}
|
||||
|
||||
// nr decodes the underlying VReg assuming the operand is of operandKindNR.
|
||||
func (o operand) nr() regalloc.VReg {
|
||||
return regalloc.VReg(o.data)
|
||||
}
|
||||
|
||||
// operandER encodes the given VReg as an operand of operandKindER.
|
||||
func operandER(r regalloc.VReg, eop extendOp, to byte) operand {
|
||||
if to < 32 {
|
||||
panic("TODO?BUG?: when we need to extend to less than 32 bits?")
|
||||
}
|
||||
return operand{kind: operandKindER, data: uint64(r), data2: uint64(eop)<<32 | uint64(to)}
|
||||
}
|
||||
|
||||
// er decodes the underlying VReg, extend operation, and the target size assuming the operand is of operandKindER.
|
||||
func (o operand) er() (r regalloc.VReg, eop extendOp, to byte) {
|
||||
return regalloc.VReg(o.data), extendOp(o.data2>>32) & 0xff, byte(o.data2 & 0xff)
|
||||
}
|
||||
|
||||
// operandSR encodes the given VReg as an operand of operandKindSR.
|
||||
func operandSR(r regalloc.VReg, amt byte, sop shiftOp) operand {
|
||||
return operand{kind: operandKindSR, data: uint64(r), data2: uint64(amt)<<32 | uint64(sop)}
|
||||
}
|
||||
|
||||
// sr decodes the underlying VReg, shift amount, and shift operation assuming the operand is of operandKindSR.
|
||||
func (o operand) sr() (r regalloc.VReg, amt byte, sop shiftOp) {
|
||||
return regalloc.VReg(o.data), byte(o.data2>>32) & 0xff, shiftOp(o.data2) & 0xff
|
||||
}
|
||||
|
||||
// operandImm12 encodes the given imm12 as an operand of operandKindImm12.
|
||||
func operandImm12(imm12 uint16, shiftBit byte) operand {
|
||||
return operand{kind: operandKindImm12, data: uint64(imm12) | uint64(shiftBit)<<32}
|
||||
}
|
||||
|
||||
// imm12 decodes the underlying imm12 data assuming the operand is of operandKindImm12.
|
||||
func (o operand) imm12() (v uint16, shiftBit byte) {
|
||||
return uint16(o.data), byte(o.data >> 32)
|
||||
}
|
||||
|
||||
// operandShiftImm encodes the given amount as an operand of operandKindShiftImm.
|
||||
func operandShiftImm(amount byte) operand {
|
||||
return operand{kind: operandKindShiftImm, data: uint64(amount)}
|
||||
}
|
||||
|
||||
// shiftImm decodes the underlying shift amount data assuming the operand is of operandKindShiftImm.
|
||||
func (o operand) shiftImm() byte {
|
||||
return byte(o.data)
|
||||
}
|
||||
|
||||
// reg returns the register of the operand if applicable.
|
||||
func (o operand) reg() regalloc.VReg {
|
||||
switch o.kind {
|
||||
case operandKindNR:
|
||||
return o.nr()
|
||||
case operandKindSR:
|
||||
r, _, _ := o.sr()
|
||||
return r
|
||||
case operandKindER:
|
||||
r, _, _ := o.er()
|
||||
return r
|
||||
case operandKindImm12:
|
||||
// Does not have a register.
|
||||
case operandKindShiftImm:
|
||||
// Does not have a register.
|
||||
default:
|
||||
panic(o.kind)
|
||||
}
|
||||
return regalloc.VRegInvalid
|
||||
}
|
||||
|
||||
func (o operand) realReg() regalloc.RealReg {
|
||||
return o.nr().RealReg()
|
||||
}
|
||||
|
||||
func (o operand) assignReg(v regalloc.VReg) operand {
|
||||
switch o.kind {
|
||||
case operandKindNR:
|
||||
return operandNR(v)
|
||||
case operandKindSR:
|
||||
_, amt, sop := o.sr()
|
||||
return operandSR(v, amt, sop)
|
||||
case operandKindER:
|
||||
_, eop, to := o.er()
|
||||
return operandER(v, eop, to)
|
||||
case operandKindImm12:
|
||||
// Does not have a register.
|
||||
case operandKindShiftImm:
|
||||
// Does not have a register.
|
||||
}
|
||||
panic(o.kind)
|
||||
}
|
||||
|
||||
// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).
|
||||
//
|
||||
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
|
||||
// If the operand can be expressed as operandKindImm12, `mode` is ignored.
|
||||
func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return operandNR(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
instr := def.Instr
|
||||
if instr.Opcode() == ssa.OpcodeIconst {
|
||||
if imm12Op, ok := asImm12Operand(instr.ConstantVal()); ok {
|
||||
instr.MarkLowered()
|
||||
return imm12Op
|
||||
}
|
||||
}
|
||||
return m.getOperand_ER_SR_NR(def, mode)
|
||||
}
|
||||
|
||||
// getOperand_MaybeNegatedImm12_ER_SR_NR is almost the same as getOperand_Imm12_ER_SR_NR, but this might negate the immediate value.
|
||||
// If the immediate value is negated, the second return value is true, otherwise always false.
|
||||
func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) {
|
||||
if def.IsFromBlockParam() {
|
||||
return operandNR(def.BlkParamVReg), false
|
||||
}
|
||||
|
||||
instr := def.Instr
|
||||
if instr.Opcode() == ssa.OpcodeIconst {
|
||||
c := instr.ConstantVal()
|
||||
if imm12Op, ok := asImm12Operand(c); ok {
|
||||
instr.MarkLowered()
|
||||
return imm12Op, false
|
||||
}
|
||||
|
||||
signExtended := int64(c)
|
||||
if def.SSAValue().Type().Bits() == 32 {
|
||||
signExtended = (signExtended << 32) >> 32
|
||||
}
|
||||
negatedWithoutSign := -signExtended
|
||||
if imm12Op, ok := asImm12Operand(uint64(negatedWithoutSign)); ok {
|
||||
instr.MarkLowered()
|
||||
return imm12Op, true
|
||||
}
|
||||
}
|
||||
return m.getOperand_ER_SR_NR(def, mode), false
|
||||
}
|
||||
|
||||
// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).
|
||||
//
|
||||
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
|
||||
func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return operandNR(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
if m.compiler.MatchInstr(def, ssa.OpcodeSExtend) || m.compiler.MatchInstr(def, ssa.OpcodeUExtend) {
|
||||
extInstr := def.Instr
|
||||
|
||||
signed := extInstr.Opcode() == ssa.OpcodeSExtend
|
||||
innerExtFromBits, innerExtToBits := extInstr.ExtendFromToBits()
|
||||
modeBits, modeSigned := mode.bits(), mode.signed()
|
||||
if mode == extModeNone || innerExtToBits == modeBits {
|
||||
eop := extendOpFrom(signed, innerExtFromBits)
|
||||
extArg := m.getOperand_NR(m.compiler.ValueDefinition(extInstr.Arg()), extModeNone)
|
||||
op = operandER(extArg.nr(), eop, innerExtToBits)
|
||||
extInstr.MarkLowered()
|
||||
return
|
||||
}
|
||||
|
||||
if innerExtToBits > modeBits {
|
||||
panic("BUG?TODO?: need the results of inner extension to be larger than the mode")
|
||||
}
|
||||
|
||||
switch {
|
||||
case (!signed && !modeSigned) || (signed && modeSigned):
|
||||
// Two sign/zero extensions are equivalent to one sign/zero extension for the larger size.
|
||||
eop := extendOpFrom(modeSigned, innerExtFromBits)
|
||||
op = operandER(m.compiler.VRegOf(extInstr.Arg()), eop, modeBits)
|
||||
extInstr.MarkLowered()
|
||||
case (signed && !modeSigned) || (!signed && modeSigned):
|
||||
// We need to {sign, zero}-extend the result of the {zero,sign} extension.
|
||||
eop := extendOpFrom(modeSigned, innerExtToBits)
|
||||
op = operandER(m.compiler.VRegOf(extInstr.Return()), eop, modeBits)
|
||||
// Note that we failed to merge the inner extension instruction this case.
|
||||
}
|
||||
return
|
||||
}
|
||||
return m.getOperand_SR_NR(def, mode)
|
||||
}
|
||||
|
||||
// ensureValueNR returns an operand of either operandKindSR or operandKindNR from the given value (defined by `def).
|
||||
//
|
||||
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
|
||||
func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return operandNR(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
if m.compiler.MatchInstr(def, ssa.OpcodeIshl) {
|
||||
// Check if the shift amount is constant instruction.
|
||||
targetVal, amountVal := def.Instr.Arg2()
|
||||
targetVReg := m.getOperand_NR(m.compiler.ValueDefinition(targetVal), extModeNone).nr()
|
||||
amountDef := m.compiler.ValueDefinition(amountVal)
|
||||
if amountDef.IsFromInstr() && amountDef.Instr.Constant() {
|
||||
// If that is the case, we can use the shifted register operand (SR).
|
||||
c := byte(amountDef.Instr.ConstantVal()) & (targetVal.Type().Bits() - 1) // Clears the unnecessary bits.
|
||||
def.Instr.MarkLowered()
|
||||
amountDef.Instr.MarkLowered()
|
||||
return operandSR(targetVReg, c, shiftOpLSL)
|
||||
}
|
||||
}
|
||||
return m.getOperand_NR(def, mode)
|
||||
}
|
||||
|
||||
// getOperand_ShiftImm_NR returns an operand of either operandKindShiftImm or operandKindNR from the given value (defined by `def).
|
||||
func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) {
|
||||
if def.IsFromBlockParam() {
|
||||
return operandNR(def.BlkParamVReg)
|
||||
}
|
||||
|
||||
instr := def.Instr
|
||||
if instr.Constant() {
|
||||
amount := byte(instr.ConstantVal()) & (shiftBitWidth - 1) // Clears the unnecessary bits.
|
||||
return operandShiftImm(amount)
|
||||
}
|
||||
return m.getOperand_NR(def, mode)
|
||||
}
|
||||
|
||||
// ensureValueNR returns an operand of operandKindNR from the given value (defined by `def).
|
||||
//
|
||||
// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
|
||||
func (m *machine) getOperand_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
|
||||
var v regalloc.VReg
|
||||
if def.IsFromBlockParam() {
|
||||
v = def.BlkParamVReg
|
||||
} else {
|
||||
instr := def.Instr
|
||||
if instr.Constant() {
|
||||
// We inline all the constant instructions so that we could reduce the register usage.
|
||||
v = m.lowerConstant(instr)
|
||||
instr.MarkLowered()
|
||||
} else {
|
||||
if n := def.N; n == 0 {
|
||||
v = m.compiler.VRegOf(instr.Return())
|
||||
} else {
|
||||
_, rs := instr.Returns()
|
||||
v = m.compiler.VRegOf(rs[n-1])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r := v
|
||||
switch inBits := def.SSAValue().Type().Bits(); {
|
||||
case mode == extModeNone:
|
||||
case inBits == 32 && (mode == extModeZeroExtend32 || mode == extModeSignExtend32):
|
||||
case inBits == 32 && mode == extModeZeroExtend64:
|
||||
extended := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
ext := m.allocateInstr()
|
||||
ext.asExtend(extended, v, 32, 64, false)
|
||||
m.insert(ext)
|
||||
r = extended
|
||||
case inBits == 32 && mode == extModeSignExtend64:
|
||||
extended := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
ext := m.allocateInstr()
|
||||
ext.asExtend(extended, v, 32, 64, true)
|
||||
m.insert(ext)
|
||||
r = extended
|
||||
case inBits == 64 && (mode == extModeZeroExtend64 || mode == extModeSignExtend64):
|
||||
}
|
||||
return operandNR(r)
|
||||
}
|
||||
|
||||
func asImm12Operand(val uint64) (op operand, ok bool) {
|
||||
v, shiftBit, ok := asImm12(val)
|
||||
if !ok {
|
||||
return operand{}, false
|
||||
}
|
||||
return operandImm12(v, shiftBit), true
|
||||
}
|
||||
|
||||
func asImm12(val uint64) (v uint16, shiftBit byte, ok bool) {
|
||||
const mask1, mask2 uint64 = 0xfff, 0xfff_000
|
||||
if val&^mask1 == 0 {
|
||||
return uint16(val), 0, true
|
||||
} else if val&^mask2 == 0 {
|
||||
return uint16(val >> 12), 1, true
|
||||
} else {
|
||||
return 0, 0, false
|
||||
}
|
||||
}
|
||||
440
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
generated
vendored
Normal file
440
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
generated
vendored
Normal file
|
|
@ -0,0 +1,440 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
type (
|
||||
// addressMode represents an ARM64 addressing mode.
|
||||
//
|
||||
// https://developer.arm.com/documentation/102374/0101/Loads-and-stores---addressing
|
||||
// TODO: use the bit-packed layout like operand struct.
|
||||
addressMode struct {
|
||||
kind addressModeKind
|
||||
rn, rm regalloc.VReg
|
||||
extOp extendOp
|
||||
imm int64
|
||||
}
|
||||
|
||||
// addressModeKind represents the kind of ARM64 addressing mode.
|
||||
addressModeKind byte
|
||||
)
|
||||
|
||||
const (
|
||||
// addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended,
|
||||
// and then scaled by bits(type)/8.
|
||||
//
|
||||
// e.g.
|
||||
// - ldrh w1, [x2, w3, SXTW #1] ;; sign-extended and scaled by 2 (== LSL #1)
|
||||
// - strh w1, [x2, w3, UXTW #1] ;; zero-extended and scaled by 2 (== LSL #1)
|
||||
// - ldr w1, [x2, w3, SXTW #2] ;; sign-extended and scaled by 4 (== LSL #2)
|
||||
// - str x1, [x2, w3, UXTW #3] ;; zero-extended and scaled by 8 (== LSL #3)
|
||||
//
|
||||
// See the following pages:
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--register---Load-Register-Halfword--register--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--
|
||||
addressModeKindRegScaledExtended addressModeKind = iota
|
||||
|
||||
// addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without extension factor.
|
||||
addressModeKindRegScaled
|
||||
|
||||
// addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without scale factor.
|
||||
addressModeKindRegExtended
|
||||
|
||||
// addressModeKindRegReg takes a base register and an index register. The index register is not either scaled or extended.
|
||||
addressModeKindRegReg
|
||||
|
||||
// addressModeKindRegSignedImm9 takes a base register and a 9-bit "signed" immediate offset (-256 to 255).
|
||||
// The immediate will be sign-extended, and be added to the base register.
|
||||
// This is a.k.a. "unscaled" since the immediate is not scaled.
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--
|
||||
addressModeKindRegSignedImm9
|
||||
|
||||
// addressModeKindRegUnsignedImm12 takes a base register and a 12-bit "unsigned" immediate offset. scaled by
|
||||
// the size of the type. In other words, the actual offset will be imm12 * bits(type)/8.
|
||||
// See "Unsigned offset" in the following pages:
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
|
||||
addressModeKindRegUnsignedImm12
|
||||
|
||||
// addressModePostIndex takes a base register and a 9-bit "signed" immediate offset.
|
||||
// After the load/store, the base register will be updated by the offset.
|
||||
//
|
||||
// Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset.
|
||||
//
|
||||
// See "Post-index" in the following pages for examples:
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
|
||||
addressModeKindPostIndex
|
||||
|
||||
// addressModePostIndex takes a base register and a 9-bit "signed" immediate offset.
|
||||
// Before the load/store, the base register will be updated by the offset.
|
||||
//
|
||||
// Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset.
|
||||
//
|
||||
// See "Pre-index" in the following pages for examples:
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
|
||||
addressModeKindPreIndex
|
||||
|
||||
// addressModeKindArgStackSpace is used to resolve the address of the argument stack space
|
||||
// exiting right above the stack pointer. Since we don't know the exact stack space needed for a function
|
||||
// at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above.
|
||||
addressModeKindArgStackSpace
|
||||
|
||||
// addressModeKindResultStackSpace is used to resolve the address of the result stack space
|
||||
// exiting right above the stack pointer. Since we don't know the exact stack space needed for a function
|
||||
// at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above.
|
||||
addressModeKindResultStackSpace
|
||||
)
|
||||
|
||||
func (a addressMode) format(dstSizeBits byte) (ret string) {
|
||||
base := formatVRegSized(a.rn, 64)
|
||||
if rn := a.rn; rn.RegType() != regalloc.RegTypeInt {
|
||||
panic("invalid base register type: " + a.rn.RegType().String())
|
||||
} else if rn.IsRealReg() && v0 <= a.rn.RealReg() && a.rn.RealReg() <= v30 {
|
||||
panic("BUG: likely a bug in reg alloc or reset behavior")
|
||||
}
|
||||
|
||||
switch a.kind {
|
||||
case addressModeKindRegScaledExtended:
|
||||
amount := a.sizeInBitsToShiftAmount(dstSizeBits)
|
||||
ret = fmt.Sprintf("[%s, %s, %s #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp, amount)
|
||||
case addressModeKindRegScaled:
|
||||
amount := a.sizeInBitsToShiftAmount(dstSizeBits)
|
||||
ret = fmt.Sprintf("[%s, %s, lsl #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), amount)
|
||||
case addressModeKindRegExtended:
|
||||
ret = fmt.Sprintf("[%s, %s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp)
|
||||
case addressModeKindRegReg:
|
||||
ret = fmt.Sprintf("[%s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()))
|
||||
case addressModeKindRegSignedImm9:
|
||||
if a.imm != 0 {
|
||||
ret = fmt.Sprintf("[%s, #%#x]", base, a.imm)
|
||||
} else {
|
||||
ret = fmt.Sprintf("[%s]", base)
|
||||
}
|
||||
case addressModeKindRegUnsignedImm12:
|
||||
if a.imm != 0 {
|
||||
ret = fmt.Sprintf("[%s, #%#x]", base, a.imm)
|
||||
} else {
|
||||
ret = fmt.Sprintf("[%s]", base)
|
||||
}
|
||||
case addressModeKindPostIndex:
|
||||
ret = fmt.Sprintf("[%s], #%#x", base, a.imm)
|
||||
case addressModeKindPreIndex:
|
||||
ret = fmt.Sprintf("[%s, #%#x]!", base, a.imm)
|
||||
case addressModeKindArgStackSpace:
|
||||
ret = fmt.Sprintf("[#arg_space, #%#x]", a.imm)
|
||||
case addressModeKindResultStackSpace:
|
||||
ret = fmt.Sprintf("[#ret_space, #%#x]", a.imm)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode {
|
||||
if !offsetFitsInAddressModeKindRegSignedImm9(imm) {
|
||||
panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm))
|
||||
}
|
||||
if preIndex {
|
||||
return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
|
||||
} else {
|
||||
return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
|
||||
}
|
||||
}
|
||||
|
||||
func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool {
|
||||
divisor := int64(dstSizeInBits) / 8
|
||||
return 0 < offset && offset%divisor == 0 && offset/divisor < 4096
|
||||
}
|
||||
|
||||
func offsetFitsInAddressModeKindRegSignedImm9(offset int64) bool {
|
||||
return -256 <= offset && offset <= 255
|
||||
}
|
||||
|
||||
func (a addressMode) indexRegBits() byte {
|
||||
bits := a.extOp.srcBits()
|
||||
if bits != 32 && bits != 64 {
|
||||
panic("invalid index register for address mode. it must be either 32 or 64 bits")
|
||||
}
|
||||
return bits
|
||||
}
|
||||
|
||||
func (a addressMode) sizeInBitsToShiftAmount(sizeInBits byte) (lsl byte) {
|
||||
switch sizeInBits {
|
||||
case 8:
|
||||
lsl = 0
|
||||
case 16:
|
||||
lsl = 1
|
||||
case 32:
|
||||
lsl = 2
|
||||
case 64:
|
||||
lsl = 3
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func extLoadSignSize(op ssa.Opcode) (size byte, signed bool) {
|
||||
switch op {
|
||||
case ssa.OpcodeUload8:
|
||||
size, signed = 8, false
|
||||
case ssa.OpcodeUload16:
|
||||
size, signed = 16, false
|
||||
case ssa.OpcodeUload32:
|
||||
size, signed = 32, false
|
||||
case ssa.OpcodeSload8:
|
||||
size, signed = 8, true
|
||||
case ssa.OpcodeSload16:
|
||||
size, signed = 16, true
|
||||
case ssa.OpcodeSload32:
|
||||
size, signed = 32, true
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret regalloc.VReg) {
|
||||
size, signed := extLoadSignSize(op)
|
||||
amode := m.lowerToAddressMode(ptr, offset, size)
|
||||
load := m.allocateInstr()
|
||||
if signed {
|
||||
load.asSLoad(operandNR(ret), amode, size)
|
||||
} else {
|
||||
load.asULoad(operandNR(ret), amode, size)
|
||||
}
|
||||
m.insert(load)
|
||||
}
|
||||
|
||||
func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa.Value) {
|
||||
amode := m.lowerToAddressMode(ptr, offset, typ.Bits())
|
||||
|
||||
dst := m.compiler.VRegOf(ret)
|
||||
load := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
load.asULoad(operandNR(dst), amode, typ.Bits())
|
||||
case ssa.TypeF32, ssa.TypeF64:
|
||||
load.asFpuLoad(operandNR(dst), amode, typ.Bits())
|
||||
case ssa.TypeV128:
|
||||
load.asFpuLoad(operandNR(dst), amode, 128)
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
m.insert(load)
|
||||
}
|
||||
|
||||
func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane, ret ssa.Value) {
|
||||
// vecLoad1R has offset address mode (base+imm) only for post index, so we simply add the offset to the base.
|
||||
base := m.getOperand_NR(m.compiler.ValueDefinition(ptr), extModeNone).nr()
|
||||
offsetReg := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerConstantI64(offsetReg, int64(offset))
|
||||
addedBase := m.addReg64ToReg64(base, offsetReg)
|
||||
|
||||
rd := operandNR(m.compiler.VRegOf(ret))
|
||||
|
||||
ld1r := m.allocateInstr()
|
||||
ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane))
|
||||
m.insert(ld1r)
|
||||
}
|
||||
|
||||
func (m *machine) lowerStore(si *ssa.Instruction) {
|
||||
// TODO: merge consecutive stores into a single pair store instruction.
|
||||
value, ptr, offset, storeSizeInBits := si.StoreData()
|
||||
amode := m.lowerToAddressMode(ptr, offset, storeSizeInBits)
|
||||
|
||||
valueOp := m.getOperand_NR(m.compiler.ValueDefinition(value), extModeNone)
|
||||
store := m.allocateInstr()
|
||||
store.asStore(valueOp, amode, storeSizeInBits)
|
||||
m.insert(store)
|
||||
}
|
||||
|
||||
// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
|
||||
func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) {
|
||||
// TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and
|
||||
// addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed
|
||||
// to support more efficient address resolution.
|
||||
|
||||
a32s, a64s, offset := m.collectAddends(ptr)
|
||||
offset += int64(offsetBase)
|
||||
return m.lowerToAddressModeFromAddends(a32s, a64s, size, offset)
|
||||
}
|
||||
|
||||
// lowerToAddressModeFromAddends creates an addressMode from a list of addends collected by collectAddends.
|
||||
// During the construction, this might emit additional instructions.
|
||||
//
|
||||
// Extracted as a separate function for easy testing.
|
||||
func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) {
|
||||
switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); {
|
||||
case a64sExist && a32sExist:
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
var a32 addend32
|
||||
a32 = a32s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
|
||||
case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
|
||||
offset = 0
|
||||
case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
|
||||
offset = 0
|
||||
case a64sExist:
|
||||
var base regalloc.VReg
|
||||
base = a64s.Dequeue()
|
||||
if !a64s.Empty() {
|
||||
index := a64s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
|
||||
} else {
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
||||
}
|
||||
case a32sExist:
|
||||
base32 := a32s.Dequeue()
|
||||
|
||||
// First we need 64-bit base.
|
||||
base := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
baseExt := m.allocateInstr()
|
||||
var signed bool
|
||||
if base32.ext == extendOpSXTW {
|
||||
signed = true
|
||||
}
|
||||
baseExt.asExtend(base, base32.r, 32, 64, signed)
|
||||
m.insert(baseExt)
|
||||
|
||||
if !a32s.Empty() {
|
||||
index := a32s.Dequeue()
|
||||
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
|
||||
} else {
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
||||
}
|
||||
default: // Only static offsets.
|
||||
tmpReg := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.lowerConstantI64(tmpReg, offset)
|
||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
|
||||
offset = 0
|
||||
}
|
||||
|
||||
baseReg := amode.rn
|
||||
if offset > 0 {
|
||||
baseReg = m.addConstToReg64(baseReg, offset) // baseReg += offset
|
||||
}
|
||||
|
||||
for !a64s.Empty() {
|
||||
a64 := a64s.Dequeue()
|
||||
baseReg = m.addReg64ToReg64(baseReg, a64) // baseReg += a64
|
||||
}
|
||||
|
||||
for !a32s.Empty() {
|
||||
a32 := a32s.Dequeue()
|
||||
baseReg = m.addRegToReg64Ext(baseReg, a32.r, a32.ext) // baseReg += (a32 extended to 64-bit)
|
||||
}
|
||||
amode.rn = baseReg
|
||||
return
|
||||
}
|
||||
|
||||
var addendsMatchOpcodes = [4]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst}
|
||||
|
||||
func (m *machine) collectAddends(ptr ssa.Value) (addends32 *wazevoapi.Queue[addend32], addends64 *wazevoapi.Queue[regalloc.VReg], offset int64) {
|
||||
m.addendsWorkQueue.Reset()
|
||||
m.addends32.Reset()
|
||||
m.addends64.Reset()
|
||||
m.addendsWorkQueue.Enqueue(ptr)
|
||||
|
||||
for !m.addendsWorkQueue.Empty() {
|
||||
v := m.addendsWorkQueue.Dequeue()
|
||||
|
||||
def := m.compiler.ValueDefinition(v)
|
||||
switch op := m.compiler.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op {
|
||||
case ssa.OpcodeIadd:
|
||||
// If the addend is an add, we recursively collect its operands.
|
||||
x, y := def.Instr.Arg2()
|
||||
m.addendsWorkQueue.Enqueue(x)
|
||||
m.addendsWorkQueue.Enqueue(y)
|
||||
def.Instr.MarkLowered()
|
||||
case ssa.OpcodeIconst:
|
||||
// If the addend is constant, we just statically merge it into the offset.
|
||||
ic := def.Instr
|
||||
u64 := ic.ConstantVal()
|
||||
if ic.Return().Type().Bits() == 32 {
|
||||
offset += int64(int32(u64)) // sign-extend.
|
||||
} else {
|
||||
offset += int64(u64)
|
||||
}
|
||||
def.Instr.MarkLowered()
|
||||
case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
|
||||
input := def.Instr.Arg()
|
||||
if input.Type().Bits() != 32 {
|
||||
panic("illegal size: " + input.Type().String())
|
||||
}
|
||||
|
||||
var ext extendOp
|
||||
if op == ssa.OpcodeUExtend {
|
||||
ext = extendOpUXTW
|
||||
} else {
|
||||
ext = extendOpSXTW
|
||||
}
|
||||
|
||||
inputDef := m.compiler.ValueDefinition(input)
|
||||
constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
|
||||
switch {
|
||||
case constInst && ext == extendOpUXTW:
|
||||
// Zero-extension of a 32-bit constant can be merged into the offset.
|
||||
offset += int64(uint32(inputDef.Instr.ConstantVal()))
|
||||
case constInst && ext == extendOpSXTW:
|
||||
// Sign-extension of a 32-bit constant can be merged into the offset.
|
||||
offset += int64(int32(inputDef.Instr.ConstantVal())) // sign-extend!
|
||||
default:
|
||||
m.addends32.Enqueue(addend32{r: m.getOperand_NR(inputDef, extModeNone).nr(), ext: ext})
|
||||
}
|
||||
def.Instr.MarkLowered()
|
||||
continue
|
||||
default:
|
||||
// If the addend is not one of them, we simply use it as-is (without merging!), optionally zero-extending it.
|
||||
m.addends64.Enqueue(m.getOperand_NR(def, extModeZeroExtend64 /* optional zero ext */).nr())
|
||||
}
|
||||
}
|
||||
return &m.addends32, &m.addends64, offset
|
||||
}
|
||||
|
||||
func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
|
||||
rd = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
alu := m.allocateInstr()
|
||||
if imm12Op, ok := asImm12Operand(uint64(c)); ok {
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true)
|
||||
} else if imm12Op, ok = asImm12Operand(uint64(-c)); ok {
|
||||
alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true)
|
||||
} else {
|
||||
tmp := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
m.load64bitConst(c, tmp)
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true)
|
||||
}
|
||||
m.insert(alu)
|
||||
return
|
||||
}
|
||||
|
||||
func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
|
||||
rd = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
alu := m.allocateInstr()
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true)
|
||||
m.insert(alu)
|
||||
return
|
||||
}
|
||||
|
||||
func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) {
|
||||
rd = m.compiler.AllocateVReg(ssa.TypeI64)
|
||||
alu := m.allocateInstr()
|
||||
alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true)
|
||||
m.insert(alu)
|
||||
return
|
||||
}
|
||||
515
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
generated
vendored
Normal file
515
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
generated
vendored
Normal file
|
|
@ -0,0 +1,515 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
type (
|
||||
// machine implements backend.Machine.
|
||||
machine struct {
|
||||
compiler backend.Compiler
|
||||
executableContext *backend.ExecutableContextT[instruction]
|
||||
currentABI *backend.FunctionABI
|
||||
|
||||
regAlloc regalloc.Allocator
|
||||
regAllocFn *backend.RegAllocFunction[*instruction, *machine]
|
||||
|
||||
// addendsWorkQueue is used during address lowering, defined here for reuse.
|
||||
addendsWorkQueue wazevoapi.Queue[ssa.Value]
|
||||
addends32 wazevoapi.Queue[addend32]
|
||||
// addends64 is used during address lowering, defined here for reuse.
|
||||
addends64 wazevoapi.Queue[regalloc.VReg]
|
||||
unresolvedAddressModes []*instruction
|
||||
|
||||
// condBrRelocs holds the conditional branches which need offset relocation.
|
||||
condBrRelocs []condBrReloc
|
||||
|
||||
// jmpTableTargets holds the labels of the jump table targets.
|
||||
jmpTableTargets [][]uint32
|
||||
|
||||
// spillSlotSize is the size of the stack slot in bytes used for spilling registers.
|
||||
// During the execution of the function, the stack looks like:
|
||||
//
|
||||
//
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | xxxxx |
|
||||
// | ReturnAddress |
|
||||
// +-----------------+ <<-|
|
||||
// | ........... | |
|
||||
// | spill slot M | | <--- spillSlotSize
|
||||
// | ............ | |
|
||||
// | spill slot 2 | |
|
||||
// | spill slot 1 | <<-+
|
||||
// | clobbered N |
|
||||
// | ........... |
|
||||
// | clobbered 1 |
|
||||
// | clobbered 0 |
|
||||
// SP---> +-----------------+
|
||||
// (low address)
|
||||
//
|
||||
// and it represents the size of the space between FP and the first spilled slot. This must be a multiple of 16.
|
||||
// Also note that this is only known after register allocation.
|
||||
spillSlotSize int64
|
||||
spillSlots map[regalloc.VRegID]int64 // regalloc.VRegID to offset.
|
||||
// clobberedRegs holds real-register backed VRegs saved at the function prologue, and restored at the epilogue.
|
||||
clobberedRegs []regalloc.VReg
|
||||
|
||||
maxRequiredStackSizeForCalls int64
|
||||
stackBoundsCheckDisabled bool
|
||||
|
||||
regAllocStarted bool
|
||||
}
|
||||
|
||||
addend32 struct {
|
||||
r regalloc.VReg
|
||||
ext extendOp
|
||||
}
|
||||
|
||||
condBrReloc struct {
|
||||
cbr *instruction
|
||||
// currentLabelPos is the labelPosition within which condBr is defined.
|
||||
currentLabelPos *labelPosition
|
||||
// Next block's labelPosition.
|
||||
nextLabel label
|
||||
offset int64
|
||||
}
|
||||
|
||||
labelPosition = backend.LabelPosition[instruction]
|
||||
label = backend.Label
|
||||
)
|
||||
|
||||
const (
|
||||
labelReturn = backend.LabelReturn
|
||||
labelInvalid = backend.LabelInvalid
|
||||
)
|
||||
|
||||
// NewBackend returns a new backend for arm64.
|
||||
func NewBackend() backend.Machine {
|
||||
m := &machine{
|
||||
spillSlots: make(map[regalloc.VRegID]int64),
|
||||
executableContext: newExecutableContext(),
|
||||
regAlloc: regalloc.NewAllocator(regInfo),
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func newExecutableContext() *backend.ExecutableContextT[instruction] {
|
||||
return backend.NewExecutableContextT[instruction](resetInstruction, setNext, setPrev, asNop0)
|
||||
}
|
||||
|
||||
// ExecutableContext implements backend.Machine.
|
||||
func (m *machine) ExecutableContext() backend.ExecutableContext {
|
||||
return m.executableContext
|
||||
}
|
||||
|
||||
// RegAlloc implements backend.Machine Function.
|
||||
func (m *machine) RegAlloc() {
|
||||
rf := m.regAllocFn
|
||||
for _, pos := range m.executableContext.OrderedBlockLabels {
|
||||
rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End)
|
||||
}
|
||||
|
||||
m.regAllocStarted = true
|
||||
m.regAlloc.DoAllocation(rf)
|
||||
// Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes.
|
||||
m.spillSlotSize = (m.spillSlotSize + 15) &^ 15
|
||||
}
|
||||
|
||||
// Reset implements backend.Machine.
|
||||
func (m *machine) Reset() {
|
||||
m.clobberedRegs = m.clobberedRegs[:0]
|
||||
for key := range m.spillSlots {
|
||||
m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key))
|
||||
}
|
||||
for _, key := range m.clobberedRegs {
|
||||
delete(m.spillSlots, regalloc.VRegID(key))
|
||||
}
|
||||
m.clobberedRegs = m.clobberedRegs[:0]
|
||||
m.regAllocStarted = false
|
||||
m.regAlloc.Reset()
|
||||
m.regAllocFn.Reset()
|
||||
m.spillSlotSize = 0
|
||||
m.unresolvedAddressModes = m.unresolvedAddressModes[:0]
|
||||
m.maxRequiredStackSizeForCalls = 0
|
||||
m.executableContext.Reset()
|
||||
m.jmpTableTargets = m.jmpTableTargets[:0]
|
||||
}
|
||||
|
||||
// SetCurrentABI implements backend.Machine SetCurrentABI.
|
||||
func (m *machine) SetCurrentABI(abi *backend.FunctionABI) {
|
||||
m.currentABI = abi
|
||||
}
|
||||
|
||||
// DisableStackCheck implements backend.Machine DisableStackCheck.
|
||||
func (m *machine) DisableStackCheck() {
|
||||
m.stackBoundsCheckDisabled = true
|
||||
}
|
||||
|
||||
// SetCompiler implements backend.Machine.
|
||||
func (m *machine) SetCompiler(ctx backend.Compiler) {
|
||||
m.compiler = ctx
|
||||
m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, ctx.SSABuilder(), ctx)
|
||||
}
|
||||
|
||||
func (m *machine) insert(i *instruction) {
|
||||
ectx := m.executableContext
|
||||
ectx.PendingInstructions = append(ectx.PendingInstructions, i)
|
||||
}
|
||||
|
||||
func (m *machine) insertBrTargetLabel() label {
|
||||
nop, l := m.allocateBrTarget()
|
||||
m.insert(nop)
|
||||
return l
|
||||
}
|
||||
|
||||
func (m *machine) allocateBrTarget() (nop *instruction, l label) {
|
||||
ectx := m.executableContext
|
||||
l = ectx.AllocateLabel()
|
||||
nop = m.allocateInstr()
|
||||
nop.asNop0WithLabel(l)
|
||||
pos := ectx.AllocateLabelPosition(l)
|
||||
pos.Begin, pos.End = nop, nop
|
||||
ectx.LabelPositions[l] = pos
|
||||
return
|
||||
}
|
||||
|
||||
// allocateInstr allocates an instruction.
|
||||
func (m *machine) allocateInstr() *instruction {
|
||||
instr := m.executableContext.InstructionPool.Allocate()
|
||||
if !m.regAllocStarted {
|
||||
instr.addedBeforeRegAlloc = true
|
||||
}
|
||||
return instr
|
||||
}
|
||||
|
||||
func resetInstruction(i *instruction) {
|
||||
*i = instruction{}
|
||||
}
|
||||
|
||||
func (m *machine) allocateNop() *instruction {
|
||||
instr := m.allocateInstr()
|
||||
instr.asNop0()
|
||||
return instr
|
||||
}
|
||||
|
||||
func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) {
|
||||
amode := &i.amode
|
||||
switch amode.kind {
|
||||
case addressModeKindResultStackSpace:
|
||||
amode.imm += ret0offset
|
||||
case addressModeKindArgStackSpace:
|
||||
amode.imm += arg0offset
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
|
||||
var sizeInBits byte
|
||||
switch i.kind {
|
||||
case store8, uLoad8:
|
||||
sizeInBits = 8
|
||||
case store16, uLoad16:
|
||||
sizeInBits = 16
|
||||
case store32, fpuStore32, uLoad32, fpuLoad32:
|
||||
sizeInBits = 32
|
||||
case store64, fpuStore64, uLoad64, fpuLoad64:
|
||||
sizeInBits = 64
|
||||
case fpuStore128, fpuLoad128:
|
||||
sizeInBits = 128
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
|
||||
if offsetFitsInAddressModeKindRegUnsignedImm12(sizeInBits, amode.imm) {
|
||||
amode.kind = addressModeKindRegUnsignedImm12
|
||||
} else {
|
||||
// This case, we load the offset into the temporary register,
|
||||
// and then use it as the index register.
|
||||
newPrev := m.lowerConstantI64AndInsert(i.prev, tmpRegVReg, amode.imm)
|
||||
linkInstr(newPrev, i)
|
||||
*amode = addressMode{kind: addressModeKindRegReg, rn: amode.rn, rm: tmpRegVReg, extOp: extendOpUXTX /* indicates rm reg is 64-bit */}
|
||||
}
|
||||
}
|
||||
|
||||
// resolveRelativeAddresses resolves the relative addresses before encoding.
|
||||
func (m *machine) resolveRelativeAddresses(ctx context.Context) {
|
||||
ectx := m.executableContext
|
||||
for {
|
||||
if len(m.unresolvedAddressModes) > 0 {
|
||||
arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP()
|
||||
for _, i := range m.unresolvedAddressModes {
|
||||
m.resolveAddressingMode(arg0offset, ret0offset, i)
|
||||
}
|
||||
}
|
||||
|
||||
// Reuse the slice to gather the unresolved conditional branches.
|
||||
m.condBrRelocs = m.condBrRelocs[:0]
|
||||
|
||||
var fn string
|
||||
var fnIndex int
|
||||
var labelToSSABlockID map[label]ssa.BasicBlockID
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
fn = wazevoapi.GetCurrentFunctionName(ctx)
|
||||
labelToSSABlockID = make(map[label]ssa.BasicBlockID)
|
||||
for i, l := range ectx.SsaBlockIDToLabels {
|
||||
labelToSSABlockID[l] = ssa.BasicBlockID(i)
|
||||
}
|
||||
fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)
|
||||
}
|
||||
|
||||
// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
|
||||
var offset int64
|
||||
for i, pos := range ectx.OrderedBlockLabels {
|
||||
pos.BinaryOffset = offset
|
||||
var size int64
|
||||
for cur := pos.Begin; ; cur = cur.next {
|
||||
switch cur.kind {
|
||||
case nop0:
|
||||
l := cur.nop0Label()
|
||||
if pos, ok := ectx.LabelPositions[l]; ok {
|
||||
pos.BinaryOffset = offset + size
|
||||
}
|
||||
case condBr:
|
||||
if !cur.condBrOffsetResolved() {
|
||||
var nextLabel label
|
||||
if i < len(ectx.OrderedBlockLabels)-1 {
|
||||
// Note: this is only used when the block ends with fallthrough,
|
||||
// therefore can be safely assumed that the next block exists when it's needed.
|
||||
nextLabel = ectx.OrderedBlockLabels[i+1].L
|
||||
}
|
||||
m.condBrRelocs = append(m.condBrRelocs, condBrReloc{
|
||||
cbr: cur, currentLabelPos: pos, offset: offset + size,
|
||||
nextLabel: nextLabel,
|
||||
})
|
||||
}
|
||||
}
|
||||
size += cur.size()
|
||||
if cur == pos.End {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
if size > 0 {
|
||||
l := pos.L
|
||||
var labelStr string
|
||||
if blkID, ok := labelToSSABlockID[l]; ok {
|
||||
labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID)
|
||||
} else {
|
||||
labelStr = l.String()
|
||||
}
|
||||
wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr))
|
||||
}
|
||||
}
|
||||
offset += size
|
||||
}
|
||||
|
||||
// Before resolving any offsets, we need to check if all the conditional branches can be resolved.
|
||||
var needRerun bool
|
||||
for i := range m.condBrRelocs {
|
||||
reloc := &m.condBrRelocs[i]
|
||||
cbr := reloc.cbr
|
||||
offset := reloc.offset
|
||||
|
||||
target := cbr.condBrLabel()
|
||||
offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
|
||||
diff := offsetOfTarget - offset
|
||||
if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
|
||||
// This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block,
|
||||
// and jump to it.
|
||||
m.insertConditionalJumpTrampoline(cbr, reloc.currentLabelPos, reloc.nextLabel)
|
||||
// Then, we need to recall this function to fix up the label offsets
|
||||
// as they have changed after the trampoline is inserted.
|
||||
needRerun = true
|
||||
}
|
||||
}
|
||||
if needRerun {
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
wazevoapi.PerfMap.Clear()
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
var currentOffset int64
|
||||
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
|
||||
switch cur.kind {
|
||||
case br:
|
||||
target := cur.brLabel()
|
||||
offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
|
||||
diff := offsetOfTarget - currentOffset
|
||||
divided := diff >> 2
|
||||
if divided < minSignedInt26 || divided > maxSignedInt26 {
|
||||
// This means the currently compiled single function is extremely large.
|
||||
panic("too large function that requires branch relocation of large unconditional branch larger than 26-bit range")
|
||||
}
|
||||
cur.brOffsetResolve(diff)
|
||||
case condBr:
|
||||
if !cur.condBrOffsetResolved() {
|
||||
target := cur.condBrLabel()
|
||||
offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
|
||||
diff := offsetOfTarget - currentOffset
|
||||
if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
|
||||
panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly")
|
||||
}
|
||||
cur.condBrOffsetResolve(diff)
|
||||
}
|
||||
case brTableSequence:
|
||||
tableIndex := cur.u1
|
||||
targets := m.jmpTableTargets[tableIndex]
|
||||
for i := range targets {
|
||||
l := label(targets[i])
|
||||
offsetOfTarget := ectx.LabelPositions[l].BinaryOffset
|
||||
diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin)
|
||||
targets[i] = uint32(diff)
|
||||
}
|
||||
cur.brTableSequenceOffsetsResolved()
|
||||
case emitSourceOffsetInfo:
|
||||
m.compiler.AddSourceOffsetInfo(currentOffset, cur.sourceOffsetInfo())
|
||||
}
|
||||
currentOffset += cur.size()
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
maxSignedInt26 = 1<<25 - 1
|
||||
minSignedInt26 = -(1 << 25)
|
||||
|
||||
maxSignedInt19 = 1<<18 - 1
|
||||
minSignedInt19 = -(1 << 18)
|
||||
)
|
||||
|
||||
func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) {
|
||||
cur := currentBlk.End
|
||||
originalTarget := cbr.condBrLabel()
|
||||
endNext := cur.next
|
||||
|
||||
if cur.kind != br {
|
||||
// If the current block ends with a conditional branch, we can just insert the trampoline after it.
|
||||
// Otherwise, we need to insert "skip" instruction to skip the trampoline instructions.
|
||||
skip := m.allocateInstr()
|
||||
skip.asBr(nextLabel)
|
||||
cur = linkInstr(cur, skip)
|
||||
}
|
||||
|
||||
cbrNewTargetInstr, cbrNewTargetLabel := m.allocateBrTarget()
|
||||
cbr.setCondBrTargets(cbrNewTargetLabel)
|
||||
cur = linkInstr(cur, cbrNewTargetInstr)
|
||||
|
||||
// Then insert the unconditional branch to the original, which should be possible to get encoded
|
||||
// as 26-bit offset should be enough for any practical application.
|
||||
br := m.allocateInstr()
|
||||
br.asBr(originalTarget)
|
||||
cur = linkInstr(cur, br)
|
||||
|
||||
// Update the end of the current block.
|
||||
currentBlk.End = cur
|
||||
|
||||
linkInstr(cur, endNext)
|
||||
}
|
||||
|
||||
// Format implements backend.Machine.
|
||||
func (m *machine) Format() string {
|
||||
ectx := m.executableContext
|
||||
begins := map[*instruction]label{}
|
||||
for l, pos := range ectx.LabelPositions {
|
||||
begins[pos.Begin] = l
|
||||
}
|
||||
|
||||
irBlocks := map[label]ssa.BasicBlockID{}
|
||||
for i, l := range ectx.SsaBlockIDToLabels {
|
||||
irBlocks[l] = ssa.BasicBlockID(i)
|
||||
}
|
||||
|
||||
var lines []string
|
||||
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
|
||||
if l, ok := begins[cur]; ok {
|
||||
var labelStr string
|
||||
if blkID, ok := irBlocks[l]; ok {
|
||||
labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID)
|
||||
} else {
|
||||
labelStr = fmt.Sprintf("%s:", l)
|
||||
}
|
||||
lines = append(lines, labelStr)
|
||||
}
|
||||
if cur.kind == nop0 {
|
||||
continue
|
||||
}
|
||||
lines = append(lines, "\t"+cur.String())
|
||||
}
|
||||
return "\n" + strings.Join(lines, "\n") + "\n"
|
||||
}
|
||||
|
||||
// InsertReturn implements backend.Machine.
|
||||
func (m *machine) InsertReturn() {
|
||||
i := m.allocateInstr()
|
||||
i.asRet()
|
||||
m.insert(i)
|
||||
}
|
||||
|
||||
func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 {
|
||||
offset, ok := m.spillSlots[id]
|
||||
if !ok {
|
||||
offset = m.spillSlotSize
|
||||
// TODO: this should be aligned depending on the `size` to use Imm12 offset load/store as much as possible.
|
||||
m.spillSlots[id] = offset
|
||||
m.spillSlotSize += int64(size)
|
||||
}
|
||||
return offset + 16 // spill slot starts above the clobbered registers and the frame size.
|
||||
}
|
||||
|
||||
func (m *machine) clobberedRegSlotSize() int64 {
|
||||
return int64(len(m.clobberedRegs) * 16)
|
||||
}
|
||||
|
||||
func (m *machine) arg0OffsetFromSP() int64 {
|
||||
return m.frameSize() +
|
||||
16 + // 16-byte aligned return address
|
||||
16 // frame size saved below the clobbered registers.
|
||||
}
|
||||
|
||||
func (m *machine) ret0OffsetFromSP() int64 {
|
||||
return m.arg0OffsetFromSP() + m.currentABI.ArgStackSize
|
||||
}
|
||||
|
||||
func (m *machine) requiredStackSize() int64 {
|
||||
return m.maxRequiredStackSizeForCalls +
|
||||
m.frameSize() +
|
||||
16 + // 16-byte aligned return address.
|
||||
16 // frame size saved below the clobbered registers.
|
||||
}
|
||||
|
||||
func (m *machine) frameSize() int64 {
|
||||
s := m.clobberedRegSlotSize() + m.spillSlotSize
|
||||
if s&0xf != 0 {
|
||||
panic(fmt.Errorf("BUG: frame size %d is not 16-byte aligned", s))
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) {
|
||||
// TODO: reuse the slice!
|
||||
labels := make([]uint32, len(targets))
|
||||
for j, target := range targets {
|
||||
labels[j] = uint32(m.executableContext.GetOrAllocateSSABlockLabel(target))
|
||||
}
|
||||
index = len(m.jmpTableTargets)
|
||||
m.jmpTableTargets = append(m.jmpTableTargets, labels)
|
||||
return
|
||||
}
|
||||
469
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
generated
vendored
Normal file
469
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
generated
vendored
Normal file
|
|
@ -0,0 +1,469 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// PostRegAlloc implements backend.Machine.
|
||||
func (m *machine) PostRegAlloc() {
|
||||
m.setupPrologue()
|
||||
m.postRegAlloc()
|
||||
}
|
||||
|
||||
// setupPrologue initializes the prologue of the function.
|
||||
func (m *machine) setupPrologue() {
|
||||
ectx := m.executableContext
|
||||
|
||||
cur := ectx.RootInstr
|
||||
prevInitInst := cur.next
|
||||
|
||||
//
|
||||
// (high address) (high address)
|
||||
// SP----> +-----------------+ +------------------+ <----+
|
||||
// | ....... | | ....... | |
|
||||
// | ret Y | | ret Y | |
|
||||
// | ....... | | ....... | |
|
||||
// | ret 0 | | ret 0 | |
|
||||
// | arg X | | arg X | | size_of_arg_ret.
|
||||
// | ....... | ====> | ....... | |
|
||||
// | arg 1 | | arg 1 | |
|
||||
// | arg 0 | | arg 0 | <----+
|
||||
// |-----------------| | size_of_arg_ret |
|
||||
// | return address |
|
||||
// +------------------+ <---- SP
|
||||
// (low address) (low address)
|
||||
|
||||
// Saves the return address (lr) and the size_of_arg_ret below the SP.
|
||||
// size_of_arg_ret is used for stack unwinding.
|
||||
cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
|
||||
|
||||
if !m.stackBoundsCheckDisabled {
|
||||
cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
|
||||
}
|
||||
|
||||
// Decrement SP if spillSlotSize > 0.
|
||||
if m.spillSlotSize == 0 && len(m.spillSlots) != 0 {
|
||||
panic(fmt.Sprintf("BUG: spillSlotSize=%d, spillSlots=%v\n", m.spillSlotSize, m.spillSlots))
|
||||
}
|
||||
|
||||
if regs := m.clobberedRegs; len(regs) > 0 {
|
||||
//
|
||||
// (high address) (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | size_of_arg_ret | | size_of_arg_ret |
|
||||
// | ReturnAddress | | ReturnAddress |
|
||||
// SP----> +-----------------+ ====> +-----------------+
|
||||
// (low address) | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// +-----------------+ <----- SP
|
||||
// (low address)
|
||||
//
|
||||
_amode := addressModePreOrPostIndex(spVReg,
|
||||
-16, // stack pointer must be 16-byte aligned.
|
||||
true, // Decrement before store.
|
||||
)
|
||||
for _, vr := range regs {
|
||||
// TODO: pair stores to reduce the number of instructions.
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(vr), _amode, regTypeToRegisterSizeInBits(vr.RegType()))
|
||||
cur = linkInstr(cur, store)
|
||||
}
|
||||
}
|
||||
|
||||
if size := m.spillSlotSize; size > 0 {
|
||||
// Check if size is 16-byte aligned.
|
||||
if size&0xf != 0 {
|
||||
panic(fmt.Errorf("BUG: spill slot size %d is not 16-byte aligned", size))
|
||||
}
|
||||
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, size, false)
|
||||
|
||||
// At this point, the stack looks like:
|
||||
//
|
||||
// (high address)
|
||||
// +------------------+
|
||||
// | ....... |
|
||||
// | ret Y |
|
||||
// | ....... |
|
||||
// | ret 0 |
|
||||
// | arg X |
|
||||
// | ....... |
|
||||
// | arg 1 |
|
||||
// | arg 0 |
|
||||
// | size_of_arg_ret |
|
||||
// | ReturnAddress |
|
||||
// +------------------+
|
||||
// | clobbered M |
|
||||
// | ............ |
|
||||
// | clobbered 0 |
|
||||
// | spill slot N |
|
||||
// | ............ |
|
||||
// | spill slot 2 |
|
||||
// | spill slot 0 |
|
||||
// SP----> +------------------+
|
||||
// (low address)
|
||||
}
|
||||
|
||||
// We push the frame size into the stack to make it possible to unwind stack:
|
||||
//
|
||||
//
|
||||
// (high address) (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | size_of_arg_ret | | size_of_arg_ret |
|
||||
// | ReturnAddress | | ReturnAddress |
|
||||
// +-----------------+ ==> +-----------------+ <----+
|
||||
// | clobbered M | | clobbered M | |
|
||||
// | ............ | | ............ | |
|
||||
// | clobbered 2 | | clobbered 2 | |
|
||||
// | clobbered 1 | | clobbered 1 | | frame size
|
||||
// | clobbered 0 | | clobbered 0 | |
|
||||
// | spill slot N | | spill slot N | |
|
||||
// | ............ | | ............ | |
|
||||
// | spill slot 0 | | spill slot 0 | <----+
|
||||
// SP---> +-----------------+ | xxxxxx | ;; unused space to make it 16-byte aligned.
|
||||
// | frame_size |
|
||||
// +-----------------+ <---- SP
|
||||
// (low address)
|
||||
//
|
||||
cur = m.createFrameSizeSlot(cur, m.frameSize())
|
||||
|
||||
linkInstr(cur, prevInitInst)
|
||||
}
|
||||
|
||||
func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruction {
|
||||
// First we decrement the stack pointer to point the arg0 slot.
|
||||
var sizeOfArgRetReg regalloc.VReg
|
||||
s := int64(m.currentABI.AlignedArgResultStackSlotSize())
|
||||
if s > 0 {
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s)
|
||||
sizeOfArgRetReg = tmpRegVReg
|
||||
|
||||
subSp := m.allocateInstr()
|
||||
subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
|
||||
cur = linkInstr(cur, subSp)
|
||||
} else {
|
||||
sizeOfArgRetReg = xzrVReg
|
||||
}
|
||||
|
||||
// Saves the return address (lr) and the size_of_arg_ret below the SP.
|
||||
// size_of_arg_ret is used for stack unwinding.
|
||||
pstr := m.allocateInstr()
|
||||
amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */)
|
||||
pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode)
|
||||
cur = linkInstr(cur, pstr)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {
|
||||
var frameSizeReg regalloc.VReg
|
||||
if s > 0 {
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s)
|
||||
frameSizeReg = tmpRegVReg
|
||||
} else {
|
||||
frameSizeReg = xzrVReg
|
||||
}
|
||||
_amode := addressModePreOrPostIndex(spVReg,
|
||||
-16, // stack pointer must be 16-byte aligned.
|
||||
true, // Decrement before store.
|
||||
)
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(frameSizeReg), _amode, 64)
|
||||
cur = linkInstr(cur, store)
|
||||
return cur
|
||||
}
|
||||
|
||||
// postRegAlloc does multiple things while walking through the instructions:
|
||||
// 1. Removes the redundant copy instruction.
|
||||
// 2. Inserts the epilogue.
|
||||
func (m *machine) postRegAlloc() {
|
||||
ectx := m.executableContext
|
||||
for cur := ectx.RootInstr; cur != nil; cur = cur.next {
|
||||
switch cur.kind {
|
||||
case ret:
|
||||
m.setupEpilogueAfter(cur.prev)
|
||||
case loadConstBlockArg:
|
||||
lc := cur
|
||||
next := lc.next
|
||||
m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
|
||||
m.lowerLoadConstantBlockArgAfterRegAlloc(lc)
|
||||
for _, instr := range m.executableContext.PendingInstructions {
|
||||
cur = linkInstr(cur, instr)
|
||||
}
|
||||
linkInstr(cur, next)
|
||||
m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
|
||||
default:
|
||||
// Removes the redundant copy instruction.
|
||||
if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() {
|
||||
prev, next := cur.prev, cur.next
|
||||
// Remove the copy instruction.
|
||||
prev.next = next
|
||||
if next != nil {
|
||||
next.prev = prev
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *machine) setupEpilogueAfter(cur *instruction) {
|
||||
prevNext := cur.next
|
||||
|
||||
// We've stored the frame size in the prologue, and now that we are about to return from this function, we won't need it anymore.
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, 16, true)
|
||||
|
||||
if s := m.spillSlotSize; s > 0 {
|
||||
// Adjust SP to the original value:
|
||||
//
|
||||
// (high address) (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | xxxxx | | xxxxx |
|
||||
// | ReturnAddress | | ReturnAddress |
|
||||
// +-----------------+ ====> +-----------------+
|
||||
// | clobbered M | | clobbered M |
|
||||
// | ............ | | ............ |
|
||||
// | clobbered 1 | | clobbered 1 |
|
||||
// | clobbered 0 | | clobbered 0 |
|
||||
// | spill slot N | +-----------------+ <---- SP
|
||||
// | ............ |
|
||||
// | spill slot 0 |
|
||||
// SP---> +-----------------+
|
||||
// (low address)
|
||||
//
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
|
||||
}
|
||||
|
||||
// First we need to restore the clobbered registers.
|
||||
if len(m.clobberedRegs) > 0 {
|
||||
// (high address)
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | xxxxx | | xxxxx |
|
||||
// | ReturnAddress | | ReturnAddress |
|
||||
// +-----------------+ ========> +-----------------+ <---- SP
|
||||
// | clobbered M |
|
||||
// | ........... |
|
||||
// | clobbered 1 |
|
||||
// | clobbered 0 |
|
||||
// SP---> +-----------------+
|
||||
// (low address)
|
||||
|
||||
l := len(m.clobberedRegs) - 1
|
||||
for i := range m.clobberedRegs {
|
||||
vr := m.clobberedRegs[l-i] // reverse order to restore.
|
||||
load := m.allocateInstr()
|
||||
amode := addressModePreOrPostIndex(spVReg,
|
||||
16, // stack pointer must be 16-byte aligned.
|
||||
false, // Increment after store.
|
||||
)
|
||||
// TODO: pair loads to reduce the number of instructions.
|
||||
switch regTypeToRegisterSizeInBits(vr.RegType()) {
|
||||
case 64: // save int reg.
|
||||
load.asULoad(operandNR(vr), amode, 64)
|
||||
case 128: // save vector reg.
|
||||
load.asFpuLoad(operandNR(vr), amode, 128)
|
||||
}
|
||||
cur = linkInstr(cur, load)
|
||||
}
|
||||
}
|
||||
|
||||
// Reload the return address (lr).
|
||||
//
|
||||
// +-----------------+ +-----------------+
|
||||
// | ....... | | ....... |
|
||||
// | ret Y | | ret Y |
|
||||
// | ....... | | ....... |
|
||||
// | ret 0 | | ret 0 |
|
||||
// | arg X | | arg X |
|
||||
// | ....... | ===> | ....... |
|
||||
// | arg 1 | | arg 1 |
|
||||
// | arg 0 | | arg 0 |
|
||||
// | xxxxx | +-----------------+ <---- SP
|
||||
// | ReturnAddress |
|
||||
// SP----> +-----------------+
|
||||
|
||||
ldr := m.allocateInstr()
|
||||
ldr.asULoad(operandNR(lrVReg),
|
||||
addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
|
||||
cur = linkInstr(cur, ldr)
|
||||
|
||||
if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
|
||||
cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
|
||||
}
|
||||
|
||||
linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
|
||||
// stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0,
|
||||
// which always points to the execution context whenever the native code is entered from Go.
|
||||
var saveRequiredRegs = []regalloc.VReg{
|
||||
x1VReg, x2VReg, x3VReg, x4VReg, x5VReg, x6VReg, x7VReg,
|
||||
x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg, lrVReg,
|
||||
v0VReg, v1VReg, v2VReg, v3VReg, v4VReg, v5VReg, v6VReg, v7VReg,
|
||||
v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
|
||||
}
|
||||
|
||||
// insertStackBoundsCheck will insert the instructions after `cur` to check the
|
||||
// stack bounds, and if there's no sufficient spaces required for the function,
|
||||
// exit the execution and try growing it in Go world.
|
||||
//
|
||||
// TODO: we should be able to share the instructions across all the functions to reduce the size of compiled executable.
|
||||
func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
|
||||
if requiredStackSize%16 != 0 {
|
||||
panic("BUG")
|
||||
}
|
||||
|
||||
if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok {
|
||||
// sub tmp, sp, #requiredStackSize
|
||||
sub := m.allocateInstr()
|
||||
sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true)
|
||||
cur = linkInstr(cur, sub)
|
||||
} else {
|
||||
// This case, we first load the requiredStackSize into the temporary register,
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
|
||||
// Then subtract it.
|
||||
sub := m.allocateInstr()
|
||||
sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true)
|
||||
cur = linkInstr(cur, sub)
|
||||
}
|
||||
|
||||
tmp2 := x11VReg // Caller save, so it is safe to use it here in the prologue.
|
||||
|
||||
// ldr tmp2, [executionContext #StackBottomPtr]
|
||||
ldr := m.allocateInstr()
|
||||
ldr.asULoad(operandNR(tmp2), addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: x0VReg, // execution context is always the first argument.
|
||||
imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(),
|
||||
}, 64)
|
||||
cur = linkInstr(cur, ldr)
|
||||
|
||||
// subs xzr, tmp, tmp2
|
||||
subs := m.allocateInstr()
|
||||
subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true)
|
||||
cur = linkInstr(cur, subs)
|
||||
|
||||
// b.ge #imm
|
||||
cbr := m.allocateInstr()
|
||||
cbr.asCondBr(ge.asCond(), labelInvalid, false /* ignored */)
|
||||
cur = linkInstr(cur, cbr)
|
||||
|
||||
// Set the required stack size and set it to the exec context.
|
||||
{
|
||||
// First load the requiredStackSize into the temporary register,
|
||||
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
|
||||
setRequiredStackSize := m.allocateInstr()
|
||||
setRequiredStackSize.asStore(operandNR(tmpRegVReg),
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
// Execution context is always the first argument.
|
||||
rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
|
||||
}, 64)
|
||||
|
||||
cur = linkInstr(cur, setRequiredStackSize)
|
||||
}
|
||||
|
||||
ldrAddress := m.allocateInstr()
|
||||
ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: x0VReg, // execution context is always the first argument
|
||||
imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(),
|
||||
}, 64)
|
||||
cur = linkInstr(cur, ldrAddress)
|
||||
|
||||
// Then jumps to the stack grow call sequence's address, meaning
|
||||
// transferring the control to the code compiled by CompileStackGrowCallSequence.
|
||||
bl := m.allocateInstr()
|
||||
bl.asCallIndirect(tmpRegVReg, nil)
|
||||
cur = linkInstr(cur, bl)
|
||||
|
||||
// Now that we know the entire code, we can finalize how many bytes
|
||||
// we have to skip when the stack size is sufficient.
|
||||
var cbrOffset int64
|
||||
for _cur := cbr; ; _cur = _cur.next {
|
||||
cbrOffset += _cur.size()
|
||||
if _cur == cur {
|
||||
break
|
||||
}
|
||||
}
|
||||
cbr.condBrOffsetResolve(cbrOffset)
|
||||
return cur
|
||||
}
|
||||
|
||||
// CompileStackGrowCallSequence implements backend.Machine.
|
||||
func (m *machine) CompileStackGrowCallSequence() []byte {
|
||||
ectx := m.executableContext
|
||||
|
||||
cur := m.allocateInstr()
|
||||
cur.asNop0()
|
||||
ectx.RootInstr = cur
|
||||
|
||||
// Save the callee saved and argument registers.
|
||||
cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs)
|
||||
|
||||
// Save the current stack pointer.
|
||||
cur = m.saveCurrentStackPointer(cur, x0VReg)
|
||||
|
||||
// Set the exit status on the execution context.
|
||||
cur = m.setExitCode(cur, x0VReg, wazevoapi.ExitCodeGrowStack)
|
||||
|
||||
// Exit the execution.
|
||||
cur = m.storeReturnAddressAndExit(cur)
|
||||
|
||||
// After the exit, restore the saved registers.
|
||||
cur = m.restoreRegistersInExecutionContext(cur, saveRequiredRegs)
|
||||
|
||||
// Then goes back the original address of this stack grow call.
|
||||
ret := m.allocateInstr()
|
||||
ret.asRet()
|
||||
linkInstr(cur, ret)
|
||||
|
||||
m.encode(ectx.RootInstr)
|
||||
return m.compiler.Buf()
|
||||
}
|
||||
|
||||
func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction {
|
||||
ectx := m.executableContext
|
||||
|
||||
ectx.PendingInstructions = ectx.PendingInstructions[:0]
|
||||
m.insertAddOrSubStackPointer(rd, diff, add)
|
||||
for _, inserted := range ectx.PendingInstructions {
|
||||
cur = linkInstr(cur, inserted)
|
||||
}
|
||||
return cur
|
||||
}
|
||||
152
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
generated
vendored
Normal file
152
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
generated
vendored
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
package arm64
|
||||
|
||||
// This file implements the interfaces required for register allocations. See backend.RegAllocFunctionMachine.
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// ClobberedRegisters implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) ClobberedRegisters(regs []regalloc.VReg) {
|
||||
m.clobberedRegs = append(m.clobberedRegs[:0], regs...)
|
||||
}
|
||||
|
||||
// Swap implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
|
||||
prevNext := cur.next
|
||||
var mov1, mov2, mov3 *instruction
|
||||
if x1.RegType() == regalloc.RegTypeInt {
|
||||
if !tmp.Valid() {
|
||||
tmp = tmpRegVReg
|
||||
}
|
||||
mov1 = m.allocateInstr().asMove64(tmp, x1)
|
||||
mov2 = m.allocateInstr().asMove64(x1, x2)
|
||||
mov3 = m.allocateInstr().asMove64(x2, tmp)
|
||||
cur = linkInstr(cur, mov1)
|
||||
cur = linkInstr(cur, mov2)
|
||||
cur = linkInstr(cur, mov3)
|
||||
linkInstr(cur, prevNext)
|
||||
} else {
|
||||
if !tmp.Valid() {
|
||||
r2 := x2.RealReg()
|
||||
// Temporarily spill x1 to stack.
|
||||
cur = m.InsertStoreRegisterAt(x1, cur, true).prev
|
||||
// Then move x2 to x1.
|
||||
cur = linkInstr(cur, m.allocateInstr().asFpuMov128(x1, x2))
|
||||
linkInstr(cur, prevNext)
|
||||
// Then reload the original value on x1 from stack to r2.
|
||||
m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
|
||||
} else {
|
||||
mov1 = m.allocateInstr().asFpuMov128(tmp, x1)
|
||||
mov2 = m.allocateInstr().asFpuMov128(x1, x2)
|
||||
mov3 = m.allocateInstr().asFpuMov128(x2, tmp)
|
||||
cur = linkInstr(cur, mov1)
|
||||
cur = linkInstr(cur, mov2)
|
||||
cur = linkInstr(cur, mov3)
|
||||
linkInstr(cur, prevNext)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// InsertMoveBefore implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
|
||||
typ := src.RegType()
|
||||
if typ != dst.RegType() {
|
||||
panic("BUG: src and dst must have the same type")
|
||||
}
|
||||
|
||||
mov := m.allocateInstr()
|
||||
if typ == regalloc.RegTypeInt {
|
||||
mov.asMove64(dst, src)
|
||||
} else {
|
||||
mov.asFpuMov128(dst, src)
|
||||
}
|
||||
|
||||
cur := instr.prev
|
||||
prevNext := cur.next
|
||||
cur = linkInstr(cur, mov)
|
||||
linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// SSABlockLabel implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label {
|
||||
return m.executableContext.SsaBlockIDToLabels[id]
|
||||
}
|
||||
|
||||
// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
|
||||
if !v.IsRealReg() {
|
||||
panic("BUG: VReg must be backed by real reg to be stored")
|
||||
}
|
||||
|
||||
typ := m.compiler.TypeOf(v)
|
||||
|
||||
var prevNext, cur *instruction
|
||||
if after {
|
||||
cur, prevNext = instr, instr.next
|
||||
} else {
|
||||
cur, prevNext = instr.prev, instr
|
||||
}
|
||||
|
||||
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
|
||||
var amode addressMode
|
||||
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
|
||||
store := m.allocateInstr()
|
||||
store.asStore(operandNR(v), amode, typ.Bits())
|
||||
|
||||
cur = linkInstr(cur, store)
|
||||
return linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
|
||||
if !v.IsRealReg() {
|
||||
panic("BUG: VReg must be backed by real reg to be stored")
|
||||
}
|
||||
|
||||
typ := m.compiler.TypeOf(v)
|
||||
|
||||
var prevNext, cur *instruction
|
||||
if after {
|
||||
cur, prevNext = instr, instr.next
|
||||
} else {
|
||||
cur, prevNext = instr.prev, instr
|
||||
}
|
||||
|
||||
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
|
||||
var amode addressMode
|
||||
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
|
||||
load := m.allocateInstr()
|
||||
switch typ {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
load.asULoad(operandNR(v), amode, typ.Bits())
|
||||
case ssa.TypeF32, ssa.TypeF64:
|
||||
load.asFpuLoad(operandNR(v), amode, typ.Bits())
|
||||
case ssa.TypeV128:
|
||||
load.asFpuLoad(operandNR(v), amode, 128)
|
||||
default:
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
cur = linkInstr(cur, load)
|
||||
return linkInstr(cur, prevNext)
|
||||
}
|
||||
|
||||
// LastInstrForInsertion implements backend.RegAllocFunctionMachine.
|
||||
func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {
|
||||
cur := end
|
||||
for cur.kind == nop0 {
|
||||
cur = cur.prev
|
||||
if cur == begin {
|
||||
return end
|
||||
}
|
||||
}
|
||||
switch cur.kind {
|
||||
case br:
|
||||
return cur
|
||||
default:
|
||||
return end
|
||||
}
|
||||
}
|
||||
117
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
generated
vendored
Normal file
117
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
generated
vendored
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
)
|
||||
|
||||
const (
|
||||
// trampolineCallSize is the size of the trampoline instruction sequence for each function in an island.
|
||||
trampolineCallSize = 4*4 + 4 // Four instructions + 32-bit immediate.
|
||||
|
||||
// Unconditional branch offset is encoded as divided by 4 in imm26.
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en
|
||||
|
||||
maxUnconditionalBranchOffset = maxSignedInt26 * 4
|
||||
minUnconditionalBranchOffset = minSignedInt26 * 4
|
||||
|
||||
// trampolineIslandInterval is the range of the trampoline island.
|
||||
// Half of the range is used for the trampoline island, and the other half is used for the function.
|
||||
trampolineIslandInterval = maxUnconditionalBranchOffset / 2
|
||||
|
||||
// maxNumFunctions explicitly specifies the maximum number of functions that can be allowed in a single executable.
|
||||
maxNumFunctions = trampolineIslandInterval >> 6
|
||||
|
||||
// maxFunctionExecutableSize is the maximum size of a function that can exist in a trampoline island.
|
||||
// Conservatively set to 1/4 of the trampoline island interval.
|
||||
maxFunctionExecutableSize = trampolineIslandInterval >> 2
|
||||
)
|
||||
|
||||
// CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo.
|
||||
func (m *machine) CallTrampolineIslandInfo(numFunctions int) (interval, size int, err error) {
|
||||
if numFunctions > maxNumFunctions {
|
||||
return 0, 0, fmt.Errorf("too many functions: %d > %d", numFunctions, maxNumFunctions)
|
||||
}
|
||||
return trampolineIslandInterval, trampolineCallSize * numFunctions, nil
|
||||
}
|
||||
|
||||
// ResolveRelocations implements backend.Machine ResolveRelocations.
|
||||
func (m *machine) ResolveRelocations(
|
||||
refToBinaryOffset []int,
|
||||
executable []byte,
|
||||
relocations []backend.RelocationInfo,
|
||||
callTrampolineIslandOffsets []int,
|
||||
) {
|
||||
for _, islandOffset := range callTrampolineIslandOffsets {
|
||||
encodeCallTrampolineIsland(refToBinaryOffset, islandOffset, executable)
|
||||
}
|
||||
|
||||
for _, r := range relocations {
|
||||
instrOffset := r.Offset
|
||||
calleeFnOffset := refToBinaryOffset[r.FuncRef]
|
||||
diff := int64(calleeFnOffset) - (instrOffset)
|
||||
// Check if the diff is within the range of the branch instruction.
|
||||
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
|
||||
// Find the near trampoline island from callTrampolineIslandOffsets.
|
||||
islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset))
|
||||
islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef)
|
||||
diff = int64(islandTargetOffset) - (instrOffset)
|
||||
if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
|
||||
panic("BUG in trampoline placement")
|
||||
}
|
||||
}
|
||||
binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff))
|
||||
}
|
||||
}
|
||||
|
||||
// encodeCallTrampolineIsland encodes a trampoline island for the given functions.
|
||||
// Each island consists of a trampoline instruction sequence for each function.
|
||||
// Each trampoline instruction sequence consists of 4 instructions + 32-bit immediate.
|
||||
func encodeCallTrampolineIsland(refToBinaryOffset []int, islandOffset int, executable []byte) {
|
||||
for i := 0; i < len(refToBinaryOffset); i++ {
|
||||
trampolineOffset := islandOffset + trampolineCallSize*i
|
||||
|
||||
fnOffset := refToBinaryOffset[i]
|
||||
diff := fnOffset - (trampolineOffset + 16)
|
||||
if diff > math.MaxInt32 || diff < math.MinInt32 {
|
||||
// This case even amd64 can't handle. 4GB is too big.
|
||||
panic("too big binary")
|
||||
}
|
||||
|
||||
// The tmpReg, tmpReg2 is safe to overwrite (in fact any caller-saved register is safe to use).
|
||||
tmpReg, tmpReg2 := regNumberInEncoding[tmpRegVReg.RealReg()], regNumberInEncoding[x11]
|
||||
|
||||
// adr tmpReg, PC+16: load the address of #diff into tmpReg.
|
||||
binary.LittleEndian.PutUint32(executable[trampolineOffset:], encodeAdr(tmpReg, 16))
|
||||
// ldrsw tmpReg2, [tmpReg]: Load #diff into tmpReg2.
|
||||
binary.LittleEndian.PutUint32(executable[trampolineOffset+4:],
|
||||
encodeLoadOrStore(sLoad32, tmpReg2, addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpRegVReg}))
|
||||
// add tmpReg, tmpReg2, tmpReg: add #diff to the address of #diff, getting the absolute address of the function.
|
||||
binary.LittleEndian.PutUint32(executable[trampolineOffset+8:],
|
||||
encodeAluRRR(aluOpAdd, tmpReg, tmpReg, tmpReg2, true, false))
|
||||
// br tmpReg: branch to the function without overwriting the link register.
|
||||
binary.LittleEndian.PutUint32(executable[trampolineOffset+12:], encodeUnconditionalBranchReg(tmpReg, false))
|
||||
// #diff
|
||||
binary.LittleEndian.PutUint32(executable[trampolineOffset+16:], uint32(diff))
|
||||
}
|
||||
}
|
||||
|
||||
// searchTrampolineIsland finds the nearest trampoline island from callTrampolineIslandOffsets.
|
||||
// Note that even if the offset is in the middle of two islands, it returns the latter one.
|
||||
// That is ok because the island is always placed in the middle of the range.
|
||||
//
|
||||
// precondition: callTrampolineIslandOffsets is sorted in ascending order.
|
||||
func searchTrampolineIsland(callTrampolineIslandOffsets []int, offset int) int {
|
||||
l := len(callTrampolineIslandOffsets)
|
||||
n := sort.Search(l, func(i int) bool {
|
||||
return callTrampolineIslandOffsets[i] >= offset
|
||||
})
|
||||
if n == l {
|
||||
n = l - 1
|
||||
}
|
||||
return callTrampolineIslandOffsets[n]
|
||||
}
|
||||
397
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go
generated
vendored
Normal file
397
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go
generated
vendored
Normal file
|
|
@ -0,0 +1,397 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
)
|
||||
|
||||
// Arm64-specific registers.
|
||||
//
|
||||
// See https://developer.arm.com/documentation/dui0801/a/Overview-of-AArch64-state/Predeclared-core-register-names-in-AArch64-state
|
||||
|
||||
const (
|
||||
// General purpose registers. Note that we do not distinguish wn and xn registers
|
||||
// because they are the same from the perspective of register allocator, and
|
||||
// the size can be determined by the type of the instruction.
|
||||
|
||||
x0 = regalloc.RealRegInvalid + 1 + iota
|
||||
x1
|
||||
x2
|
||||
x3
|
||||
x4
|
||||
x5
|
||||
x6
|
||||
x7
|
||||
x8
|
||||
x9
|
||||
x10
|
||||
x11
|
||||
x12
|
||||
x13
|
||||
x14
|
||||
x15
|
||||
x16
|
||||
x17
|
||||
x18
|
||||
x19
|
||||
x20
|
||||
x21
|
||||
x22
|
||||
x23
|
||||
x24
|
||||
x25
|
||||
x26
|
||||
x27
|
||||
x28
|
||||
x29
|
||||
x30
|
||||
|
||||
// Vector registers. Note that we do not distinguish vn and dn, ... registers
|
||||
// because they are the same from the perspective of register allocator, and
|
||||
// the size can be determined by the type of the instruction.
|
||||
|
||||
v0
|
||||
v1
|
||||
v2
|
||||
v3
|
||||
v4
|
||||
v5
|
||||
v6
|
||||
v7
|
||||
v8
|
||||
v9
|
||||
v10
|
||||
v11
|
||||
v12
|
||||
v13
|
||||
v14
|
||||
v15
|
||||
v16
|
||||
v17
|
||||
v18
|
||||
v19
|
||||
v20
|
||||
v21
|
||||
v22
|
||||
v23
|
||||
v24
|
||||
v25
|
||||
v26
|
||||
v27
|
||||
v28
|
||||
v29
|
||||
v30
|
||||
v31
|
||||
|
||||
// Special registers
|
||||
|
||||
xzr
|
||||
sp
|
||||
lr = x30
|
||||
fp = x29
|
||||
tmp = x27
|
||||
)
|
||||
|
||||
var (
|
||||
x0VReg = regalloc.FromRealReg(x0, regalloc.RegTypeInt)
|
||||
x1VReg = regalloc.FromRealReg(x1, regalloc.RegTypeInt)
|
||||
x2VReg = regalloc.FromRealReg(x2, regalloc.RegTypeInt)
|
||||
x3VReg = regalloc.FromRealReg(x3, regalloc.RegTypeInt)
|
||||
x4VReg = regalloc.FromRealReg(x4, regalloc.RegTypeInt)
|
||||
x5VReg = regalloc.FromRealReg(x5, regalloc.RegTypeInt)
|
||||
x6VReg = regalloc.FromRealReg(x6, regalloc.RegTypeInt)
|
||||
x7VReg = regalloc.FromRealReg(x7, regalloc.RegTypeInt)
|
||||
x8VReg = regalloc.FromRealReg(x8, regalloc.RegTypeInt)
|
||||
x9VReg = regalloc.FromRealReg(x9, regalloc.RegTypeInt)
|
||||
x10VReg = regalloc.FromRealReg(x10, regalloc.RegTypeInt)
|
||||
x11VReg = regalloc.FromRealReg(x11, regalloc.RegTypeInt)
|
||||
x12VReg = regalloc.FromRealReg(x12, regalloc.RegTypeInt)
|
||||
x13VReg = regalloc.FromRealReg(x13, regalloc.RegTypeInt)
|
||||
x14VReg = regalloc.FromRealReg(x14, regalloc.RegTypeInt)
|
||||
x15VReg = regalloc.FromRealReg(x15, regalloc.RegTypeInt)
|
||||
x16VReg = regalloc.FromRealReg(x16, regalloc.RegTypeInt)
|
||||
x17VReg = regalloc.FromRealReg(x17, regalloc.RegTypeInt)
|
||||
x18VReg = regalloc.FromRealReg(x18, regalloc.RegTypeInt)
|
||||
x19VReg = regalloc.FromRealReg(x19, regalloc.RegTypeInt)
|
||||
x20VReg = regalloc.FromRealReg(x20, regalloc.RegTypeInt)
|
||||
x21VReg = regalloc.FromRealReg(x21, regalloc.RegTypeInt)
|
||||
x22VReg = regalloc.FromRealReg(x22, regalloc.RegTypeInt)
|
||||
x23VReg = regalloc.FromRealReg(x23, regalloc.RegTypeInt)
|
||||
x24VReg = regalloc.FromRealReg(x24, regalloc.RegTypeInt)
|
||||
x25VReg = regalloc.FromRealReg(x25, regalloc.RegTypeInt)
|
||||
x26VReg = regalloc.FromRealReg(x26, regalloc.RegTypeInt)
|
||||
x27VReg = regalloc.FromRealReg(x27, regalloc.RegTypeInt)
|
||||
x28VReg = regalloc.FromRealReg(x28, regalloc.RegTypeInt)
|
||||
x29VReg = regalloc.FromRealReg(x29, regalloc.RegTypeInt)
|
||||
x30VReg = regalloc.FromRealReg(x30, regalloc.RegTypeInt)
|
||||
v0VReg = regalloc.FromRealReg(v0, regalloc.RegTypeFloat)
|
||||
v1VReg = regalloc.FromRealReg(v1, regalloc.RegTypeFloat)
|
||||
v2VReg = regalloc.FromRealReg(v2, regalloc.RegTypeFloat)
|
||||
v3VReg = regalloc.FromRealReg(v3, regalloc.RegTypeFloat)
|
||||
v4VReg = regalloc.FromRealReg(v4, regalloc.RegTypeFloat)
|
||||
v5VReg = regalloc.FromRealReg(v5, regalloc.RegTypeFloat)
|
||||
v6VReg = regalloc.FromRealReg(v6, regalloc.RegTypeFloat)
|
||||
v7VReg = regalloc.FromRealReg(v7, regalloc.RegTypeFloat)
|
||||
v8VReg = regalloc.FromRealReg(v8, regalloc.RegTypeFloat)
|
||||
v9VReg = regalloc.FromRealReg(v9, regalloc.RegTypeFloat)
|
||||
v10VReg = regalloc.FromRealReg(v10, regalloc.RegTypeFloat)
|
||||
v11VReg = regalloc.FromRealReg(v11, regalloc.RegTypeFloat)
|
||||
v12VReg = regalloc.FromRealReg(v12, regalloc.RegTypeFloat)
|
||||
v13VReg = regalloc.FromRealReg(v13, regalloc.RegTypeFloat)
|
||||
v14VReg = regalloc.FromRealReg(v14, regalloc.RegTypeFloat)
|
||||
v15VReg = regalloc.FromRealReg(v15, regalloc.RegTypeFloat)
|
||||
v16VReg = regalloc.FromRealReg(v16, regalloc.RegTypeFloat)
|
||||
v17VReg = regalloc.FromRealReg(v17, regalloc.RegTypeFloat)
|
||||
v18VReg = regalloc.FromRealReg(v18, regalloc.RegTypeFloat)
|
||||
v19VReg = regalloc.FromRealReg(v19, regalloc.RegTypeFloat)
|
||||
v20VReg = regalloc.FromRealReg(v20, regalloc.RegTypeFloat)
|
||||
v21VReg = regalloc.FromRealReg(v21, regalloc.RegTypeFloat)
|
||||
v22VReg = regalloc.FromRealReg(v22, regalloc.RegTypeFloat)
|
||||
v23VReg = regalloc.FromRealReg(v23, regalloc.RegTypeFloat)
|
||||
v24VReg = regalloc.FromRealReg(v24, regalloc.RegTypeFloat)
|
||||
v25VReg = regalloc.FromRealReg(v25, regalloc.RegTypeFloat)
|
||||
v26VReg = regalloc.FromRealReg(v26, regalloc.RegTypeFloat)
|
||||
v27VReg = regalloc.FromRealReg(v27, regalloc.RegTypeFloat)
|
||||
// lr (link register) holds the return address at the function entry.
|
||||
lrVReg = x30VReg
|
||||
// tmpReg is used to perform spill/load on large stack offsets, and load large constants.
|
||||
// Therefore, be cautious to use this register in the middle of the compilation, especially before the register allocation.
|
||||
// This is the same as golang/go, but it's only described in the source code:
|
||||
// https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L59
|
||||
// https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L13-L15
|
||||
tmpRegVReg = regalloc.FromRealReg(tmp, regalloc.RegTypeInt)
|
||||
v28VReg = regalloc.FromRealReg(v28, regalloc.RegTypeFloat)
|
||||
v29VReg = regalloc.FromRealReg(v29, regalloc.RegTypeFloat)
|
||||
v30VReg = regalloc.FromRealReg(v30, regalloc.RegTypeFloat)
|
||||
v31VReg = regalloc.FromRealReg(v31, regalloc.RegTypeFloat)
|
||||
xzrVReg = regalloc.FromRealReg(xzr, regalloc.RegTypeInt)
|
||||
spVReg = regalloc.FromRealReg(sp, regalloc.RegTypeInt)
|
||||
fpVReg = regalloc.FromRealReg(fp, regalloc.RegTypeInt)
|
||||
)
|
||||
|
||||
var regNames = [...]string{
|
||||
x0: "x0",
|
||||
x1: "x1",
|
||||
x2: "x2",
|
||||
x3: "x3",
|
||||
x4: "x4",
|
||||
x5: "x5",
|
||||
x6: "x6",
|
||||
x7: "x7",
|
||||
x8: "x8",
|
||||
x9: "x9",
|
||||
x10: "x10",
|
||||
x11: "x11",
|
||||
x12: "x12",
|
||||
x13: "x13",
|
||||
x14: "x14",
|
||||
x15: "x15",
|
||||
x16: "x16",
|
||||
x17: "x17",
|
||||
x18: "x18",
|
||||
x19: "x19",
|
||||
x20: "x20",
|
||||
x21: "x21",
|
||||
x22: "x22",
|
||||
x23: "x23",
|
||||
x24: "x24",
|
||||
x25: "x25",
|
||||
x26: "x26",
|
||||
x27: "x27",
|
||||
x28: "x28",
|
||||
x29: "x29",
|
||||
x30: "x30",
|
||||
xzr: "xzr",
|
||||
sp: "sp",
|
||||
v0: "v0",
|
||||
v1: "v1",
|
||||
v2: "v2",
|
||||
v3: "v3",
|
||||
v4: "v4",
|
||||
v5: "v5",
|
||||
v6: "v6",
|
||||
v7: "v7",
|
||||
v8: "v8",
|
||||
v9: "v9",
|
||||
v10: "v10",
|
||||
v11: "v11",
|
||||
v12: "v12",
|
||||
v13: "v13",
|
||||
v14: "v14",
|
||||
v15: "v15",
|
||||
v16: "v16",
|
||||
v17: "v17",
|
||||
v18: "v18",
|
||||
v19: "v19",
|
||||
v20: "v20",
|
||||
v21: "v21",
|
||||
v22: "v22",
|
||||
v23: "v23",
|
||||
v24: "v24",
|
||||
v25: "v25",
|
||||
v26: "v26",
|
||||
v27: "v27",
|
||||
v28: "v28",
|
||||
v29: "v29",
|
||||
v30: "v30",
|
||||
v31: "v31",
|
||||
}
|
||||
|
||||
func formatVRegSized(r regalloc.VReg, size byte) (ret string) {
|
||||
if r.IsRealReg() {
|
||||
ret = regNames[r.RealReg()]
|
||||
switch ret[0] {
|
||||
case 'x':
|
||||
switch size {
|
||||
case 32:
|
||||
ret = strings.Replace(ret, "x", "w", 1)
|
||||
case 64:
|
||||
default:
|
||||
panic("BUG: invalid register size: " + strconv.Itoa(int(size)))
|
||||
}
|
||||
case 'v':
|
||||
switch size {
|
||||
case 32:
|
||||
ret = strings.Replace(ret, "v", "s", 1)
|
||||
case 64:
|
||||
ret = strings.Replace(ret, "v", "d", 1)
|
||||
case 128:
|
||||
ret = strings.Replace(ret, "v", "q", 1)
|
||||
default:
|
||||
panic("BUG: invalid register size")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
switch r.RegType() {
|
||||
case regalloc.RegTypeInt:
|
||||
switch size {
|
||||
case 32:
|
||||
ret = fmt.Sprintf("w%d?", r.ID())
|
||||
case 64:
|
||||
ret = fmt.Sprintf("x%d?", r.ID())
|
||||
default:
|
||||
panic("BUG: invalid register size: " + strconv.Itoa(int(size)))
|
||||
}
|
||||
case regalloc.RegTypeFloat:
|
||||
switch size {
|
||||
case 32:
|
||||
ret = fmt.Sprintf("s%d?", r.ID())
|
||||
case 64:
|
||||
ret = fmt.Sprintf("d%d?", r.ID())
|
||||
case 128:
|
||||
ret = fmt.Sprintf("q%d?", r.ID())
|
||||
default:
|
||||
panic("BUG: invalid register size")
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("BUG: invalid register type: %d for %s", r.RegType(), r))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func formatVRegWidthVec(r regalloc.VReg, width vecArrangement) (ret string) {
|
||||
var id string
|
||||
wspec := strings.ToLower(width.String())
|
||||
if r.IsRealReg() {
|
||||
id = regNames[r.RealReg()][1:]
|
||||
} else {
|
||||
id = fmt.Sprintf("%d?", r.ID())
|
||||
}
|
||||
ret = fmt.Sprintf("%s%s", wspec, id)
|
||||
return
|
||||
}
|
||||
|
||||
func formatVRegVec(r regalloc.VReg, arr vecArrangement, index vecIndex) (ret string) {
|
||||
id := fmt.Sprintf("v%d?", r.ID())
|
||||
if r.IsRealReg() {
|
||||
id = regNames[r.RealReg()]
|
||||
}
|
||||
ret = fmt.Sprintf("%s.%s", id, strings.ToLower(arr.String()))
|
||||
if index != vecIndexNone {
|
||||
ret += fmt.Sprintf("[%d]", index)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func regTypeToRegisterSizeInBits(r regalloc.RegType) byte {
|
||||
switch r {
|
||||
case regalloc.RegTypeInt:
|
||||
return 64
|
||||
case regalloc.RegTypeFloat:
|
||||
return 128
|
||||
default:
|
||||
panic("BUG: invalid register type")
|
||||
}
|
||||
}
|
||||
|
||||
var regNumberInEncoding = [...]uint32{
|
||||
x0: 0,
|
||||
x1: 1,
|
||||
x2: 2,
|
||||
x3: 3,
|
||||
x4: 4,
|
||||
x5: 5,
|
||||
x6: 6,
|
||||
x7: 7,
|
||||
x8: 8,
|
||||
x9: 9,
|
||||
x10: 10,
|
||||
x11: 11,
|
||||
x12: 12,
|
||||
x13: 13,
|
||||
x14: 14,
|
||||
x15: 15,
|
||||
x16: 16,
|
||||
x17: 17,
|
||||
x18: 18,
|
||||
x19: 19,
|
||||
x20: 20,
|
||||
x21: 21,
|
||||
x22: 22,
|
||||
x23: 23,
|
||||
x24: 24,
|
||||
x25: 25,
|
||||
x26: 26,
|
||||
x27: 27,
|
||||
x28: 28,
|
||||
x29: 29,
|
||||
x30: 30,
|
||||
xzr: 31,
|
||||
sp: 31,
|
||||
v0: 0,
|
||||
v1: 1,
|
||||
v2: 2,
|
||||
v3: 3,
|
||||
v4: 4,
|
||||
v5: 5,
|
||||
v6: 6,
|
||||
v7: 7,
|
||||
v8: 8,
|
||||
v9: 9,
|
||||
v10: 10,
|
||||
v11: 11,
|
||||
v12: 12,
|
||||
v13: 13,
|
||||
v14: 14,
|
||||
v15: 15,
|
||||
v16: 16,
|
||||
v17: 17,
|
||||
v18: 18,
|
||||
v19: 19,
|
||||
v20: 20,
|
||||
v21: 21,
|
||||
v22: 22,
|
||||
v23: 23,
|
||||
v24: 24,
|
||||
v25: 25,
|
||||
v26: 26,
|
||||
v27: 27,
|
||||
v28: 28,
|
||||
v29: 29,
|
||||
v30: 30,
|
||||
v31: 31,
|
||||
}
|
||||
90
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go
generated
vendored
Normal file
90
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go
generated
vendored
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
package arm64
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"reflect"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/wasmdebug"
|
||||
)
|
||||
|
||||
// UnwindStack implements wazevo.unwindStack.
|
||||
func UnwindStack(sp, _, top uintptr, returnAddresses []uintptr) []uintptr {
|
||||
l := int(top - sp)
|
||||
|
||||
var stackBuf []byte
|
||||
{
|
||||
// TODO: use unsafe.Slice after floor version is set to Go 1.20.
|
||||
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))
|
||||
hdr.Data = sp
|
||||
hdr.Len = l
|
||||
hdr.Cap = l
|
||||
}
|
||||
|
||||
for i := uint64(0); i < uint64(l); {
|
||||
// (high address)
|
||||
// +-----------------+
|
||||
// | ....... |
|
||||
// | ret Y | <----+
|
||||
// | ....... | |
|
||||
// | ret 0 | |
|
||||
// | arg X | | size_of_arg_ret
|
||||
// | ....... | |
|
||||
// | arg 1 | |
|
||||
// | arg 0 | <----+
|
||||
// | size_of_arg_ret |
|
||||
// | ReturnAddress |
|
||||
// +-----------------+ <----+
|
||||
// | ........... | |
|
||||
// | spill slot M | |
|
||||
// | ............ | |
|
||||
// | spill slot 2 | |
|
||||
// | spill slot 1 | | frame size
|
||||
// | spill slot 1 | |
|
||||
// | clobbered N | |
|
||||
// | ............ | |
|
||||
// | clobbered 0 | <----+
|
||||
// | xxxxxx | ;; unused space to make it 16-byte aligned.
|
||||
// | frame_size |
|
||||
// +-----------------+ <---- SP
|
||||
// (low address)
|
||||
|
||||
frameSize := binary.LittleEndian.Uint64(stackBuf[i:])
|
||||
i += frameSize +
|
||||
16 // frame size + aligned space.
|
||||
retAddr := binary.LittleEndian.Uint64(stackBuf[i:])
|
||||
i += 8 // ret addr.
|
||||
sizeOfArgRet := binary.LittleEndian.Uint64(stackBuf[i:])
|
||||
i += 8 + sizeOfArgRet
|
||||
returnAddresses = append(returnAddresses, uintptr(retAddr))
|
||||
if len(returnAddresses) == wasmdebug.MaxFrames {
|
||||
break
|
||||
}
|
||||
}
|
||||
return returnAddresses
|
||||
}
|
||||
|
||||
// GoCallStackView implements wazevo.goCallStackView.
|
||||
func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
|
||||
// (high address)
|
||||
// +-----------------+ <----+
|
||||
// | xxxxxxxxxxx | | ;; optional unused space to make it 16-byte aligned.
|
||||
// ^ | arg[N]/ret[M] | |
|
||||
// sliceSize | | ............ | | sliceSize
|
||||
// | | arg[1]/ret[1] | |
|
||||
// v | arg[0]/ret[0] | <----+
|
||||
// | sliceSize |
|
||||
// | frame_size |
|
||||
// +-----------------+ <---- stackPointerBeforeGoCall
|
||||
// (low address)
|
||||
ptr := unsafe.Pointer(stackPointerBeforeGoCall)
|
||||
size := *(*uint64)(unsafe.Add(ptr, 8))
|
||||
var view []uint64
|
||||
{
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&view))
|
||||
sh.Data = uintptr(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize).
|
||||
sh.Len = int(size)
|
||||
sh.Cap = int(size)
|
||||
}
|
||||
return view
|
||||
}
|
||||
100
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go
generated
vendored
Normal file
100
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go
generated
vendored
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
type (
|
||||
// Machine is a backend for a specific ISA machine.
|
||||
Machine interface {
|
||||
ExecutableContext() ExecutableContext
|
||||
|
||||
// DisableStackCheck disables the stack check for the current compilation for debugging/testing.
|
||||
DisableStackCheck()
|
||||
|
||||
// SetCurrentABI initializes the FunctionABI for the given signature.
|
||||
SetCurrentABI(abi *FunctionABI)
|
||||
|
||||
// SetCompiler sets the compilation context used for the lifetime of Machine.
|
||||
// This is only called once per Machine, i.e. before the first compilation.
|
||||
SetCompiler(Compiler)
|
||||
|
||||
// LowerSingleBranch is called when the compilation of the given single branch is started.
|
||||
LowerSingleBranch(b *ssa.Instruction)
|
||||
|
||||
// LowerConditionalBranch is called when the compilation of the given conditional branch is started.
|
||||
LowerConditionalBranch(b *ssa.Instruction)
|
||||
|
||||
// LowerInstr is called for each instruction in the given block except for the ones marked as already lowered
|
||||
// via Compiler.MarkLowered. The order is reverse, i.e. from the last instruction to the first one.
|
||||
//
|
||||
// Note: this can lower multiple instructions (which produce the inputs) at once whenever it's possible
|
||||
// for optimization.
|
||||
LowerInstr(*ssa.Instruction)
|
||||
|
||||
// Reset resets the machine state for the next compilation.
|
||||
Reset()
|
||||
|
||||
// InsertMove inserts a move instruction from src to dst whose type is typ.
|
||||
InsertMove(dst, src regalloc.VReg, typ ssa.Type)
|
||||
|
||||
// InsertReturn inserts the return instruction to return from the current function.
|
||||
InsertReturn()
|
||||
|
||||
// InsertLoadConstantBlockArg inserts the instruction(s) to load the constant value into the given regalloc.VReg.
|
||||
InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg)
|
||||
|
||||
// Format returns the string representation of the currently compiled machine code.
|
||||
// This is only for testing purpose.
|
||||
Format() string
|
||||
|
||||
// RegAlloc does the register allocation after lowering.
|
||||
RegAlloc()
|
||||
|
||||
// PostRegAlloc does the post register allocation, e.g. setting up prologue/epilogue, redundant move elimination, etc.
|
||||
PostRegAlloc()
|
||||
|
||||
// ResolveRelocations resolves the relocations after emitting machine code.
|
||||
// * refToBinaryOffset: the map from the function reference (ssa.FuncRef) to the executable offset.
|
||||
// * executable: the binary to resolve the relocations.
|
||||
// * relocations: the relocations to resolve.
|
||||
// * callTrampolineIslandOffsets: the offsets of the trampoline islands in the executable.
|
||||
ResolveRelocations(
|
||||
refToBinaryOffset []int,
|
||||
executable []byte,
|
||||
relocations []RelocationInfo,
|
||||
callTrampolineIslandOffsets []int,
|
||||
)
|
||||
|
||||
// Encode encodes the machine instructions to the Compiler.
|
||||
Encode(ctx context.Context) error
|
||||
|
||||
// CompileGoFunctionTrampoline compiles the trampoline function to call a Go function of the given exit code and signature.
|
||||
CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte
|
||||
|
||||
// CompileStackGrowCallSequence returns the sequence of instructions shared by all functions to
|
||||
// call the stack grow builtin function.
|
||||
CompileStackGrowCallSequence() []byte
|
||||
|
||||
// CompileEntryPreamble returns the sequence of instructions shared by multiple functions to
|
||||
// enter the function from Go.
|
||||
CompileEntryPreamble(signature *ssa.Signature) []byte
|
||||
|
||||
// LowerParams lowers the given parameters.
|
||||
LowerParams(params []ssa.Value)
|
||||
|
||||
// LowerReturns lowers the given returns.
|
||||
LowerReturns(returns []ssa.Value)
|
||||
|
||||
// ArgsResultsRegs returns the registers used for arguments and return values.
|
||||
ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg)
|
||||
|
||||
// CallTrampolineIslandInfo returns the interval of the offset where the trampoline island is placed, and
|
||||
// the size of the trampoline island. If islandSize is zero, the trampoline island is not used on this machine.
|
||||
CallTrampolineIslandInfo(numFunctions int) (interval, islandSize int, err error)
|
||||
}
|
||||
)
|
||||
319
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
generated
vendored
Normal file
319
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
generated
vendored
Normal file
|
|
@ -0,0 +1,319 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// RegAllocFunctionMachine is the interface for the machine specific logic that will be used in RegAllocFunction.
|
||||
type RegAllocFunctionMachine[I regalloc.InstrConstraint] interface {
|
||||
// InsertMoveBefore inserts the move instruction from src to dst before the given instruction.
|
||||
InsertMoveBefore(dst, src regalloc.VReg, instr I)
|
||||
// InsertStoreRegisterAt inserts the instruction(s) to store the given virtual register at the given instruction.
|
||||
// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before.
|
||||
InsertStoreRegisterAt(v regalloc.VReg, instr I, after bool) I
|
||||
// InsertReloadRegisterAt inserts the instruction(s) to reload the given virtual register at the given instruction.
|
||||
// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before.
|
||||
InsertReloadRegisterAt(v regalloc.VReg, instr I, after bool) I
|
||||
// ClobberedRegisters is called when the register allocation is done and the clobbered registers are known.
|
||||
ClobberedRegisters(regs []regalloc.VReg)
|
||||
// Swap swaps the two virtual registers after the given instruction.
|
||||
Swap(cur I, x1, x2, tmp regalloc.VReg)
|
||||
// LastInstrForInsertion implements LastInstrForInsertion of regalloc.Function. See its comment for details.
|
||||
LastInstrForInsertion(begin, end I) I
|
||||
// SSABlockLabel returns the label of the given ssa.BasicBlockID.
|
||||
SSABlockLabel(id ssa.BasicBlockID) Label
|
||||
}
|
||||
|
||||
type (
|
||||
// RegAllocFunction implements regalloc.Function.
|
||||
RegAllocFunction[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct {
|
||||
m m
|
||||
ssb ssa.Builder
|
||||
c Compiler
|
||||
// iter is the iterator for reversePostOrderBlocks
|
||||
iter int
|
||||
reversePostOrderBlocks []RegAllocBlock[I, m]
|
||||
// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks.
|
||||
labelToRegAllocBlockIndex map[Label]int
|
||||
loopNestingForestRoots []ssa.BasicBlock
|
||||
}
|
||||
|
||||
// RegAllocBlock implements regalloc.Block.
|
||||
RegAllocBlock[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct {
|
||||
// f is the function this instruction belongs to. Used to reuse the regAllocFunctionImpl.predsSlice slice for Defs() and Uses().
|
||||
f *RegAllocFunction[I, m]
|
||||
sb ssa.BasicBlock
|
||||
l Label
|
||||
begin, end I
|
||||
loopNestingForestChildren []ssa.BasicBlock
|
||||
cur I
|
||||
id int
|
||||
cachedLastInstrForInsertion I
|
||||
}
|
||||
)
|
||||
|
||||
// NewRegAllocFunction returns a new RegAllocFunction.
|
||||
func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] {
|
||||
return &RegAllocFunction[I, M]{
|
||||
m: m,
|
||||
ssb: ssb,
|
||||
c: c,
|
||||
labelToRegAllocBlockIndex: make(map[Label]int),
|
||||
}
|
||||
}
|
||||
|
||||
// AddBlock adds a new block to the function.
|
||||
func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end I) {
|
||||
i := len(f.reversePostOrderBlocks)
|
||||
f.reversePostOrderBlocks = append(f.reversePostOrderBlocks, RegAllocBlock[I, M]{
|
||||
f: f,
|
||||
sb: sb,
|
||||
l: l,
|
||||
begin: begin,
|
||||
end: end,
|
||||
id: int(sb.ID()),
|
||||
})
|
||||
f.labelToRegAllocBlockIndex[l] = i
|
||||
}
|
||||
|
||||
// Reset resets the function for the next compilation.
|
||||
func (f *RegAllocFunction[I, M]) Reset() {
|
||||
f.reversePostOrderBlocks = f.reversePostOrderBlocks[:0]
|
||||
f.iter = 0
|
||||
}
|
||||
|
||||
// StoreRegisterAfter implements regalloc.Function StoreRegisterAfter.
|
||||
func (f *RegAllocFunction[I, M]) StoreRegisterAfter(v regalloc.VReg, instr regalloc.Instr) {
|
||||
m := f.m
|
||||
m.InsertStoreRegisterAt(v, instr.(I), true)
|
||||
}
|
||||
|
||||
// ReloadRegisterBefore implements regalloc.Function ReloadRegisterBefore.
|
||||
func (f *RegAllocFunction[I, M]) ReloadRegisterBefore(v regalloc.VReg, instr regalloc.Instr) {
|
||||
m := f.m
|
||||
m.InsertReloadRegisterAt(v, instr.(I), false)
|
||||
}
|
||||
|
||||
// ReloadRegisterAfter implements regalloc.Function ReloadRegisterAfter.
|
||||
func (f *RegAllocFunction[I, M]) ReloadRegisterAfter(v regalloc.VReg, instr regalloc.Instr) {
|
||||
m := f.m
|
||||
m.InsertReloadRegisterAt(v, instr.(I), true)
|
||||
}
|
||||
|
||||
// StoreRegisterBefore implements regalloc.Function StoreRegisterBefore.
|
||||
func (f *RegAllocFunction[I, M]) StoreRegisterBefore(v regalloc.VReg, instr regalloc.Instr) {
|
||||
m := f.m
|
||||
m.InsertStoreRegisterAt(v, instr.(I), false)
|
||||
}
|
||||
|
||||
// ClobberedRegisters implements regalloc.Function ClobberedRegisters.
|
||||
func (f *RegAllocFunction[I, M]) ClobberedRegisters(regs []regalloc.VReg) {
|
||||
f.m.ClobberedRegisters(regs)
|
||||
}
|
||||
|
||||
// SwapBefore implements regalloc.Function SwapBefore.
|
||||
func (f *RegAllocFunction[I, M]) SwapBefore(x1, x2, tmp regalloc.VReg, instr regalloc.Instr) {
|
||||
f.m.Swap(instr.Prev().(I), x1, x2, tmp)
|
||||
}
|
||||
|
||||
// PostOrderBlockIteratorBegin implements regalloc.Function PostOrderBlockIteratorBegin.
|
||||
func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorBegin() regalloc.Block {
|
||||
f.iter = len(f.reversePostOrderBlocks) - 1
|
||||
return f.PostOrderBlockIteratorNext()
|
||||
}
|
||||
|
||||
// PostOrderBlockIteratorNext implements regalloc.Function PostOrderBlockIteratorNext.
|
||||
func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorNext() regalloc.Block {
|
||||
if f.iter < 0 {
|
||||
return nil
|
||||
}
|
||||
b := &f.reversePostOrderBlocks[f.iter]
|
||||
f.iter--
|
||||
return b
|
||||
}
|
||||
|
||||
// ReversePostOrderBlockIteratorBegin implements regalloc.Function ReversePostOrderBlockIteratorBegin.
|
||||
func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorBegin() regalloc.Block {
|
||||
f.iter = 0
|
||||
return f.ReversePostOrderBlockIteratorNext()
|
||||
}
|
||||
|
||||
// ReversePostOrderBlockIteratorNext implements regalloc.Function ReversePostOrderBlockIteratorNext.
|
||||
func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorNext() regalloc.Block {
|
||||
if f.iter >= len(f.reversePostOrderBlocks) {
|
||||
return nil
|
||||
}
|
||||
b := &f.reversePostOrderBlocks[f.iter]
|
||||
f.iter++
|
||||
return b
|
||||
}
|
||||
|
||||
// LoopNestingForestRoots implements regalloc.Function LoopNestingForestRoots.
|
||||
func (f *RegAllocFunction[I, M]) LoopNestingForestRoots() int {
|
||||
f.loopNestingForestRoots = f.ssb.LoopNestingForestRoots()
|
||||
return len(f.loopNestingForestRoots)
|
||||
}
|
||||
|
||||
// LoopNestingForestRoot implements regalloc.Function LoopNestingForestRoot.
|
||||
func (f *RegAllocFunction[I, M]) LoopNestingForestRoot(i int) regalloc.Block {
|
||||
blk := f.loopNestingForestRoots[i]
|
||||
l := f.m.SSABlockLabel(blk.ID())
|
||||
index := f.labelToRegAllocBlockIndex[l]
|
||||
return &f.reversePostOrderBlocks[index]
|
||||
}
|
||||
|
||||
// InsertMoveBefore implements regalloc.Function InsertMoveBefore.
|
||||
func (f *RegAllocFunction[I, M]) InsertMoveBefore(dst, src regalloc.VReg, instr regalloc.Instr) {
|
||||
f.m.InsertMoveBefore(dst, src, instr.(I))
|
||||
}
|
||||
|
||||
// LowestCommonAncestor implements regalloc.Function LowestCommonAncestor.
|
||||
func (f *RegAllocFunction[I, M]) LowestCommonAncestor(blk1, blk2 regalloc.Block) regalloc.Block {
|
||||
ret := f.ssb.LowestCommonAncestor(blk1.(*RegAllocBlock[I, M]).sb, blk2.(*RegAllocBlock[I, M]).sb)
|
||||
l := f.m.SSABlockLabel(ret.ID())
|
||||
index := f.labelToRegAllocBlockIndex[l]
|
||||
return &f.reversePostOrderBlocks[index]
|
||||
}
|
||||
|
||||
// Idom implements regalloc.Function Idom.
|
||||
func (f *RegAllocFunction[I, M]) Idom(blk regalloc.Block) regalloc.Block {
|
||||
builder := f.ssb
|
||||
idom := builder.Idom(blk.(*RegAllocBlock[I, M]).sb)
|
||||
if idom == nil {
|
||||
panic("BUG: idom must not be nil")
|
||||
}
|
||||
l := f.m.SSABlockLabel(idom.ID())
|
||||
index := f.labelToRegAllocBlockIndex[l]
|
||||
return &f.reversePostOrderBlocks[index]
|
||||
}
|
||||
|
||||
// ID implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) ID() int32 { return int32(r.id) }
|
||||
|
||||
// BlockParams implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) BlockParams(regs *[]regalloc.VReg) []regalloc.VReg {
|
||||
c := r.f.c
|
||||
*regs = (*regs)[:0]
|
||||
for i := 0; i < r.sb.Params(); i++ {
|
||||
v := c.VRegOf(r.sb.Param(i))
|
||||
*regs = append(*regs, v)
|
||||
}
|
||||
return *regs
|
||||
}
|
||||
|
||||
// InstrIteratorBegin implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) InstrIteratorBegin() regalloc.Instr {
|
||||
r.cur = r.begin
|
||||
return r.cur
|
||||
}
|
||||
|
||||
// InstrIteratorNext implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) InstrIteratorNext() regalloc.Instr {
|
||||
for {
|
||||
if r.cur == r.end {
|
||||
return nil
|
||||
}
|
||||
instr := r.cur.Next()
|
||||
r.cur = instr.(I)
|
||||
if instr == nil {
|
||||
return nil
|
||||
} else if instr.AddedBeforeRegAlloc() {
|
||||
// Only concerned about the instruction added before regalloc.
|
||||
return instr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// InstrRevIteratorBegin implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) InstrRevIteratorBegin() regalloc.Instr {
|
||||
r.cur = r.end
|
||||
return r.cur
|
||||
}
|
||||
|
||||
// InstrRevIteratorNext implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) InstrRevIteratorNext() regalloc.Instr {
|
||||
for {
|
||||
if r.cur == r.begin {
|
||||
return nil
|
||||
}
|
||||
instr := r.cur.Prev()
|
||||
r.cur = instr.(I)
|
||||
if instr == nil {
|
||||
return nil
|
||||
} else if instr.AddedBeforeRegAlloc() {
|
||||
// Only concerned about the instruction added before regalloc.
|
||||
return instr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FirstInstr implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) FirstInstr() regalloc.Instr {
|
||||
return r.begin
|
||||
}
|
||||
|
||||
// EndInstr implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) EndInstr() regalloc.Instr {
|
||||
return r.end
|
||||
}
|
||||
|
||||
// LastInstrForInsertion implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) LastInstrForInsertion() regalloc.Instr {
|
||||
var nil I
|
||||
if r.cachedLastInstrForInsertion == nil {
|
||||
r.cachedLastInstrForInsertion = r.f.m.LastInstrForInsertion(r.begin, r.end)
|
||||
}
|
||||
return r.cachedLastInstrForInsertion
|
||||
}
|
||||
|
||||
// Preds implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) Preds() int { return r.sb.Preds() }
|
||||
|
||||
// Pred implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) Pred(i int) regalloc.Block {
|
||||
sb := r.sb
|
||||
pred := sb.Pred(i)
|
||||
l := r.f.m.SSABlockLabel(pred.ID())
|
||||
index := r.f.labelToRegAllocBlockIndex[l]
|
||||
return &r.f.reversePostOrderBlocks[index]
|
||||
}
|
||||
|
||||
// Entry implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) Entry() bool { return r.sb.EntryBlock() }
|
||||
|
||||
// Succs implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) Succs() int {
|
||||
return r.sb.Succs()
|
||||
}
|
||||
|
||||
// Succ implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) Succ(i int) regalloc.Block {
|
||||
sb := r.sb
|
||||
succ := sb.Succ(i)
|
||||
if succ.ReturnBlock() {
|
||||
return nil
|
||||
}
|
||||
l := r.f.m.SSABlockLabel(succ.ID())
|
||||
index := r.f.labelToRegAllocBlockIndex[l]
|
||||
return &r.f.reversePostOrderBlocks[index]
|
||||
}
|
||||
|
||||
// LoopHeader implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) LoopHeader() bool {
|
||||
return r.sb.LoopHeader()
|
||||
}
|
||||
|
||||
// LoopNestingForestChildren implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) LoopNestingForestChildren() int {
|
||||
r.loopNestingForestChildren = r.sb.LoopNestingForestChildren()
|
||||
return len(r.loopNestingForestChildren)
|
||||
}
|
||||
|
||||
// LoopNestingForestChild implements regalloc.Block.
|
||||
func (r *RegAllocBlock[I, m]) LoopNestingForestChild(i int) regalloc.Block {
|
||||
blk := r.loopNestingForestChildren[i]
|
||||
l := r.f.m.SSABlockLabel(blk.ID())
|
||||
index := r.f.labelToRegAllocBlockIndex[l]
|
||||
return &r.f.reversePostOrderBlocks[index]
|
||||
}
|
||||
136
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
generated
vendored
Normal file
136
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
generated
vendored
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
package regalloc
|
||||
|
||||
import "fmt"
|
||||
|
||||
// These interfaces are implemented by ISA-specific backends to abstract away the details, and allow the register
|
||||
// allocators to work on any ISA.
|
||||
//
|
||||
// TODO: the interfaces are not stabilized yet, especially x64 will need some changes. E.g. x64 has an addressing mode
|
||||
// where index can be in memory. That kind of info will be useful to reduce the register pressure, and should be leveraged
|
||||
// by the register allocators, like https://docs.rs/regalloc2/latest/regalloc2/enum.OperandConstraint.html
|
||||
|
||||
type (
|
||||
// Function is the top-level interface to do register allocation, which corresponds to a CFG containing
|
||||
// Blocks(s).
|
||||
Function interface {
|
||||
// PostOrderBlockIteratorBegin returns the first block in the post-order traversal of the CFG.
|
||||
// In other words, the last blocks in the CFG will be returned first.
|
||||
PostOrderBlockIteratorBegin() Block
|
||||
// PostOrderBlockIteratorNext returns the next block in the post-order traversal of the CFG.
|
||||
PostOrderBlockIteratorNext() Block
|
||||
// ReversePostOrderBlockIteratorBegin returns the first block in the reverse post-order traversal of the CFG.
|
||||
// In other words, the first blocks in the CFG will be returned first.
|
||||
ReversePostOrderBlockIteratorBegin() Block
|
||||
// ReversePostOrderBlockIteratorNext returns the next block in the reverse post-order traversal of the CFG.
|
||||
ReversePostOrderBlockIteratorNext() Block
|
||||
// ClobberedRegisters tell the clobbered registers by this function.
|
||||
ClobberedRegisters([]VReg)
|
||||
// LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function.
|
||||
LoopNestingForestRoots() int
|
||||
// LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function.
|
||||
LoopNestingForestRoot(i int) Block
|
||||
// LowestCommonAncestor returns the lowest common ancestor of two blocks in the dominator tree.
|
||||
LowestCommonAncestor(blk1, blk2 Block) Block
|
||||
// Idom returns the immediate dominator of the given block.
|
||||
Idom(blk Block) Block
|
||||
|
||||
// Followings are for rewriting the function.
|
||||
|
||||
// SwapAtEndOfBlock swaps the two virtual registers at the end of the given block.
|
||||
SwapBefore(x1, x2, tmp VReg, instr Instr)
|
||||
// StoreRegisterBefore inserts store instruction(s) before the given instruction for the given virtual register.
|
||||
StoreRegisterBefore(v VReg, instr Instr)
|
||||
// StoreRegisterAfter inserts store instruction(s) after the given instruction for the given virtual register.
|
||||
StoreRegisterAfter(v VReg, instr Instr)
|
||||
// ReloadRegisterBefore inserts reload instruction(s) before the given instruction for the given virtual register.
|
||||
ReloadRegisterBefore(v VReg, instr Instr)
|
||||
// ReloadRegisterAfter inserts reload instruction(s) after the given instruction for the given virtual register.
|
||||
ReloadRegisterAfter(v VReg, instr Instr)
|
||||
// InsertMoveBefore inserts move instruction(s) before the given instruction for the given virtual registers.
|
||||
InsertMoveBefore(dst, src VReg, instr Instr)
|
||||
}
|
||||
|
||||
// Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s).
|
||||
Block interface {
|
||||
// ID returns the unique identifier of this block which is ordered in the reverse post-order traversal of the CFG.
|
||||
ID() int32
|
||||
// BlockParams returns the virtual registers used as the parameters of this block.
|
||||
BlockParams(*[]VReg) []VReg
|
||||
// InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped.
|
||||
// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
|
||||
InstrIteratorBegin() Instr
|
||||
// InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped.
|
||||
// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
|
||||
InstrIteratorNext() Instr
|
||||
// InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order.
|
||||
InstrRevIteratorBegin() Instr
|
||||
// InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order.
|
||||
InstrRevIteratorNext() Instr
|
||||
// FirstInstr returns the fist instruction in this block where instructions will be inserted after it.
|
||||
FirstInstr() Instr
|
||||
// EndInstr returns the end instruction in this block.
|
||||
EndInstr() Instr
|
||||
// LastInstrForInsertion returns the last instruction in this block where instructions will be inserted before it.
|
||||
// Such insertions only happen when we need to insert spill/reload instructions to adjust the merge edges.
|
||||
// At the time of register allocation, all the critical edges are already split, so there is no need
|
||||
// to worry about the case where branching instruction has multiple successors.
|
||||
// Therefore, usually, it is the nop instruction, but if the block ends with an unconditional branching, then it returns
|
||||
// the unconditional branch, not the nop. In other words it is either nop or unconditional branch.
|
||||
LastInstrForInsertion() Instr
|
||||
// Preds returns the number of predecessors of this block in the CFG.
|
||||
Preds() int
|
||||
// Pred returns the i-th predecessor of this block in the CFG.
|
||||
Pred(i int) Block
|
||||
// Entry returns true if the block is for the entry block.
|
||||
Entry() bool
|
||||
// Succs returns the number of successors of this block in the CFG.
|
||||
Succs() int
|
||||
// Succ returns the i-th successor of this block in the CFG.
|
||||
Succ(i int) Block
|
||||
// LoopHeader returns true if this block is a loop header.
|
||||
LoopHeader() bool
|
||||
// LoopNestingForestChildren returns the number of children of this block in the loop nesting forest.
|
||||
LoopNestingForestChildren() int
|
||||
// LoopNestingForestChild returns the i-th child of this block in the loop nesting forest.
|
||||
LoopNestingForestChild(i int) Block
|
||||
}
|
||||
|
||||
// Instr is an instruction in a block, abstracting away the underlying ISA.
|
||||
Instr interface {
|
||||
fmt.Stringer
|
||||
// Next returns the next instruction in the same block.
|
||||
Next() Instr
|
||||
// Prev returns the previous instruction in the same block.
|
||||
Prev() Instr
|
||||
// Defs returns the virtual registers defined by this instruction.
|
||||
Defs(*[]VReg) []VReg
|
||||
// Uses returns the virtual registers used by this instruction.
|
||||
// Note: multiple returned []VReg will not be held at the same time, so it's safe to use the same slice for this.
|
||||
Uses(*[]VReg) []VReg
|
||||
// AssignUse assigns the RealReg-allocated virtual register used by this instruction at the given index.
|
||||
AssignUse(index int, v VReg)
|
||||
// AssignDef assigns a RealReg-allocated virtual register defined by this instruction.
|
||||
// This only accepts one register because we don't allocate registers for multi-def instructions (i.e. call instruction)
|
||||
AssignDef(VReg)
|
||||
// IsCopy returns true if this instruction is a move instruction between two registers.
|
||||
// If true, the instruction is of the form of dst = src, and if the src and dst do not interfere with each other,
|
||||
// we could coalesce them, and hence the copy can be eliminated from the final code.
|
||||
IsCopy() bool
|
||||
// IsCall returns true if this instruction is a call instruction. The result is used to insert
|
||||
// caller saved register spills and restores.
|
||||
IsCall() bool
|
||||
// IsIndirectCall returns true if this instruction is an indirect call instruction which calls a function pointer.
|
||||
// The result is used to insert caller saved register spills and restores.
|
||||
IsIndirectCall() bool
|
||||
// IsReturn returns true if this instruction is a return instruction.
|
||||
IsReturn() bool
|
||||
// AddedBeforeRegAlloc returns true if this instruction is added before register allocation.
|
||||
AddedBeforeRegAlloc() bool
|
||||
}
|
||||
|
||||
// InstrConstraint is an interface for arch-specific instruction constraints.
|
||||
InstrConstraint interface {
|
||||
comparable
|
||||
Instr
|
||||
}
|
||||
)
|
||||
123
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go
generated
vendored
Normal file
123
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go
generated
vendored
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
package regalloc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// VReg represents a register which is assigned to an SSA value. This is used to represent a register in the backend.
|
||||
// A VReg may or may not be a physical register, and the info of physical register can be obtained by RealReg.
|
||||
type VReg uint64
|
||||
|
||||
// VRegID is the lower 32bit of VReg, which is the pure identifier of VReg without RealReg info.
|
||||
type VRegID uint32
|
||||
|
||||
// RealReg returns the RealReg of this VReg.
|
||||
func (v VReg) RealReg() RealReg {
|
||||
return RealReg(v >> 32)
|
||||
}
|
||||
|
||||
// IsRealReg returns true if this VReg is backed by a physical register.
|
||||
func (v VReg) IsRealReg() bool {
|
||||
return v.RealReg() != RealRegInvalid
|
||||
}
|
||||
|
||||
// FromRealReg returns a VReg from the given RealReg and RegType.
|
||||
// This is used to represent a specific pre-colored register in the backend.
|
||||
func FromRealReg(r RealReg, typ RegType) VReg {
|
||||
rid := VRegID(r)
|
||||
if rid > vRegIDReservedForRealNum {
|
||||
panic(fmt.Sprintf("invalid real reg %d", r))
|
||||
}
|
||||
return VReg(r).SetRealReg(r).SetRegType(typ)
|
||||
}
|
||||
|
||||
// SetRealReg sets the RealReg of this VReg and returns the updated VReg.
|
||||
func (v VReg) SetRealReg(r RealReg) VReg {
|
||||
return VReg(r)<<32 | (v & 0xff_00_ffffffff)
|
||||
}
|
||||
|
||||
// RegType returns the RegType of this VReg.
|
||||
func (v VReg) RegType() RegType {
|
||||
return RegType(v >> 40)
|
||||
}
|
||||
|
||||
// SetRegType sets the RegType of this VReg and returns the updated VReg.
|
||||
func (v VReg) SetRegType(t RegType) VReg {
|
||||
return VReg(t)<<40 | (v & 0x00_ff_ffffffff)
|
||||
}
|
||||
|
||||
// ID returns the VRegID of this VReg.
|
||||
func (v VReg) ID() VRegID {
|
||||
return VRegID(v & 0xffffffff)
|
||||
}
|
||||
|
||||
// Valid returns true if this VReg is Valid.
|
||||
func (v VReg) Valid() bool {
|
||||
return v.ID() != vRegIDInvalid && v.RegType() != RegTypeInvalid
|
||||
}
|
||||
|
||||
// RealReg represents a physical register.
|
||||
type RealReg byte
|
||||
|
||||
const RealRegInvalid RealReg = 0
|
||||
|
||||
const (
|
||||
vRegIDInvalid VRegID = 1 << 31
|
||||
VRegIDNonReservedBegin = vRegIDReservedForRealNum
|
||||
vRegIDReservedForRealNum VRegID = 128
|
||||
VRegInvalid = VReg(vRegIDInvalid)
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (r RealReg) String() string {
|
||||
switch r {
|
||||
case RealRegInvalid:
|
||||
return "invalid"
|
||||
default:
|
||||
return fmt.Sprintf("r%d", r)
|
||||
}
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (v VReg) String() string {
|
||||
if v.IsRealReg() {
|
||||
return fmt.Sprintf("r%d", v.ID())
|
||||
}
|
||||
return fmt.Sprintf("v%d?", v.ID())
|
||||
}
|
||||
|
||||
// RegType represents the type of a register.
|
||||
type RegType byte
|
||||
|
||||
const (
|
||||
RegTypeInvalid RegType = iota
|
||||
RegTypeInt
|
||||
RegTypeFloat
|
||||
NumRegType
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (r RegType) String() string {
|
||||
switch r {
|
||||
case RegTypeInt:
|
||||
return "int"
|
||||
case RegTypeFloat:
|
||||
return "float"
|
||||
default:
|
||||
return "invalid"
|
||||
}
|
||||
}
|
||||
|
||||
// RegTypeOf returns the RegType of the given ssa.Type.
|
||||
func RegTypeOf(p ssa.Type) RegType {
|
||||
switch p {
|
||||
case ssa.TypeI32, ssa.TypeI64:
|
||||
return RegTypeInt
|
||||
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
|
||||
return RegTypeFloat
|
||||
default:
|
||||
panic("invalid type")
|
||||
}
|
||||
}
|
||||
1212
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
generated
vendored
Normal file
1212
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
108
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
generated
vendored
Normal file
108
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
generated
vendored
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
package regalloc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// NewRegSet returns a new RegSet with the given registers.
|
||||
func NewRegSet(regs ...RealReg) RegSet {
|
||||
var ret RegSet
|
||||
for _, r := range regs {
|
||||
ret = ret.add(r)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// RegSet represents a set of registers.
|
||||
type RegSet uint64
|
||||
|
||||
func (rs RegSet) format(info *RegisterInfo) string { //nolint:unused
|
||||
var ret []string
|
||||
for i := 0; i < 64; i++ {
|
||||
if rs&(1<<uint(i)) != 0 {
|
||||
ret = append(ret, info.RealRegName(RealReg(i)))
|
||||
}
|
||||
}
|
||||
return strings.Join(ret, ", ")
|
||||
}
|
||||
|
||||
func (rs RegSet) has(r RealReg) bool {
|
||||
return rs&(1<<uint(r)) != 0
|
||||
}
|
||||
|
||||
func (rs RegSet) add(r RealReg) RegSet {
|
||||
if r >= 64 {
|
||||
return rs
|
||||
}
|
||||
return rs | 1<<uint(r)
|
||||
}
|
||||
|
||||
func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {
|
||||
for i := 0; i < 64; i++ {
|
||||
if rs&(1<<uint(i)) != 0 {
|
||||
f(RealReg(i))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type regInUseSet struct {
|
||||
set RegSet
|
||||
vrs [64]VReg
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) reset() {
|
||||
rs.set = 0
|
||||
for i := range rs.vrs {
|
||||
rs.vrs[i] = VRegInvalid
|
||||
}
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
|
||||
var ret []string
|
||||
for i := 0; i < 64; i++ {
|
||||
if rs.set&(1<<uint(i)) != 0 {
|
||||
vr := rs.vrs[i]
|
||||
ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))
|
||||
}
|
||||
}
|
||||
return strings.Join(ret, ", ")
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) has(r RealReg) bool {
|
||||
if r >= 64 {
|
||||
return false
|
||||
}
|
||||
return rs.set&(1<<uint(r)) != 0
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) get(r RealReg) VReg {
|
||||
if r >= 64 {
|
||||
return VRegInvalid
|
||||
}
|
||||
return rs.vrs[r]
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) remove(r RealReg) {
|
||||
if r >= 64 {
|
||||
return
|
||||
}
|
||||
rs.set &= ^(1 << uint(r))
|
||||
rs.vrs[r] = VRegInvalid
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) add(r RealReg, vr VReg) {
|
||||
if r >= 64 {
|
||||
return
|
||||
}
|
||||
rs.set |= 1 << uint(r)
|
||||
rs.vrs[r] = vr
|
||||
}
|
||||
|
||||
func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) {
|
||||
for i := 0; i < 64; i++ {
|
||||
if rs.set&(1<<uint(i)) != 0 {
|
||||
f(RealReg(i), rs.vrs[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
43
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go
generated
vendored
Normal file
43
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go
generated
vendored
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
// SSAValueDefinition represents a definition of an SSA value.
|
||||
type SSAValueDefinition struct {
|
||||
// BlockParamValue is valid if Instr == nil
|
||||
BlockParamValue ssa.Value
|
||||
|
||||
// BlkParamVReg is valid if Instr == nil
|
||||
BlkParamVReg regalloc.VReg
|
||||
|
||||
// Instr is not nil if this is a definition from an instruction.
|
||||
Instr *ssa.Instruction
|
||||
// N is the index of the return value in the instr's return values list.
|
||||
N int
|
||||
// RefCount is the number of references to the result.
|
||||
RefCount int
|
||||
}
|
||||
|
||||
func (d *SSAValueDefinition) IsFromInstr() bool {
|
||||
return d.Instr != nil
|
||||
}
|
||||
|
||||
func (d *SSAValueDefinition) IsFromBlockParam() bool {
|
||||
return d.Instr == nil
|
||||
}
|
||||
|
||||
func (d *SSAValueDefinition) SSAValue() ssa.Value {
|
||||
if d.IsFromBlockParam() {
|
||||
return d.BlockParamValue
|
||||
} else {
|
||||
r, rs := d.Instr.Returns()
|
||||
if d.N == 0 {
|
||||
return r
|
||||
} else {
|
||||
return rs[d.N-1]
|
||||
}
|
||||
}
|
||||
}
|
||||
722
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go
generated
vendored
Normal file
722
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go
generated
vendored
Normal file
|
|
@ -0,0 +1,722 @@
|
|||
package wazevo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"sync/atomic"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/api"
|
||||
"github.com/tetratelabs/wazero/experimental"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
"github.com/tetratelabs/wazero/internal/expctxkeys"
|
||||
"github.com/tetratelabs/wazero/internal/internalapi"
|
||||
"github.com/tetratelabs/wazero/internal/wasm"
|
||||
"github.com/tetratelabs/wazero/internal/wasmdebug"
|
||||
"github.com/tetratelabs/wazero/internal/wasmruntime"
|
||||
)
|
||||
|
||||
type (
|
||||
// callEngine implements api.Function.
|
||||
callEngine struct {
|
||||
internalapi.WazeroOnly
|
||||
stack []byte
|
||||
// stackTop is the pointer to the *aligned* top of the stack. This must be updated
|
||||
// whenever the stack is changed. This is passed to the assembly function
|
||||
// at the very beginning of api.Function Call/CallWithStack.
|
||||
stackTop uintptr
|
||||
// executable is the pointer to the executable code for this function.
|
||||
executable *byte
|
||||
preambleExecutable *byte
|
||||
// parent is the *moduleEngine from which this callEngine is created.
|
||||
parent *moduleEngine
|
||||
// indexInModule is the index of the function in the module.
|
||||
indexInModule wasm.Index
|
||||
// sizeOfParamResultSlice is the size of the parameter/result slice.
|
||||
sizeOfParamResultSlice int
|
||||
requiredParams int
|
||||
// execCtx holds various information to be read/written by assembly functions.
|
||||
execCtx executionContext
|
||||
// execCtxPtr holds the pointer to the executionContext which doesn't change after callEngine is created.
|
||||
execCtxPtr uintptr
|
||||
numberOfResults int
|
||||
stackIteratorImpl stackIterator
|
||||
}
|
||||
|
||||
// executionContext is the struct to be read/written by assembly functions.
|
||||
executionContext struct {
|
||||
// exitCode holds the wazevoapi.ExitCode describing the state of the function execution.
|
||||
exitCode wazevoapi.ExitCode
|
||||
// callerModuleContextPtr holds the moduleContextOpaque for Go function calls.
|
||||
callerModuleContextPtr *byte
|
||||
// originalFramePointer holds the original frame pointer of the caller of the assembly function.
|
||||
originalFramePointer uintptr
|
||||
// originalStackPointer holds the original stack pointer of the caller of the assembly function.
|
||||
originalStackPointer uintptr
|
||||
// goReturnAddress holds the return address to go back to the caller of the assembly function.
|
||||
goReturnAddress uintptr
|
||||
// stackBottomPtr holds the pointer to the bottom of the stack.
|
||||
stackBottomPtr *byte
|
||||
// goCallReturnAddress holds the return address to go back to the caller of the Go function.
|
||||
goCallReturnAddress *byte
|
||||
// stackPointerBeforeGoCall holds the stack pointer before calling a Go function.
|
||||
stackPointerBeforeGoCall *uint64
|
||||
// stackGrowRequiredSize holds the required size of stack grow.
|
||||
stackGrowRequiredSize uintptr
|
||||
// memoryGrowTrampolineAddress holds the address of memory grow trampoline function.
|
||||
memoryGrowTrampolineAddress *byte
|
||||
// stackGrowCallTrampolineAddress holds the address of stack grow trampoline function.
|
||||
stackGrowCallTrampolineAddress *byte
|
||||
// checkModuleExitCodeTrampolineAddress holds the address of check-module-exit-code function.
|
||||
checkModuleExitCodeTrampolineAddress *byte
|
||||
// savedRegisters is the opaque spaces for save/restore registers.
|
||||
// We want to align 16 bytes for each register, so we use [64][2]uint64.
|
||||
savedRegisters [64][2]uint64
|
||||
// goFunctionCallCalleeModuleContextOpaque is the pointer to the target Go function's moduleContextOpaque.
|
||||
goFunctionCallCalleeModuleContextOpaque uintptr
|
||||
// tableGrowTrampolineAddress holds the address of table grow trampoline function.
|
||||
tableGrowTrampolineAddress *byte
|
||||
// refFuncTrampolineAddress holds the address of ref-func trampoline function.
|
||||
refFuncTrampolineAddress *byte
|
||||
// memmoveAddress holds the address of memmove function implemented by Go runtime. See memmove.go.
|
||||
memmoveAddress uintptr
|
||||
// framePointerBeforeGoCall holds the frame pointer before calling a Go function. Note: only used in amd64.
|
||||
framePointerBeforeGoCall uintptr
|
||||
// memoryWait32TrampolineAddress holds the address of memory_wait32 trampoline function.
|
||||
memoryWait32TrampolineAddress *byte
|
||||
// memoryWait32TrampolineAddress holds the address of memory_wait64 trampoline function.
|
||||
memoryWait64TrampolineAddress *byte
|
||||
// memoryNotifyTrampolineAddress holds the address of the memory_notify trampoline function.
|
||||
memoryNotifyTrampolineAddress *byte
|
||||
}
|
||||
)
|
||||
|
||||
func (c *callEngine) requiredInitialStackSize() int {
|
||||
const initialStackSizeDefault = 10240
|
||||
stackSize := initialStackSizeDefault
|
||||
paramResultInBytes := c.sizeOfParamResultSlice * 8 * 2 // * 8 because uint64 is 8 bytes, and *2 because we need both separated param/result slots.
|
||||
required := paramResultInBytes + 32 + 16 // 32 is enough to accommodate the call frame info, and 16 exists just in case when []byte is not aligned to 16 bytes.
|
||||
if required > stackSize {
|
||||
stackSize = required
|
||||
}
|
||||
return stackSize
|
||||
}
|
||||
|
||||
func (c *callEngine) init() {
|
||||
stackSize := c.requiredInitialStackSize()
|
||||
if wazevoapi.StackGuardCheckEnabled {
|
||||
stackSize += wazevoapi.StackGuardCheckGuardPageSize
|
||||
}
|
||||
c.stack = make([]byte, stackSize)
|
||||
c.stackTop = alignedStackTop(c.stack)
|
||||
if wazevoapi.StackGuardCheckEnabled {
|
||||
c.execCtx.stackBottomPtr = &c.stack[wazevoapi.StackGuardCheckGuardPageSize]
|
||||
} else {
|
||||
c.execCtx.stackBottomPtr = &c.stack[0]
|
||||
}
|
||||
c.execCtxPtr = uintptr(unsafe.Pointer(&c.execCtx))
|
||||
}
|
||||
|
||||
// alignedStackTop returns 16-bytes aligned stack top of given stack.
|
||||
// 16 bytes should be good for all platform (arm64/amd64).
|
||||
func alignedStackTop(s []byte) uintptr {
|
||||
stackAddr := uintptr(unsafe.Pointer(&s[len(s)-1]))
|
||||
return stackAddr - (stackAddr & (16 - 1))
|
||||
}
|
||||
|
||||
// Definition implements api.Function.
|
||||
func (c *callEngine) Definition() api.FunctionDefinition {
|
||||
return c.parent.module.Source.FunctionDefinition(c.indexInModule)
|
||||
}
|
||||
|
||||
// Call implements api.Function.
|
||||
func (c *callEngine) Call(ctx context.Context, params ...uint64) ([]uint64, error) {
|
||||
if c.requiredParams != len(params) {
|
||||
return nil, fmt.Errorf("expected %d params, but passed %d", c.requiredParams, len(params))
|
||||
}
|
||||
paramResultSlice := make([]uint64, c.sizeOfParamResultSlice)
|
||||
copy(paramResultSlice, params)
|
||||
if err := c.callWithStack(ctx, paramResultSlice); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return paramResultSlice[:c.numberOfResults], nil
|
||||
}
|
||||
|
||||
func (c *callEngine) addFrame(builder wasmdebug.ErrorBuilder, addr uintptr) (def api.FunctionDefinition, listener experimental.FunctionListener) {
|
||||
eng := c.parent.parent.parent
|
||||
cm := eng.compiledModuleOfAddr(addr)
|
||||
if cm == nil {
|
||||
// This case, the module might have been closed and deleted from the engine.
|
||||
// We fall back to searching the imported modules that can be referenced from this callEngine.
|
||||
|
||||
// First, we check itself.
|
||||
if checkAddrInBytes(addr, c.parent.parent.executable) {
|
||||
cm = c.parent.parent
|
||||
} else {
|
||||
// Otherwise, search all imported modules. TODO: maybe recursive, but not sure it's useful in practice.
|
||||
p := c.parent
|
||||
for i := range p.importedFunctions {
|
||||
candidate := p.importedFunctions[i].me.parent
|
||||
if checkAddrInBytes(addr, candidate.executable) {
|
||||
cm = candidate
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if cm != nil {
|
||||
index := cm.functionIndexOf(addr)
|
||||
def = cm.module.FunctionDefinition(cm.module.ImportFunctionCount + index)
|
||||
var sources []string
|
||||
if dw := cm.module.DWARFLines; dw != nil {
|
||||
sourceOffset := cm.getSourceOffset(addr)
|
||||
sources = dw.Line(sourceOffset)
|
||||
}
|
||||
builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
|
||||
if len(cm.listeners) > 0 {
|
||||
listener = cm.listeners[index]
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CallWithStack implements api.Function.
|
||||
func (c *callEngine) CallWithStack(ctx context.Context, paramResultStack []uint64) (err error) {
|
||||
if c.sizeOfParamResultSlice > len(paramResultStack) {
|
||||
return fmt.Errorf("need %d params, but stack size is %d", c.sizeOfParamResultSlice, len(paramResultStack))
|
||||
}
|
||||
return c.callWithStack(ctx, paramResultStack)
|
||||
}
|
||||
|
||||
// CallWithStack implements api.Function.
|
||||
func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint64) (err error) {
|
||||
snapshotEnabled := ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil
|
||||
if snapshotEnabled {
|
||||
ctx = context.WithValue(ctx, expctxkeys.SnapshotterKey{}, c)
|
||||
}
|
||||
|
||||
if wazevoapi.StackGuardCheckEnabled {
|
||||
defer func() {
|
||||
wazevoapi.CheckStackGuardPage(c.stack)
|
||||
}()
|
||||
}
|
||||
|
||||
p := c.parent
|
||||
ensureTermination := p.parent.ensureTermination
|
||||
m := p.module
|
||||
if ensureTermination {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// If the provided context is already done, close the module and return the error.
|
||||
m.CloseWithCtxErr(ctx)
|
||||
return m.FailIfClosed()
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
var paramResultPtr *uint64
|
||||
if len(paramResultStack) > 0 {
|
||||
paramResultPtr = ¶mResultStack[0]
|
||||
}
|
||||
defer func() {
|
||||
r := recover()
|
||||
if s, ok := r.(*snapshot); ok {
|
||||
// A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation,
|
||||
// let it propagate up to be handled by the caller.
|
||||
panic(s)
|
||||
}
|
||||
if r != nil {
|
||||
type listenerForAbort struct {
|
||||
def api.FunctionDefinition
|
||||
lsn experimental.FunctionListener
|
||||
}
|
||||
|
||||
var listeners []listenerForAbort
|
||||
builder := wasmdebug.NewErrorBuilder()
|
||||
def, lsn := c.addFrame(builder, uintptr(unsafe.Pointer(c.execCtx.goCallReturnAddress)))
|
||||
if lsn != nil {
|
||||
listeners = append(listeners, listenerForAbort{def, lsn})
|
||||
}
|
||||
returnAddrs := unwindStack(
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)),
|
||||
c.execCtx.framePointerBeforeGoCall,
|
||||
c.stackTop,
|
||||
nil,
|
||||
)
|
||||
for _, retAddr := range returnAddrs[:len(returnAddrs)-1] { // the last return addr is the trampoline, so we skip it.
|
||||
def, lsn = c.addFrame(builder, retAddr)
|
||||
if lsn != nil {
|
||||
listeners = append(listeners, listenerForAbort{def, lsn})
|
||||
}
|
||||
}
|
||||
err = builder.FromRecovered(r)
|
||||
|
||||
for _, lsn := range listeners {
|
||||
lsn.lsn.Abort(ctx, m, lsn.def, err)
|
||||
}
|
||||
} else {
|
||||
if err != wasmruntime.ErrRuntimeStackOverflow { // Stackoverflow case shouldn't be panic (to avoid extreme stack unwinding).
|
||||
err = c.parent.module.FailIfClosed()
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
// Ensures that we can reuse this callEngine even after an error.
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
}
|
||||
}()
|
||||
|
||||
if ensureTermination {
|
||||
done := m.CloseModuleOnCanceledOrTimeout(ctx)
|
||||
defer done()
|
||||
}
|
||||
|
||||
if c.stackTop&(16-1) != 0 {
|
||||
panic("BUG: stack must be aligned to 16 bytes")
|
||||
}
|
||||
entrypoint(c.preambleExecutable, c.executable, c.execCtxPtr, c.parent.opaquePtr, paramResultPtr, c.stackTop)
|
||||
for {
|
||||
switch ec := c.execCtx.exitCode; ec & wazevoapi.ExitCodeMask {
|
||||
case wazevoapi.ExitCodeOK:
|
||||
return nil
|
||||
case wazevoapi.ExitCodeGrowStack:
|
||||
oldsp := uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
|
||||
oldTop := c.stackTop
|
||||
oldStack := c.stack
|
||||
var newsp, newfp uintptr
|
||||
if wazevoapi.StackGuardCheckEnabled {
|
||||
newsp, newfp, err = c.growStackWithGuarded()
|
||||
} else {
|
||||
newsp, newfp, err = c.growStack()
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
adjustClonedStack(oldsp, oldTop, newsp, newfp, c.stackTop)
|
||||
// Old stack must be alive until the new stack is adjusted.
|
||||
runtime.KeepAlive(oldStack)
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, newsp, newfp)
|
||||
case wazevoapi.ExitCodeGrowMemory:
|
||||
mod := c.callerModuleInstance()
|
||||
mem := mod.MemoryInstance
|
||||
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
|
||||
argRes := &s[0]
|
||||
if res, ok := mem.Grow(uint32(*argRes)); !ok {
|
||||
*argRes = uint64(0xffffffff) // = -1 in signed 32-bit integer.
|
||||
} else {
|
||||
*argRes = uint64(res)
|
||||
calleeOpaque := opaqueViewFromPtr(uintptr(unsafe.Pointer(c.execCtx.callerModuleContextPtr)))
|
||||
if mod.Source.MemorySection != nil { // Local memory.
|
||||
putLocalMemory(calleeOpaque, 8 /* local memory begins at 8 */, mem)
|
||||
} else {
|
||||
// Imported memory's owner at offset 16 of the callerModuleContextPtr.
|
||||
opaquePtr := uintptr(binary.LittleEndian.Uint64(calleeOpaque[16:]))
|
||||
importedMemOwner := opaqueViewFromPtr(opaquePtr)
|
||||
putLocalMemory(importedMemOwner, 8 /* local memory begins at 8 */, mem)
|
||||
}
|
||||
}
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeTableGrow:
|
||||
mod := c.callerModuleInstance()
|
||||
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
|
||||
tableIndex, num, ref := uint32(s[0]), uint32(s[1]), uintptr(s[2])
|
||||
table := mod.Tables[tableIndex]
|
||||
s[0] = uint64(uint32(int32(table.Grow(num, ref))))
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeCallGoFunction:
|
||||
index := wazevoapi.GoFunctionIndexFromExitCode(ec)
|
||||
f := hostModuleGoFuncFromOpaque[api.GoFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
|
||||
func() {
|
||||
if snapshotEnabled {
|
||||
defer snapshotRecoverFn(c)
|
||||
}
|
||||
f.Call(ctx, goCallStackView(c.execCtx.stackPointerBeforeGoCall))
|
||||
}()
|
||||
// Back to the native code.
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeCallGoFunctionWithListener:
|
||||
index := wazevoapi.GoFunctionIndexFromExitCode(ec)
|
||||
f := hostModuleGoFuncFromOpaque[api.GoFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
|
||||
listeners := hostModuleListenersSliceFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
|
||||
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
|
||||
// Call Listener.Before.
|
||||
callerModule := c.callerModuleInstance()
|
||||
listener := listeners[index]
|
||||
hostModule := hostModuleFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
|
||||
def := hostModule.FunctionDefinition(wasm.Index(index))
|
||||
listener.Before(ctx, callerModule, def, s, c.stackIterator(true))
|
||||
// Call into the Go function.
|
||||
func() {
|
||||
if snapshotEnabled {
|
||||
defer snapshotRecoverFn(c)
|
||||
}
|
||||
f.Call(ctx, s)
|
||||
}()
|
||||
// Call Listener.After.
|
||||
listener.After(ctx, callerModule, def, s)
|
||||
// Back to the native code.
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeCallGoModuleFunction:
|
||||
index := wazevoapi.GoFunctionIndexFromExitCode(ec)
|
||||
f := hostModuleGoFuncFromOpaque[api.GoModuleFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
|
||||
mod := c.callerModuleInstance()
|
||||
func() {
|
||||
if snapshotEnabled {
|
||||
defer snapshotRecoverFn(c)
|
||||
}
|
||||
f.Call(ctx, mod, goCallStackView(c.execCtx.stackPointerBeforeGoCall))
|
||||
}()
|
||||
// Back to the native code.
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeCallGoModuleFunctionWithListener:
|
||||
index := wazevoapi.GoFunctionIndexFromExitCode(ec)
|
||||
f := hostModuleGoFuncFromOpaque[api.GoModuleFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
|
||||
listeners := hostModuleListenersSliceFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
|
||||
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
|
||||
// Call Listener.Before.
|
||||
callerModule := c.callerModuleInstance()
|
||||
listener := listeners[index]
|
||||
hostModule := hostModuleFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
|
||||
def := hostModule.FunctionDefinition(wasm.Index(index))
|
||||
listener.Before(ctx, callerModule, def, s, c.stackIterator(true))
|
||||
// Call into the Go function.
|
||||
func() {
|
||||
if snapshotEnabled {
|
||||
defer snapshotRecoverFn(c)
|
||||
}
|
||||
f.Call(ctx, callerModule, s)
|
||||
}()
|
||||
// Call Listener.After.
|
||||
listener.After(ctx, callerModule, def, s)
|
||||
// Back to the native code.
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeCallListenerBefore:
|
||||
stack := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
|
||||
index := wasm.Index(stack[0])
|
||||
mod := c.callerModuleInstance()
|
||||
listener := mod.Engine.(*moduleEngine).listeners[index]
|
||||
def := mod.Source.FunctionDefinition(index + mod.Source.ImportFunctionCount)
|
||||
listener.Before(ctx, mod, def, stack[1:], c.stackIterator(false))
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeCallListenerAfter:
|
||||
stack := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
|
||||
index := wasm.Index(stack[0])
|
||||
mod := c.callerModuleInstance()
|
||||
listener := mod.Engine.(*moduleEngine).listeners[index]
|
||||
def := mod.Source.FunctionDefinition(index + mod.Source.ImportFunctionCount)
|
||||
listener.After(ctx, mod, def, stack[1:])
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeCheckModuleExitCode:
|
||||
// Note: this operation must be done in Go, not native code. The reason is that
|
||||
// native code cannot be preempted and that means it can block forever if there are not
|
||||
// enough OS threads (which we don't have control over).
|
||||
if err := m.FailIfClosed(); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeRefFunc:
|
||||
mod := c.callerModuleInstance()
|
||||
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
|
||||
funcIndex := wasm.Index(s[0])
|
||||
ref := mod.Engine.FunctionInstanceReference(funcIndex)
|
||||
s[0] = uint64(ref)
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeMemoryWait32:
|
||||
mod := c.callerModuleInstance()
|
||||
mem := mod.MemoryInstance
|
||||
if !mem.Shared {
|
||||
panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
|
||||
}
|
||||
|
||||
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
|
||||
timeout, exp, addr := int64(s[0]), uint32(s[1]), uintptr(s[2])
|
||||
base := uintptr(unsafe.Pointer(&mem.Buffer[0]))
|
||||
|
||||
offset := uint32(addr - base)
|
||||
res := mem.Wait32(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint32 {
|
||||
addr := unsafe.Add(unsafe.Pointer(&mem.Buffer[0]), offset)
|
||||
return atomic.LoadUint32((*uint32)(addr))
|
||||
})
|
||||
s[0] = res
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeMemoryWait64:
|
||||
mod := c.callerModuleInstance()
|
||||
mem := mod.MemoryInstance
|
||||
if !mem.Shared {
|
||||
panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
|
||||
}
|
||||
|
||||
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
|
||||
timeout, exp, addr := int64(s[0]), uint64(s[1]), uintptr(s[2])
|
||||
base := uintptr(unsafe.Pointer(&mem.Buffer[0]))
|
||||
|
||||
offset := uint32(addr - base)
|
||||
res := mem.Wait64(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint64 {
|
||||
addr := unsafe.Add(unsafe.Pointer(&mem.Buffer[0]), offset)
|
||||
return atomic.LoadUint64((*uint64)(addr))
|
||||
})
|
||||
s[0] = uint64(res)
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeMemoryNotify:
|
||||
mod := c.callerModuleInstance()
|
||||
mem := mod.MemoryInstance
|
||||
|
||||
s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
|
||||
count, addr := uint32(s[0]), s[1]
|
||||
offset := uint32(uintptr(addr) - uintptr(unsafe.Pointer(&mem.Buffer[0])))
|
||||
res := mem.Notify(offset, count)
|
||||
s[0] = uint64(res)
|
||||
c.execCtx.exitCode = wazevoapi.ExitCodeOK
|
||||
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
|
||||
uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
|
||||
case wazevoapi.ExitCodeUnreachable:
|
||||
panic(wasmruntime.ErrRuntimeUnreachable)
|
||||
case wazevoapi.ExitCodeMemoryOutOfBounds:
|
||||
panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
|
||||
case wazevoapi.ExitCodeTableOutOfBounds:
|
||||
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
|
||||
case wazevoapi.ExitCodeIndirectCallNullPointer:
|
||||
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
|
||||
case wazevoapi.ExitCodeIndirectCallTypeMismatch:
|
||||
panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
|
||||
case wazevoapi.ExitCodeIntegerOverflow:
|
||||
panic(wasmruntime.ErrRuntimeIntegerOverflow)
|
||||
case wazevoapi.ExitCodeIntegerDivisionByZero:
|
||||
panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
|
||||
case wazevoapi.ExitCodeInvalidConversionToInteger:
|
||||
panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
|
||||
case wazevoapi.ExitCodeUnalignedAtomic:
|
||||
panic(wasmruntime.ErrRuntimeUnalignedAtomic)
|
||||
default:
|
||||
panic("BUG")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *callEngine) callerModuleInstance() *wasm.ModuleInstance {
|
||||
return moduleInstanceFromOpaquePtr(c.execCtx.callerModuleContextPtr)
|
||||
}
|
||||
|
||||
func opaqueViewFromPtr(ptr uintptr) []byte {
|
||||
var opaque []byte
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaque))
|
||||
sh.Data = ptr
|
||||
setSliceLimits(sh, 24, 24)
|
||||
return opaque
|
||||
}
|
||||
|
||||
const callStackCeiling = uintptr(50000000) // in uint64 (8 bytes) == 400000000 bytes in total == 400mb.
|
||||
|
||||
func (c *callEngine) growStackWithGuarded() (newSP uintptr, newFP uintptr, err error) {
|
||||
if wazevoapi.StackGuardCheckEnabled {
|
||||
wazevoapi.CheckStackGuardPage(c.stack)
|
||||
}
|
||||
newSP, newFP, err = c.growStack()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if wazevoapi.StackGuardCheckEnabled {
|
||||
c.execCtx.stackBottomPtr = &c.stack[wazevoapi.StackGuardCheckGuardPageSize]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// growStack grows the stack, and returns the new stack pointer.
|
||||
func (c *callEngine) growStack() (newSP, newFP uintptr, err error) {
|
||||
currentLen := uintptr(len(c.stack))
|
||||
if callStackCeiling < currentLen {
|
||||
err = wasmruntime.ErrRuntimeStackOverflow
|
||||
return
|
||||
}
|
||||
|
||||
newLen := 2*currentLen + c.execCtx.stackGrowRequiredSize + 16 // Stack might be aligned to 16 bytes, so add 16 bytes just in case.
|
||||
newSP, newFP, c.stackTop, c.stack = c.cloneStack(newLen)
|
||||
c.execCtx.stackBottomPtr = &c.stack[0]
|
||||
return
|
||||
}
|
||||
|
||||
func (c *callEngine) cloneStack(l uintptr) (newSP, newFP, newTop uintptr, newStack []byte) {
|
||||
newStack = make([]byte, l)
|
||||
|
||||
relSp := c.stackTop - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
|
||||
relFp := c.stackTop - c.execCtx.framePointerBeforeGoCall
|
||||
|
||||
// Copy the existing contents in the previous Go-allocated stack into the new one.
|
||||
var prevStackAligned, newStackAligned []byte
|
||||
{
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&prevStackAligned))
|
||||
sh.Data = c.stackTop - relSp
|
||||
setSliceLimits(sh, relSp, relSp)
|
||||
}
|
||||
newTop = alignedStackTop(newStack)
|
||||
{
|
||||
newSP = newTop - relSp
|
||||
newFP = newTop - relFp
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&newStackAligned))
|
||||
sh.Data = newSP
|
||||
setSliceLimits(sh, relSp, relSp)
|
||||
}
|
||||
copy(newStackAligned, prevStackAligned)
|
||||
return
|
||||
}
|
||||
|
||||
func (c *callEngine) stackIterator(onHostCall bool) experimental.StackIterator {
|
||||
c.stackIteratorImpl.reset(c, onHostCall)
|
||||
return &c.stackIteratorImpl
|
||||
}
|
||||
|
||||
// stackIterator implements experimental.StackIterator.
|
||||
type stackIterator struct {
|
||||
retAddrs []uintptr
|
||||
retAddrCursor int
|
||||
eng *engine
|
||||
pc uint64
|
||||
|
||||
currentDef *wasm.FunctionDefinition
|
||||
}
|
||||
|
||||
func (si *stackIterator) reset(c *callEngine, onHostCall bool) {
|
||||
if onHostCall {
|
||||
si.retAddrs = append(si.retAddrs[:0], uintptr(unsafe.Pointer(c.execCtx.goCallReturnAddress)))
|
||||
} else {
|
||||
si.retAddrs = si.retAddrs[:0]
|
||||
}
|
||||
si.retAddrs = unwindStack(uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall, c.stackTop, si.retAddrs)
|
||||
si.retAddrs = si.retAddrs[:len(si.retAddrs)-1] // the last return addr is the trampoline, so we skip it.
|
||||
si.retAddrCursor = 0
|
||||
si.eng = c.parent.parent.parent
|
||||
}
|
||||
|
||||
// Next implements the same method as documented on experimental.StackIterator.
|
||||
func (si *stackIterator) Next() bool {
|
||||
if si.retAddrCursor >= len(si.retAddrs) {
|
||||
return false
|
||||
}
|
||||
|
||||
addr := si.retAddrs[si.retAddrCursor]
|
||||
cm := si.eng.compiledModuleOfAddr(addr)
|
||||
if cm != nil {
|
||||
index := cm.functionIndexOf(addr)
|
||||
def := cm.module.FunctionDefinition(cm.module.ImportFunctionCount + index)
|
||||
si.currentDef = def
|
||||
si.retAddrCursor++
|
||||
si.pc = uint64(addr)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ProgramCounter implements the same method as documented on experimental.StackIterator.
|
||||
func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
|
||||
return experimental.ProgramCounter(si.pc)
|
||||
}
|
||||
|
||||
// Function implements the same method as documented on experimental.StackIterator.
|
||||
func (si *stackIterator) Function() experimental.InternalFunction {
|
||||
return si
|
||||
}
|
||||
|
||||
// Definition implements the same method as documented on experimental.InternalFunction.
|
||||
func (si *stackIterator) Definition() api.FunctionDefinition {
|
||||
return si.currentDef
|
||||
}
|
||||
|
||||
// SourceOffsetForPC implements the same method as documented on experimental.InternalFunction.
|
||||
func (si *stackIterator) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
|
||||
upc := uintptr(pc)
|
||||
cm := si.eng.compiledModuleOfAddr(upc)
|
||||
return cm.getSourceOffset(upc)
|
||||
}
|
||||
|
||||
// snapshot implements experimental.Snapshot
|
||||
type snapshot struct {
|
||||
sp, fp, top uintptr
|
||||
returnAddress *byte
|
||||
stack []byte
|
||||
savedRegisters [64][2]uint64
|
||||
ret []uint64
|
||||
c *callEngine
|
||||
}
|
||||
|
||||
// Snapshot implements the same method as documented on experimental.Snapshotter.
|
||||
func (c *callEngine) Snapshot() experimental.Snapshot {
|
||||
returnAddress := c.execCtx.goCallReturnAddress
|
||||
oldTop, oldSp := c.stackTop, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
|
||||
newSP, newFP, newTop, newStack := c.cloneStack(uintptr(len(c.stack)) + 16)
|
||||
adjustClonedStack(oldSp, oldTop, newSP, newFP, newTop)
|
||||
return &snapshot{
|
||||
sp: newSP,
|
||||
fp: newFP,
|
||||
top: newTop,
|
||||
savedRegisters: c.execCtx.savedRegisters,
|
||||
returnAddress: returnAddress,
|
||||
stack: newStack,
|
||||
c: c,
|
||||
}
|
||||
}
|
||||
|
||||
// Restore implements the same method as documented on experimental.Snapshot.
|
||||
func (s *snapshot) Restore(ret []uint64) {
|
||||
s.ret = ret
|
||||
panic(s)
|
||||
}
|
||||
|
||||
func (s *snapshot) doRestore() {
|
||||
spp := *(**uint64)(unsafe.Pointer(&s.sp))
|
||||
view := goCallStackView(spp)
|
||||
copy(view, s.ret)
|
||||
|
||||
c := s.c
|
||||
c.stack = s.stack
|
||||
c.stackTop = s.top
|
||||
ec := &c.execCtx
|
||||
ec.stackBottomPtr = &c.stack[0]
|
||||
ec.stackPointerBeforeGoCall = spp
|
||||
ec.framePointerBeforeGoCall = s.fp
|
||||
ec.goCallReturnAddress = s.returnAddress
|
||||
ec.savedRegisters = s.savedRegisters
|
||||
}
|
||||
|
||||
// Error implements the same method on error.
|
||||
func (s *snapshot) Error() string {
|
||||
return "unhandled snapshot restore, this generally indicates restore was called from a different " +
|
||||
"exported function invocation than snapshot"
|
||||
}
|
||||
|
||||
func snapshotRecoverFn(c *callEngine) {
|
||||
if r := recover(); r != nil {
|
||||
if s, ok := r.(*snapshot); ok && s.c == c {
|
||||
s.doRestore()
|
||||
} else {
|
||||
panic(r)
|
||||
}
|
||||
}
|
||||
}
|
||||
843
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go
generated
vendored
Normal file
843
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go
generated
vendored
Normal file
|
|
@ -0,0 +1,843 @@
|
|||
package wazevo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"sort"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/api"
|
||||
"github.com/tetratelabs/wazero/experimental"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/frontend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
"github.com/tetratelabs/wazero/internal/filecache"
|
||||
"github.com/tetratelabs/wazero/internal/platform"
|
||||
"github.com/tetratelabs/wazero/internal/version"
|
||||
"github.com/tetratelabs/wazero/internal/wasm"
|
||||
)
|
||||
|
||||
type (
|
||||
// engine implements wasm.Engine.
|
||||
engine struct {
|
||||
wazeroVersion string
|
||||
fileCache filecache.Cache
|
||||
compiledModules map[wasm.ModuleID]*compiledModule
|
||||
// sortedCompiledModules is a list of compiled modules sorted by the initial address of the executable.
|
||||
sortedCompiledModules []*compiledModule
|
||||
mux sync.RWMutex
|
||||
// sharedFunctions is compiled functions shared by all modules.
|
||||
sharedFunctions *sharedFunctions
|
||||
// setFinalizer defaults to runtime.SetFinalizer, but overridable for tests.
|
||||
setFinalizer func(obj interface{}, finalizer interface{})
|
||||
|
||||
// The followings are reused for compiling shared functions.
|
||||
machine backend.Machine
|
||||
be backend.Compiler
|
||||
}
|
||||
|
||||
sharedFunctions struct {
|
||||
// memoryGrowExecutable is a compiled trampoline executable for memory.grow builtin function.
|
||||
memoryGrowExecutable []byte
|
||||
// checkModuleExitCode is a compiled trampoline executable for checking module instance exit code. This
|
||||
// is used when ensureTermination is true.
|
||||
checkModuleExitCode []byte
|
||||
// stackGrowExecutable is a compiled executable for growing stack builtin function.
|
||||
stackGrowExecutable []byte
|
||||
// tableGrowExecutable is a compiled trampoline executable for table.grow builtin function.
|
||||
tableGrowExecutable []byte
|
||||
// refFuncExecutable is a compiled trampoline executable for ref.func builtin function.
|
||||
refFuncExecutable []byte
|
||||
// memoryWait32Executable is a compiled trampoline executable for memory.wait32 builtin function
|
||||
memoryWait32Executable []byte
|
||||
// memoryWait64Executable is a compiled trampoline executable for memory.wait64 builtin function
|
||||
memoryWait64Executable []byte
|
||||
// memoryNotifyExecutable is a compiled trampoline executable for memory.notify builtin function
|
||||
memoryNotifyExecutable []byte
|
||||
listenerBeforeTrampolines map[*wasm.FunctionType][]byte
|
||||
listenerAfterTrampolines map[*wasm.FunctionType][]byte
|
||||
}
|
||||
|
||||
// compiledModule is a compiled variant of a wasm.Module and ready to be used for instantiation.
|
||||
compiledModule struct {
|
||||
*executables
|
||||
// functionOffsets maps a local function index to the offset in the executable.
|
||||
functionOffsets []int
|
||||
parent *engine
|
||||
module *wasm.Module
|
||||
ensureTermination bool
|
||||
listeners []experimental.FunctionListener
|
||||
listenerBeforeTrampolines []*byte
|
||||
listenerAfterTrampolines []*byte
|
||||
|
||||
// The followings are only available for non host modules.
|
||||
|
||||
offsets wazevoapi.ModuleContextOffsetData
|
||||
sharedFunctions *sharedFunctions
|
||||
sourceMap sourceMap
|
||||
}
|
||||
|
||||
executables struct {
|
||||
executable []byte
|
||||
entryPreambles [][]byte
|
||||
}
|
||||
)
|
||||
|
||||
// sourceMap is a mapping from the offset of the executable to the offset of the original wasm binary.
|
||||
type sourceMap struct {
|
||||
// executableOffsets is a sorted list of offsets of the executable. This is index-correlated with wasmBinaryOffsets,
|
||||
// in other words executableOffsets[i] is the offset of the executable which corresponds to the offset of a Wasm
|
||||
// binary pointed by wasmBinaryOffsets[i].
|
||||
executableOffsets []uintptr
|
||||
// wasmBinaryOffsets is the counterpart of executableOffsets.
|
||||
wasmBinaryOffsets []uint64
|
||||
}
|
||||
|
||||
var _ wasm.Engine = (*engine)(nil)
|
||||
|
||||
// NewEngine returns the implementation of wasm.Engine.
|
||||
func NewEngine(ctx context.Context, _ api.CoreFeatures, fc filecache.Cache) wasm.Engine {
|
||||
machine := newMachine()
|
||||
be := backend.NewCompiler(ctx, machine, ssa.NewBuilder())
|
||||
e := &engine{
|
||||
compiledModules: make(map[wasm.ModuleID]*compiledModule),
|
||||
setFinalizer: runtime.SetFinalizer,
|
||||
machine: machine,
|
||||
be: be,
|
||||
fileCache: fc,
|
||||
wazeroVersion: version.GetWazeroVersion(),
|
||||
}
|
||||
e.compileSharedFunctions()
|
||||
return e
|
||||
}
|
||||
|
||||
// CompileModule implements wasm.Engine.
|
||||
func (e *engine) CompileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (err error) {
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
wazevoapi.PerfMap.Lock()
|
||||
defer wazevoapi.PerfMap.Unlock()
|
||||
}
|
||||
|
||||
if _, ok, err := e.getCompiledModule(module, listeners, ensureTermination); ok { // cache hit!
|
||||
return nil
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
ctx = wazevoapi.NewDeterministicCompilationVerifierContext(ctx, len(module.CodeSection))
|
||||
}
|
||||
cm, err := e.compileModule(ctx, module, listeners, ensureTermination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err = e.addCompiledModule(module, cm); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
for i := 0; i < wazevoapi.DeterministicCompilationVerifyingIter; i++ {
|
||||
_, err := e.compileModule(ctx, module, listeners, ensureTermination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(listeners) > 0 {
|
||||
cm.listeners = listeners
|
||||
cm.listenerBeforeTrampolines = make([]*byte, len(module.TypeSection))
|
||||
cm.listenerAfterTrampolines = make([]*byte, len(module.TypeSection))
|
||||
for i := range module.TypeSection {
|
||||
typ := &module.TypeSection[i]
|
||||
before, after := e.getListenerTrampolineForType(typ)
|
||||
cm.listenerBeforeTrampolines[i] = before
|
||||
cm.listenerAfterTrampolines[i] = after
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (exec *executables) compileEntryPreambles(m *wasm.Module, machine backend.Machine, be backend.Compiler) {
|
||||
exec.entryPreambles = make([][]byte, len(m.TypeSection))
|
||||
for i := range m.TypeSection {
|
||||
typ := &m.TypeSection[i]
|
||||
sig := frontend.SignatureForWasmFunctionType(typ)
|
||||
be.Init()
|
||||
buf := machine.CompileEntryPreamble(&sig)
|
||||
executable := mmapExecutable(buf)
|
||||
exec.entryPreambles[i] = executable
|
||||
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&executable[0])),
|
||||
uint64(len(executable)), fmt.Sprintf("entry_preamble::type=%s", typ.String()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (*compiledModule, error) {
|
||||
withListener := len(listeners) > 0
|
||||
cm := &compiledModule{
|
||||
offsets: wazevoapi.NewModuleContextOffsetData(module, withListener), parent: e, module: module,
|
||||
ensureTermination: ensureTermination,
|
||||
executables: &executables{},
|
||||
}
|
||||
|
||||
if module.IsHostModule {
|
||||
return e.compileHostModule(ctx, module, listeners)
|
||||
}
|
||||
|
||||
importedFns, localFns := int(module.ImportFunctionCount), len(module.FunctionSection)
|
||||
if localFns == 0 {
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
rels := make([]backend.RelocationInfo, 0)
|
||||
refToBinaryOffset := make([]int, importedFns+localFns)
|
||||
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
// The compilation must be deterministic regardless of the order of functions being compiled.
|
||||
wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx)
|
||||
}
|
||||
|
||||
needSourceInfo := module.DWARFLines != nil
|
||||
|
||||
// Creates new compiler instances which are reused for each function.
|
||||
ssaBuilder := ssa.NewBuilder()
|
||||
fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo)
|
||||
machine := newMachine()
|
||||
be := backend.NewCompiler(ctx, machine, ssaBuilder)
|
||||
|
||||
cm.executables.compileEntryPreambles(module, machine, be)
|
||||
|
||||
totalSize := 0 // Total binary size of the executable.
|
||||
cm.functionOffsets = make([]int, localFns)
|
||||
bodies := make([][]byte, localFns)
|
||||
|
||||
// Trampoline relocation related variables.
|
||||
trampolineInterval, callTrampolineIslandSize, err := machine.CallTrampolineIslandInfo(localFns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
needCallTrampoline := callTrampolineIslandSize > 0
|
||||
var callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands.
|
||||
|
||||
for i := range module.CodeSection {
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
i = wazevoapi.DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx, i)
|
||||
}
|
||||
|
||||
fidx := wasm.Index(i + importedFns)
|
||||
|
||||
if wazevoapi.NeedFunctionNameInContext {
|
||||
def := module.FunctionDefinition(fidx)
|
||||
name := def.DebugName()
|
||||
if len(def.ExportNames()) > 0 {
|
||||
name = def.ExportNames()[0]
|
||||
}
|
||||
ctx = wazevoapi.SetCurrentFunctionName(ctx, i, fmt.Sprintf("[%d/%d]%s", i, len(module.CodeSection)-1, name))
|
||||
}
|
||||
|
||||
needListener := len(listeners) > 0 && listeners[i] != nil
|
||||
body, relsPerFunc, err := e.compileLocalWasmFunction(ctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err)
|
||||
}
|
||||
|
||||
// Align 16-bytes boundary.
|
||||
totalSize = (totalSize + 15) &^ 15
|
||||
cm.functionOffsets[i] = totalSize
|
||||
|
||||
if needSourceInfo {
|
||||
// At the beginning of the function, we add the offset of the function body so that
|
||||
// we can resolve the source location of the call site of before listener call.
|
||||
cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize))
|
||||
cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[i].BodyOffsetInCodeSection)
|
||||
|
||||
for _, info := range be.SourceOffsetInfo() {
|
||||
cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize)+uintptr(info.ExecutableOffset))
|
||||
cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset))
|
||||
}
|
||||
}
|
||||
|
||||
fref := frontend.FunctionIndexToFuncRef(fidx)
|
||||
refToBinaryOffset[fref] = totalSize
|
||||
|
||||
// At this point, relocation offsets are relative to the start of the function body,
|
||||
// so we adjust it to the start of the executable.
|
||||
for _, r := range relsPerFunc {
|
||||
r.Offset += int64(totalSize)
|
||||
rels = append(rels, r)
|
||||
}
|
||||
|
||||
bodies[i] = body
|
||||
totalSize += len(body)
|
||||
if wazevoapi.PrintMachineCodeHexPerFunction {
|
||||
fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body))
|
||||
}
|
||||
|
||||
if needCallTrampoline {
|
||||
// If the total size exceeds the trampoline interval, we need to add a trampoline island.
|
||||
if totalSize/trampolineInterval > len(callTrampolineIslandOffsets) {
|
||||
callTrampolineIslandOffsets = append(callTrampolineIslandOffsets, totalSize)
|
||||
totalSize += callTrampolineIslandSize
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate executable memory and then copy the generated machine code.
|
||||
executable, err := platform.MmapCodeSegment(totalSize)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
cm.executable = executable
|
||||
|
||||
for i, b := range bodies {
|
||||
offset := cm.functionOffsets[i]
|
||||
copy(executable[offset:], b)
|
||||
}
|
||||
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
|
||||
}
|
||||
|
||||
if needSourceInfo {
|
||||
for i := range cm.sourceMap.executableOffsets {
|
||||
cm.sourceMap.executableOffsets[i] += uintptr(unsafe.Pointer(&cm.executable[0]))
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve relocations for local function calls.
|
||||
if len(rels) > 0 {
|
||||
machine.ResolveRelocations(refToBinaryOffset, executable, rels, callTrampolineIslandOffsets)
|
||||
}
|
||||
|
||||
if runtime.GOARCH == "arm64" {
|
||||
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
|
||||
if err = platform.MprotectRX(executable); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
cm.sharedFunctions = e.sharedFunctions
|
||||
e.setFinalizer(cm.executables, executablesFinalizer)
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
func (e *engine) compileLocalWasmFunction(
|
||||
ctx context.Context,
|
||||
module *wasm.Module,
|
||||
localFunctionIndex wasm.Index,
|
||||
fe *frontend.Compiler,
|
||||
ssaBuilder ssa.Builder,
|
||||
be backend.Compiler,
|
||||
needListener bool,
|
||||
) (body []byte, rels []backend.RelocationInfo, err error) {
|
||||
typIndex := module.FunctionSection[localFunctionIndex]
|
||||
typ := &module.TypeSection[typIndex]
|
||||
codeSeg := &module.CodeSection[localFunctionIndex]
|
||||
|
||||
// Initializes both frontend and backend compilers.
|
||||
fe.Init(localFunctionIndex, typIndex, typ, codeSeg.LocalTypes, codeSeg.Body, needListener, codeSeg.BodyOffsetInCodeSection)
|
||||
be.Init()
|
||||
|
||||
// Lower Wasm to SSA.
|
||||
fe.LowerToSSA()
|
||||
if wazevoapi.PrintSSA && wazevoapi.PrintEnabledIndex(ctx) {
|
||||
fmt.Printf("[[[SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
|
||||
}
|
||||
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "SSA", ssaBuilder.Format())
|
||||
}
|
||||
|
||||
// Run SSA-level optimization passes.
|
||||
ssaBuilder.RunPasses()
|
||||
|
||||
if wazevoapi.PrintOptimizedSSA && wazevoapi.PrintEnabledIndex(ctx) {
|
||||
fmt.Printf("[[[Optimized SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
|
||||
}
|
||||
|
||||
if wazevoapi.DeterministicCompilationVerifierEnabled {
|
||||
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "Optimized SSA", ssaBuilder.Format())
|
||||
}
|
||||
|
||||
// Now our ssaBuilder contains the necessary information to further lower them to
|
||||
// machine code.
|
||||
original, rels, err := be.Compile(ctx)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("ssa->machine code: %v", err)
|
||||
}
|
||||
|
||||
// TODO: optimize as zero copy.
|
||||
copied := make([]byte, len(original))
|
||||
copy(copied, original)
|
||||
return copied, rels, nil
|
||||
}
|
||||
|
||||
func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener) (*compiledModule, error) {
|
||||
machine := newMachine()
|
||||
be := backend.NewCompiler(ctx, machine, ssa.NewBuilder())
|
||||
|
||||
num := len(module.CodeSection)
|
||||
cm := &compiledModule{module: module, listeners: listeners, executables: &executables{}}
|
||||
cm.functionOffsets = make([]int, num)
|
||||
totalSize := 0 // Total binary size of the executable.
|
||||
bodies := make([][]byte, num)
|
||||
var sig ssa.Signature
|
||||
for i := range module.CodeSection {
|
||||
totalSize = (totalSize + 15) &^ 15
|
||||
cm.functionOffsets[i] = totalSize
|
||||
|
||||
typIndex := module.FunctionSection[i]
|
||||
typ := &module.TypeSection[typIndex]
|
||||
|
||||
// We can relax until the index fits together in ExitCode as we do in wazevoapi.ExitCodeCallGoModuleFunctionWithIndex.
|
||||
// However, 1 << 16 should be large enough for a real use case.
|
||||
const hostFunctionNumMaximum = 1 << 16
|
||||
if i >= hostFunctionNumMaximum {
|
||||
return nil, fmt.Errorf("too many host functions (maximum %d)", hostFunctionNumMaximum)
|
||||
}
|
||||
|
||||
sig.ID = ssa.SignatureID(typIndex) // This is important since we reuse the `machine` which caches the ABI based on the SignatureID.
|
||||
sig.Params = append(sig.Params[:0],
|
||||
ssa.TypeI64, // First argument must be exec context.
|
||||
ssa.TypeI64, // The second argument is the moduleContextOpaque of this host module.
|
||||
)
|
||||
for _, t := range typ.Params {
|
||||
sig.Params = append(sig.Params, frontend.WasmTypeToSSAType(t))
|
||||
}
|
||||
|
||||
sig.Results = sig.Results[:0]
|
||||
for _, t := range typ.Results {
|
||||
sig.Results = append(sig.Results, frontend.WasmTypeToSSAType(t))
|
||||
}
|
||||
|
||||
c := &module.CodeSection[i]
|
||||
if c.GoFunc == nil {
|
||||
panic("BUG: GoFunc must be set for host module")
|
||||
}
|
||||
|
||||
withListener := len(listeners) > 0 && listeners[i] != nil
|
||||
var exitCode wazevoapi.ExitCode
|
||||
fn := c.GoFunc
|
||||
switch fn.(type) {
|
||||
case api.GoModuleFunction:
|
||||
exitCode = wazevoapi.ExitCodeCallGoModuleFunctionWithIndex(i, withListener)
|
||||
case api.GoFunction:
|
||||
exitCode = wazevoapi.ExitCodeCallGoFunctionWithIndex(i, withListener)
|
||||
}
|
||||
|
||||
be.Init()
|
||||
machine.CompileGoFunctionTrampoline(exitCode, &sig, true)
|
||||
if err := be.Finalize(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
body := be.Buf()
|
||||
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
name := module.FunctionDefinition(wasm.Index(i)).DebugName()
|
||||
wazevoapi.PerfMap.AddModuleEntry(i,
|
||||
int64(totalSize),
|
||||
uint64(len(body)),
|
||||
fmt.Sprintf("trampoline:%s", name))
|
||||
}
|
||||
|
||||
// TODO: optimize as zero copy.
|
||||
copied := make([]byte, len(body))
|
||||
copy(copied, body)
|
||||
bodies[i] = copied
|
||||
totalSize += len(body)
|
||||
}
|
||||
|
||||
if totalSize == 0 {
|
||||
// Empty module.
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
// Allocate executable memory and then copy the generated machine code.
|
||||
executable, err := platform.MmapCodeSegment(totalSize)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
cm.executable = executable
|
||||
|
||||
for i, b := range bodies {
|
||||
offset := cm.functionOffsets[i]
|
||||
copy(executable[offset:], b)
|
||||
}
|
||||
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
|
||||
}
|
||||
|
||||
if runtime.GOARCH == "arm64" {
|
||||
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
|
||||
if err = platform.MprotectRX(executable); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
e.setFinalizer(cm.executables, executablesFinalizer)
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
// Close implements wasm.Engine.
|
||||
func (e *engine) Close() (err error) {
|
||||
e.mux.Lock()
|
||||
defer e.mux.Unlock()
|
||||
e.sortedCompiledModules = nil
|
||||
e.compiledModules = nil
|
||||
e.sharedFunctions = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// CompiledModuleCount implements wasm.Engine.
|
||||
func (e *engine) CompiledModuleCount() uint32 {
|
||||
e.mux.RLock()
|
||||
defer e.mux.RUnlock()
|
||||
return uint32(len(e.compiledModules))
|
||||
}
|
||||
|
||||
// DeleteCompiledModule implements wasm.Engine.
|
||||
func (e *engine) DeleteCompiledModule(m *wasm.Module) {
|
||||
e.mux.Lock()
|
||||
defer e.mux.Unlock()
|
||||
cm, ok := e.compiledModules[m.ID]
|
||||
if ok {
|
||||
if len(cm.executable) > 0 {
|
||||
e.deleteCompiledModuleFromSortedList(cm)
|
||||
}
|
||||
delete(e.compiledModules, m.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func (e *engine) addCompiledModuleToSortedList(cm *compiledModule) {
|
||||
ptr := uintptr(unsafe.Pointer(&cm.executable[0]))
|
||||
|
||||
index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
|
||||
return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) >= ptr
|
||||
})
|
||||
e.sortedCompiledModules = append(e.sortedCompiledModules, nil)
|
||||
copy(e.sortedCompiledModules[index+1:], e.sortedCompiledModules[index:])
|
||||
e.sortedCompiledModules[index] = cm
|
||||
}
|
||||
|
||||
func (e *engine) deleteCompiledModuleFromSortedList(cm *compiledModule) {
|
||||
ptr := uintptr(unsafe.Pointer(&cm.executable[0]))
|
||||
|
||||
index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
|
||||
return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) >= ptr
|
||||
})
|
||||
if index >= len(e.sortedCompiledModules) {
|
||||
return
|
||||
}
|
||||
copy(e.sortedCompiledModules[index:], e.sortedCompiledModules[index+1:])
|
||||
e.sortedCompiledModules = e.sortedCompiledModules[:len(e.sortedCompiledModules)-1]
|
||||
}
|
||||
|
||||
func (e *engine) compiledModuleOfAddr(addr uintptr) *compiledModule {
|
||||
e.mux.RLock()
|
||||
defer e.mux.RUnlock()
|
||||
|
||||
index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
|
||||
return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) > addr
|
||||
})
|
||||
index -= 1
|
||||
if index < 0 {
|
||||
return nil
|
||||
}
|
||||
candidate := e.sortedCompiledModules[index]
|
||||
if checkAddrInBytes(addr, candidate.executable) {
|
||||
// If a module is already deleted, the found module may have been wrong.
|
||||
return candidate
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkAddrInBytes(addr uintptr, b []byte) bool {
|
||||
return uintptr(unsafe.Pointer(&b[0])) <= addr && addr <= uintptr(unsafe.Pointer(&b[len(b)-1]))
|
||||
}
|
||||
|
||||
// NewModuleEngine implements wasm.Engine.
|
||||
func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
|
||||
me := &moduleEngine{}
|
||||
|
||||
// Note: imported functions are resolved in moduleEngine.ResolveImportedFunction.
|
||||
me.importedFunctions = make([]importedFunction, m.ImportFunctionCount)
|
||||
|
||||
compiled, ok := e.getCompiledModuleFromMemory(m)
|
||||
if !ok {
|
||||
return nil, errors.New("source module must be compiled before instantiation")
|
||||
}
|
||||
me.parent = compiled
|
||||
me.module = mi
|
||||
me.listeners = compiled.listeners
|
||||
|
||||
if m.IsHostModule {
|
||||
me.opaque = buildHostModuleOpaque(m, compiled.listeners)
|
||||
me.opaquePtr = &me.opaque[0]
|
||||
} else {
|
||||
if size := compiled.offsets.TotalSize; size != 0 {
|
||||
opaque := newAlignedOpaque(size)
|
||||
me.opaque = opaque
|
||||
me.opaquePtr = &opaque[0]
|
||||
}
|
||||
}
|
||||
return me, nil
|
||||
}
|
||||
|
||||
func (e *engine) compileSharedFunctions() {
|
||||
e.sharedFunctions = &sharedFunctions{
|
||||
listenerBeforeTrampolines: make(map[*wasm.FunctionType][]byte),
|
||||
listenerAfterTrampolines: make(map[*wasm.FunctionType][]byte),
|
||||
}
|
||||
|
||||
e.be.Init()
|
||||
{
|
||||
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{
|
||||
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32},
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}, false)
|
||||
e.sharedFunctions.memoryGrowExecutable = mmapExecutable(src)
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
exe := e.sharedFunctions.memoryGrowExecutable
|
||||
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_grow_trampoline")
|
||||
}
|
||||
}
|
||||
|
||||
e.be.Init()
|
||||
{
|
||||
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{
|
||||
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */},
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}, false)
|
||||
e.sharedFunctions.tableGrowExecutable = mmapExecutable(src)
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
exe := e.sharedFunctions.tableGrowExecutable
|
||||
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "table_grow_trampoline")
|
||||
}
|
||||
}
|
||||
|
||||
e.be.Init()
|
||||
{
|
||||
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{
|
||||
Params: []ssa.Type{ssa.TypeI32 /* exec context */},
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}, false)
|
||||
e.sharedFunctions.checkModuleExitCode = mmapExecutable(src)
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
exe := e.sharedFunctions.checkModuleExitCode
|
||||
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "check_module_exit_code_trampoline")
|
||||
}
|
||||
}
|
||||
|
||||
e.be.Init()
|
||||
{
|
||||
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{
|
||||
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* function index */},
|
||||
Results: []ssa.Type{ssa.TypeI64}, // returns the function reference.
|
||||
}, false)
|
||||
e.sharedFunctions.refFuncExecutable = mmapExecutable(src)
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
exe := e.sharedFunctions.refFuncExecutable
|
||||
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "ref_func_trampoline")
|
||||
}
|
||||
}
|
||||
|
||||
e.be.Init()
|
||||
{
|
||||
src := e.machine.CompileStackGrowCallSequence()
|
||||
e.sharedFunctions.stackGrowExecutable = mmapExecutable(src)
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
exe := e.sharedFunctions.stackGrowExecutable
|
||||
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "stack_grow_trampoline")
|
||||
}
|
||||
}
|
||||
|
||||
e.be.Init()
|
||||
{
|
||||
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{
|
||||
// exec context, timeout, expected, addr
|
||||
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
|
||||
// Returns the status.
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}, false)
|
||||
e.sharedFunctions.memoryWait32Executable = mmapExecutable(src)
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
exe := e.sharedFunctions.memoryWait32Executable
|
||||
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait32_trampoline")
|
||||
}
|
||||
}
|
||||
|
||||
e.be.Init()
|
||||
{
|
||||
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{
|
||||
// exec context, timeout, expected, addr
|
||||
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
|
||||
// Returns the status.
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}, false)
|
||||
e.sharedFunctions.memoryWait64Executable = mmapExecutable(src)
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
exe := e.sharedFunctions.memoryWait64Executable
|
||||
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait64_trampoline")
|
||||
}
|
||||
}
|
||||
|
||||
e.be.Init()
|
||||
{
|
||||
src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{
|
||||
// exec context, count, addr
|
||||
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
|
||||
// Returns the number notified.
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}, false)
|
||||
e.sharedFunctions.memoryNotifyExecutable = mmapExecutable(src)
|
||||
if wazevoapi.PerfMapEnabled {
|
||||
exe := e.sharedFunctions.memoryNotifyExecutable
|
||||
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_notify_trampoline")
|
||||
}
|
||||
}
|
||||
|
||||
e.setFinalizer(e.sharedFunctions, sharedFunctionsFinalizer)
|
||||
}
|
||||
|
||||
func sharedFunctionsFinalizer(sf *sharedFunctions) {
|
||||
if err := platform.MunmapCodeSegment(sf.memoryGrowExecutable); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := platform.MunmapCodeSegment(sf.checkModuleExitCode); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := platform.MunmapCodeSegment(sf.stackGrowExecutable); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := platform.MunmapCodeSegment(sf.tableGrowExecutable); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := platform.MunmapCodeSegment(sf.refFuncExecutable); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := platform.MunmapCodeSegment(sf.memoryWait32Executable); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := platform.MunmapCodeSegment(sf.memoryWait64Executable); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := platform.MunmapCodeSegment(sf.memoryNotifyExecutable); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for _, f := range sf.listenerBeforeTrampolines {
|
||||
if err := platform.MunmapCodeSegment(f); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
for _, f := range sf.listenerAfterTrampolines {
|
||||
if err := platform.MunmapCodeSegment(f); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
sf.memoryGrowExecutable = nil
|
||||
sf.checkModuleExitCode = nil
|
||||
sf.stackGrowExecutable = nil
|
||||
sf.tableGrowExecutable = nil
|
||||
sf.refFuncExecutable = nil
|
||||
sf.memoryWait32Executable = nil
|
||||
sf.memoryWait64Executable = nil
|
||||
sf.memoryNotifyExecutable = nil
|
||||
sf.listenerBeforeTrampolines = nil
|
||||
sf.listenerAfterTrampolines = nil
|
||||
}
|
||||
|
||||
func executablesFinalizer(exec *executables) {
|
||||
if len(exec.executable) > 0 {
|
||||
if err := platform.MunmapCodeSegment(exec.executable); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
exec.executable = nil
|
||||
|
||||
for _, f := range exec.entryPreambles {
|
||||
if err := platform.MunmapCodeSegment(f); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
exec.entryPreambles = nil
|
||||
}
|
||||
|
||||
func mmapExecutable(src []byte) []byte {
|
||||
executable, err := platform.MmapCodeSegment(len(src))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
copy(executable, src)
|
||||
|
||||
if runtime.GOARCH == "arm64" {
|
||||
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
|
||||
if err = platform.MprotectRX(executable); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
return executable
|
||||
}
|
||||
|
||||
func (cm *compiledModule) functionIndexOf(addr uintptr) wasm.Index {
|
||||
addr -= uintptr(unsafe.Pointer(&cm.executable[0]))
|
||||
offset := cm.functionOffsets
|
||||
index := sort.Search(len(offset), func(i int) bool {
|
||||
return offset[i] > int(addr)
|
||||
})
|
||||
index--
|
||||
if index < 0 {
|
||||
panic("BUG")
|
||||
}
|
||||
return wasm.Index(index)
|
||||
}
|
||||
|
||||
func (e *engine) getListenerTrampolineForType(functionType *wasm.FunctionType) (before, after *byte) {
|
||||
e.mux.Lock()
|
||||
defer e.mux.Unlock()
|
||||
|
||||
beforeBuf, ok := e.sharedFunctions.listenerBeforeTrampolines[functionType]
|
||||
afterBuf := e.sharedFunctions.listenerAfterTrampolines[functionType]
|
||||
if ok {
|
||||
return &beforeBuf[0], &afterBuf[0]
|
||||
}
|
||||
|
||||
beforeSig, afterSig := frontend.SignatureForListener(functionType)
|
||||
|
||||
e.be.Init()
|
||||
buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false)
|
||||
beforeBuf = mmapExecutable(buf)
|
||||
|
||||
e.be.Init()
|
||||
buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false)
|
||||
afterBuf = mmapExecutable(buf)
|
||||
|
||||
e.sharedFunctions.listenerBeforeTrampolines[functionType] = beforeBuf
|
||||
e.sharedFunctions.listenerAfterTrampolines[functionType] = afterBuf
|
||||
return &beforeBuf[0], &afterBuf[0]
|
||||
}
|
||||
|
||||
func (cm *compiledModule) getSourceOffset(pc uintptr) uint64 {
|
||||
offsets := cm.sourceMap.executableOffsets
|
||||
if len(offsets) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
index := sort.Search(len(offsets), func(i int) bool {
|
||||
return offsets[i] >= pc
|
||||
})
|
||||
|
||||
index--
|
||||
if index < 0 {
|
||||
return 0
|
||||
}
|
||||
return cm.sourceMap.wasmBinaryOffsets[index]
|
||||
}
|
||||
296
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
generated
vendored
Normal file
296
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
generated
vendored
Normal file
|
|
@ -0,0 +1,296 @@
|
|||
package wazevo
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/experimental"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
"github.com/tetratelabs/wazero/internal/filecache"
|
||||
"github.com/tetratelabs/wazero/internal/platform"
|
||||
"github.com/tetratelabs/wazero/internal/u32"
|
||||
"github.com/tetratelabs/wazero/internal/u64"
|
||||
"github.com/tetratelabs/wazero/internal/wasm"
|
||||
)
|
||||
|
||||
var crc = crc32.MakeTable(crc32.Castagnoli)
|
||||
|
||||
// fileCacheKey returns a key for the file cache.
|
||||
// In order to avoid collisions with the existing compiler, we do not use m.ID directly,
|
||||
// but instead we rehash it with magic.
|
||||
func fileCacheKey(m *wasm.Module) (ret filecache.Key) {
|
||||
s := sha256.New()
|
||||
s.Write(m.ID[:])
|
||||
s.Write(magic)
|
||||
s.Sum(ret[:0])
|
||||
return
|
||||
}
|
||||
|
||||
func (e *engine) addCompiledModule(module *wasm.Module, cm *compiledModule) (err error) {
|
||||
e.addCompiledModuleToMemory(module, cm)
|
||||
if !module.IsHostModule && e.fileCache != nil {
|
||||
err = e.addCompiledModuleToCache(module, cm)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (cm *compiledModule, ok bool, err error) {
|
||||
cm, ok = e.getCompiledModuleFromMemory(module)
|
||||
if ok {
|
||||
return
|
||||
}
|
||||
cm, ok, err = e.getCompiledModuleFromCache(module)
|
||||
if ok {
|
||||
cm.parent = e
|
||||
cm.module = module
|
||||
cm.sharedFunctions = e.sharedFunctions
|
||||
cm.ensureTermination = ensureTermination
|
||||
cm.offsets = wazevoapi.NewModuleContextOffsetData(module, len(listeners) > 0)
|
||||
if len(listeners) > 0 {
|
||||
cm.listeners = listeners
|
||||
cm.listenerBeforeTrampolines = make([]*byte, len(module.TypeSection))
|
||||
cm.listenerAfterTrampolines = make([]*byte, len(module.TypeSection))
|
||||
for i := range module.TypeSection {
|
||||
typ := &module.TypeSection[i]
|
||||
before, after := e.getListenerTrampolineForType(typ)
|
||||
cm.listenerBeforeTrampolines[i] = before
|
||||
cm.listenerAfterTrampolines[i] = after
|
||||
}
|
||||
}
|
||||
e.addCompiledModuleToMemory(module, cm)
|
||||
ssaBuilder := ssa.NewBuilder()
|
||||
machine := newMachine()
|
||||
be := backend.NewCompiler(context.Background(), machine, ssaBuilder)
|
||||
cm.executables.compileEntryPreambles(module, machine, be)
|
||||
|
||||
// Set the finalizer.
|
||||
e.setFinalizer(cm.executables, executablesFinalizer)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *engine) addCompiledModuleToMemory(m *wasm.Module, cm *compiledModule) {
|
||||
e.mux.Lock()
|
||||
defer e.mux.Unlock()
|
||||
e.compiledModules[m.ID] = cm
|
||||
if len(cm.executable) > 0 {
|
||||
e.addCompiledModuleToSortedList(cm)
|
||||
}
|
||||
}
|
||||
|
||||
func (e *engine) getCompiledModuleFromMemory(module *wasm.Module) (cm *compiledModule, ok bool) {
|
||||
e.mux.RLock()
|
||||
defer e.mux.RUnlock()
|
||||
cm, ok = e.compiledModules[module.ID]
|
||||
return
|
||||
}
|
||||
|
||||
func (e *engine) addCompiledModuleToCache(module *wasm.Module, cm *compiledModule) (err error) {
|
||||
if e.fileCache == nil || module.IsHostModule {
|
||||
return
|
||||
}
|
||||
err = e.fileCache.Add(fileCacheKey(module), serializeCompiledModule(e.wazeroVersion, cm))
|
||||
return
|
||||
}
|
||||
|
||||
func (e *engine) getCompiledModuleFromCache(module *wasm.Module) (cm *compiledModule, hit bool, err error) {
|
||||
if e.fileCache == nil || module.IsHostModule {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if the entries exist in the external cache.
|
||||
var cached io.ReadCloser
|
||||
cached, hit, err = e.fileCache.Get(fileCacheKey(module))
|
||||
if !hit || err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise, we hit the cache on external cache.
|
||||
// We retrieve *code structures from `cached`.
|
||||
var staleCache bool
|
||||
// Note: cached.Close is ensured to be called in deserializeCodes.
|
||||
cm, staleCache, err = deserializeCompiledModule(e.wazeroVersion, cached)
|
||||
if err != nil {
|
||||
hit = false
|
||||
return
|
||||
} else if staleCache {
|
||||
return nil, false, e.fileCache.Delete(fileCacheKey(module))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var magic = []byte{'W', 'A', 'Z', 'E', 'V', 'O'}
|
||||
|
||||
func serializeCompiledModule(wazeroVersion string, cm *compiledModule) io.Reader {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
// First 6 byte: WAZEVO header.
|
||||
buf.Write(magic)
|
||||
// Next 1 byte: length of version:
|
||||
buf.WriteByte(byte(len(wazeroVersion)))
|
||||
// Version of wazero.
|
||||
buf.WriteString(wazeroVersion)
|
||||
// Number of *code (== locally defined functions in the module): 4 bytes.
|
||||
buf.Write(u32.LeBytes(uint32(len(cm.functionOffsets))))
|
||||
for _, offset := range cm.functionOffsets {
|
||||
// The offset of this function in the executable (8 bytes).
|
||||
buf.Write(u64.LeBytes(uint64(offset)))
|
||||
}
|
||||
// The length of code segment (8 bytes).
|
||||
buf.Write(u64.LeBytes(uint64(len(cm.executable))))
|
||||
// Append the native code.
|
||||
buf.Write(cm.executable)
|
||||
// Append checksum.
|
||||
checksum := crc32.Checksum(cm.executable, crc)
|
||||
buf.Write(u32.LeBytes(checksum))
|
||||
if sm := cm.sourceMap; len(sm.executableOffsets) > 0 {
|
||||
buf.WriteByte(1) // indicates that source map is present.
|
||||
l := len(sm.wasmBinaryOffsets)
|
||||
buf.Write(u64.LeBytes(uint64(l)))
|
||||
executableAddr := uintptr(unsafe.Pointer(&cm.executable[0]))
|
||||
for i := 0; i < l; i++ {
|
||||
buf.Write(u64.LeBytes(sm.wasmBinaryOffsets[i]))
|
||||
// executableOffsets is absolute address, so we need to subtract executableAddr.
|
||||
buf.Write(u64.LeBytes(uint64(sm.executableOffsets[i] - executableAddr)))
|
||||
}
|
||||
} else {
|
||||
buf.WriteByte(0) // indicates that source map is not present.
|
||||
}
|
||||
return bytes.NewReader(buf.Bytes())
|
||||
}
|
||||
|
||||
func deserializeCompiledModule(wazeroVersion string, reader io.ReadCloser) (cm *compiledModule, staleCache bool, err error) {
|
||||
defer reader.Close()
|
||||
cacheHeaderSize := len(magic) + 1 /* version size */ + len(wazeroVersion) + 4 /* number of functions */
|
||||
|
||||
// Read the header before the native code.
|
||||
header := make([]byte, cacheHeaderSize)
|
||||
n, err := reader.Read(header)
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("compilationcache: error reading header: %v", err)
|
||||
}
|
||||
|
||||
if n != cacheHeaderSize {
|
||||
return nil, false, fmt.Errorf("compilationcache: invalid header length: %d", n)
|
||||
}
|
||||
|
||||
if !bytes.Equal(header[:len(magic)], magic) {
|
||||
return nil, false, fmt.Errorf(
|
||||
"compilationcache: invalid magic number: got %s but want %s", magic, header[:len(magic)])
|
||||
}
|
||||
|
||||
// Check the version compatibility.
|
||||
versionSize := int(header[len(magic)])
|
||||
|
||||
cachedVersionBegin, cachedVersionEnd := len(magic)+1, len(magic)+1+versionSize
|
||||
if cachedVersionEnd >= len(header) {
|
||||
staleCache = true
|
||||
return
|
||||
} else if cachedVersion := string(header[cachedVersionBegin:cachedVersionEnd]); cachedVersion != wazeroVersion {
|
||||
staleCache = true
|
||||
return
|
||||
}
|
||||
|
||||
functionsNum := binary.LittleEndian.Uint32(header[len(header)-4:])
|
||||
cm = &compiledModule{functionOffsets: make([]int, functionsNum), executables: &executables{}}
|
||||
|
||||
var eightBytes [8]byte
|
||||
for i := uint32(0); i < functionsNum; i++ {
|
||||
// Read the offset of each function in the executable.
|
||||
var offset uint64
|
||||
if offset, err = readUint64(reader, &eightBytes); err != nil {
|
||||
err = fmt.Errorf("compilationcache: error reading func[%d] executable offset: %v", i, err)
|
||||
return
|
||||
}
|
||||
cm.functionOffsets[i] = int(offset)
|
||||
}
|
||||
|
||||
executableLen, err := readUint64(reader, &eightBytes)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("compilationcache: error reading executable size: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if executableLen > 0 {
|
||||
executable, err := platform.MmapCodeSegment(int(executableLen))
|
||||
if err != nil {
|
||||
err = fmt.Errorf("compilationcache: error mmapping executable (len=%d): %v", executableLen, err)
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
_, err = io.ReadFull(reader, executable)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("compilationcache: error reading executable (len=%d): %v", executableLen, err)
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
expected := crc32.Checksum(executable, crc)
|
||||
if _, err = io.ReadFull(reader, eightBytes[:4]); err != nil {
|
||||
return nil, false, fmt.Errorf("compilationcache: could not read checksum: %v", err)
|
||||
} else if checksum := binary.LittleEndian.Uint32(eightBytes[:4]); expected != checksum {
|
||||
return nil, false, fmt.Errorf("compilationcache: checksum mismatch (expected %d, got %d)", expected, checksum)
|
||||
}
|
||||
|
||||
if runtime.GOARCH == "arm64" {
|
||||
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
|
||||
if err = platform.MprotectRX(executable); err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
}
|
||||
cm.executable = executable
|
||||
}
|
||||
|
||||
if _, err := io.ReadFull(reader, eightBytes[:1]); err != nil {
|
||||
return nil, false, fmt.Errorf("compilationcache: error reading source map presence: %v", err)
|
||||
}
|
||||
|
||||
if eightBytes[0] == 1 {
|
||||
sm := &cm.sourceMap
|
||||
sourceMapLen, err := readUint64(reader, &eightBytes)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("compilationcache: error reading source map length: %v", err)
|
||||
return nil, false, err
|
||||
}
|
||||
executableOffset := uintptr(unsafe.Pointer(&cm.executable[0]))
|
||||
for i := uint64(0); i < sourceMapLen; i++ {
|
||||
wasmBinaryOffset, err := readUint64(reader, &eightBytes)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("compilationcache: error reading source map[%d] wasm binary offset: %v", i, err)
|
||||
return nil, false, err
|
||||
}
|
||||
executableRelativeOffset, err := readUint64(reader, &eightBytes)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("compilationcache: error reading source map[%d] executable offset: %v", i, err)
|
||||
return nil, false, err
|
||||
}
|
||||
sm.wasmBinaryOffsets = append(sm.wasmBinaryOffsets, wasmBinaryOffset)
|
||||
// executableOffsets is absolute address, so we need to add executableOffset.
|
||||
sm.executableOffsets = append(sm.executableOffsets, uintptr(executableRelativeOffset)+executableOffset)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// readUint64 strictly reads an uint64 in little-endian byte order, using the
|
||||
// given array as a buffer. This returns io.EOF if less than 8 bytes were read.
|
||||
func readUint64(reader io.Reader, b *[8]byte) (uint64, error) {
|
||||
s := b[0:8]
|
||||
n, err := reader.Read(s)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
} else if n < 8 { // more strict than reader.Read
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
// Read the u64 from the underlying buffer.
|
||||
ret := binary.LittleEndian.Uint64(s)
|
||||
return ret, nil
|
||||
}
|
||||
15
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_amd64.go
generated
vendored
Normal file
15
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_amd64.go
generated
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
//go:build amd64 && !tinygo
|
||||
|
||||
package wazevo
|
||||
|
||||
import _ "unsafe"
|
||||
|
||||
// entrypoint is implemented by the backend.
|
||||
//
|
||||
//go:linkname entrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64.entrypoint
|
||||
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr)
|
||||
|
||||
// entrypoint is implemented by the backend.
|
||||
//
|
||||
//go:linkname afterGoFunctionCallEntrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64.afterGoFunctionCallEntrypoint
|
||||
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
|
||||
15
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_arm64.go
generated
vendored
Normal file
15
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_arm64.go
generated
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
//go:build arm64 && !tinygo
|
||||
|
||||
package wazevo
|
||||
|
||||
import _ "unsafe"
|
||||
|
||||
// entrypoint is implemented by the backend.
|
||||
//
|
||||
//go:linkname entrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64.entrypoint
|
||||
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr)
|
||||
|
||||
// entrypoint is implemented by the backend.
|
||||
//
|
||||
//go:linkname afterGoFunctionCallEntrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64.afterGoFunctionCallEntrypoint
|
||||
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
|
||||
15
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_others.go
generated
vendored
Normal file
15
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_others.go
generated
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
//go:build (!arm64 && !amd64) || tinygo
|
||||
|
||||
package wazevo
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
)
|
||||
|
||||
func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr) {
|
||||
panic(runtime.GOARCH)
|
||||
}
|
||||
|
||||
func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr) {
|
||||
panic(runtime.GOARCH)
|
||||
}
|
||||
594
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go
generated
vendored
Normal file
594
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go
generated
vendored
Normal file
|
|
@ -0,0 +1,594 @@
|
|||
// Package frontend implements the translation of WebAssembly to SSA IR using the ssa package.
|
||||
package frontend
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
"github.com/tetratelabs/wazero/internal/wasm"
|
||||
)
|
||||
|
||||
// Compiler is in charge of lowering Wasm to SSA IR, and does the optimization
|
||||
// on top of it in architecture-independent way.
|
||||
type Compiler struct {
|
||||
// Per-module data that is used across all functions.
|
||||
|
||||
m *wasm.Module
|
||||
offset *wazevoapi.ModuleContextOffsetData
|
||||
// ssaBuilder is a ssa.Builder used by this frontend.
|
||||
ssaBuilder ssa.Builder
|
||||
signatures map[*wasm.FunctionType]*ssa.Signature
|
||||
listenerSignatures map[*wasm.FunctionType][2]*ssa.Signature
|
||||
memoryGrowSig ssa.Signature
|
||||
memoryWait32Sig ssa.Signature
|
||||
memoryWait64Sig ssa.Signature
|
||||
memoryNotifySig ssa.Signature
|
||||
checkModuleExitCodeSig ssa.Signature
|
||||
tableGrowSig ssa.Signature
|
||||
refFuncSig ssa.Signature
|
||||
memmoveSig ssa.Signature
|
||||
ensureTermination bool
|
||||
|
||||
// Followings are reset by per function.
|
||||
|
||||
// wasmLocalToVariable maps the index (considered as wasm.Index of locals)
|
||||
// to the corresponding ssa.Variable.
|
||||
wasmLocalToVariable [] /* local index to */ ssa.Variable
|
||||
wasmLocalFunctionIndex wasm.Index
|
||||
wasmFunctionTypeIndex wasm.Index
|
||||
wasmFunctionTyp *wasm.FunctionType
|
||||
wasmFunctionLocalTypes []wasm.ValueType
|
||||
wasmFunctionBody []byte
|
||||
wasmFunctionBodyOffsetInCodeSection uint64
|
||||
memoryBaseVariable, memoryLenVariable ssa.Variable
|
||||
needMemory bool
|
||||
memoryShared bool
|
||||
globalVariables []ssa.Variable
|
||||
globalVariablesTypes []ssa.Type
|
||||
mutableGlobalVariablesIndexes []wasm.Index // index to ^.
|
||||
needListener bool
|
||||
needSourceOffsetInfo bool
|
||||
// br is reused during lowering.
|
||||
br *bytes.Reader
|
||||
loweringState loweringState
|
||||
|
||||
knownSafeBounds [] /* ssa.ValueID to */ knownSafeBound
|
||||
knownSafeBoundsSet []ssa.ValueID
|
||||
|
||||
knownSafeBoundsAtTheEndOfBlocks [] /* ssa.BlockID to */ knownSafeBoundsAtTheEndOfBlock
|
||||
varLengthKnownSafeBoundWithIDPool wazevoapi.VarLengthPool[knownSafeBoundWithID]
|
||||
|
||||
execCtxPtrValue, moduleCtxPtrValue ssa.Value
|
||||
|
||||
// Following are reused for the known safe bounds analysis.
|
||||
|
||||
pointers []int
|
||||
bounds [][]knownSafeBoundWithID
|
||||
}
|
||||
|
||||
type (
|
||||
// knownSafeBound represents a known safe bound for a value.
|
||||
knownSafeBound struct {
|
||||
// bound is a constant upper bound for the value.
|
||||
bound uint64
|
||||
// absoluteAddr is the absolute address of the value.
|
||||
absoluteAddr ssa.Value
|
||||
}
|
||||
// knownSafeBoundWithID is a knownSafeBound with the ID of the value.
|
||||
knownSafeBoundWithID struct {
|
||||
knownSafeBound
|
||||
id ssa.ValueID
|
||||
}
|
||||
knownSafeBoundsAtTheEndOfBlock = wazevoapi.VarLength[knownSafeBoundWithID]
|
||||
)
|
||||
|
||||
var knownSafeBoundsAtTheEndOfBlockNil = wazevoapi.NewNilVarLength[knownSafeBoundWithID]()
|
||||
|
||||
// NewFrontendCompiler returns a frontend Compiler.
|
||||
func NewFrontendCompiler(m *wasm.Module, ssaBuilder ssa.Builder, offset *wazevoapi.ModuleContextOffsetData, ensureTermination bool, listenerOn bool, sourceInfo bool) *Compiler {
|
||||
c := &Compiler{
|
||||
m: m,
|
||||
ssaBuilder: ssaBuilder,
|
||||
br: bytes.NewReader(nil),
|
||||
offset: offset,
|
||||
ensureTermination: ensureTermination,
|
||||
needSourceOffsetInfo: sourceInfo,
|
||||
varLengthKnownSafeBoundWithIDPool: wazevoapi.NewVarLengthPool[knownSafeBoundWithID](),
|
||||
}
|
||||
c.declareSignatures(listenerOn)
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Compiler) declareSignatures(listenerOn bool) {
|
||||
m := c.m
|
||||
c.signatures = make(map[*wasm.FunctionType]*ssa.Signature, len(m.TypeSection)+2)
|
||||
if listenerOn {
|
||||
c.listenerSignatures = make(map[*wasm.FunctionType][2]*ssa.Signature, len(m.TypeSection))
|
||||
}
|
||||
for i := range m.TypeSection {
|
||||
wasmSig := &m.TypeSection[i]
|
||||
sig := SignatureForWasmFunctionType(wasmSig)
|
||||
sig.ID = ssa.SignatureID(i)
|
||||
c.signatures[wasmSig] = &sig
|
||||
c.ssaBuilder.DeclareSignature(&sig)
|
||||
|
||||
if listenerOn {
|
||||
beforeSig, afterSig := SignatureForListener(wasmSig)
|
||||
beforeSig.ID = ssa.SignatureID(i) + ssa.SignatureID(len(m.TypeSection))
|
||||
afterSig.ID = ssa.SignatureID(i) + ssa.SignatureID(len(m.TypeSection))*2
|
||||
c.listenerSignatures[wasmSig] = [2]*ssa.Signature{beforeSig, afterSig}
|
||||
c.ssaBuilder.DeclareSignature(beforeSig)
|
||||
c.ssaBuilder.DeclareSignature(afterSig)
|
||||
}
|
||||
}
|
||||
|
||||
begin := ssa.SignatureID(len(m.TypeSection))
|
||||
if listenerOn {
|
||||
begin *= 3
|
||||
}
|
||||
c.memoryGrowSig = ssa.Signature{
|
||||
ID: begin,
|
||||
// Takes execution context and the page size to grow.
|
||||
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32},
|
||||
// Returns the previous page size.
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}
|
||||
c.ssaBuilder.DeclareSignature(&c.memoryGrowSig)
|
||||
|
||||
c.checkModuleExitCodeSig = ssa.Signature{
|
||||
ID: c.memoryGrowSig.ID + 1,
|
||||
// Only takes execution context.
|
||||
Params: []ssa.Type{ssa.TypeI64},
|
||||
}
|
||||
c.ssaBuilder.DeclareSignature(&c.checkModuleExitCodeSig)
|
||||
|
||||
c.tableGrowSig = ssa.Signature{
|
||||
ID: c.checkModuleExitCodeSig.ID + 1,
|
||||
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */},
|
||||
// Returns the previous size.
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}
|
||||
c.ssaBuilder.DeclareSignature(&c.tableGrowSig)
|
||||
|
||||
c.refFuncSig = ssa.Signature{
|
||||
ID: c.tableGrowSig.ID + 1,
|
||||
Params: []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* func index */},
|
||||
// Returns the function reference.
|
||||
Results: []ssa.Type{ssa.TypeI64},
|
||||
}
|
||||
c.ssaBuilder.DeclareSignature(&c.refFuncSig)
|
||||
|
||||
c.memmoveSig = ssa.Signature{
|
||||
ID: c.refFuncSig.ID + 1,
|
||||
// dst, src, and the byte count.
|
||||
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
|
||||
}
|
||||
|
||||
c.ssaBuilder.DeclareSignature(&c.memmoveSig)
|
||||
|
||||
c.memoryWait32Sig = ssa.Signature{
|
||||
ID: c.memmoveSig.ID + 1,
|
||||
// exec context, timeout, expected, addr
|
||||
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
|
||||
// Returns the status.
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}
|
||||
c.ssaBuilder.DeclareSignature(&c.memoryWait32Sig)
|
||||
|
||||
c.memoryWait64Sig = ssa.Signature{
|
||||
ID: c.memoryWait32Sig.ID + 1,
|
||||
// exec context, timeout, expected, addr
|
||||
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
|
||||
// Returns the status.
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}
|
||||
c.ssaBuilder.DeclareSignature(&c.memoryWait64Sig)
|
||||
|
||||
c.memoryNotifySig = ssa.Signature{
|
||||
ID: c.memoryWait64Sig.ID + 1,
|
||||
// exec context, count, addr
|
||||
Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
|
||||
// Returns the number notified.
|
||||
Results: []ssa.Type{ssa.TypeI32},
|
||||
}
|
||||
c.ssaBuilder.DeclareSignature(&c.memoryNotifySig)
|
||||
}
|
||||
|
||||
// SignatureForWasmFunctionType returns the ssa.Signature for the given wasm.FunctionType.
|
||||
func SignatureForWasmFunctionType(typ *wasm.FunctionType) ssa.Signature {
|
||||
sig := ssa.Signature{
|
||||
// +2 to pass moduleContextPtr and executionContextPtr. See the inline comment LowerToSSA.
|
||||
Params: make([]ssa.Type, len(typ.Params)+2),
|
||||
Results: make([]ssa.Type, len(typ.Results)),
|
||||
}
|
||||
sig.Params[0] = executionContextPtrTyp
|
||||
sig.Params[1] = moduleContextPtrTyp
|
||||
for j, typ := range typ.Params {
|
||||
sig.Params[j+2] = WasmTypeToSSAType(typ)
|
||||
}
|
||||
for j, typ := range typ.Results {
|
||||
sig.Results[j] = WasmTypeToSSAType(typ)
|
||||
}
|
||||
return sig
|
||||
}
|
||||
|
||||
// Init initializes the state of frontendCompiler and make it ready for a next function.
|
||||
func (c *Compiler) Init(idx, typIndex wasm.Index, typ *wasm.FunctionType, localTypes []wasm.ValueType, body []byte, needListener bool, bodyOffsetInCodeSection uint64) {
|
||||
c.ssaBuilder.Init(c.signatures[typ])
|
||||
c.loweringState.reset()
|
||||
|
||||
c.wasmFunctionTypeIndex = typIndex
|
||||
c.wasmLocalFunctionIndex = idx
|
||||
c.wasmFunctionTyp = typ
|
||||
c.wasmFunctionLocalTypes = localTypes
|
||||
c.wasmFunctionBody = body
|
||||
c.wasmFunctionBodyOffsetInCodeSection = bodyOffsetInCodeSection
|
||||
c.needListener = needListener
|
||||
c.clearSafeBounds()
|
||||
c.varLengthKnownSafeBoundWithIDPool.Reset()
|
||||
c.knownSafeBoundsAtTheEndOfBlocks = c.knownSafeBoundsAtTheEndOfBlocks[:0]
|
||||
}
|
||||
|
||||
// Note: this assumes 64-bit platform (I believe we won't have 32-bit backend ;)).
|
||||
const executionContextPtrTyp, moduleContextPtrTyp = ssa.TypeI64, ssa.TypeI64
|
||||
|
||||
// LowerToSSA lowers the current function to SSA function which will be held by ssaBuilder.
|
||||
// After calling this, the caller will be able to access the SSA info in *Compiler.ssaBuilder.
|
||||
//
|
||||
// Note that this only does the naive lowering, and do not do any optimization, instead the caller is expected to do so.
|
||||
func (c *Compiler) LowerToSSA() {
|
||||
builder := c.ssaBuilder
|
||||
|
||||
// Set up the entry block.
|
||||
entryBlock := builder.AllocateBasicBlock()
|
||||
builder.SetCurrentBlock(entryBlock)
|
||||
|
||||
// Functions always take two parameters in addition to Wasm-level parameters:
|
||||
//
|
||||
// 1. executionContextPtr: pointer to the *executionContext in wazevo package.
|
||||
// This will be used to exit the execution in the face of trap, plus used for host function calls.
|
||||
//
|
||||
// 2. moduleContextPtr: pointer to the *moduleContextOpaque in wazevo package.
|
||||
// This will be used to access memory, etc. Also, this will be used during host function calls.
|
||||
//
|
||||
// Note: it's clear that sometimes a function won't need them. For example,
|
||||
// if the function doesn't trap and doesn't make function call, then
|
||||
// we might be able to eliminate the parameter. However, if that function
|
||||
// can be called via call_indirect, then we cannot eliminate because the
|
||||
// signature won't match with the expected one.
|
||||
// TODO: maybe there's some way to do this optimization without glitches, but so far I have no clue about the feasibility.
|
||||
//
|
||||
// Note: In Wasmtime or many other runtimes, moduleContextPtr is called "vmContext". Also note that `moduleContextPtr`
|
||||
// is wazero-specific since other runtimes can naturally use the OS-level signal to do this job thanks to the fact that
|
||||
// they can use native stack vs wazero cannot use Go-routine stack and have to use Go-runtime allocated []byte as a stack.
|
||||
c.execCtxPtrValue = entryBlock.AddParam(builder, executionContextPtrTyp)
|
||||
c.moduleCtxPtrValue = entryBlock.AddParam(builder, moduleContextPtrTyp)
|
||||
builder.AnnotateValue(c.execCtxPtrValue, "exec_ctx")
|
||||
builder.AnnotateValue(c.moduleCtxPtrValue, "module_ctx")
|
||||
|
||||
for i, typ := range c.wasmFunctionTyp.Params {
|
||||
st := WasmTypeToSSAType(typ)
|
||||
variable := builder.DeclareVariable(st)
|
||||
value := entryBlock.AddParam(builder, st)
|
||||
builder.DefineVariable(variable, value, entryBlock)
|
||||
c.setWasmLocalVariable(wasm.Index(i), variable)
|
||||
}
|
||||
c.declareWasmLocals(entryBlock)
|
||||
c.declareNecessaryVariables()
|
||||
|
||||
c.lowerBody(entryBlock)
|
||||
}
|
||||
|
||||
// localVariable returns the SSA variable for the given Wasm local index.
|
||||
func (c *Compiler) localVariable(index wasm.Index) ssa.Variable {
|
||||
return c.wasmLocalToVariable[index]
|
||||
}
|
||||
|
||||
func (c *Compiler) setWasmLocalVariable(index wasm.Index, variable ssa.Variable) {
|
||||
idx := int(index)
|
||||
if idx >= len(c.wasmLocalToVariable) {
|
||||
c.wasmLocalToVariable = append(c.wasmLocalToVariable, make([]ssa.Variable, idx+1-len(c.wasmLocalToVariable))...)
|
||||
}
|
||||
c.wasmLocalToVariable[idx] = variable
|
||||
}
|
||||
|
||||
// declareWasmLocals declares the SSA variables for the Wasm locals.
|
||||
func (c *Compiler) declareWasmLocals(entry ssa.BasicBlock) {
|
||||
localCount := wasm.Index(len(c.wasmFunctionTyp.Params))
|
||||
for i, typ := range c.wasmFunctionLocalTypes {
|
||||
st := WasmTypeToSSAType(typ)
|
||||
variable := c.ssaBuilder.DeclareVariable(st)
|
||||
c.setWasmLocalVariable(wasm.Index(i)+localCount, variable)
|
||||
|
||||
zeroInst := c.ssaBuilder.AllocateInstruction()
|
||||
switch st {
|
||||
case ssa.TypeI32:
|
||||
zeroInst.AsIconst32(0)
|
||||
case ssa.TypeI64:
|
||||
zeroInst.AsIconst64(0)
|
||||
case ssa.TypeF32:
|
||||
zeroInst.AsF32const(0)
|
||||
case ssa.TypeF64:
|
||||
zeroInst.AsF64const(0)
|
||||
case ssa.TypeV128:
|
||||
zeroInst.AsVconst(0, 0)
|
||||
default:
|
||||
panic("TODO: " + wasm.ValueTypeName(typ))
|
||||
}
|
||||
|
||||
c.ssaBuilder.InsertInstruction(zeroInst)
|
||||
value := zeroInst.Return()
|
||||
c.ssaBuilder.DefineVariable(variable, value, entry)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Compiler) declareNecessaryVariables() {
|
||||
if c.needMemory = c.m.MemorySection != nil; c.needMemory {
|
||||
c.memoryShared = c.m.MemorySection.IsShared
|
||||
} else if c.needMemory = c.m.ImportMemoryCount > 0; c.needMemory {
|
||||
for _, imp := range c.m.ImportSection {
|
||||
if imp.Type == wasm.ExternTypeMemory {
|
||||
c.memoryShared = imp.DescMem.IsShared
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if c.needMemory {
|
||||
c.memoryBaseVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64)
|
||||
c.memoryLenVariable = c.ssaBuilder.DeclareVariable(ssa.TypeI64)
|
||||
}
|
||||
|
||||
c.globalVariables = c.globalVariables[:0]
|
||||
c.mutableGlobalVariablesIndexes = c.mutableGlobalVariablesIndexes[:0]
|
||||
c.globalVariablesTypes = c.globalVariablesTypes[:0]
|
||||
for _, imp := range c.m.ImportSection {
|
||||
if imp.Type == wasm.ExternTypeGlobal {
|
||||
desc := imp.DescGlobal
|
||||
c.declareWasmGlobal(desc.ValType, desc.Mutable)
|
||||
}
|
||||
}
|
||||
for _, g := range c.m.GlobalSection {
|
||||
desc := g.Type
|
||||
c.declareWasmGlobal(desc.ValType, desc.Mutable)
|
||||
}
|
||||
|
||||
// TODO: add tables.
|
||||
}
|
||||
|
||||
func (c *Compiler) declareWasmGlobal(typ wasm.ValueType, mutable bool) {
|
||||
var st ssa.Type
|
||||
switch typ {
|
||||
case wasm.ValueTypeI32:
|
||||
st = ssa.TypeI32
|
||||
case wasm.ValueTypeI64,
|
||||
// Both externref and funcref are represented as I64 since we only support 64-bit platforms.
|
||||
wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
|
||||
st = ssa.TypeI64
|
||||
case wasm.ValueTypeF32:
|
||||
st = ssa.TypeF32
|
||||
case wasm.ValueTypeF64:
|
||||
st = ssa.TypeF64
|
||||
case wasm.ValueTypeV128:
|
||||
st = ssa.TypeV128
|
||||
default:
|
||||
panic("TODO: " + wasm.ValueTypeName(typ))
|
||||
}
|
||||
v := c.ssaBuilder.DeclareVariable(st)
|
||||
index := wasm.Index(len(c.globalVariables))
|
||||
c.globalVariables = append(c.globalVariables, v)
|
||||
c.globalVariablesTypes = append(c.globalVariablesTypes, st)
|
||||
if mutable {
|
||||
c.mutableGlobalVariablesIndexes = append(c.mutableGlobalVariablesIndexes, index)
|
||||
}
|
||||
}
|
||||
|
||||
// WasmTypeToSSAType converts wasm.ValueType to ssa.Type.
|
||||
func WasmTypeToSSAType(vt wasm.ValueType) ssa.Type {
|
||||
switch vt {
|
||||
case wasm.ValueTypeI32:
|
||||
return ssa.TypeI32
|
||||
case wasm.ValueTypeI64,
|
||||
// Both externref and funcref are represented as I64 since we only support 64-bit platforms.
|
||||
wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
|
||||
return ssa.TypeI64
|
||||
case wasm.ValueTypeF32:
|
||||
return ssa.TypeF32
|
||||
case wasm.ValueTypeF64:
|
||||
return ssa.TypeF64
|
||||
case wasm.ValueTypeV128:
|
||||
return ssa.TypeV128
|
||||
default:
|
||||
panic("TODO: " + wasm.ValueTypeName(vt))
|
||||
}
|
||||
}
|
||||
|
||||
// addBlockParamsFromWasmTypes adds the block parameters to the given block.
|
||||
func (c *Compiler) addBlockParamsFromWasmTypes(tps []wasm.ValueType, blk ssa.BasicBlock) {
|
||||
for _, typ := range tps {
|
||||
st := WasmTypeToSSAType(typ)
|
||||
blk.AddParam(c.ssaBuilder, st)
|
||||
}
|
||||
}
|
||||
|
||||
// formatBuilder outputs the constructed SSA function as a string with a source information.
|
||||
func (c *Compiler) formatBuilder() string {
|
||||
return c.ssaBuilder.Format()
|
||||
}
|
||||
|
||||
// SignatureForListener returns the signatures for the listener functions.
|
||||
func SignatureForListener(wasmSig *wasm.FunctionType) (*ssa.Signature, *ssa.Signature) {
|
||||
beforeSig := &ssa.Signature{}
|
||||
beforeSig.Params = make([]ssa.Type, len(wasmSig.Params)+2)
|
||||
beforeSig.Params[0] = ssa.TypeI64 // Execution context.
|
||||
beforeSig.Params[1] = ssa.TypeI32 // Function index.
|
||||
for i, p := range wasmSig.Params {
|
||||
beforeSig.Params[i+2] = WasmTypeToSSAType(p)
|
||||
}
|
||||
afterSig := &ssa.Signature{}
|
||||
afterSig.Params = make([]ssa.Type, len(wasmSig.Results)+2)
|
||||
afterSig.Params[0] = ssa.TypeI64 // Execution context.
|
||||
afterSig.Params[1] = ssa.TypeI32 // Function index.
|
||||
for i, p := range wasmSig.Results {
|
||||
afterSig.Params[i+2] = WasmTypeToSSAType(p)
|
||||
}
|
||||
return beforeSig, afterSig
|
||||
}
|
||||
|
||||
// isBoundSafe returns true if the given value is known to be safe to access up to the given bound.
|
||||
func (c *Compiler) getKnownSafeBound(v ssa.ValueID) *knownSafeBound {
|
||||
if int(v) >= len(c.knownSafeBounds) {
|
||||
return nil
|
||||
}
|
||||
return &c.knownSafeBounds[v]
|
||||
}
|
||||
|
||||
// recordKnownSafeBound records the given safe bound for the given value.
|
||||
func (c *Compiler) recordKnownSafeBound(v ssa.ValueID, safeBound uint64, absoluteAddr ssa.Value) {
|
||||
if int(v) >= len(c.knownSafeBounds) {
|
||||
c.knownSafeBounds = append(c.knownSafeBounds, make([]knownSafeBound, v+1)...)
|
||||
}
|
||||
|
||||
if exiting := c.knownSafeBounds[v]; exiting.bound == 0 {
|
||||
c.knownSafeBounds[v] = knownSafeBound{
|
||||
bound: safeBound,
|
||||
absoluteAddr: absoluteAddr,
|
||||
}
|
||||
c.knownSafeBoundsSet = append(c.knownSafeBoundsSet, v)
|
||||
} else if safeBound > exiting.bound {
|
||||
c.knownSafeBounds[v].bound = safeBound
|
||||
}
|
||||
}
|
||||
|
||||
// clearSafeBounds clears the known safe bounds.
|
||||
func (c *Compiler) clearSafeBounds() {
|
||||
for _, v := range c.knownSafeBoundsSet {
|
||||
ptr := &c.knownSafeBounds[v]
|
||||
ptr.bound = 0
|
||||
ptr.absoluteAddr = ssa.ValueInvalid
|
||||
}
|
||||
c.knownSafeBoundsSet = c.knownSafeBoundsSet[:0]
|
||||
}
|
||||
|
||||
// resetAbsoluteAddressInSafeBounds resets the absolute addresses recorded in the known safe bounds.
|
||||
func (c *Compiler) resetAbsoluteAddressInSafeBounds() {
|
||||
for _, v := range c.knownSafeBoundsSet {
|
||||
ptr := &c.knownSafeBounds[v]
|
||||
ptr.absoluteAddr = ssa.ValueInvalid
|
||||
}
|
||||
}
|
||||
|
||||
func (k *knownSafeBound) valid() bool {
|
||||
return k != nil && k.bound > 0
|
||||
}
|
||||
|
||||
func (c *Compiler) allocateVarLengthValues(_cap int, vs ...ssa.Value) ssa.Values {
|
||||
builder := c.ssaBuilder
|
||||
pool := builder.VarLengthPool()
|
||||
args := pool.Allocate(_cap)
|
||||
args = args.Append(builder.VarLengthPool(), vs...)
|
||||
return args
|
||||
}
|
||||
|
||||
func (c *Compiler) finalizeKnownSafeBoundsAtTheEndOfBlock(bID ssa.BasicBlockID) {
|
||||
_bID := int(bID)
|
||||
if l := len(c.knownSafeBoundsAtTheEndOfBlocks); _bID >= l {
|
||||
c.knownSafeBoundsAtTheEndOfBlocks = append(c.knownSafeBoundsAtTheEndOfBlocks,
|
||||
make([]knownSafeBoundsAtTheEndOfBlock, _bID+1-len(c.knownSafeBoundsAtTheEndOfBlocks))...)
|
||||
for i := l; i < len(c.knownSafeBoundsAtTheEndOfBlocks); i++ {
|
||||
c.knownSafeBoundsAtTheEndOfBlocks[i] = knownSafeBoundsAtTheEndOfBlockNil
|
||||
}
|
||||
}
|
||||
p := &c.varLengthKnownSafeBoundWithIDPool
|
||||
size := len(c.knownSafeBoundsSet)
|
||||
allocated := c.varLengthKnownSafeBoundWithIDPool.Allocate(size)
|
||||
// Sort the known safe bounds by the value ID so that we can use the intersection algorithm in initializeCurrentBlockKnownBounds.
|
||||
sortSSAValueIDs(c.knownSafeBoundsSet)
|
||||
for _, vID := range c.knownSafeBoundsSet {
|
||||
kb := c.knownSafeBounds[vID]
|
||||
allocated = allocated.Append(p, knownSafeBoundWithID{
|
||||
knownSafeBound: kb,
|
||||
id: vID,
|
||||
})
|
||||
}
|
||||
c.knownSafeBoundsAtTheEndOfBlocks[bID] = allocated
|
||||
c.clearSafeBounds()
|
||||
}
|
||||
|
||||
func (c *Compiler) initializeCurrentBlockKnownBounds() {
|
||||
currentBlk := c.ssaBuilder.CurrentBlock()
|
||||
switch preds := currentBlk.Preds(); preds {
|
||||
case 0:
|
||||
case 1:
|
||||
pred := currentBlk.Pred(0).ID()
|
||||
for _, kb := range c.getKnownSafeBoundsAtTheEndOfBlocks(pred).View() {
|
||||
// Unless the block is sealed, we cannot assume the absolute address is valid:
|
||||
// later we might add another predecessor that has no visibility of that value.
|
||||
addr := ssa.ValueInvalid
|
||||
if currentBlk.Sealed() {
|
||||
addr = kb.absoluteAddr
|
||||
}
|
||||
c.recordKnownSafeBound(kb.id, kb.bound, addr)
|
||||
}
|
||||
default:
|
||||
c.pointers = c.pointers[:0]
|
||||
c.bounds = c.bounds[:0]
|
||||
for i := 0; i < preds; i++ {
|
||||
c.bounds = append(c.bounds, c.getKnownSafeBoundsAtTheEndOfBlocks(currentBlk.Pred(i).ID()).View())
|
||||
c.pointers = append(c.pointers, 0)
|
||||
}
|
||||
|
||||
// If there are multiple predecessors, we need to find the intersection of the known safe bounds.
|
||||
|
||||
outer:
|
||||
for {
|
||||
smallestID := ssa.ValueID(math.MaxUint32)
|
||||
for i, ptr := range c.pointers {
|
||||
if ptr >= len(c.bounds[i]) {
|
||||
break outer
|
||||
}
|
||||
cb := &c.bounds[i][ptr]
|
||||
if id := cb.id; id < smallestID {
|
||||
smallestID = cb.id
|
||||
}
|
||||
}
|
||||
|
||||
// Check if current elements are the same across all lists.
|
||||
same := true
|
||||
minBound := uint64(math.MaxUint64)
|
||||
for i := 0; i < preds; i++ {
|
||||
cb := &c.bounds[i][c.pointers[i]]
|
||||
if cb.id != smallestID {
|
||||
same = false
|
||||
break
|
||||
} else {
|
||||
if cb.bound < minBound {
|
||||
minBound = cb.bound
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if same { // All elements are the same.
|
||||
// Absolute address cannot be used in the intersection since the value might be only defined in one of the predecessors.
|
||||
c.recordKnownSafeBound(smallestID, minBound, ssa.ValueInvalid)
|
||||
}
|
||||
|
||||
// Move pointer(s) for the smallest ID forward (if same, move all).
|
||||
for i := 0; i < preds; i++ {
|
||||
cb := &c.bounds[i][c.pointers[i]]
|
||||
if cb.id == smallestID {
|
||||
c.pointers[i]++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Compiler) getKnownSafeBoundsAtTheEndOfBlocks(id ssa.BasicBlockID) knownSafeBoundsAtTheEndOfBlock {
|
||||
if int(id) >= len(c.knownSafeBoundsAtTheEndOfBlocks) {
|
||||
return knownSafeBoundsAtTheEndOfBlockNil
|
||||
}
|
||||
return c.knownSafeBoundsAtTheEndOfBlocks[id]
|
||||
}
|
||||
4268
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go
generated
vendored
Normal file
4268
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
10
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/misc.go
generated
vendored
Normal file
10
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/misc.go
generated
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
package frontend
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
"github.com/tetratelabs/wazero/internal/wasm"
|
||||
)
|
||||
|
||||
func FunctionIndexToFuncRef(idx wasm.Index) ssa.FuncRef {
|
||||
return ssa.FuncRef(idx)
|
||||
}
|
||||
15
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go
generated
vendored
Normal file
15
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id.go
generated
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
//go:build go1.21
|
||||
|
||||
package frontend
|
||||
|
||||
import (
|
||||
"slices"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
func sortSSAValueIDs(IDs []ssa.ValueID) {
|
||||
slices.SortFunc(IDs, func(i, j ssa.ValueID) int {
|
||||
return int(i) - int(j)
|
||||
})
|
||||
}
|
||||
17
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id_old.go
generated
vendored
Normal file
17
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/sort_id_old.go
generated
vendored
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
//go:build !go1.21
|
||||
|
||||
// TODO: delete after the floor Go version is 1.21
|
||||
|
||||
package frontend
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
|
||||
)
|
||||
|
||||
func sortSSAValueIDs(IDs []ssa.ValueID) {
|
||||
sort.SliceStable(IDs, func(i, j int) bool {
|
||||
return int(IDs[i]) < int(IDs[j])
|
||||
})
|
||||
}
|
||||
82
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go
generated
vendored
Normal file
82
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/hostmodule.go
generated
vendored
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
package wazevo
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"reflect"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/experimental"
|
||||
"github.com/tetratelabs/wazero/internal/wasm"
|
||||
)
|
||||
|
||||
func buildHostModuleOpaque(m *wasm.Module, listeners []experimental.FunctionListener) moduleContextOpaque {
|
||||
size := len(m.CodeSection)*16 + 32
|
||||
ret := newAlignedOpaque(size)
|
||||
|
||||
binary.LittleEndian.PutUint64(ret[0:], uint64(uintptr(unsafe.Pointer(m))))
|
||||
|
||||
if len(listeners) > 0 {
|
||||
sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&listeners))
|
||||
binary.LittleEndian.PutUint64(ret[8:], uint64(sliceHeader.Data))
|
||||
binary.LittleEndian.PutUint64(ret[16:], uint64(sliceHeader.Len))
|
||||
binary.LittleEndian.PutUint64(ret[24:], uint64(sliceHeader.Cap))
|
||||
}
|
||||
|
||||
offset := 32
|
||||
for i := range m.CodeSection {
|
||||
goFn := m.CodeSection[i].GoFunc
|
||||
writeIface(goFn, ret[offset:])
|
||||
offset += 16
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func hostModuleFromOpaque(opaqueBegin uintptr) *wasm.Module {
|
||||
var opaqueViewOverSlice []byte
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice))
|
||||
sh.Data = opaqueBegin
|
||||
sh.Len = 32
|
||||
sh.Cap = 32
|
||||
return *(**wasm.Module)(unsafe.Pointer(&opaqueViewOverSlice[0]))
|
||||
}
|
||||
|
||||
func hostModuleListenersSliceFromOpaque(opaqueBegin uintptr) []experimental.FunctionListener {
|
||||
var opaqueViewOverSlice []byte
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverSlice))
|
||||
sh.Data = opaqueBegin
|
||||
sh.Len = 32
|
||||
sh.Cap = 32
|
||||
|
||||
b := binary.LittleEndian.Uint64(opaqueViewOverSlice[8:])
|
||||
l := binary.LittleEndian.Uint64(opaqueViewOverSlice[16:])
|
||||
c := binary.LittleEndian.Uint64(opaqueViewOverSlice[24:])
|
||||
var ret []experimental.FunctionListener
|
||||
sh = (*reflect.SliceHeader)(unsafe.Pointer(&ret))
|
||||
sh.Data = uintptr(b)
|
||||
setSliceLimits(sh, uintptr(l), uintptr(c))
|
||||
return ret
|
||||
}
|
||||
|
||||
func hostModuleGoFuncFromOpaque[T any](index int, opaqueBegin uintptr) T {
|
||||
offset := uintptr(index*16) + 32
|
||||
ptr := opaqueBegin + offset
|
||||
|
||||
var opaqueViewOverFunction []byte
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaqueViewOverFunction))
|
||||
sh.Data = ptr
|
||||
sh.Len = 16
|
||||
sh.Cap = 16
|
||||
return readIface(opaqueViewOverFunction).(T)
|
||||
}
|
||||
|
||||
func writeIface(goFn interface{}, buf []byte) {
|
||||
goFnIface := *(*[2]uint64)(unsafe.Pointer(&goFn))
|
||||
binary.LittleEndian.PutUint64(buf, goFnIface[0])
|
||||
binary.LittleEndian.PutUint64(buf[8:], goFnIface[1])
|
||||
}
|
||||
|
||||
func readIface(buf []byte) interface{} {
|
||||
b := binary.LittleEndian.Uint64(buf)
|
||||
s := binary.LittleEndian.Uint64(buf[8:])
|
||||
return *(*interface{})(unsafe.Pointer(&[2]uint64{b, s}))
|
||||
}
|
||||
30
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_amd64.go
generated
vendored
Normal file
30
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_amd64.go
generated
vendored
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
//go:build amd64
|
||||
|
||||
package wazevo
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64"
|
||||
)
|
||||
|
||||
func newMachine() backend.Machine {
|
||||
return amd64.NewBackend()
|
||||
}
|
||||
|
||||
// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice.
|
||||
// The implementation must be aligned with the ABI/Calling convention.
|
||||
func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr {
|
||||
return amd64.UnwindStack(sp, fp, top, returnAddresses)
|
||||
}
|
||||
|
||||
// goCallStackView is a function to get a view of the stack before a Go call, which
|
||||
// is the view of the stack allocated in CompileGoFunctionTrampoline.
|
||||
func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
|
||||
return amd64.GoCallStackView(stackPointerBeforeGoCall)
|
||||
}
|
||||
|
||||
// adjustClonedStack is a function to adjust the stack after it is grown.
|
||||
// More precisely, absolute addresses (frame pointers) in the stack must be adjusted.
|
||||
func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) {
|
||||
amd64.AdjustClonedStack(oldsp, oldTop, sp, fp, top)
|
||||
}
|
||||
32
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_arm64.go
generated
vendored
Normal file
32
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_arm64.go
generated
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
//go:build arm64
|
||||
|
||||
package wazevo
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64"
|
||||
)
|
||||
|
||||
func newMachine() backend.Machine {
|
||||
return arm64.NewBackend()
|
||||
}
|
||||
|
||||
// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice.
|
||||
// The implementation must be aligned with the ABI/Calling convention.
|
||||
func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr {
|
||||
return arm64.UnwindStack(sp, fp, top, returnAddresses)
|
||||
}
|
||||
|
||||
// goCallStackView is a function to get a view of the stack before a Go call, which
|
||||
// is the view of the stack allocated in CompileGoFunctionTrampoline.
|
||||
func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
|
||||
return arm64.GoCallStackView(stackPointerBeforeGoCall)
|
||||
}
|
||||
|
||||
// adjustClonedStack is a function to adjust the stack after it is grown.
|
||||
// More precisely, absolute addresses (frame pointers) in the stack must be adjusted.
|
||||
func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) {
|
||||
// TODO: currently, the frame pointers are not used, and saved old sps are relative to the current stack pointer,
|
||||
// so no need to adjustment on arm64. However, when we make it absolute, which in my opinion is better perf-wise
|
||||
// at the expense of slightly costly stack growth, we need to adjust the pushed frame pointers.
|
||||
}
|
||||
29
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_other.go
generated
vendored
Normal file
29
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/isa_other.go
generated
vendored
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
//go:build !(amd64 || arm64)
|
||||
|
||||
package wazevo
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
|
||||
)
|
||||
|
||||
func newMachine() backend.Machine {
|
||||
panic("unsupported architecture")
|
||||
}
|
||||
|
||||
// unwindStack is a function to unwind the stack, and appends return addresses to `returnAddresses` slice.
|
||||
// The implementation must be aligned with the ABI/Calling convention.
|
||||
func unwindStack(sp, fp, top uintptr, returnAddresses []uintptr) []uintptr {
|
||||
panic("unsupported architecture")
|
||||
}
|
||||
|
||||
// goCallStackView is a function to get a view of the stack before a Go call, which
|
||||
// is the view of the stack allocated in CompileGoFunctionTrampoline.
|
||||
func goCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
|
||||
panic("unsupported architecture")
|
||||
}
|
||||
|
||||
// adjustClonedStack is a function to adjust the stack after it is grown.
|
||||
// More precisely, absolute addresses (frame pointers) in the stack must be adjusted.
|
||||
func adjustClonedStack(oldsp, oldTop, sp, fp, top uintptr) {
|
||||
panic("unsupported architecture")
|
||||
}
|
||||
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/memmove.go
generated
vendored
Normal file
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/memmove.go
generated
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
package wazevo
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
//go:linkname memmove runtime.memmove
|
||||
func memmove(_, _ unsafe.Pointer, _ uintptr)
|
||||
|
||||
var memmovPtr = reflect.ValueOf(memmove).Pointer()
|
||||
344
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go
generated
vendored
Normal file
344
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go
generated
vendored
Normal file
|
|
@ -0,0 +1,344 @@
|
|||
package wazevo
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"unsafe"
|
||||
|
||||
"github.com/tetratelabs/wazero/api"
|
||||
"github.com/tetratelabs/wazero/experimental"
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
"github.com/tetratelabs/wazero/internal/wasm"
|
||||
"github.com/tetratelabs/wazero/internal/wasmruntime"
|
||||
)
|
||||
|
||||
type (
|
||||
// moduleEngine implements wasm.ModuleEngine.
|
||||
moduleEngine struct {
|
||||
// opaquePtr equals &opaque[0].
|
||||
opaquePtr *byte
|
||||
parent *compiledModule
|
||||
module *wasm.ModuleInstance
|
||||
opaque moduleContextOpaque
|
||||
localFunctionInstances []*functionInstance
|
||||
importedFunctions []importedFunction
|
||||
listeners []experimental.FunctionListener
|
||||
}
|
||||
|
||||
functionInstance struct {
|
||||
executable *byte
|
||||
moduleContextOpaquePtr *byte
|
||||
typeID wasm.FunctionTypeID
|
||||
indexInModule wasm.Index
|
||||
}
|
||||
|
||||
importedFunction struct {
|
||||
me *moduleEngine
|
||||
indexInModule wasm.Index
|
||||
}
|
||||
|
||||
// moduleContextOpaque is the opaque byte slice of Module instance specific contents whose size
|
||||
// is only Wasm-compile-time known, hence dynamic. Its contents are basically the pointers to the module instance,
|
||||
// specific objects as well as functions. This is sometimes called "VMContext" in other Wasm runtimes.
|
||||
//
|
||||
// Internally, the buffer is structured as follows:
|
||||
//
|
||||
// type moduleContextOpaque struct {
|
||||
// moduleInstance *wasm.ModuleInstance
|
||||
// localMemoryBufferPtr *byte (optional)
|
||||
// localMemoryLength uint64 (optional)
|
||||
// importedMemoryInstance *wasm.MemoryInstance (optional)
|
||||
// importedMemoryOwnerOpaqueCtx *byte (optional)
|
||||
// importedFunctions [# of importedFunctions]functionInstance
|
||||
// importedGlobals []ImportedGlobal (optional)
|
||||
// localGlobals []Global (optional)
|
||||
// typeIDsBegin &wasm.ModuleInstance.TypeIDs[0] (optional)
|
||||
// tables []*wasm.TableInstance (optional)
|
||||
// beforeListenerTrampolines1stElement **byte (optional)
|
||||
// afterListenerTrampolines1stElement **byte (optional)
|
||||
// dataInstances1stElement []wasm.DataInstance (optional)
|
||||
// elementInstances1stElement []wasm.ElementInstance (optional)
|
||||
// }
|
||||
//
|
||||
// type ImportedGlobal struct {
|
||||
// *Global
|
||||
// _ uint64 // padding
|
||||
// }
|
||||
//
|
||||
// type Global struct {
|
||||
// Val, ValHi uint64
|
||||
// }
|
||||
//
|
||||
// See wazevoapi.NewModuleContextOffsetData for the details of the offsets.
|
||||
//
|
||||
// Note that for host modules, the structure is entirely different. See buildHostModuleOpaque.
|
||||
moduleContextOpaque []byte
|
||||
)
|
||||
|
||||
func newAlignedOpaque(size int) moduleContextOpaque {
|
||||
// Check if the size is a multiple of 16.
|
||||
if size%16 != 0 {
|
||||
panic("size must be a multiple of 16")
|
||||
}
|
||||
buf := make([]byte, size+16)
|
||||
// Align the buffer to 16 bytes.
|
||||
rem := uintptr(unsafe.Pointer(&buf[0])) % 16
|
||||
buf = buf[16-rem:]
|
||||
return buf
|
||||
}
|
||||
|
||||
func putLocalMemory(opaque []byte, offset wazevoapi.Offset, mem *wasm.MemoryInstance) {
|
||||
s := uint64(len(mem.Buffer))
|
||||
var b uint64
|
||||
if len(mem.Buffer) > 0 {
|
||||
b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0])))
|
||||
}
|
||||
binary.LittleEndian.PutUint64(opaque[offset:], b)
|
||||
binary.LittleEndian.PutUint64(opaque[offset+8:], s)
|
||||
}
|
||||
|
||||
func (m *moduleEngine) setupOpaque() {
|
||||
inst := m.module
|
||||
offsets := &m.parent.offsets
|
||||
opaque := m.opaque
|
||||
|
||||
binary.LittleEndian.PutUint64(opaque[offsets.ModuleInstanceOffset:],
|
||||
uint64(uintptr(unsafe.Pointer(m.module))),
|
||||
)
|
||||
|
||||
if lm := offsets.LocalMemoryBegin; lm >= 0 {
|
||||
putLocalMemory(opaque, lm, inst.MemoryInstance)
|
||||
}
|
||||
|
||||
// Note: imported memory is resolved in ResolveImportedFunction.
|
||||
|
||||
// Note: imported functions are resolved in ResolveImportedFunction.
|
||||
|
||||
if globalOffset := offsets.GlobalsBegin; globalOffset >= 0 {
|
||||
for i, g := range inst.Globals {
|
||||
if i < int(inst.Source.ImportGlobalCount) {
|
||||
importedME := g.Me.(*moduleEngine)
|
||||
offset := importedME.parent.offsets.GlobalInstanceOffset(g.Index)
|
||||
importedMEOpaque := importedME.opaque
|
||||
binary.LittleEndian.PutUint64(opaque[globalOffset:],
|
||||
uint64(uintptr(unsafe.Pointer(&importedMEOpaque[offset]))))
|
||||
} else {
|
||||
binary.LittleEndian.PutUint64(opaque[globalOffset:], g.Val)
|
||||
binary.LittleEndian.PutUint64(opaque[globalOffset+8:], g.ValHi)
|
||||
}
|
||||
globalOffset += 16
|
||||
}
|
||||
}
|
||||
|
||||
if tableOffset := offsets.TablesBegin; tableOffset >= 0 {
|
||||
// First we write the first element's address of typeIDs.
|
||||
if len(inst.TypeIDs) > 0 {
|
||||
binary.LittleEndian.PutUint64(opaque[offsets.TypeIDs1stElement:], uint64(uintptr(unsafe.Pointer(&inst.TypeIDs[0]))))
|
||||
}
|
||||
|
||||
// Then we write the table addresses.
|
||||
for _, table := range inst.Tables {
|
||||
binary.LittleEndian.PutUint64(opaque[tableOffset:], uint64(uintptr(unsafe.Pointer(table))))
|
||||
tableOffset += 8
|
||||
}
|
||||
}
|
||||
|
||||
if beforeListenerOffset := offsets.BeforeListenerTrampolines1stElement; beforeListenerOffset >= 0 {
|
||||
binary.LittleEndian.PutUint64(opaque[beforeListenerOffset:], uint64(uintptr(unsafe.Pointer(&m.parent.listenerBeforeTrampolines[0]))))
|
||||
}
|
||||
if afterListenerOffset := offsets.AfterListenerTrampolines1stElement; afterListenerOffset >= 0 {
|
||||
binary.LittleEndian.PutUint64(opaque[afterListenerOffset:], uint64(uintptr(unsafe.Pointer(&m.parent.listenerAfterTrampolines[0]))))
|
||||
}
|
||||
if len(inst.DataInstances) > 0 {
|
||||
binary.LittleEndian.PutUint64(opaque[offsets.DataInstances1stElement:], uint64(uintptr(unsafe.Pointer(&inst.DataInstances[0]))))
|
||||
}
|
||||
if len(inst.ElementInstances) > 0 {
|
||||
binary.LittleEndian.PutUint64(opaque[offsets.ElementInstances1stElement:], uint64(uintptr(unsafe.Pointer(&inst.ElementInstances[0]))))
|
||||
}
|
||||
}
|
||||
|
||||
// NewFunction implements wasm.ModuleEngine.
|
||||
func (m *moduleEngine) NewFunction(index wasm.Index) api.Function {
|
||||
if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable {
|
||||
panic("When PrintMachineCodeHexPerFunctionDisassemblable enabled, functions must not be called")
|
||||
}
|
||||
|
||||
localIndex := index
|
||||
if importedFnCount := m.module.Source.ImportFunctionCount; index < importedFnCount {
|
||||
imported := &m.importedFunctions[index]
|
||||
return imported.me.NewFunction(imported.indexInModule)
|
||||
} else {
|
||||
localIndex -= importedFnCount
|
||||
}
|
||||
|
||||
src := m.module.Source
|
||||
typIndex := src.FunctionSection[localIndex]
|
||||
typ := src.TypeSection[typIndex]
|
||||
sizeOfParamResultSlice := typ.ResultNumInUint64
|
||||
if ps := typ.ParamNumInUint64; ps > sizeOfParamResultSlice {
|
||||
sizeOfParamResultSlice = ps
|
||||
}
|
||||
p := m.parent
|
||||
offset := p.functionOffsets[localIndex]
|
||||
|
||||
ce := &callEngine{
|
||||
indexInModule: index,
|
||||
executable: &p.executable[offset],
|
||||
parent: m,
|
||||
preambleExecutable: &m.parent.entryPreambles[typIndex][0],
|
||||
sizeOfParamResultSlice: sizeOfParamResultSlice,
|
||||
requiredParams: typ.ParamNumInUint64,
|
||||
numberOfResults: typ.ResultNumInUint64,
|
||||
}
|
||||
|
||||
ce.execCtx.memoryGrowTrampolineAddress = &m.parent.sharedFunctions.memoryGrowExecutable[0]
|
||||
ce.execCtx.stackGrowCallTrampolineAddress = &m.parent.sharedFunctions.stackGrowExecutable[0]
|
||||
ce.execCtx.checkModuleExitCodeTrampolineAddress = &m.parent.sharedFunctions.checkModuleExitCode[0]
|
||||
ce.execCtx.tableGrowTrampolineAddress = &m.parent.sharedFunctions.tableGrowExecutable[0]
|
||||
ce.execCtx.refFuncTrampolineAddress = &m.parent.sharedFunctions.refFuncExecutable[0]
|
||||
ce.execCtx.memoryWait32TrampolineAddress = &m.parent.sharedFunctions.memoryWait32Executable[0]
|
||||
ce.execCtx.memoryWait64TrampolineAddress = &m.parent.sharedFunctions.memoryWait64Executable[0]
|
||||
ce.execCtx.memoryNotifyTrampolineAddress = &m.parent.sharedFunctions.memoryNotifyExecutable[0]
|
||||
ce.execCtx.memmoveAddress = memmovPtr
|
||||
ce.init()
|
||||
return ce
|
||||
}
|
||||
|
||||
// GetGlobalValue implements the same method as documented on wasm.ModuleEngine.
|
||||
func (m *moduleEngine) GetGlobalValue(i wasm.Index) (lo, hi uint64) {
|
||||
offset := m.parent.offsets.GlobalInstanceOffset(i)
|
||||
buf := m.opaque[offset:]
|
||||
if i < m.module.Source.ImportGlobalCount {
|
||||
panic("GetGlobalValue should not be called for imported globals")
|
||||
}
|
||||
return binary.LittleEndian.Uint64(buf), binary.LittleEndian.Uint64(buf[8:])
|
||||
}
|
||||
|
||||
// SetGlobalValue implements the same method as documented on wasm.ModuleEngine.
|
||||
func (m *moduleEngine) SetGlobalValue(i wasm.Index, lo, hi uint64) {
|
||||
offset := m.parent.offsets.GlobalInstanceOffset(i)
|
||||
buf := m.opaque[offset:]
|
||||
if i < m.module.Source.ImportGlobalCount {
|
||||
panic("GetGlobalValue should not be called for imported globals")
|
||||
}
|
||||
binary.LittleEndian.PutUint64(buf, lo)
|
||||
binary.LittleEndian.PutUint64(buf[8:], hi)
|
||||
}
|
||||
|
||||
// OwnsGlobals implements the same method as documented on wasm.ModuleEngine.
|
||||
func (m *moduleEngine) OwnsGlobals() bool { return true }
|
||||
|
||||
// ResolveImportedFunction implements wasm.ModuleEngine.
|
||||
func (m *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
|
||||
executableOffset, moduleCtxOffset, typeIDOffset := m.parent.offsets.ImportedFunctionOffset(index)
|
||||
importedME := importedModuleEngine.(*moduleEngine)
|
||||
|
||||
if int(indexInImportedModule) >= len(importedME.importedFunctions) {
|
||||
indexInImportedModule -= wasm.Index(len(importedME.importedFunctions))
|
||||
} else {
|
||||
imported := &importedME.importedFunctions[indexInImportedModule]
|
||||
m.ResolveImportedFunction(index, imported.indexInModule, imported.me)
|
||||
return // Recursively resolve the imported function.
|
||||
}
|
||||
|
||||
offset := importedME.parent.functionOffsets[indexInImportedModule]
|
||||
typeID := getTypeIDOf(indexInImportedModule, importedME.module)
|
||||
executable := &importedME.parent.executable[offset]
|
||||
// Write functionInstance.
|
||||
binary.LittleEndian.PutUint64(m.opaque[executableOffset:], uint64(uintptr(unsafe.Pointer(executable))))
|
||||
binary.LittleEndian.PutUint64(m.opaque[moduleCtxOffset:], uint64(uintptr(unsafe.Pointer(importedME.opaquePtr))))
|
||||
binary.LittleEndian.PutUint64(m.opaque[typeIDOffset:], uint64(typeID))
|
||||
|
||||
// Write importedFunction so that it can be used by NewFunction.
|
||||
m.importedFunctions[index] = importedFunction{me: importedME, indexInModule: indexInImportedModule}
|
||||
}
|
||||
|
||||
func getTypeIDOf(funcIndex wasm.Index, m *wasm.ModuleInstance) wasm.FunctionTypeID {
|
||||
source := m.Source
|
||||
|
||||
var typeIndex wasm.Index
|
||||
if funcIndex >= source.ImportFunctionCount {
|
||||
funcIndex -= source.ImportFunctionCount
|
||||
typeIndex = source.FunctionSection[funcIndex]
|
||||
} else {
|
||||
var cnt wasm.Index
|
||||
for i := range source.ImportSection {
|
||||
if source.ImportSection[i].Type == wasm.ExternTypeFunc {
|
||||
if cnt == funcIndex {
|
||||
typeIndex = source.ImportSection[i].DescFunc
|
||||
break
|
||||
}
|
||||
cnt++
|
||||
}
|
||||
}
|
||||
}
|
||||
return m.TypeIDs[typeIndex]
|
||||
}
|
||||
|
||||
// ResolveImportedMemory implements wasm.ModuleEngine.
|
||||
func (m *moduleEngine) ResolveImportedMemory(importedModuleEngine wasm.ModuleEngine) {
|
||||
importedME := importedModuleEngine.(*moduleEngine)
|
||||
inst := importedME.module
|
||||
|
||||
var memInstPtr uint64
|
||||
var memOwnerOpaquePtr uint64
|
||||
if offs := importedME.parent.offsets; offs.ImportedMemoryBegin >= 0 {
|
||||
offset := offs.ImportedMemoryBegin
|
||||
memInstPtr = binary.LittleEndian.Uint64(importedME.opaque[offset:])
|
||||
memOwnerOpaquePtr = binary.LittleEndian.Uint64(importedME.opaque[offset+8:])
|
||||
} else {
|
||||
memInstPtr = uint64(uintptr(unsafe.Pointer(inst.MemoryInstance)))
|
||||
memOwnerOpaquePtr = uint64(uintptr(unsafe.Pointer(importedME.opaquePtr)))
|
||||
}
|
||||
offset := m.parent.offsets.ImportedMemoryBegin
|
||||
binary.LittleEndian.PutUint64(m.opaque[offset:], memInstPtr)
|
||||
binary.LittleEndian.PutUint64(m.opaque[offset+8:], memOwnerOpaquePtr)
|
||||
}
|
||||
|
||||
// DoneInstantiation implements wasm.ModuleEngine.
|
||||
func (m *moduleEngine) DoneInstantiation() {
|
||||
if !m.module.Source.IsHostModule {
|
||||
m.setupOpaque()
|
||||
}
|
||||
}
|
||||
|
||||
// FunctionInstanceReference implements wasm.ModuleEngine.
|
||||
func (m *moduleEngine) FunctionInstanceReference(funcIndex wasm.Index) wasm.Reference {
|
||||
if funcIndex < m.module.Source.ImportFunctionCount {
|
||||
begin, _, _ := m.parent.offsets.ImportedFunctionOffset(funcIndex)
|
||||
return uintptr(unsafe.Pointer(&m.opaque[begin]))
|
||||
}
|
||||
localIndex := funcIndex - m.module.Source.ImportFunctionCount
|
||||
p := m.parent
|
||||
executable := &p.executable[p.functionOffsets[localIndex]]
|
||||
typeID := m.module.TypeIDs[m.module.Source.FunctionSection[localIndex]]
|
||||
|
||||
lf := &functionInstance{
|
||||
executable: executable,
|
||||
moduleContextOpaquePtr: m.opaquePtr,
|
||||
typeID: typeID,
|
||||
indexInModule: funcIndex,
|
||||
}
|
||||
m.localFunctionInstances = append(m.localFunctionInstances, lf)
|
||||
return uintptr(unsafe.Pointer(lf))
|
||||
}
|
||||
|
||||
// LookupFunction implements wasm.ModuleEngine.
|
||||
func (m *moduleEngine) LookupFunction(t *wasm.TableInstance, typeId wasm.FunctionTypeID, tableOffset wasm.Index) (*wasm.ModuleInstance, wasm.Index) {
|
||||
if tableOffset >= uint32(len(t.References)) || t.Type != wasm.RefTypeFuncref {
|
||||
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
|
||||
}
|
||||
rawPtr := t.References[tableOffset]
|
||||
if rawPtr == 0 {
|
||||
panic(wasmruntime.ErrRuntimeInvalidTableAccess)
|
||||
}
|
||||
|
||||
tf := wazevoapi.PtrFromUintptr[functionInstance](rawPtr)
|
||||
if tf.typeID != typeId {
|
||||
panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
|
||||
}
|
||||
return moduleInstanceFromOpaquePtr(tf.moduleContextOpaquePtr), tf.indexInModule
|
||||
}
|
||||
|
||||
func moduleInstanceFromOpaquePtr(ptr *byte) *wasm.ModuleInstance {
|
||||
return *(**wasm.ModuleInstance)(unsafe.Pointer(ptr))
|
||||
}
|
||||
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go
generated
vendored
Normal file
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect.go
generated
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
//go:build !tinygo
|
||||
|
||||
package wazevo
|
||||
|
||||
import "reflect"
|
||||
|
||||
// setSliceLimits sets both Cap and Len for the given reflected slice.
|
||||
func setSliceLimits(s *reflect.SliceHeader, l, c uintptr) {
|
||||
s.Len = int(l)
|
||||
s.Cap = int(c)
|
||||
}
|
||||
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go
generated
vendored
Normal file
11
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/reflect_tinygo.go
generated
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
//go:build tinygo
|
||||
|
||||
package wazevo
|
||||
|
||||
import "reflect"
|
||||
|
||||
// setSliceLimits sets both Cap and Len for the given reflected slice.
|
||||
func setSliceLimits(s *reflect.SliceHeader, l, c uintptr) {
|
||||
s.Len = l
|
||||
s.Cap = c
|
||||
}
|
||||
407
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go
generated
vendored
Normal file
407
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go
generated
vendored
Normal file
|
|
@ -0,0 +1,407 @@
|
|||
package ssa
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// BasicBlock represents the Basic Block of an SSA function.
|
||||
// Each BasicBlock always ends with branching instructions (e.g. Branch, Return, etc.),
|
||||
// and at most two branches are allowed. If there's two branches, these two are placed together at the end of the block.
|
||||
// In other words, there's no branching instruction in the middle of the block.
|
||||
//
|
||||
// Note: we use the "block argument" variant of SSA, instead of PHI functions. See the package level doc comments.
|
||||
//
|
||||
// Note: we use "parameter/param" as a placeholder which represents a variant of PHI, and "argument/arg" as an actual
|
||||
// Value passed to that "parameter/param".
|
||||
type BasicBlock interface {
|
||||
// ID returns the unique ID of this block.
|
||||
ID() BasicBlockID
|
||||
|
||||
// Name returns the unique string ID of this block. e.g. blk0, blk1, ...
|
||||
Name() string
|
||||
|
||||
// AddParam adds the parameter to the block whose type specified by `t`.
|
||||
AddParam(b Builder, t Type) Value
|
||||
|
||||
// Params returns the number of parameters to this block.
|
||||
Params() int
|
||||
|
||||
// Param returns (Variable, Value) which corresponds to the i-th parameter of this block.
|
||||
// The returned Value is the definition of the param in this block.
|
||||
Param(i int) Value
|
||||
|
||||
// InsertInstruction inserts an instruction that implements Value into the tail of this block.
|
||||
InsertInstruction(raw *Instruction)
|
||||
|
||||
// Root returns the root instruction of this block.
|
||||
Root() *Instruction
|
||||
|
||||
// Tail returns the tail instruction of this block.
|
||||
Tail() *Instruction
|
||||
|
||||
// EntryBlock returns true if this block represents the function entry.
|
||||
EntryBlock() bool
|
||||
|
||||
// ReturnBlock returns ture if this block represents the function return.
|
||||
ReturnBlock() bool
|
||||
|
||||
// FormatHeader returns the debug string of this block, not including instruction.
|
||||
FormatHeader(b Builder) string
|
||||
|
||||
// Valid is true if this block is still valid even after optimizations.
|
||||
Valid() bool
|
||||
|
||||
// Sealed is true if this block has been sealed.
|
||||
Sealed() bool
|
||||
|
||||
// BeginPredIterator returns the first predecessor of this block.
|
||||
BeginPredIterator() BasicBlock
|
||||
|
||||
// NextPredIterator returns the next predecessor of this block.
|
||||
NextPredIterator() BasicBlock
|
||||
|
||||
// Preds returns the number of predecessors of this block.
|
||||
Preds() int
|
||||
|
||||
// Pred returns the i-th predecessor of this block.
|
||||
Pred(i int) BasicBlock
|
||||
|
||||
// Succs returns the number of successors of this block.
|
||||
Succs() int
|
||||
|
||||
// Succ returns the i-th successor of this block.
|
||||
Succ(i int) BasicBlock
|
||||
|
||||
// LoopHeader returns true if this block is a loop header.
|
||||
LoopHeader() bool
|
||||
|
||||
// LoopNestingForestChildren returns the children of this block in the loop nesting forest.
|
||||
LoopNestingForestChildren() []BasicBlock
|
||||
}
|
||||
|
||||
type (
|
||||
// basicBlock is a basic block in a SSA-transformed function.
|
||||
basicBlock struct {
|
||||
id BasicBlockID
|
||||
rootInstr, currentInstr *Instruction
|
||||
params []blockParam
|
||||
predIter int
|
||||
preds []basicBlockPredecessorInfo
|
||||
success []*basicBlock
|
||||
// singlePred is the alias to preds[0] for fast lookup, and only set after Seal is called.
|
||||
singlePred *basicBlock
|
||||
// lastDefinitions maps Variable to its last definition in this block.
|
||||
lastDefinitions map[Variable]Value
|
||||
// unknownsValues are used in builder.findValue. The usage is well-described in the paper.
|
||||
unknownValues []unknownValue
|
||||
// invalid is true if this block is made invalid during optimizations.
|
||||
invalid bool
|
||||
// sealed is true if this is sealed (all the predecessors are known).
|
||||
sealed bool
|
||||
// loopHeader is true if this block is a loop header:
|
||||
//
|
||||
// > A loop header (sometimes called the entry point of the loop) is a dominator that is the target
|
||||
// > of a loop-forming back edge. The loop header dominates all blocks in the loop body.
|
||||
// > A block may be a loop header for more than one loop. A loop may have multiple entry points,
|
||||
// > in which case it has no "loop header".
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Control-flow_graph for more details.
|
||||
//
|
||||
// This is modified during the subPassLoopDetection pass.
|
||||
loopHeader bool
|
||||
|
||||
// loopNestingForestChildren holds the children of this block in the loop nesting forest.
|
||||
// Non-empty if and only if this block is a loop header (i.e. loopHeader=true)
|
||||
loopNestingForestChildren []BasicBlock
|
||||
|
||||
// reversePostOrder is used to sort all the blocks in the function in reverse post order.
|
||||
// This is used in builder.LayoutBlocks.
|
||||
reversePostOrder int
|
||||
|
||||
// child and sibling are the ones in the dominator tree.
|
||||
child, sibling *basicBlock
|
||||
}
|
||||
// BasicBlockID is the unique ID of a basicBlock.
|
||||
BasicBlockID uint32
|
||||
|
||||
// blockParam implements Value and represents a parameter to a basicBlock.
|
||||
blockParam struct {
|
||||
// value is the Value that corresponds to the parameter in this block,
|
||||
// and can be considered as an output of PHI instruction in traditional SSA.
|
||||
value Value
|
||||
// typ is the type of the parameter.
|
||||
typ Type
|
||||
}
|
||||
|
||||
unknownValue struct {
|
||||
// variable is the variable that this unknownValue represents.
|
||||
variable Variable
|
||||
// value is the value that this unknownValue represents.
|
||||
value Value
|
||||
}
|
||||
)
|
||||
|
||||
const basicBlockIDReturnBlock = 0xffffffff
|
||||
|
||||
// Name implements BasicBlock.Name.
|
||||
func (bb *basicBlock) Name() string {
|
||||
if bb.id == basicBlockIDReturnBlock {
|
||||
return "blk_ret"
|
||||
} else {
|
||||
return fmt.Sprintf("blk%d", bb.id)
|
||||
}
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer for debugging.
|
||||
func (bid BasicBlockID) String() string {
|
||||
if bid == basicBlockIDReturnBlock {
|
||||
return "blk_ret"
|
||||
} else {
|
||||
return fmt.Sprintf("blk%d", bid)
|
||||
}
|
||||
}
|
||||
|
||||
// ID implements BasicBlock.ID.
|
||||
func (bb *basicBlock) ID() BasicBlockID {
|
||||
return bb.id
|
||||
}
|
||||
|
||||
// basicBlockPredecessorInfo is the information of a predecessor of a basicBlock.
|
||||
// predecessor is determined by a pair of block and the branch instruction used to jump to the successor.
|
||||
type basicBlockPredecessorInfo struct {
|
||||
blk *basicBlock
|
||||
branch *Instruction
|
||||
}
|
||||
|
||||
// EntryBlock implements BasicBlock.EntryBlock.
|
||||
func (bb *basicBlock) EntryBlock() bool {
|
||||
return bb.id == 0
|
||||
}
|
||||
|
||||
// ReturnBlock implements BasicBlock.ReturnBlock.
|
||||
func (bb *basicBlock) ReturnBlock() bool {
|
||||
return bb.id == basicBlockIDReturnBlock
|
||||
}
|
||||
|
||||
// AddParam implements BasicBlock.AddParam.
|
||||
func (bb *basicBlock) AddParam(b Builder, typ Type) Value {
|
||||
paramValue := b.allocateValue(typ)
|
||||
bb.params = append(bb.params, blockParam{typ: typ, value: paramValue})
|
||||
return paramValue
|
||||
}
|
||||
|
||||
// addParamOn adds a parameter to this block whose value is already allocated.
|
||||
func (bb *basicBlock) addParamOn(typ Type, value Value) {
|
||||
bb.params = append(bb.params, blockParam{typ: typ, value: value})
|
||||
}
|
||||
|
||||
// Params implements BasicBlock.Params.
|
||||
func (bb *basicBlock) Params() int {
|
||||
return len(bb.params)
|
||||
}
|
||||
|
||||
// Param implements BasicBlock.Param.
|
||||
func (bb *basicBlock) Param(i int) Value {
|
||||
p := &bb.params[i]
|
||||
return p.value
|
||||
}
|
||||
|
||||
// Valid implements BasicBlock.Valid.
|
||||
func (bb *basicBlock) Valid() bool {
|
||||
return !bb.invalid
|
||||
}
|
||||
|
||||
// Sealed implements BasicBlock.Sealed.
|
||||
func (bb *basicBlock) Sealed() bool {
|
||||
return bb.sealed
|
||||
}
|
||||
|
||||
// InsertInstruction implements BasicBlock.InsertInstruction.
|
||||
func (bb *basicBlock) InsertInstruction(next *Instruction) {
|
||||
current := bb.currentInstr
|
||||
if current != nil {
|
||||
current.next = next
|
||||
next.prev = current
|
||||
} else {
|
||||
bb.rootInstr = next
|
||||
}
|
||||
bb.currentInstr = next
|
||||
|
||||
switch next.opcode {
|
||||
case OpcodeJump, OpcodeBrz, OpcodeBrnz:
|
||||
target := next.blk.(*basicBlock)
|
||||
target.addPred(bb, next)
|
||||
case OpcodeBrTable:
|
||||
for _, _target := range next.targets {
|
||||
target := _target.(*basicBlock)
|
||||
target.addPred(bb, next)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NumPreds implements BasicBlock.NumPreds.
|
||||
func (bb *basicBlock) NumPreds() int {
|
||||
return len(bb.preds)
|
||||
}
|
||||
|
||||
// BeginPredIterator implements BasicBlock.BeginPredIterator.
|
||||
func (bb *basicBlock) BeginPredIterator() BasicBlock {
|
||||
bb.predIter = 0
|
||||
return bb.NextPredIterator()
|
||||
}
|
||||
|
||||
// NextPredIterator implements BasicBlock.NextPredIterator.
|
||||
func (bb *basicBlock) NextPredIterator() BasicBlock {
|
||||
if bb.predIter >= len(bb.preds) {
|
||||
return nil
|
||||
}
|
||||
pred := bb.preds[bb.predIter].blk
|
||||
bb.predIter++
|
||||
return pred
|
||||
}
|
||||
|
||||
// Preds implements BasicBlock.Preds.
|
||||
func (bb *basicBlock) Preds() int {
|
||||
return len(bb.preds)
|
||||
}
|
||||
|
||||
// Pred implements BasicBlock.Pred.
|
||||
func (bb *basicBlock) Pred(i int) BasicBlock {
|
||||
return bb.preds[i].blk
|
||||
}
|
||||
|
||||
// Succs implements BasicBlock.Succs.
|
||||
func (bb *basicBlock) Succs() int {
|
||||
return len(bb.success)
|
||||
}
|
||||
|
||||
// Succ implements BasicBlock.Succ.
|
||||
func (bb *basicBlock) Succ(i int) BasicBlock {
|
||||
return bb.success[i]
|
||||
}
|
||||
|
||||
// Root implements BasicBlock.Root.
|
||||
func (bb *basicBlock) Root() *Instruction {
|
||||
return bb.rootInstr
|
||||
}
|
||||
|
||||
// Tail implements BasicBlock.Tail.
|
||||
func (bb *basicBlock) Tail() *Instruction {
|
||||
return bb.currentInstr
|
||||
}
|
||||
|
||||
// reset resets the basicBlock to its initial state so that it can be reused for another function.
|
||||
func resetBasicBlock(bb *basicBlock) {
|
||||
bb.params = bb.params[:0]
|
||||
bb.rootInstr, bb.currentInstr = nil, nil
|
||||
bb.preds = bb.preds[:0]
|
||||
bb.success = bb.success[:0]
|
||||
bb.invalid, bb.sealed = false, false
|
||||
bb.singlePred = nil
|
||||
bb.unknownValues = bb.unknownValues[:0]
|
||||
bb.lastDefinitions = wazevoapi.ResetMap(bb.lastDefinitions)
|
||||
bb.reversePostOrder = -1
|
||||
bb.loopNestingForestChildren = bb.loopNestingForestChildren[:0]
|
||||
bb.loopHeader = false
|
||||
bb.sibling = nil
|
||||
bb.child = nil
|
||||
}
|
||||
|
||||
// addPred adds a predecessor to this block specified by the branch instruction.
|
||||
func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) {
|
||||
if bb.sealed {
|
||||
panic("BUG: trying to add predecessor to a sealed block: " + bb.Name())
|
||||
}
|
||||
|
||||
pred := blk.(*basicBlock)
|
||||
for i := range bb.preds {
|
||||
existingPred := &bb.preds[i]
|
||||
if existingPred.blk == pred && existingPred.branch != branch {
|
||||
// If the target is already added, then this must come from the same BrTable,
|
||||
// otherwise such redundant branch should be eliminated by the frontend. (which should be simpler).
|
||||
panic(fmt.Sprintf("BUG: redundant non BrTable jumps in %s whose targes are the same", bb.Name()))
|
||||
}
|
||||
}
|
||||
|
||||
bb.preds = append(bb.preds, basicBlockPredecessorInfo{
|
||||
blk: pred,
|
||||
branch: branch,
|
||||
})
|
||||
|
||||
pred.success = append(pred.success, bb)
|
||||
}
|
||||
|
||||
// FormatHeader implements BasicBlock.FormatHeader.
|
||||
func (bb *basicBlock) FormatHeader(b Builder) string {
|
||||
ps := make([]string, len(bb.params))
|
||||
for i, p := range bb.params {
|
||||
ps[i] = p.value.formatWithType(b)
|
||||
}
|
||||
|
||||
if len(bb.preds) > 0 {
|
||||
preds := make([]string, 0, len(bb.preds))
|
||||
for _, pred := range bb.preds {
|
||||
if pred.blk.invalid {
|
||||
continue
|
||||
}
|
||||
preds = append(preds, fmt.Sprintf("blk%d", pred.blk.id))
|
||||
|
||||
}
|
||||
return fmt.Sprintf("blk%d: (%s) <-- (%s)",
|
||||
bb.id, strings.Join(ps, ","), strings.Join(preds, ","))
|
||||
} else {
|
||||
return fmt.Sprintf("blk%d: (%s)", bb.id, strings.Join(ps, ", "))
|
||||
}
|
||||
}
|
||||
|
||||
// validates validates the basicBlock for debugging purpose.
|
||||
func (bb *basicBlock) validate(b *builder) {
|
||||
if bb.invalid {
|
||||
panic("BUG: trying to validate an invalid block: " + bb.Name())
|
||||
}
|
||||
if len(bb.preds) > 0 {
|
||||
for _, pred := range bb.preds {
|
||||
if pred.branch.opcode != OpcodeBrTable {
|
||||
if target := pred.branch.blk; target != bb {
|
||||
panic(fmt.Sprintf("BUG: '%s' is not branch to %s, but to %s",
|
||||
pred.branch.Format(b), bb.Name(), target.Name()))
|
||||
}
|
||||
}
|
||||
|
||||
var exp int
|
||||
if bb.ReturnBlock() {
|
||||
exp = len(b.currentSignature.Results)
|
||||
} else {
|
||||
exp = len(bb.params)
|
||||
}
|
||||
|
||||
if len(pred.branch.vs.View()) != exp {
|
||||
panic(fmt.Sprintf(
|
||||
"BUG: len(argument at %s) != len(params at %s): %d != %d: %s",
|
||||
pred.blk.Name(), bb.Name(),
|
||||
len(pred.branch.vs.View()), len(bb.params), pred.branch.Format(b),
|
||||
))
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer for debugging purpose only.
|
||||
func (bb *basicBlock) String() string {
|
||||
return strconv.Itoa(int(bb.id))
|
||||
}
|
||||
|
||||
// LoopNestingForestChildren implements BasicBlock.LoopNestingForestChildren.
|
||||
func (bb *basicBlock) LoopNestingForestChildren() []BasicBlock {
|
||||
return bb.loopNestingForestChildren
|
||||
}
|
||||
|
||||
// LoopHeader implements BasicBlock.LoopHeader.
|
||||
func (bb *basicBlock) LoopHeader() bool {
|
||||
return bb.loopHeader
|
||||
}
|
||||
34
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go
generated
vendored
Normal file
34
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort.go
generated
vendored
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
//go:build go1.21
|
||||
|
||||
package ssa
|
||||
|
||||
import (
|
||||
"slices"
|
||||
)
|
||||
|
||||
func sortBlocks(blocks []*basicBlock) {
|
||||
slices.SortFunc(blocks, func(i, j *basicBlock) int {
|
||||
jIsReturn := j.ReturnBlock()
|
||||
iIsReturn := i.ReturnBlock()
|
||||
if iIsReturn && jIsReturn {
|
||||
return 0
|
||||
}
|
||||
if jIsReturn {
|
||||
return 1
|
||||
}
|
||||
if iIsReturn {
|
||||
return -1
|
||||
}
|
||||
iRoot, jRoot := i.rootInstr, j.rootInstr
|
||||
if iRoot == nil && jRoot == nil { // For testing.
|
||||
return 0
|
||||
}
|
||||
if jRoot == nil {
|
||||
return 1
|
||||
}
|
||||
if iRoot == nil {
|
||||
return -1
|
||||
}
|
||||
return i.rootInstr.id - j.rootInstr.id
|
||||
})
|
||||
}
|
||||
24
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go
generated
vendored
Normal file
24
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block_sort_old.go
generated
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
//go:build !go1.21
|
||||
|
||||
// TODO: delete after the floor Go version is 1.21
|
||||
|
||||
package ssa
|
||||
|
||||
import "sort"
|
||||
|
||||
func sortBlocks(blocks []*basicBlock) {
|
||||
sort.SliceStable(blocks, func(i, j int) bool {
|
||||
iBlk, jBlk := blocks[i], blocks[j]
|
||||
if jBlk.ReturnBlock() {
|
||||
return true
|
||||
}
|
||||
if iBlk.ReturnBlock() {
|
||||
return false
|
||||
}
|
||||
iRoot, jRoot := iBlk.rootInstr, jBlk.rootInstr
|
||||
if iRoot == nil || jRoot == nil { // For testing.
|
||||
return true
|
||||
}
|
||||
return iBlk.rootInstr.id < jBlk.rootInstr.id
|
||||
})
|
||||
}
|
||||
731
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go
generated
vendored
Normal file
731
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go
generated
vendored
Normal file
|
|
@ -0,0 +1,731 @@
|
|||
package ssa
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// Builder is used to builds SSA consisting of Basic Blocks per function.
|
||||
type Builder interface {
|
||||
// Init must be called to reuse this builder for the next function.
|
||||
Init(typ *Signature)
|
||||
|
||||
// Signature returns the Signature of the currently-compiled function.
|
||||
Signature() *Signature
|
||||
|
||||
// BlockIDMax returns the maximum value of BasicBlocksID existing in the currently-compiled function.
|
||||
BlockIDMax() BasicBlockID
|
||||
|
||||
// AllocateBasicBlock creates a basic block in SSA function.
|
||||
AllocateBasicBlock() BasicBlock
|
||||
|
||||
// CurrentBlock returns the currently handled BasicBlock which is set by the latest call to SetCurrentBlock.
|
||||
CurrentBlock() BasicBlock
|
||||
|
||||
// EntryBlock returns the entry BasicBlock of the currently-compiled function.
|
||||
EntryBlock() BasicBlock
|
||||
|
||||
// SetCurrentBlock sets the instruction insertion target to the BasicBlock `b`.
|
||||
SetCurrentBlock(b BasicBlock)
|
||||
|
||||
// DeclareVariable declares a Variable of the given Type.
|
||||
DeclareVariable(Type) Variable
|
||||
|
||||
// DefineVariable defines a variable in the `block` with value.
|
||||
// The defining instruction will be inserted into the `block`.
|
||||
DefineVariable(variable Variable, value Value, block BasicBlock)
|
||||
|
||||
// DefineVariableInCurrentBB is the same as DefineVariable except the definition is
|
||||
// inserted into the current BasicBlock. Alias to DefineVariable(x, y, CurrentBlock()).
|
||||
DefineVariableInCurrentBB(variable Variable, value Value)
|
||||
|
||||
// AllocateInstruction returns a new Instruction.
|
||||
AllocateInstruction() *Instruction
|
||||
|
||||
// InsertInstruction executes BasicBlock.InsertInstruction for the currently handled basic block.
|
||||
InsertInstruction(raw *Instruction)
|
||||
|
||||
// allocateValue allocates an unused Value.
|
||||
allocateValue(typ Type) Value
|
||||
|
||||
// MustFindValue searches the latest definition of the given Variable and returns the result.
|
||||
MustFindValue(variable Variable) Value
|
||||
|
||||
// MustFindValueInBlk is the same as MustFindValue except it searches the latest definition from the given BasicBlock.
|
||||
MustFindValueInBlk(variable Variable, blk BasicBlock) Value
|
||||
|
||||
// FindValueInLinearPath tries to find the latest definition of the given Variable in the linear path to the current BasicBlock.
|
||||
// If it cannot find the definition, or it's not sealed yet, it returns ValueInvalid.
|
||||
FindValueInLinearPath(variable Variable) Value
|
||||
|
||||
// Seal declares that we've known all the predecessors to this block and were added via AddPred.
|
||||
// After calling this, AddPred will be forbidden.
|
||||
Seal(blk BasicBlock)
|
||||
|
||||
// AnnotateValue is for debugging purpose.
|
||||
AnnotateValue(value Value, annotation string)
|
||||
|
||||
// DeclareSignature appends the *Signature to be referenced by various instructions (e.g. OpcodeCall).
|
||||
DeclareSignature(signature *Signature)
|
||||
|
||||
// Signatures returns the slice of declared Signatures.
|
||||
Signatures() []*Signature
|
||||
|
||||
// ResolveSignature returns the Signature which corresponds to SignatureID.
|
||||
ResolveSignature(id SignatureID) *Signature
|
||||
|
||||
// RunPasses runs various passes on the constructed SSA function.
|
||||
RunPasses()
|
||||
|
||||
// Format returns the debugging string of the SSA function.
|
||||
Format() string
|
||||
|
||||
// BlockIteratorBegin initializes the state to iterate over all the valid BasicBlock(s) compiled.
|
||||
// Combined with BlockIteratorNext, we can use this like:
|
||||
//
|
||||
// for blk := builder.BlockIteratorBegin(); blk != nil; blk = builder.BlockIteratorNext() {
|
||||
// // ...
|
||||
// }
|
||||
//
|
||||
// The returned blocks are ordered in the order of AllocateBasicBlock being called.
|
||||
BlockIteratorBegin() BasicBlock
|
||||
|
||||
// BlockIteratorNext advances the state for iteration initialized by BlockIteratorBegin.
|
||||
// Returns nil if there's no unseen BasicBlock.
|
||||
BlockIteratorNext() BasicBlock
|
||||
|
||||
// ValueRefCounts returns the map of ValueID to its reference count.
|
||||
// The returned slice must not be modified.
|
||||
ValueRefCounts() []int
|
||||
|
||||
// BlockIteratorReversePostOrderBegin is almost the same as BlockIteratorBegin except it returns the BasicBlock in the reverse post-order.
|
||||
// This is available after RunPasses is run.
|
||||
BlockIteratorReversePostOrderBegin() BasicBlock
|
||||
|
||||
// BlockIteratorReversePostOrderNext is almost the same as BlockIteratorPostOrderNext except it returns the BasicBlock in the reverse post-order.
|
||||
// This is available after RunPasses is run.
|
||||
BlockIteratorReversePostOrderNext() BasicBlock
|
||||
|
||||
// ReturnBlock returns the BasicBlock which is used to return from the function.
|
||||
ReturnBlock() BasicBlock
|
||||
|
||||
// InsertUndefined inserts an undefined instruction at the current position.
|
||||
InsertUndefined()
|
||||
|
||||
// SetCurrentSourceOffset sets the current source offset. The incoming instruction will be annotated with this offset.
|
||||
SetCurrentSourceOffset(line SourceOffset)
|
||||
|
||||
// LoopNestingForestRoots returns the roots of the loop nesting forest.
|
||||
LoopNestingForestRoots() []BasicBlock
|
||||
|
||||
// LowestCommonAncestor returns the lowest common ancestor in the dominator tree of the given BasicBlock(s).
|
||||
LowestCommonAncestor(blk1, blk2 BasicBlock) BasicBlock
|
||||
|
||||
// Idom returns the immediate dominator of the given BasicBlock.
|
||||
Idom(blk BasicBlock) BasicBlock
|
||||
|
||||
VarLengthPool() *wazevoapi.VarLengthPool[Value]
|
||||
}
|
||||
|
||||
// NewBuilder returns a new Builder implementation.
|
||||
func NewBuilder() Builder {
|
||||
return &builder{
|
||||
instructionsPool: wazevoapi.NewPool[Instruction](resetInstruction),
|
||||
basicBlocksPool: wazevoapi.NewPool[basicBlock](resetBasicBlock),
|
||||
varLengthPool: wazevoapi.NewVarLengthPool[Value](),
|
||||
valueAnnotations: make(map[ValueID]string),
|
||||
signatures: make(map[SignatureID]*Signature),
|
||||
blkVisited: make(map[*basicBlock]int),
|
||||
valueIDAliases: make(map[ValueID]Value),
|
||||
redundantParameterIndexToValue: make(map[int]Value),
|
||||
returnBlk: &basicBlock{id: basicBlockIDReturnBlock},
|
||||
}
|
||||
}
|
||||
|
||||
// builder implements Builder interface.
|
||||
type builder struct {
|
||||
basicBlocksPool wazevoapi.Pool[basicBlock]
|
||||
instructionsPool wazevoapi.Pool[Instruction]
|
||||
varLengthPool wazevoapi.VarLengthPool[Value]
|
||||
signatures map[SignatureID]*Signature
|
||||
currentSignature *Signature
|
||||
|
||||
// reversePostOrderedBasicBlocks are the BasicBlock(s) ordered in the reverse post-order after passCalculateImmediateDominators.
|
||||
reversePostOrderedBasicBlocks []*basicBlock
|
||||
currentBB *basicBlock
|
||||
returnBlk *basicBlock
|
||||
|
||||
// variables track the types for Variable with the index regarded Variable.
|
||||
variables []Type
|
||||
// nextValueID is used by builder.AllocateValue.
|
||||
nextValueID ValueID
|
||||
// nextVariable is used by builder.AllocateVariable.
|
||||
nextVariable Variable
|
||||
|
||||
valueIDAliases map[ValueID]Value
|
||||
valueAnnotations map[ValueID]string
|
||||
|
||||
// valueRefCounts is used to lower the SSA in backend, and will be calculated
|
||||
// by the last SSA-level optimization pass.
|
||||
valueRefCounts []int
|
||||
|
||||
// dominators stores the immediate dominator of each BasicBlock.
|
||||
// The index is blockID of the BasicBlock.
|
||||
dominators []*basicBlock
|
||||
sparseTree dominatorSparseTree
|
||||
|
||||
// loopNestingForestRoots are the roots of the loop nesting forest.
|
||||
loopNestingForestRoots []BasicBlock
|
||||
|
||||
// The followings are used for optimization passes/deterministic compilation.
|
||||
instStack []*Instruction
|
||||
blkVisited map[*basicBlock]int
|
||||
valueIDToInstruction []*Instruction
|
||||
blkStack []*basicBlock
|
||||
blkStack2 []*basicBlock
|
||||
ints []int
|
||||
redundantParameterIndexToValue map[int]Value
|
||||
|
||||
// blockIterCur is used to implement blockIteratorBegin and blockIteratorNext.
|
||||
blockIterCur int
|
||||
|
||||
// donePreBlockLayoutPasses is true if all the passes before LayoutBlocks are called.
|
||||
donePreBlockLayoutPasses bool
|
||||
// doneBlockLayout is true if LayoutBlocks is called.
|
||||
doneBlockLayout bool
|
||||
// donePostBlockLayoutPasses is true if all the passes after LayoutBlocks are called.
|
||||
donePostBlockLayoutPasses bool
|
||||
|
||||
currentSourceOffset SourceOffset
|
||||
}
|
||||
|
||||
func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] {
|
||||
return &b.varLengthPool
|
||||
}
|
||||
|
||||
// ReturnBlock implements Builder.ReturnBlock.
|
||||
func (b *builder) ReturnBlock() BasicBlock {
|
||||
return b.returnBlk
|
||||
}
|
||||
|
||||
// Init implements Builder.Reset.
|
||||
func (b *builder) Init(s *Signature) {
|
||||
b.nextVariable = 0
|
||||
b.currentSignature = s
|
||||
resetBasicBlock(b.returnBlk)
|
||||
b.instructionsPool.Reset()
|
||||
b.basicBlocksPool.Reset()
|
||||
b.varLengthPool.Reset()
|
||||
b.donePreBlockLayoutPasses = false
|
||||
b.doneBlockLayout = false
|
||||
b.donePostBlockLayoutPasses = false
|
||||
for _, sig := range b.signatures {
|
||||
sig.used = false
|
||||
}
|
||||
|
||||
b.ints = b.ints[:0]
|
||||
b.blkStack = b.blkStack[:0]
|
||||
b.blkStack2 = b.blkStack2[:0]
|
||||
b.dominators = b.dominators[:0]
|
||||
b.loopNestingForestRoots = b.loopNestingForestRoots[:0]
|
||||
|
||||
for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
|
||||
blk := b.basicBlocksPool.View(i)
|
||||
delete(b.blkVisited, blk)
|
||||
}
|
||||
b.basicBlocksPool.Reset()
|
||||
|
||||
for v := ValueID(0); v < b.nextValueID; v++ {
|
||||
delete(b.valueAnnotations, v)
|
||||
delete(b.valueIDAliases, v)
|
||||
b.valueRefCounts[v] = 0
|
||||
b.valueIDToInstruction[v] = nil
|
||||
}
|
||||
b.nextValueID = 0
|
||||
b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0]
|
||||
b.doneBlockLayout = false
|
||||
for i := range b.valueRefCounts {
|
||||
b.valueRefCounts[i] = 0
|
||||
}
|
||||
|
||||
b.currentSourceOffset = sourceOffsetUnknown
|
||||
}
|
||||
|
||||
// Signature implements Builder.Signature.
|
||||
func (b *builder) Signature() *Signature {
|
||||
return b.currentSignature
|
||||
}
|
||||
|
||||
// AnnotateValue implements Builder.AnnotateValue.
|
||||
func (b *builder) AnnotateValue(value Value, a string) {
|
||||
b.valueAnnotations[value.ID()] = a
|
||||
}
|
||||
|
||||
// AllocateInstruction implements Builder.AllocateInstruction.
|
||||
func (b *builder) AllocateInstruction() *Instruction {
|
||||
instr := b.instructionsPool.Allocate()
|
||||
instr.id = b.instructionsPool.Allocated()
|
||||
return instr
|
||||
}
|
||||
|
||||
// DeclareSignature implements Builder.AnnotateValue.
|
||||
func (b *builder) DeclareSignature(s *Signature) {
|
||||
b.signatures[s.ID] = s
|
||||
s.used = false
|
||||
}
|
||||
|
||||
// Signatures implements Builder.Signatures.
|
||||
func (b *builder) Signatures() (ret []*Signature) {
|
||||
for _, sig := range b.signatures {
|
||||
ret = append(ret, sig)
|
||||
}
|
||||
sort.Slice(ret, func(i, j int) bool {
|
||||
return ret[i].ID < ret[j].ID
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// SetCurrentSourceOffset implements Builder.SetCurrentSourceOffset.
|
||||
func (b *builder) SetCurrentSourceOffset(l SourceOffset) {
|
||||
b.currentSourceOffset = l
|
||||
}
|
||||
|
||||
func (b *builder) usedSignatures() (ret []*Signature) {
|
||||
for _, sig := range b.signatures {
|
||||
if sig.used {
|
||||
ret = append(ret, sig)
|
||||
}
|
||||
}
|
||||
sort.Slice(ret, func(i, j int) bool {
|
||||
return ret[i].ID < ret[j].ID
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// ResolveSignature implements Builder.ResolveSignature.
|
||||
func (b *builder) ResolveSignature(id SignatureID) *Signature {
|
||||
return b.signatures[id]
|
||||
}
|
||||
|
||||
// AllocateBasicBlock implements Builder.AllocateBasicBlock.
|
||||
func (b *builder) AllocateBasicBlock() BasicBlock {
|
||||
return b.allocateBasicBlock()
|
||||
}
|
||||
|
||||
// allocateBasicBlock allocates a new basicBlock.
|
||||
func (b *builder) allocateBasicBlock() *basicBlock {
|
||||
id := BasicBlockID(b.basicBlocksPool.Allocated())
|
||||
blk := b.basicBlocksPool.Allocate()
|
||||
blk.id = id
|
||||
return blk
|
||||
}
|
||||
|
||||
// Idom implements Builder.Idom.
|
||||
func (b *builder) Idom(blk BasicBlock) BasicBlock {
|
||||
return b.dominators[blk.ID()]
|
||||
}
|
||||
|
||||
// InsertInstruction implements Builder.InsertInstruction.
|
||||
func (b *builder) InsertInstruction(instr *Instruction) {
|
||||
b.currentBB.InsertInstruction(instr)
|
||||
|
||||
if l := b.currentSourceOffset; l.Valid() {
|
||||
// Emit the source offset info only when the instruction has side effect because
|
||||
// these are the only instructions that are accessed by stack unwinding.
|
||||
// This reduces the significant amount of the offset info in the binary.
|
||||
if instr.sideEffect() != sideEffectNone {
|
||||
instr.annotateSourceOffset(l)
|
||||
}
|
||||
}
|
||||
|
||||
resultTypesFn := instructionReturnTypes[instr.opcode]
|
||||
if resultTypesFn == nil {
|
||||
panic("TODO: " + instr.Format(b))
|
||||
}
|
||||
|
||||
t1, ts := resultTypesFn(b, instr)
|
||||
if t1.invalid() {
|
||||
return
|
||||
}
|
||||
|
||||
r1 := b.allocateValue(t1)
|
||||
instr.rValue = r1
|
||||
|
||||
tsl := len(ts)
|
||||
if tsl == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
rValues := b.varLengthPool.Allocate(tsl)
|
||||
for i := 0; i < tsl; i++ {
|
||||
rValues = rValues.Append(&b.varLengthPool, b.allocateValue(ts[i]))
|
||||
}
|
||||
instr.rValues = rValues
|
||||
}
|
||||
|
||||
// DefineVariable implements Builder.DefineVariable.
|
||||
func (b *builder) DefineVariable(variable Variable, value Value, block BasicBlock) {
|
||||
if b.variables[variable].invalid() {
|
||||
panic("BUG: trying to define variable " + variable.String() + " but is not declared yet")
|
||||
}
|
||||
|
||||
if b.variables[variable] != value.Type() {
|
||||
panic(fmt.Sprintf("BUG: inconsistent type for variable %d: expected %s but got %s", variable, b.variables[variable], value.Type()))
|
||||
}
|
||||
bb := block.(*basicBlock)
|
||||
bb.lastDefinitions[variable] = value
|
||||
}
|
||||
|
||||
// DefineVariableInCurrentBB implements Builder.DefineVariableInCurrentBB.
|
||||
func (b *builder) DefineVariableInCurrentBB(variable Variable, value Value) {
|
||||
b.DefineVariable(variable, value, b.currentBB)
|
||||
}
|
||||
|
||||
// SetCurrentBlock implements Builder.SetCurrentBlock.
|
||||
func (b *builder) SetCurrentBlock(bb BasicBlock) {
|
||||
b.currentBB = bb.(*basicBlock)
|
||||
}
|
||||
|
||||
// CurrentBlock implements Builder.CurrentBlock.
|
||||
func (b *builder) CurrentBlock() BasicBlock {
|
||||
return b.currentBB
|
||||
}
|
||||
|
||||
// EntryBlock implements Builder.EntryBlock.
|
||||
func (b *builder) EntryBlock() BasicBlock {
|
||||
return b.entryBlk()
|
||||
}
|
||||
|
||||
// DeclareVariable implements Builder.DeclareVariable.
|
||||
func (b *builder) DeclareVariable(typ Type) Variable {
|
||||
v := b.allocateVariable()
|
||||
iv := int(v)
|
||||
if l := len(b.variables); l <= iv {
|
||||
b.variables = append(b.variables, make([]Type, 2*(l+1))...)
|
||||
}
|
||||
b.variables[v] = typ
|
||||
return v
|
||||
}
|
||||
|
||||
// allocateVariable allocates a new variable.
|
||||
func (b *builder) allocateVariable() (ret Variable) {
|
||||
ret = b.nextVariable
|
||||
b.nextVariable++
|
||||
return
|
||||
}
|
||||
|
||||
// allocateValue implements Builder.AllocateValue.
|
||||
func (b *builder) allocateValue(typ Type) (v Value) {
|
||||
v = Value(b.nextValueID)
|
||||
v = v.setType(typ)
|
||||
b.nextValueID++
|
||||
return
|
||||
}
|
||||
|
||||
// FindValueInLinearPath implements Builder.FindValueInLinearPath.
|
||||
func (b *builder) FindValueInLinearPath(variable Variable) Value {
|
||||
return b.findValueInLinearPath(variable, b.currentBB)
|
||||
}
|
||||
|
||||
func (b *builder) findValueInLinearPath(variable Variable, blk *basicBlock) Value {
|
||||
if val, ok := blk.lastDefinitions[variable]; ok {
|
||||
return val
|
||||
} else if !blk.sealed {
|
||||
return ValueInvalid
|
||||
}
|
||||
|
||||
if pred := blk.singlePred; pred != nil {
|
||||
// If this block is sealed and have only one predecessor,
|
||||
// we can use the value in that block without ambiguity on definition.
|
||||
return b.findValueInLinearPath(variable, pred)
|
||||
}
|
||||
if len(blk.preds) == 1 {
|
||||
panic("BUG")
|
||||
}
|
||||
return ValueInvalid
|
||||
}
|
||||
|
||||
func (b *builder) MustFindValueInBlk(variable Variable, blk BasicBlock) Value {
|
||||
typ := b.definedVariableType(variable)
|
||||
return b.findValue(typ, variable, blk.(*basicBlock))
|
||||
}
|
||||
|
||||
// MustFindValue implements Builder.MustFindValue.
|
||||
func (b *builder) MustFindValue(variable Variable) Value {
|
||||
typ := b.definedVariableType(variable)
|
||||
return b.findValue(typ, variable, b.currentBB)
|
||||
}
|
||||
|
||||
// findValue recursively tries to find the latest definition of a `variable`. The algorithm is described in
|
||||
// the section 2 of the paper https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf.
|
||||
//
|
||||
// TODO: reimplement this in iterative, not recursive, to avoid stack overflow.
|
||||
func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value {
|
||||
if val, ok := blk.lastDefinitions[variable]; ok {
|
||||
// The value is already defined in this block!
|
||||
return val
|
||||
} else if !blk.sealed { // Incomplete CFG as in the paper.
|
||||
// If this is not sealed, that means it might have additional unknown predecessor later on.
|
||||
// So we temporarily define the placeholder value here (not add as a parameter yet!),
|
||||
// and record it as unknown.
|
||||
// The unknown values are resolved when we call seal this block via BasicBlock.Seal().
|
||||
value := b.allocateValue(typ)
|
||||
if wazevoapi.SSALoggingEnabled {
|
||||
fmt.Printf("adding unknown value placeholder for %s at %d\n", variable, blk.id)
|
||||
}
|
||||
blk.lastDefinitions[variable] = value
|
||||
blk.unknownValues = append(blk.unknownValues, unknownValue{
|
||||
variable: variable,
|
||||
value: value,
|
||||
})
|
||||
return value
|
||||
}
|
||||
|
||||
if pred := blk.singlePred; pred != nil {
|
||||
// If this block is sealed and have only one predecessor,
|
||||
// we can use the value in that block without ambiguity on definition.
|
||||
return b.findValue(typ, variable, pred)
|
||||
} else if len(blk.preds) == 0 {
|
||||
panic("BUG: value is not defined for " + variable.String())
|
||||
}
|
||||
|
||||
// If this block has multiple predecessors, we have to gather the definitions,
|
||||
// and treat them as an argument to this block.
|
||||
//
|
||||
// The first thing is to define a new parameter to this block which may or may not be redundant, but
|
||||
// later we eliminate trivial params in an optimization pass. This must be done before finding the
|
||||
// definitions in the predecessors so that we can break the cycle.
|
||||
paramValue := blk.AddParam(b, typ)
|
||||
b.DefineVariable(variable, paramValue, blk)
|
||||
|
||||
// After the new param is added, we have to manipulate the original branching instructions
|
||||
// in predecessors so that they would pass the definition of `variable` as the argument to
|
||||
// the newly added PHI.
|
||||
for i := range blk.preds {
|
||||
pred := &blk.preds[i]
|
||||
value := b.findValue(typ, variable, pred.blk)
|
||||
pred.branch.addArgumentBranchInst(b, value)
|
||||
}
|
||||
return paramValue
|
||||
}
|
||||
|
||||
// Seal implements Builder.Seal.
|
||||
func (b *builder) Seal(raw BasicBlock) {
|
||||
blk := raw.(*basicBlock)
|
||||
if len(blk.preds) == 1 {
|
||||
blk.singlePred = blk.preds[0].blk
|
||||
}
|
||||
blk.sealed = true
|
||||
|
||||
for _, v := range blk.unknownValues {
|
||||
variable, phiValue := v.variable, v.value
|
||||
typ := b.definedVariableType(variable)
|
||||
blk.addParamOn(typ, phiValue)
|
||||
for i := range blk.preds {
|
||||
pred := &blk.preds[i]
|
||||
predValue := b.findValue(typ, variable, pred.blk)
|
||||
if !predValue.Valid() {
|
||||
panic("BUG: value is not defined anywhere in the predecessors in the CFG")
|
||||
}
|
||||
pred.branch.addArgumentBranchInst(b, predValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// definedVariableType returns the type of the given variable. If the variable is not defined yet, it panics.
|
||||
func (b *builder) definedVariableType(variable Variable) Type {
|
||||
typ := b.variables[variable]
|
||||
if typ.invalid() {
|
||||
panic(fmt.Sprintf("%s is not defined yet", variable))
|
||||
}
|
||||
return typ
|
||||
}
|
||||
|
||||
// Format implements Builder.Format.
|
||||
func (b *builder) Format() string {
|
||||
str := strings.Builder{}
|
||||
usedSigs := b.usedSignatures()
|
||||
if len(usedSigs) > 0 {
|
||||
str.WriteByte('\n')
|
||||
str.WriteString("signatures:\n")
|
||||
for _, sig := range usedSigs {
|
||||
str.WriteByte('\t')
|
||||
str.WriteString(sig.String())
|
||||
str.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
|
||||
var iterBegin, iterNext func() *basicBlock
|
||||
if b.doneBlockLayout {
|
||||
iterBegin, iterNext = b.blockIteratorReversePostOrderBegin, b.blockIteratorReversePostOrderNext
|
||||
} else {
|
||||
iterBegin, iterNext = b.blockIteratorBegin, b.blockIteratorNext
|
||||
}
|
||||
for bb := iterBegin(); bb != nil; bb = iterNext() {
|
||||
str.WriteByte('\n')
|
||||
str.WriteString(bb.FormatHeader(b))
|
||||
str.WriteByte('\n')
|
||||
|
||||
for cur := bb.Root(); cur != nil; cur = cur.Next() {
|
||||
str.WriteByte('\t')
|
||||
str.WriteString(cur.Format(b))
|
||||
str.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
return str.String()
|
||||
}
|
||||
|
||||
// BlockIteratorNext implements Builder.BlockIteratorNext.
|
||||
func (b *builder) BlockIteratorNext() BasicBlock {
|
||||
if blk := b.blockIteratorNext(); blk == nil {
|
||||
return nil // BasicBlock((*basicBlock)(nil)) != BasicBlock(nil)
|
||||
} else {
|
||||
return blk
|
||||
}
|
||||
}
|
||||
|
||||
// BlockIteratorNext implements Builder.BlockIteratorNext.
|
||||
func (b *builder) blockIteratorNext() *basicBlock {
|
||||
index := b.blockIterCur
|
||||
for {
|
||||
if index == b.basicBlocksPool.Allocated() {
|
||||
return nil
|
||||
}
|
||||
ret := b.basicBlocksPool.View(index)
|
||||
index++
|
||||
if !ret.invalid {
|
||||
b.blockIterCur = index
|
||||
return ret
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BlockIteratorBegin implements Builder.BlockIteratorBegin.
|
||||
func (b *builder) BlockIteratorBegin() BasicBlock {
|
||||
return b.blockIteratorBegin()
|
||||
}
|
||||
|
||||
// BlockIteratorBegin implements Builder.BlockIteratorBegin.
|
||||
func (b *builder) blockIteratorBegin() *basicBlock {
|
||||
b.blockIterCur = 0
|
||||
return b.blockIteratorNext()
|
||||
}
|
||||
|
||||
// BlockIteratorReversePostOrderBegin implements Builder.BlockIteratorReversePostOrderBegin.
|
||||
func (b *builder) BlockIteratorReversePostOrderBegin() BasicBlock {
|
||||
return b.blockIteratorReversePostOrderBegin()
|
||||
}
|
||||
|
||||
// BlockIteratorBegin implements Builder.BlockIteratorBegin.
|
||||
func (b *builder) blockIteratorReversePostOrderBegin() *basicBlock {
|
||||
b.blockIterCur = 0
|
||||
return b.blockIteratorReversePostOrderNext()
|
||||
}
|
||||
|
||||
// BlockIteratorReversePostOrderNext implements Builder.BlockIteratorReversePostOrderNext.
|
||||
func (b *builder) BlockIteratorReversePostOrderNext() BasicBlock {
|
||||
if blk := b.blockIteratorReversePostOrderNext(); blk == nil {
|
||||
return nil // BasicBlock((*basicBlock)(nil)) != BasicBlock(nil)
|
||||
} else {
|
||||
return blk
|
||||
}
|
||||
}
|
||||
|
||||
// BlockIteratorNext implements Builder.BlockIteratorNext.
|
||||
func (b *builder) blockIteratorReversePostOrderNext() *basicBlock {
|
||||
if b.blockIterCur >= len(b.reversePostOrderedBasicBlocks) {
|
||||
return nil
|
||||
} else {
|
||||
ret := b.reversePostOrderedBasicBlocks[b.blockIterCur]
|
||||
b.blockIterCur++
|
||||
return ret
|
||||
}
|
||||
}
|
||||
|
||||
// ValueRefCounts implements Builder.ValueRefCounts.
|
||||
func (b *builder) ValueRefCounts() []int {
|
||||
return b.valueRefCounts
|
||||
}
|
||||
|
||||
// alias records the alias of the given values. The alias(es) will be
|
||||
// eliminated in the optimization pass via resolveArgumentAlias.
|
||||
func (b *builder) alias(dst, src Value) {
|
||||
b.valueIDAliases[dst.ID()] = src
|
||||
}
|
||||
|
||||
// resolveArgumentAlias resolves the alias of the arguments of the given instruction.
|
||||
func (b *builder) resolveArgumentAlias(instr *Instruction) {
|
||||
if instr.v.Valid() {
|
||||
instr.v = b.resolveAlias(instr.v)
|
||||
}
|
||||
|
||||
if instr.v2.Valid() {
|
||||
instr.v2 = b.resolveAlias(instr.v2)
|
||||
}
|
||||
|
||||
if instr.v3.Valid() {
|
||||
instr.v3 = b.resolveAlias(instr.v3)
|
||||
}
|
||||
|
||||
view := instr.vs.View()
|
||||
for i, v := range view {
|
||||
view[i] = b.resolveAlias(v)
|
||||
}
|
||||
}
|
||||
|
||||
// resolveAlias resolves the alias of the given value.
|
||||
func (b *builder) resolveAlias(v Value) Value {
|
||||
// Some aliases are chained, so we need to resolve them recursively.
|
||||
for {
|
||||
if src, ok := b.valueIDAliases[v.ID()]; ok {
|
||||
v = src
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// entryBlk returns the entry block of the function.
|
||||
func (b *builder) entryBlk() *basicBlock {
|
||||
return b.basicBlocksPool.View(0)
|
||||
}
|
||||
|
||||
// isDominatedBy returns true if the given block `n` is dominated by the given block `d`.
|
||||
// Before calling this, the builder must pass by passCalculateImmediateDominators.
|
||||
func (b *builder) isDominatedBy(n *basicBlock, d *basicBlock) bool {
|
||||
if len(b.dominators) == 0 {
|
||||
panic("BUG: passCalculateImmediateDominators must be called before calling isDominatedBy")
|
||||
}
|
||||
ent := b.entryBlk()
|
||||
doms := b.dominators
|
||||
for n != d && n != ent {
|
||||
n = doms[n.id]
|
||||
}
|
||||
return n == d
|
||||
}
|
||||
|
||||
// BlockIDMax implements Builder.BlockIDMax.
|
||||
func (b *builder) BlockIDMax() BasicBlockID {
|
||||
return BasicBlockID(b.basicBlocksPool.Allocated())
|
||||
}
|
||||
|
||||
// InsertUndefined implements Builder.InsertUndefined.
|
||||
func (b *builder) InsertUndefined() {
|
||||
instr := b.AllocateInstruction()
|
||||
instr.opcode = OpcodeUndefined
|
||||
b.InsertInstruction(instr)
|
||||
}
|
||||
|
||||
// LoopNestingForestRoots implements Builder.LoopNestingForestRoots.
|
||||
func (b *builder) LoopNestingForestRoots() []BasicBlock {
|
||||
return b.loopNestingForestRoots
|
||||
}
|
||||
|
||||
// LowestCommonAncestor implements Builder.LowestCommonAncestor.
|
||||
func (b *builder) LowestCommonAncestor(blk1, blk2 BasicBlock) BasicBlock {
|
||||
return b.sparseTree.findLCA(blk1.ID(), blk2.ID())
|
||||
}
|
||||
107
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/cmp.go
generated
vendored
Normal file
107
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/cmp.go
generated
vendored
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
package ssa
|
||||
|
||||
// IntegerCmpCond represents a condition for integer comparison.
|
||||
type IntegerCmpCond byte
|
||||
|
||||
const (
|
||||
// IntegerCmpCondInvalid represents an invalid condition.
|
||||
IntegerCmpCondInvalid IntegerCmpCond = iota
|
||||
// IntegerCmpCondEqual represents "==".
|
||||
IntegerCmpCondEqual
|
||||
// IntegerCmpCondNotEqual represents "!=".
|
||||
IntegerCmpCondNotEqual
|
||||
// IntegerCmpCondSignedLessThan represents Signed "<".
|
||||
IntegerCmpCondSignedLessThan
|
||||
// IntegerCmpCondSignedGreaterThanOrEqual represents Signed ">=".
|
||||
IntegerCmpCondSignedGreaterThanOrEqual
|
||||
// IntegerCmpCondSignedGreaterThan represents Signed ">".
|
||||
IntegerCmpCondSignedGreaterThan
|
||||
// IntegerCmpCondSignedLessThanOrEqual represents Signed "<=".
|
||||
IntegerCmpCondSignedLessThanOrEqual
|
||||
// IntegerCmpCondUnsignedLessThan represents Unsigned "<".
|
||||
IntegerCmpCondUnsignedLessThan
|
||||
// IntegerCmpCondUnsignedGreaterThanOrEqual represents Unsigned ">=".
|
||||
IntegerCmpCondUnsignedGreaterThanOrEqual
|
||||
// IntegerCmpCondUnsignedGreaterThan represents Unsigned ">".
|
||||
IntegerCmpCondUnsignedGreaterThan
|
||||
// IntegerCmpCondUnsignedLessThanOrEqual represents Unsigned "<=".
|
||||
IntegerCmpCondUnsignedLessThanOrEqual
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (i IntegerCmpCond) String() string {
|
||||
switch i {
|
||||
case IntegerCmpCondEqual:
|
||||
return "eq"
|
||||
case IntegerCmpCondNotEqual:
|
||||
return "neq"
|
||||
case IntegerCmpCondSignedLessThan:
|
||||
return "lt_s"
|
||||
case IntegerCmpCondSignedGreaterThanOrEqual:
|
||||
return "ge_s"
|
||||
case IntegerCmpCondSignedGreaterThan:
|
||||
return "gt_s"
|
||||
case IntegerCmpCondSignedLessThanOrEqual:
|
||||
return "le_s"
|
||||
case IntegerCmpCondUnsignedLessThan:
|
||||
return "lt_u"
|
||||
case IntegerCmpCondUnsignedGreaterThanOrEqual:
|
||||
return "ge_u"
|
||||
case IntegerCmpCondUnsignedGreaterThan:
|
||||
return "gt_u"
|
||||
case IntegerCmpCondUnsignedLessThanOrEqual:
|
||||
return "le_u"
|
||||
default:
|
||||
panic("invalid integer comparison condition")
|
||||
}
|
||||
}
|
||||
|
||||
// Signed returns true if the condition is signed integer comparison.
|
||||
func (i IntegerCmpCond) Signed() bool {
|
||||
switch i {
|
||||
case IntegerCmpCondSignedLessThan, IntegerCmpCondSignedGreaterThanOrEqual,
|
||||
IntegerCmpCondSignedGreaterThan, IntegerCmpCondSignedLessThanOrEqual:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
type FloatCmpCond byte
|
||||
|
||||
const (
|
||||
// FloatCmpCondInvalid represents an invalid condition.
|
||||
FloatCmpCondInvalid FloatCmpCond = iota
|
||||
// FloatCmpCondEqual represents "==".
|
||||
FloatCmpCondEqual
|
||||
// FloatCmpCondNotEqual represents "!=".
|
||||
FloatCmpCondNotEqual
|
||||
// FloatCmpCondLessThan represents "<".
|
||||
FloatCmpCondLessThan
|
||||
// FloatCmpCondLessThanOrEqual represents "<=".
|
||||
FloatCmpCondLessThanOrEqual
|
||||
// FloatCmpCondGreaterThan represents ">".
|
||||
FloatCmpCondGreaterThan
|
||||
// FloatCmpCondGreaterThanOrEqual represents ">=".
|
||||
FloatCmpCondGreaterThanOrEqual
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (f FloatCmpCond) String() string {
|
||||
switch f {
|
||||
case FloatCmpCondEqual:
|
||||
return "eq"
|
||||
case FloatCmpCondNotEqual:
|
||||
return "neq"
|
||||
case FloatCmpCondLessThan:
|
||||
return "lt"
|
||||
case FloatCmpCondLessThanOrEqual:
|
||||
return "le"
|
||||
case FloatCmpCondGreaterThan:
|
||||
return "gt"
|
||||
case FloatCmpCondGreaterThanOrEqual:
|
||||
return "ge"
|
||||
default:
|
||||
panic("invalid float comparison condition")
|
||||
}
|
||||
}
|
||||
12
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/funcref.go
generated
vendored
Normal file
12
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/funcref.go
generated
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
package ssa
|
||||
|
||||
import "fmt"
|
||||
|
||||
// FuncRef is a unique identifier for a function of the frontend,
|
||||
// and is used to reference the function in function call.
|
||||
type FuncRef uint32
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (r FuncRef) String() string {
|
||||
return fmt.Sprintf("f%d", r)
|
||||
}
|
||||
2967
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go
generated
vendored
Normal file
2967
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/instructions.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
417
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go
generated
vendored
Normal file
417
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go
generated
vendored
Normal file
|
|
@ -0,0 +1,417 @@
|
|||
package ssa
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// RunPasses implements Builder.RunPasses.
|
||||
//
|
||||
// The order here matters; some pass depends on the previous ones.
|
||||
//
|
||||
// Note that passes suffixed with "Opt" are the optimization passes, meaning that they edit the instructions and blocks
|
||||
// while the other passes are not, like passEstimateBranchProbabilities does not edit them, but only calculates the additional information.
|
||||
func (b *builder) RunPasses() {
|
||||
b.runPreBlockLayoutPasses()
|
||||
b.runBlockLayoutPass()
|
||||
b.runPostBlockLayoutPasses()
|
||||
b.runFinalizingPasses()
|
||||
}
|
||||
|
||||
func (b *builder) runPreBlockLayoutPasses() {
|
||||
passSortSuccessors(b)
|
||||
passDeadBlockEliminationOpt(b)
|
||||
passRedundantPhiEliminationOpt(b)
|
||||
// The result of passCalculateImmediateDominators will be used by various passes below.
|
||||
passCalculateImmediateDominators(b)
|
||||
passNopInstElimination(b)
|
||||
|
||||
// TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic.
|
||||
// WebAssembly program shouldn't result in irreducible CFG, but we should handle it properly in just in case.
|
||||
// See FixIrreducible pass in LLVM: https://llvm.org/doxygen/FixIrreducible_8cpp_source.html
|
||||
|
||||
// TODO: implement more optimization passes like:
|
||||
// block coalescing.
|
||||
// Copy-propagation.
|
||||
// Constant folding.
|
||||
// Common subexpression elimination.
|
||||
// Arithmetic simplifications.
|
||||
// and more!
|
||||
|
||||
// passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations.
|
||||
passDeadCodeEliminationOpt(b)
|
||||
b.donePreBlockLayoutPasses = true
|
||||
}
|
||||
|
||||
func (b *builder) runBlockLayoutPass() {
|
||||
if !b.donePreBlockLayoutPasses {
|
||||
panic("runBlockLayoutPass must be called after all pre passes are done")
|
||||
}
|
||||
passLayoutBlocks(b)
|
||||
b.doneBlockLayout = true
|
||||
}
|
||||
|
||||
// runPostBlockLayoutPasses runs the post block layout passes. After this point, CFG is somewhat stable,
|
||||
// but still can be modified before finalizing passes. At this point, critical edges are split by passLayoutBlocks.
|
||||
func (b *builder) runPostBlockLayoutPasses() {
|
||||
if !b.doneBlockLayout {
|
||||
panic("runPostBlockLayoutPasses must be called after block layout pass is done")
|
||||
}
|
||||
// TODO: Do more. e.g. tail duplication, loop unrolling, etc.
|
||||
|
||||
b.donePostBlockLayoutPasses = true
|
||||
}
|
||||
|
||||
// runFinalizingPasses runs the finalizing passes. After this point, CFG should not be modified.
|
||||
func (b *builder) runFinalizingPasses() {
|
||||
if !b.donePostBlockLayoutPasses {
|
||||
panic("runFinalizingPasses must be called after post block layout passes are done")
|
||||
}
|
||||
// Critical edges are split, so we fix the loop nesting forest.
|
||||
passBuildLoopNestingForest(b)
|
||||
passBuildDominatorTree(b)
|
||||
// Now that we know the final placement of the blocks, we can explicitly mark the fallthrough jumps.
|
||||
b.markFallthroughJumps()
|
||||
}
|
||||
|
||||
// passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so.
|
||||
func passDeadBlockEliminationOpt(b *builder) {
|
||||
entryBlk := b.entryBlk()
|
||||
b.clearBlkVisited()
|
||||
b.blkStack = append(b.blkStack, entryBlk)
|
||||
for len(b.blkStack) > 0 {
|
||||
reachableBlk := b.blkStack[len(b.blkStack)-1]
|
||||
b.blkStack = b.blkStack[:len(b.blkStack)-1]
|
||||
b.blkVisited[reachableBlk] = 0 // the value won't be used in this pass.
|
||||
|
||||
if !reachableBlk.sealed && !reachableBlk.ReturnBlock() {
|
||||
panic(fmt.Sprintf("%s is not sealed", reachableBlk))
|
||||
}
|
||||
|
||||
if wazevoapi.SSAValidationEnabled {
|
||||
reachableBlk.validate(b)
|
||||
}
|
||||
|
||||
for _, succ := range reachableBlk.success {
|
||||
if _, ok := b.blkVisited[succ]; ok {
|
||||
continue
|
||||
}
|
||||
b.blkStack = append(b.blkStack, succ)
|
||||
}
|
||||
}
|
||||
|
||||
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
|
||||
if _, ok := b.blkVisited[blk]; !ok {
|
||||
blk.invalid = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block).
|
||||
func passRedundantPhiEliminationOpt(b *builder) {
|
||||
redundantParameterIndexes := b.ints[:0] // reuse the slice from previous iterations.
|
||||
|
||||
// TODO: this might be costly for large programs, but at least, as far as I did the experiment, it's almost the
|
||||
// same as the single iteration version in terms of the overall compilation time. That *might be* mostly thanks to the fact
|
||||
// that removing many PHIs results in the reduction of the total instructions, not because of this indefinite iteration is
|
||||
// relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs,
|
||||
// the maximum number of iteration was 22, which seems to be acceptable but not that small either since the
|
||||
// complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands.
|
||||
for {
|
||||
changed := false
|
||||
_ = b.blockIteratorBegin() // skip entry block!
|
||||
// Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops!
|
||||
for blk := b.blockIteratorNext(); blk != nil; blk = b.blockIteratorNext() {
|
||||
paramNum := len(blk.params)
|
||||
|
||||
for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
|
||||
phiValue := blk.params[paramIndex].value
|
||||
redundant := true
|
||||
|
||||
nonSelfReferencingValue := ValueInvalid
|
||||
for predIndex := range blk.preds {
|
||||
br := blk.preds[predIndex].branch
|
||||
// Resolve the alias in the arguments so that we could use the previous iteration's result.
|
||||
b.resolveArgumentAlias(br)
|
||||
pred := br.vs.View()[paramIndex]
|
||||
if pred == phiValue {
|
||||
// This is self-referencing: PHI from the same PHI.
|
||||
continue
|
||||
}
|
||||
|
||||
if !nonSelfReferencingValue.Valid() {
|
||||
nonSelfReferencingValue = pred
|
||||
continue
|
||||
}
|
||||
|
||||
if nonSelfReferencingValue != pred {
|
||||
redundant = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !nonSelfReferencingValue.Valid() {
|
||||
// This shouldn't happen, and must be a bug in builder.go.
|
||||
panic("BUG: params added but only self-referencing")
|
||||
}
|
||||
|
||||
if redundant {
|
||||
b.redundantParameterIndexToValue[paramIndex] = nonSelfReferencingValue
|
||||
redundantParameterIndexes = append(redundantParameterIndexes, paramIndex)
|
||||
}
|
||||
}
|
||||
|
||||
if len(b.redundantParameterIndexToValue) == 0 {
|
||||
continue
|
||||
}
|
||||
changed = true
|
||||
|
||||
// Remove the redundant PHIs from the argument list of branching instructions.
|
||||
for predIndex := range blk.preds {
|
||||
var cur int
|
||||
predBlk := blk.preds[predIndex]
|
||||
branchInst := predBlk.branch
|
||||
view := branchInst.vs.View()
|
||||
for argIndex, value := range view {
|
||||
if _, ok := b.redundantParameterIndexToValue[argIndex]; !ok {
|
||||
view[cur] = value
|
||||
cur++
|
||||
}
|
||||
}
|
||||
branchInst.vs.Cut(cur)
|
||||
}
|
||||
|
||||
// Still need to have the definition of the value of the PHI (previously as the parameter).
|
||||
for _, redundantParamIndex := range redundantParameterIndexes {
|
||||
phiValue := blk.params[redundantParamIndex].value
|
||||
onlyValue := b.redundantParameterIndexToValue[redundantParamIndex]
|
||||
// Create an alias in this block from the only phi argument to the phi value.
|
||||
b.alias(phiValue, onlyValue)
|
||||
}
|
||||
|
||||
// Finally, Remove the param from the blk.
|
||||
var cur int
|
||||
for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
|
||||
param := blk.params[paramIndex]
|
||||
if _, ok := b.redundantParameterIndexToValue[paramIndex]; !ok {
|
||||
blk.params[cur] = param
|
||||
cur++
|
||||
}
|
||||
}
|
||||
blk.params = blk.params[:cur]
|
||||
|
||||
// Clears the map for the next iteration.
|
||||
for _, paramIndex := range redundantParameterIndexes {
|
||||
delete(b.redundantParameterIndexToValue, paramIndex)
|
||||
}
|
||||
redundantParameterIndexes = redundantParameterIndexes[:0]
|
||||
}
|
||||
|
||||
if !changed {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Reuse the slice for the future passes.
|
||||
b.ints = redundantParameterIndexes
|
||||
}
|
||||
|
||||
// passDeadCodeEliminationOpt traverses all the instructions, and calculates the reference count of each Value, and
|
||||
// eliminates all the unnecessary instructions whose ref count is zero.
|
||||
// The results are stored at builder.valueRefCounts. This also assigns a InstructionGroupID to each Instruction
|
||||
// during the process. This is the last SSA-level optimization pass and after this,
|
||||
// the SSA function is ready to be used by backends.
|
||||
//
|
||||
// TODO: the algorithm here might not be efficient. Get back to this later.
|
||||
func passDeadCodeEliminationOpt(b *builder) {
|
||||
nvid := int(b.nextValueID)
|
||||
if nvid >= len(b.valueRefCounts) {
|
||||
b.valueRefCounts = append(b.valueRefCounts, make([]int, b.nextValueID)...)
|
||||
}
|
||||
if nvid >= len(b.valueIDToInstruction) {
|
||||
b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
|
||||
}
|
||||
|
||||
// First, we gather all the instructions with side effects.
|
||||
liveInstructions := b.instStack[:0]
|
||||
// During the process, we will assign InstructionGroupID to each instruction, which is not
|
||||
// relevant to dead code elimination, but we need in the backend.
|
||||
var gid InstructionGroupID
|
||||
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
|
||||
for cur := blk.rootInstr; cur != nil; cur = cur.next {
|
||||
cur.gid = gid
|
||||
switch cur.sideEffect() {
|
||||
case sideEffectTraps:
|
||||
// The trappable should always be alive.
|
||||
liveInstructions = append(liveInstructions, cur)
|
||||
case sideEffectStrict:
|
||||
liveInstructions = append(liveInstructions, cur)
|
||||
// The strict side effect should create different instruction groups.
|
||||
gid++
|
||||
}
|
||||
|
||||
r1, rs := cur.Returns()
|
||||
if r1.Valid() {
|
||||
b.valueIDToInstruction[r1.ID()] = cur
|
||||
}
|
||||
for _, r := range rs {
|
||||
b.valueIDToInstruction[r.ID()] = cur
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find all the instructions referenced by live instructions transitively.
|
||||
for len(liveInstructions) > 0 {
|
||||
tail := len(liveInstructions) - 1
|
||||
live := liveInstructions[tail]
|
||||
liveInstructions = liveInstructions[:tail]
|
||||
if live.live {
|
||||
// If it's already marked alive, this is referenced multiple times,
|
||||
// so we can skip it.
|
||||
continue
|
||||
}
|
||||
live.live = true
|
||||
|
||||
// Before we walk, we need to resolve the alias first.
|
||||
b.resolveArgumentAlias(live)
|
||||
|
||||
v1, v2, v3, vs := live.Args()
|
||||
if v1.Valid() {
|
||||
producingInst := b.valueIDToInstruction[v1.ID()]
|
||||
if producingInst != nil {
|
||||
liveInstructions = append(liveInstructions, producingInst)
|
||||
}
|
||||
}
|
||||
|
||||
if v2.Valid() {
|
||||
producingInst := b.valueIDToInstruction[v2.ID()]
|
||||
if producingInst != nil {
|
||||
liveInstructions = append(liveInstructions, producingInst)
|
||||
}
|
||||
}
|
||||
|
||||
if v3.Valid() {
|
||||
producingInst := b.valueIDToInstruction[v3.ID()]
|
||||
if producingInst != nil {
|
||||
liveInstructions = append(liveInstructions, producingInst)
|
||||
}
|
||||
}
|
||||
|
||||
for _, v := range vs {
|
||||
producingInst := b.valueIDToInstruction[v.ID()]
|
||||
if producingInst != nil {
|
||||
liveInstructions = append(liveInstructions, producingInst)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now that all the live instructions are flagged as live=true, we eliminate all dead instructions.
|
||||
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
|
||||
for cur := blk.rootInstr; cur != nil; cur = cur.next {
|
||||
if !cur.live {
|
||||
// Remove the instruction from the list.
|
||||
if prev := cur.prev; prev != nil {
|
||||
prev.next = cur.next
|
||||
} else {
|
||||
blk.rootInstr = cur.next
|
||||
}
|
||||
if next := cur.next; next != nil {
|
||||
next.prev = cur.prev
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// If the value alive, we can be sure that arguments are used definitely.
|
||||
// Hence, we can increment the value reference counts.
|
||||
v1, v2, v3, vs := cur.Args()
|
||||
if v1.Valid() {
|
||||
b.incRefCount(v1.ID(), cur)
|
||||
}
|
||||
if v2.Valid() {
|
||||
b.incRefCount(v2.ID(), cur)
|
||||
}
|
||||
if v3.Valid() {
|
||||
b.incRefCount(v3.ID(), cur)
|
||||
}
|
||||
for _, v := range vs {
|
||||
b.incRefCount(v.ID(), cur)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
b.instStack = liveInstructions // we reuse the stack for the next iteration.
|
||||
}
|
||||
|
||||
func (b *builder) incRefCount(id ValueID, from *Instruction) {
|
||||
if wazevoapi.SSALoggingEnabled {
|
||||
fmt.Printf("v%d referenced from %v\n", id, from.Format(b))
|
||||
}
|
||||
b.valueRefCounts[id]++
|
||||
}
|
||||
|
||||
// clearBlkVisited clears the b.blkVisited map so that we can reuse it for multiple places.
|
||||
func (b *builder) clearBlkVisited() {
|
||||
b.blkStack2 = b.blkStack2[:0]
|
||||
for key := range b.blkVisited {
|
||||
b.blkStack2 = append(b.blkStack2, key)
|
||||
}
|
||||
for _, blk := range b.blkStack2 {
|
||||
delete(b.blkVisited, blk)
|
||||
}
|
||||
b.blkStack2 = b.blkStack2[:0]
|
||||
}
|
||||
|
||||
// passNopInstElimination eliminates the instructions which is essentially a no-op.
|
||||
func passNopInstElimination(b *builder) {
|
||||
if int(b.nextValueID) >= len(b.valueIDToInstruction) {
|
||||
b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
|
||||
}
|
||||
|
||||
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
|
||||
for cur := blk.rootInstr; cur != nil; cur = cur.next {
|
||||
r1, rs := cur.Returns()
|
||||
if r1.Valid() {
|
||||
b.valueIDToInstruction[r1.ID()] = cur
|
||||
}
|
||||
for _, r := range rs {
|
||||
b.valueIDToInstruction[r.ID()] = cur
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
|
||||
for cur := blk.rootInstr; cur != nil; cur = cur.next {
|
||||
switch cur.Opcode() {
|
||||
// TODO: add more logics here.
|
||||
case OpcodeIshl, OpcodeSshr, OpcodeUshr:
|
||||
x, amount := cur.Arg2()
|
||||
definingInst := b.valueIDToInstruction[amount.ID()]
|
||||
if definingInst == nil {
|
||||
// If there's no defining instruction, that means the amount is coming from the parameter.
|
||||
continue
|
||||
}
|
||||
if definingInst.Constant() {
|
||||
v := definingInst.ConstantVal()
|
||||
|
||||
if x.Type().Bits() == 64 {
|
||||
v = v % 64
|
||||
} else {
|
||||
v = v % 32
|
||||
}
|
||||
if v == 0 {
|
||||
b.alias(cur.Return(), x)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// passSortSuccessors sorts the successors of each block in the natural program order.
|
||||
func passSortSuccessors(b *builder) {
|
||||
for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
|
||||
blk := b.basicBlocksPool.View(i)
|
||||
sortBlocks(blk.success)
|
||||
}
|
||||
}
|
||||
335
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go
generated
vendored
Normal file
335
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go
generated
vendored
Normal file
|
|
@ -0,0 +1,335 @@
|
|||
package ssa
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// passLayoutBlocks implements Builder.LayoutBlocks. This re-organizes builder.reversePostOrderedBasicBlocks.
|
||||
//
|
||||
// TODO: there are tons of room for improvement here. e.g. LLVM has BlockPlacementPass using BlockFrequencyInfo,
|
||||
// BranchProbabilityInfo, and LoopInfo to do a much better job. Also, if we have the profiling instrumentation
|
||||
// like ball-larus algorithm, then we could do profile-guided optimization. Basically all of them are trying
|
||||
// to maximize the fall-through opportunities which is most efficient.
|
||||
//
|
||||
// Here, fallthrough happens when a block ends with jump instruction whose target is the right next block in the
|
||||
// builder.reversePostOrderedBasicBlocks.
|
||||
//
|
||||
// Currently, we just place blocks using the DFS reverse post-order of the dominator tree with the heuristics:
|
||||
// 1. a split edge trampoline towards a loop header will be placed as a fallthrough.
|
||||
// 2. we invert the brz and brnz if it makes the fallthrough more likely.
|
||||
//
|
||||
// This heuristic is done in maybeInvertBranches function.
|
||||
func passLayoutBlocks(b *builder) {
|
||||
b.clearBlkVisited()
|
||||
|
||||
// We might end up splitting critical edges which adds more basic blocks,
|
||||
// so we store the currently existing basic blocks in nonSplitBlocks temporarily.
|
||||
// That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks.
|
||||
nonSplitBlocks := b.blkStack[:0]
|
||||
for i, blk := range b.reversePostOrderedBasicBlocks {
|
||||
if !blk.Valid() {
|
||||
continue
|
||||
}
|
||||
nonSplitBlocks = append(nonSplitBlocks, blk)
|
||||
if i != len(b.reversePostOrderedBasicBlocks)-1 {
|
||||
_ = maybeInvertBranches(blk, b.reversePostOrderedBasicBlocks[i+1])
|
||||
}
|
||||
}
|
||||
|
||||
var trampolines []*basicBlock
|
||||
|
||||
// Reset the order slice since we update on the fly by splitting critical edges.
|
||||
b.reversePostOrderedBasicBlocks = b.reversePostOrderedBasicBlocks[:0]
|
||||
uninsertedTrampolines := b.blkStack2[:0]
|
||||
for _, blk := range nonSplitBlocks {
|
||||
for i := range blk.preds {
|
||||
pred := blk.preds[i].blk
|
||||
if _, ok := b.blkVisited[pred]; ok || !pred.Valid() {
|
||||
continue
|
||||
} else if pred.reversePostOrder < blk.reversePostOrder {
|
||||
// This means the edge is critical, and this pred is the trampoline and yet to be inserted.
|
||||
// Split edge trampolines must come before the destination in reverse post-order.
|
||||
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred)
|
||||
b.blkVisited[pred] = 0 // mark as inserted, the value is not used.
|
||||
}
|
||||
}
|
||||
|
||||
// Now that we've already added all the potential trampoline blocks incoming to this block,
|
||||
// we can add this block itself.
|
||||
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk)
|
||||
b.blkVisited[blk] = 0 // mark as inserted, the value is not used.
|
||||
|
||||
if len(blk.success) < 2 {
|
||||
// There won't be critical edge originating from this block.
|
||||
continue
|
||||
} else if blk.currentInstr.opcode == OpcodeBrTable {
|
||||
// We don't split critical edges here, because at the construction site of BrTable, we already split the edges.
|
||||
continue
|
||||
}
|
||||
|
||||
for sidx, succ := range blk.success {
|
||||
if !succ.ReturnBlock() && // If the successor is a return block, we need to split the edge any way because we need "epilogue" to be inserted.
|
||||
// Plus if there's no multiple incoming edges to this successor, (pred, succ) is not critical.
|
||||
len(succ.preds) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Otherwise, we are sure this is a critical edge. To modify the CFG, we need to find the predecessor info
|
||||
// from the successor.
|
||||
var predInfo *basicBlockPredecessorInfo
|
||||
for i := range succ.preds { // This linear search should not be a problem since the number of predecessors should almost always small.
|
||||
pred := &succ.preds[i]
|
||||
if pred.blk == blk {
|
||||
predInfo = pred
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if predInfo == nil {
|
||||
// This must be a bug in somewhere around branch manipulation.
|
||||
panic("BUG: predecessor info not found while the successor exists in successors list")
|
||||
}
|
||||
|
||||
if wazevoapi.SSALoggingEnabled {
|
||||
fmt.Printf("trying to split edge from %d->%d at %s\n",
|
||||
blk.ID(), succ.ID(), predInfo.branch.Format(b))
|
||||
}
|
||||
|
||||
trampoline := b.splitCriticalEdge(blk, succ, predInfo)
|
||||
// Update the successors slice because the target is no longer the original `succ`.
|
||||
blk.success[sidx] = trampoline
|
||||
|
||||
if wazevoapi.SSAValidationEnabled {
|
||||
trampolines = append(trampolines, trampoline)
|
||||
}
|
||||
|
||||
if wazevoapi.SSALoggingEnabled {
|
||||
fmt.Printf("edge split from %d->%d at %s as %d->%d->%d \n",
|
||||
blk.ID(), succ.ID(), predInfo.branch.Format(b),
|
||||
blk.ID(), trampoline.ID(), succ.ID())
|
||||
}
|
||||
|
||||
fallthroughBranch := blk.currentInstr
|
||||
if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline {
|
||||
// This can be lowered as fallthrough at the end of the block.
|
||||
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
|
||||
b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
|
||||
} else {
|
||||
uninsertedTrampolines = append(uninsertedTrampolines, trampoline)
|
||||
}
|
||||
}
|
||||
|
||||
for _, trampoline := range uninsertedTrampolines {
|
||||
if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself.
|
||||
// This means the critical edge was backward, so we insert after the current block immediately.
|
||||
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
|
||||
b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
|
||||
} // If the target is forward, we can wait to insert until the target is inserted.
|
||||
}
|
||||
uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block.
|
||||
}
|
||||
|
||||
if wazevoapi.SSALoggingEnabled {
|
||||
var bs []string
|
||||
for _, blk := range b.reversePostOrderedBasicBlocks {
|
||||
bs = append(bs, blk.Name())
|
||||
}
|
||||
fmt.Println("ordered blocks: ", strings.Join(bs, ", "))
|
||||
}
|
||||
|
||||
if wazevoapi.SSAValidationEnabled {
|
||||
for _, trampoline := range trampolines {
|
||||
if _, ok := b.blkVisited[trampoline]; !ok {
|
||||
panic("BUG: trampoline block not inserted: " + trampoline.FormatHeader(b))
|
||||
}
|
||||
trampoline.validate(b)
|
||||
}
|
||||
}
|
||||
|
||||
// Reuse the stack for the next iteration.
|
||||
b.blkStack2 = uninsertedTrampolines[:0]
|
||||
}
|
||||
|
||||
// markFallthroughJumps finds the fallthrough jumps and marks them as such.
|
||||
func (b *builder) markFallthroughJumps() {
|
||||
l := len(b.reversePostOrderedBasicBlocks) - 1
|
||||
for i, blk := range b.reversePostOrderedBasicBlocks {
|
||||
if i < l {
|
||||
cur := blk.currentInstr
|
||||
if cur.opcode == OpcodeJump && cur.blk == b.reversePostOrderedBasicBlocks[i+1] {
|
||||
cur.AsFallthroughJump()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// maybeInvertBranches inverts the branch instructions if it is likely possible to the fallthrough more likely with simple heuristics.
|
||||
// nextInRPO is the next block in the reverse post-order.
|
||||
//
|
||||
// Returns true if the branch is inverted for testing purpose.
|
||||
func maybeInvertBranches(now *basicBlock, nextInRPO *basicBlock) bool {
|
||||
fallthroughBranch := now.currentInstr
|
||||
if fallthroughBranch.opcode == OpcodeBrTable {
|
||||
return false
|
||||
}
|
||||
|
||||
condBranch := fallthroughBranch.prev
|
||||
if condBranch == nil || (condBranch.opcode != OpcodeBrnz && condBranch.opcode != OpcodeBrz) {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(fallthroughBranch.vs.View()) != 0 || len(condBranch.vs.View()) != 0 {
|
||||
// If either one of them has arguments, we don't invert the branches.
|
||||
return false
|
||||
}
|
||||
|
||||
// So this block has two branches (a conditional branch followed by an unconditional branch) at the end.
|
||||
// We can invert the condition of the branch if it makes the fallthrough more likely.
|
||||
|
||||
fallthroughTarget, condTarget := fallthroughBranch.blk.(*basicBlock), condBranch.blk.(*basicBlock)
|
||||
|
||||
if fallthroughTarget.loopHeader {
|
||||
// First, if the tail's target is loopHeader, we don't need to do anything here,
|
||||
// because the edge is likely to be critical edge for complex loops (e.g. loop with branches inside it).
|
||||
// That means, we will split the edge in the end of LayoutBlocks function, and insert the trampoline block
|
||||
// right after this block, which will be fallthrough in any way.
|
||||
return false
|
||||
} else if condTarget.loopHeader {
|
||||
// On the other hand, if the condBranch's target is loopHeader, we invert the condition of the branch
|
||||
// so that we could get the fallthrough to the trampoline block.
|
||||
goto invert
|
||||
}
|
||||
|
||||
if fallthroughTarget == nextInRPO {
|
||||
// Also, if the tail's target is the next block in the reverse post-order, we don't need to do anything here,
|
||||
// because if this is not critical edge, we would end up placing these two blocks adjacent to each other.
|
||||
// Even if it is the critical edge, we place the trampoline block right after this block, which will be fallthrough in any way.
|
||||
return false
|
||||
} else if condTarget == nextInRPO {
|
||||
// If the condBranch's target is the next block in the reverse post-order, we invert the condition of the branch
|
||||
// so that we could get the fallthrough to the block.
|
||||
goto invert
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
|
||||
invert:
|
||||
for i := range fallthroughTarget.preds {
|
||||
pred := &fallthroughTarget.preds[i]
|
||||
if pred.branch == fallthroughBranch {
|
||||
pred.branch = condBranch
|
||||
break
|
||||
}
|
||||
}
|
||||
for i := range condTarget.preds {
|
||||
pred := &condTarget.preds[i]
|
||||
if pred.branch == condBranch {
|
||||
pred.branch = fallthroughBranch
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
condBranch.InvertBrx()
|
||||
condBranch.blk = fallthroughTarget
|
||||
fallthroughBranch.blk = condTarget
|
||||
if wazevoapi.SSALoggingEnabled {
|
||||
fmt.Printf("inverting branches at %d->%d and %d->%d\n",
|
||||
now.ID(), fallthroughTarget.ID(), now.ID(), condTarget.ID())
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// splitCriticalEdge splits the critical edge between the given predecessor (`pred`) and successor (owning `predInfo`).
|
||||
//
|
||||
// - `pred` is the source of the critical edge,
|
||||
// - `succ` is the destination of the critical edge,
|
||||
// - `predInfo` is the predecessor info in the succ.preds slice which represents the critical edge.
|
||||
//
|
||||
// Why splitting critical edges is important? See following links:
|
||||
//
|
||||
// - https://en.wikipedia.org/wiki/Control-flow_graph
|
||||
// - https://nickdesaulniers.github.io/blog/2023/01/27/critical-edge-splitting/
|
||||
//
|
||||
// The returned basic block is the trampoline block which is inserted to split the critical edge.
|
||||
func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlockPredecessorInfo) *basicBlock {
|
||||
// In the following, we convert the following CFG:
|
||||
//
|
||||
// pred --(originalBranch)--> succ
|
||||
//
|
||||
// to the following CFG:
|
||||
//
|
||||
// pred --(newBranch)--> trampoline --(originalBranch)-> succ
|
||||
//
|
||||
// where trampoline is a new basic block which is created to split the critical edge.
|
||||
|
||||
trampoline := b.allocateBasicBlock()
|
||||
if int(trampoline.id) >= len(b.dominators) {
|
||||
b.dominators = append(b.dominators, make([]*basicBlock, trampoline.id+1)...)
|
||||
}
|
||||
b.dominators[trampoline.id] = pred
|
||||
|
||||
originalBranch := predInfo.branch
|
||||
|
||||
// Replace originalBranch with the newBranch.
|
||||
newBranch := b.AllocateInstruction()
|
||||
newBranch.opcode = originalBranch.opcode
|
||||
newBranch.blk = trampoline
|
||||
switch originalBranch.opcode {
|
||||
case OpcodeJump:
|
||||
case OpcodeBrz, OpcodeBrnz:
|
||||
originalBranch.opcode = OpcodeJump // Trampoline consists of one unconditional branch.
|
||||
newBranch.v = originalBranch.v
|
||||
originalBranch.v = ValueInvalid
|
||||
default:
|
||||
panic("BUG: critical edge shouldn't be originated from br_table")
|
||||
}
|
||||
swapInstruction(pred, originalBranch, newBranch)
|
||||
|
||||
// Replace the original branch with the new branch.
|
||||
trampoline.rootInstr = originalBranch
|
||||
trampoline.currentInstr = originalBranch
|
||||
trampoline.success = append(trampoline.success, succ) // Do not use []*basicBlock{pred} because we might have already allocated the slice.
|
||||
trampoline.preds = append(trampoline.preds, // same as ^.
|
||||
basicBlockPredecessorInfo{blk: pred, branch: newBranch})
|
||||
b.Seal(trampoline)
|
||||
|
||||
// Update the original branch to point to the trampoline.
|
||||
predInfo.blk = trampoline
|
||||
predInfo.branch = originalBranch
|
||||
|
||||
if wazevoapi.SSAValidationEnabled {
|
||||
trampoline.validate(b)
|
||||
}
|
||||
|
||||
if len(trampoline.params) > 0 {
|
||||
panic("trampoline should not have params")
|
||||
}
|
||||
|
||||
// Assign the same order as the original block so that this will be placed before the actual destination.
|
||||
trampoline.reversePostOrder = pred.reversePostOrder
|
||||
return trampoline
|
||||
}
|
||||
|
||||
// swapInstruction replaces `old` in the block `blk` with `New`.
|
||||
func swapInstruction(blk *basicBlock, old, New *Instruction) {
|
||||
if blk.rootInstr == old {
|
||||
blk.rootInstr = New
|
||||
next := old.next
|
||||
New.next = next
|
||||
next.prev = New
|
||||
} else {
|
||||
if blk.currentInstr == old {
|
||||
blk.currentInstr = New
|
||||
}
|
||||
prev := old.prev
|
||||
prev.next, New.prev = New, prev
|
||||
if next := old.next; next != nil {
|
||||
New.next, next.prev = next, New
|
||||
}
|
||||
}
|
||||
old.prev, old.next = nil, nil
|
||||
}
|
||||
312
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go
generated
vendored
Normal file
312
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go
generated
vendored
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
package ssa
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// passCalculateImmediateDominators calculates immediate dominators for each basic block.
|
||||
// The result is stored in b.dominators. This make it possible for the following passes to
|
||||
// use builder.isDominatedBy to check if a block is dominated by another block.
|
||||
//
|
||||
// At the last of pass, this function also does the loop detection and sets the basicBlock.loop flag.
|
||||
func passCalculateImmediateDominators(b *builder) {
|
||||
reversePostOrder := b.reversePostOrderedBasicBlocks[:0]
|
||||
exploreStack := b.blkStack[:0]
|
||||
b.clearBlkVisited()
|
||||
|
||||
entryBlk := b.entryBlk()
|
||||
|
||||
// Store the reverse postorder from the entrypoint into reversePostOrder slice.
|
||||
// This calculation of reverse postorder is not described in the paper,
|
||||
// so we use heuristic to calculate it so that we could potentially handle arbitrary
|
||||
// complex CFGs under the assumption that success is sorted in program's natural order.
|
||||
// That means blk.success[i] always appears before blk.success[i+1] in the source program,
|
||||
// which is a reasonable assumption as long as SSA Builder is properly used.
|
||||
//
|
||||
// First we push blocks in postorder iteratively visit successors of the entry block.
|
||||
exploreStack = append(exploreStack, entryBlk)
|
||||
const visitStateUnseen, visitStateSeen, visitStateDone = 0, 1, 2
|
||||
b.blkVisited[entryBlk] = visitStateSeen
|
||||
for len(exploreStack) > 0 {
|
||||
tail := len(exploreStack) - 1
|
||||
blk := exploreStack[tail]
|
||||
exploreStack = exploreStack[:tail]
|
||||
switch b.blkVisited[blk] {
|
||||
case visitStateUnseen:
|
||||
// This is likely a bug in the frontend.
|
||||
panic("BUG: unsupported CFG")
|
||||
case visitStateSeen:
|
||||
// This is the first time to pop this block, and we have to see the successors first.
|
||||
// So push this block again to the stack.
|
||||
exploreStack = append(exploreStack, blk)
|
||||
// And push the successors to the stack if necessary.
|
||||
for _, succ := range blk.success {
|
||||
if succ.ReturnBlock() || succ.invalid {
|
||||
continue
|
||||
}
|
||||
if b.blkVisited[succ] == visitStateUnseen {
|
||||
b.blkVisited[succ] = visitStateSeen
|
||||
exploreStack = append(exploreStack, succ)
|
||||
}
|
||||
}
|
||||
// Finally, we could pop this block once we pop all of its successors.
|
||||
b.blkVisited[blk] = visitStateDone
|
||||
case visitStateDone:
|
||||
// Note: at this point we push blk in postorder despite its name.
|
||||
reversePostOrder = append(reversePostOrder, blk)
|
||||
}
|
||||
}
|
||||
// At this point, reversePostOrder has postorder actually, so we reverse it.
|
||||
for i := len(reversePostOrder)/2 - 1; i >= 0; i-- {
|
||||
j := len(reversePostOrder) - 1 - i
|
||||
reversePostOrder[i], reversePostOrder[j] = reversePostOrder[j], reversePostOrder[i]
|
||||
}
|
||||
|
||||
for i, blk := range reversePostOrder {
|
||||
blk.reversePostOrder = i
|
||||
}
|
||||
|
||||
// Reuse the dominators slice if possible from the previous computation of function.
|
||||
b.dominators = b.dominators[:cap(b.dominators)]
|
||||
if len(b.dominators) < b.basicBlocksPool.Allocated() {
|
||||
// Generously reserve space in the slice because the slice will be reused future allocation.
|
||||
b.dominators = append(b.dominators, make([]*basicBlock, b.basicBlocksPool.Allocated())...)
|
||||
}
|
||||
calculateDominators(reversePostOrder, b.dominators)
|
||||
|
||||
// Reuse the slices for the future use.
|
||||
b.blkStack = exploreStack
|
||||
|
||||
// For the following passes.
|
||||
b.reversePostOrderedBasicBlocks = reversePostOrder
|
||||
|
||||
// Ready to detect loops!
|
||||
subPassLoopDetection(b)
|
||||
}
|
||||
|
||||
// calculateDominators calculates the immediate dominator of each node in the CFG, and store the result in `doms`.
|
||||
// The algorithm is based on the one described in the paper "A Simple, Fast Dominance Algorithm"
|
||||
// https://www.cs.rice.edu/~keith/EMBED/dom.pdf which is a faster/simple alternative to the well known Lengauer-Tarjan algorithm.
|
||||
//
|
||||
// The following code almost matches the pseudocode in the paper with one exception (see the code comment below).
|
||||
//
|
||||
// The result slice `doms` must be pre-allocated with the size larger than the size of dfsBlocks.
|
||||
func calculateDominators(reversePostOrderedBlks []*basicBlock, doms []*basicBlock) {
|
||||
entry, reversePostOrderedBlks := reversePostOrderedBlks[0], reversePostOrderedBlks[1: /* skips entry point */]
|
||||
for _, blk := range reversePostOrderedBlks {
|
||||
doms[blk.id] = nil
|
||||
}
|
||||
doms[entry.id] = entry
|
||||
|
||||
changed := true
|
||||
for changed {
|
||||
changed = false
|
||||
for _, blk := range reversePostOrderedBlks {
|
||||
var u *basicBlock
|
||||
for i := range blk.preds {
|
||||
pred := blk.preds[i].blk
|
||||
// Skip if this pred is not reachable yet. Note that this is not described in the paper,
|
||||
// but it is necessary to handle nested loops etc.
|
||||
if doms[pred.id] == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if u == nil {
|
||||
u = pred
|
||||
continue
|
||||
} else {
|
||||
u = intersect(doms, u, pred)
|
||||
}
|
||||
}
|
||||
if doms[blk.id] != u {
|
||||
doms[blk.id] = u
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// intersect returns the common dominator of blk1 and blk2.
|
||||
//
|
||||
// This is the `intersect` function in the paper.
|
||||
func intersect(doms []*basicBlock, blk1 *basicBlock, blk2 *basicBlock) *basicBlock {
|
||||
finger1, finger2 := blk1, blk2
|
||||
for finger1 != finger2 {
|
||||
// Move the 'finger1' upwards to its immediate dominator.
|
||||
for finger1.reversePostOrder > finger2.reversePostOrder {
|
||||
finger1 = doms[finger1.id]
|
||||
}
|
||||
// Move the 'finger2' upwards to its immediate dominator.
|
||||
for finger2.reversePostOrder > finger1.reversePostOrder {
|
||||
finger2 = doms[finger2.id]
|
||||
}
|
||||
}
|
||||
return finger1
|
||||
}
|
||||
|
||||
// subPassLoopDetection detects loops in the function using the immediate dominators.
|
||||
//
|
||||
// This is run at the last of passCalculateImmediateDominators.
|
||||
func subPassLoopDetection(b *builder) {
|
||||
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
|
||||
for i := range blk.preds {
|
||||
pred := blk.preds[i].blk
|
||||
if pred.invalid {
|
||||
continue
|
||||
}
|
||||
if b.isDominatedBy(pred, blk) {
|
||||
blk.loopHeader = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// buildLoopNestingForest builds the loop nesting forest for the function.
|
||||
// This must be called after branch splitting since it relies on the CFG.
|
||||
func passBuildLoopNestingForest(b *builder) {
|
||||
ent := b.entryBlk()
|
||||
doms := b.dominators
|
||||
for _, blk := range b.reversePostOrderedBasicBlocks {
|
||||
n := doms[blk.id]
|
||||
for !n.loopHeader && n != ent {
|
||||
n = doms[n.id]
|
||||
}
|
||||
|
||||
if n == ent && blk.loopHeader {
|
||||
b.loopNestingForestRoots = append(b.loopNestingForestRoots, blk)
|
||||
} else if n == ent {
|
||||
} else if n.loopHeader {
|
||||
n.loopNestingForestChildren = append(n.loopNestingForestChildren, blk)
|
||||
}
|
||||
}
|
||||
|
||||
if wazevoapi.SSALoggingEnabled {
|
||||
for _, root := range b.loopNestingForestRoots {
|
||||
printLoopNestingForest(root.(*basicBlock), 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func printLoopNestingForest(root *basicBlock, depth int) {
|
||||
fmt.Println(strings.Repeat("\t", depth), "loop nesting forest root:", root.ID())
|
||||
for _, child := range root.loopNestingForestChildren {
|
||||
fmt.Println(strings.Repeat("\t", depth+1), "child:", child.ID())
|
||||
if child.LoopHeader() {
|
||||
printLoopNestingForest(child.(*basicBlock), depth+2)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type dominatorSparseTree struct {
|
||||
time int
|
||||
euler []*basicBlock
|
||||
first, depth []int
|
||||
table [][]int
|
||||
}
|
||||
|
||||
// passBuildDominatorTree builds the dominator tree for the function, and constructs builder.sparseTree.
|
||||
func passBuildDominatorTree(b *builder) {
|
||||
// First we materialize the children of each node in the dominator tree.
|
||||
idoms := b.dominators
|
||||
for _, blk := range b.reversePostOrderedBasicBlocks {
|
||||
parent := idoms[blk.id]
|
||||
if parent == nil {
|
||||
panic("BUG")
|
||||
} else if parent == blk {
|
||||
// This is the entry block.
|
||||
continue
|
||||
}
|
||||
if prev := parent.child; prev == nil {
|
||||
parent.child = blk
|
||||
} else {
|
||||
parent.child = blk
|
||||
blk.sibling = prev
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the state from the previous computation.
|
||||
n := b.basicBlocksPool.Allocated()
|
||||
st := &b.sparseTree
|
||||
st.euler = append(st.euler[:0], make([]*basicBlock, 2*n-1)...)
|
||||
st.first = append(st.first[:0], make([]int, n)...)
|
||||
for i := range st.first {
|
||||
st.first[i] = -1
|
||||
}
|
||||
st.depth = append(st.depth[:0], make([]int, 2*n-1)...)
|
||||
st.time = 0
|
||||
|
||||
// Start building the sparse tree.
|
||||
st.eulerTour(b.entryBlk(), 0)
|
||||
st.buildSparseTable()
|
||||
}
|
||||
|
||||
func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int) {
|
||||
if wazevoapi.SSALoggingEnabled {
|
||||
fmt.Println(strings.Repeat("\t", height), "euler tour:", node.ID())
|
||||
}
|
||||
dt.euler[dt.time] = node
|
||||
dt.depth[dt.time] = height
|
||||
if dt.first[node.id] == -1 {
|
||||
dt.first[node.id] = dt.time
|
||||
}
|
||||
dt.time++
|
||||
|
||||
for child := node.child; child != nil; child = child.sibling {
|
||||
dt.eulerTour(child, height+1)
|
||||
dt.euler[dt.time] = node // add the current node again after visiting a child
|
||||
dt.depth[dt.time] = height
|
||||
dt.time++
|
||||
}
|
||||
}
|
||||
|
||||
// buildSparseTable builds a sparse table for RMQ queries.
|
||||
func (dt *dominatorSparseTree) buildSparseTable() {
|
||||
n := len(dt.depth)
|
||||
k := int(math.Log2(float64(n))) + 1
|
||||
table := dt.table
|
||||
|
||||
if n >= len(table) {
|
||||
table = append(table, make([][]int, n+1)...)
|
||||
}
|
||||
for i := range table {
|
||||
if len(table[i]) < k {
|
||||
table[i] = append(table[i], make([]int, k)...)
|
||||
}
|
||||
table[i][0] = i
|
||||
}
|
||||
|
||||
for j := 1; 1<<j <= n; j++ {
|
||||
for i := 0; i+(1<<j)-1 < n; i++ {
|
||||
if dt.depth[table[i][j-1]] < dt.depth[table[i+(1<<(j-1))][j-1]] {
|
||||
table[i][j] = table[i][j-1]
|
||||
} else {
|
||||
table[i][j] = table[i+(1<<(j-1))][j-1]
|
||||
}
|
||||
}
|
||||
}
|
||||
dt.table = table
|
||||
}
|
||||
|
||||
// rmq performs a range minimum query on the sparse table.
|
||||
func (dt *dominatorSparseTree) rmq(l, r int) int {
|
||||
table := dt.table
|
||||
depth := dt.depth
|
||||
j := int(math.Log2(float64(r - l + 1)))
|
||||
if depth[table[l][j]] <= depth[table[r-(1<<j)+1][j]] {
|
||||
return table[l][j]
|
||||
}
|
||||
return table[r-(1<<j)+1][j]
|
||||
}
|
||||
|
||||
// findLCA finds the LCA using the Euler tour and RMQ.
|
||||
func (dt *dominatorSparseTree) findLCA(u, v BasicBlockID) *basicBlock {
|
||||
first := dt.first
|
||||
if first[u] > first[v] {
|
||||
u, v = v, u
|
||||
}
|
||||
return dt.euler[dt.rmq(first[u], first[v])]
|
||||
}
|
||||
49
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/signature.go
generated
vendored
Normal file
49
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/signature.go
generated
vendored
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
package ssa
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Signature is a function prototype.
|
||||
type Signature struct {
|
||||
// ID is a unique identifier for this signature used to lookup.
|
||||
ID SignatureID
|
||||
// Params and Results are the types of the parameters and results of the function.
|
||||
Params, Results []Type
|
||||
|
||||
// used is true if this is used by the currently-compiled function.
|
||||
// Debugging only.
|
||||
used bool
|
||||
}
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (s *Signature) String() string {
|
||||
str := strings.Builder{}
|
||||
str.WriteString(s.ID.String())
|
||||
str.WriteString(": ")
|
||||
if len(s.Params) > 0 {
|
||||
for _, typ := range s.Params {
|
||||
str.WriteString(typ.String())
|
||||
}
|
||||
} else {
|
||||
str.WriteByte('v')
|
||||
}
|
||||
str.WriteByte('_')
|
||||
if len(s.Results) > 0 {
|
||||
for _, typ := range s.Results {
|
||||
str.WriteString(typ.String())
|
||||
}
|
||||
} else {
|
||||
str.WriteByte('v')
|
||||
}
|
||||
return str.String()
|
||||
}
|
||||
|
||||
// SignatureID is an unique identifier used to lookup.
|
||||
type SignatureID int
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (s SignatureID) String() string {
|
||||
return fmt.Sprintf("sig%d", s)
|
||||
}
|
||||
14
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/ssa.go
generated
vendored
Normal file
14
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/ssa.go
generated
vendored
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
// Package ssa is used to construct SSA function. By nature this is free of Wasm specific thing
|
||||
// and ISA.
|
||||
//
|
||||
// We use the "block argument" variant of SSA: https://en.wikipedia.org/wiki/Static_single-assignment_form#Block_arguments
|
||||
// which is equivalent to the traditional PHI function based one, but more convenient during optimizations.
|
||||
// However, in this package's source code comment, we might use PHI whenever it seems necessary in order to be aligned with
|
||||
// existing literatures, e.g. SSA level optimization algorithms are often described using PHI nodes.
|
||||
//
|
||||
// The rationale doc for the choice of "block argument" by MLIR of LLVM is worth a read:
|
||||
// https://mlir.llvm.org/docs/Rationale/Rationale/#block-arguments-vs-phi-nodes
|
||||
//
|
||||
// The algorithm to resolve variable definitions used here is based on the paper
|
||||
// "Simple and Efficient Construction of Static Single Assignment Form": https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf.
|
||||
package ssa
|
||||
112
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go
generated
vendored
Normal file
112
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go
generated
vendored
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
package ssa
|
||||
|
||||
type Type byte
|
||||
|
||||
const (
|
||||
typeInvalid Type = iota
|
||||
|
||||
// TODO: add 8, 16 bit types when it's needed for optimizations.
|
||||
|
||||
// TypeI32 represents an integer type with 32 bits.
|
||||
TypeI32
|
||||
|
||||
// TypeI64 represents an integer type with 64 bits.
|
||||
TypeI64
|
||||
|
||||
// TypeF32 represents 32-bit floats in the IEEE 754.
|
||||
TypeF32
|
||||
|
||||
// TypeF64 represents 64-bit floats in the IEEE 754.
|
||||
TypeF64
|
||||
|
||||
// TypeV128 represents 128-bit SIMD vectors.
|
||||
TypeV128
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (t Type) String() (ret string) {
|
||||
switch t {
|
||||
case typeInvalid:
|
||||
return "invalid"
|
||||
case TypeI32:
|
||||
return "i32"
|
||||
case TypeI64:
|
||||
return "i64"
|
||||
case TypeF32:
|
||||
return "f32"
|
||||
case TypeF64:
|
||||
return "f64"
|
||||
case TypeV128:
|
||||
return "v128"
|
||||
default:
|
||||
panic(int(t))
|
||||
}
|
||||
}
|
||||
|
||||
// IsInt returns true if the type is an integer type.
|
||||
func (t Type) IsInt() bool {
|
||||
return t == TypeI32 || t == TypeI64
|
||||
}
|
||||
|
||||
// IsFloat returns true if the type is a floating point type.
|
||||
func (t Type) IsFloat() bool {
|
||||
return t == TypeF32 || t == TypeF64
|
||||
}
|
||||
|
||||
// Bits returns the number of bits required to represent the type.
|
||||
func (t Type) Bits() byte {
|
||||
switch t {
|
||||
case TypeI32, TypeF32:
|
||||
return 32
|
||||
case TypeI64, TypeF64:
|
||||
return 64
|
||||
case TypeV128:
|
||||
return 128
|
||||
default:
|
||||
panic(int(t))
|
||||
}
|
||||
}
|
||||
|
||||
// Size returns the number of bytes required to represent the type.
|
||||
func (t Type) Size() byte {
|
||||
return t.Bits() / 8
|
||||
}
|
||||
|
||||
func (t Type) invalid() bool {
|
||||
return t == typeInvalid
|
||||
}
|
||||
|
||||
// VecLane represents a lane in a SIMD vector.
|
||||
type VecLane byte
|
||||
|
||||
const (
|
||||
VecLaneInvalid VecLane = 1 + iota
|
||||
VecLaneI8x16
|
||||
VecLaneI16x8
|
||||
VecLaneI32x4
|
||||
VecLaneI64x2
|
||||
VecLaneF32x4
|
||||
VecLaneF64x2
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (vl VecLane) String() (ret string) {
|
||||
switch vl {
|
||||
case VecLaneInvalid:
|
||||
return "invalid"
|
||||
case VecLaneI8x16:
|
||||
return "i8x16"
|
||||
case VecLaneI16x8:
|
||||
return "i16x8"
|
||||
case VecLaneI32x4:
|
||||
return "i32x4"
|
||||
case VecLaneI64x2:
|
||||
return "i64x2"
|
||||
case VecLaneF32x4:
|
||||
return "f32x4"
|
||||
case VecLaneF64x2:
|
||||
return "f64x2"
|
||||
default:
|
||||
panic(int(vl))
|
||||
}
|
||||
}
|
||||
87
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go
generated
vendored
Normal file
87
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/vs.go
generated
vendored
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
package ssa
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
||||
)
|
||||
|
||||
// Variable is a unique identifier for a source program's variable and will correspond to
|
||||
// multiple ssa Value(s).
|
||||
//
|
||||
// For example, `Local 1` is a Variable in WebAssembly, and Value(s) will be created for it
|
||||
// whenever it executes `local.set 1`.
|
||||
//
|
||||
// Variable is useful to track the SSA Values of a variable in the source program, and
|
||||
// can be used to find the corresponding latest SSA Value via Builder.FindValue.
|
||||
type Variable uint32
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (v Variable) String() string {
|
||||
return fmt.Sprintf("var%d", v)
|
||||
}
|
||||
|
||||
// Value represents an SSA value with a type information. The relationship with Variable is 1: N (including 0),
|
||||
// that means there might be multiple Variable(s) for a Value.
|
||||
//
|
||||
// Higher 32-bit is used to store Type for this value.
|
||||
type Value uint64
|
||||
|
||||
// ValueID is the lower 32bit of Value, which is the pure identifier of Value without type info.
|
||||
type ValueID uint32
|
||||
|
||||
const (
|
||||
valueIDInvalid ValueID = math.MaxUint32
|
||||
ValueInvalid Value = Value(valueIDInvalid)
|
||||
)
|
||||
|
||||
// Format creates a debug string for this Value using the data stored in Builder.
|
||||
func (v Value) Format(b Builder) string {
|
||||
if annotation, ok := b.(*builder).valueAnnotations[v.ID()]; ok {
|
||||
return annotation
|
||||
}
|
||||
return fmt.Sprintf("v%d", v.ID())
|
||||
}
|
||||
|
||||
func (v Value) formatWithType(b Builder) (ret string) {
|
||||
if annotation, ok := b.(*builder).valueAnnotations[v.ID()]; ok {
|
||||
ret = annotation + ":" + v.Type().String()
|
||||
} else {
|
||||
ret = fmt.Sprintf("v%d:%s", v.ID(), v.Type())
|
||||
}
|
||||
|
||||
if wazevoapi.SSALoggingEnabled { // This is useful to check live value analysis bugs.
|
||||
if bd := b.(*builder); bd.donePostBlockLayoutPasses {
|
||||
id := v.ID()
|
||||
ret += fmt.Sprintf("(ref=%d)", bd.valueRefCounts[id])
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// Valid returns true if this value is valid.
|
||||
func (v Value) Valid() bool {
|
||||
return v.ID() != valueIDInvalid
|
||||
}
|
||||
|
||||
// Type returns the Type of this value.
|
||||
func (v Value) Type() Type {
|
||||
return Type(v >> 32)
|
||||
}
|
||||
|
||||
// ID returns the valueID of this value.
|
||||
func (v Value) ID() ValueID {
|
||||
return ValueID(v)
|
||||
}
|
||||
|
||||
// setType sets a type to this Value and returns the updated Value.
|
||||
func (v Value) setType(typ Type) Value {
|
||||
return v | Value(typ)<<32
|
||||
}
|
||||
|
||||
// Values is a slice of Value. Use this instead of []Value to reuse the underlying memory.
|
||||
type Values = wazevoapi.VarLength[Value]
|
||||
|
||||
// ValuesNil is a nil Values.
|
||||
var ValuesNil = wazevoapi.NewNilVarLength[Value]()
|
||||
196
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go
generated
vendored
Normal file
196
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/debug_options.go
generated
vendored
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
package wazevoapi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
// These consts are used various places in the wazevo implementations.
|
||||
// Instead of defining them in each file, we define them here so that we can quickly iterate on
|
||||
// debugging without spending "where do we have debug logging?" time.
|
||||
|
||||
// ----- Debug logging -----
|
||||
// These consts must be disabled by default. Enable them only when debugging.
|
||||
|
||||
const (
|
||||
FrontEndLoggingEnabled = false
|
||||
SSALoggingEnabled = false
|
||||
RegAllocLoggingEnabled = false
|
||||
)
|
||||
|
||||
// ----- Output prints -----
|
||||
// These consts must be disabled by default. Enable them only when debugging.
|
||||
|
||||
const (
|
||||
PrintSSA = false
|
||||
PrintOptimizedSSA = false
|
||||
PrintSSAToBackendIRLowering = false
|
||||
PrintRegisterAllocated = false
|
||||
PrintFinalizedMachineCode = false
|
||||
PrintMachineCodeHexPerFunction = printMachineCodeHexPerFunctionUnmodified || PrintMachineCodeHexPerFunctionDisassemblable //nolint
|
||||
printMachineCodeHexPerFunctionUnmodified = false
|
||||
// PrintMachineCodeHexPerFunctionDisassemblable prints the machine code while modifying the actual result
|
||||
// to make it disassemblable. This is useful when debugging the final machine code. See the places where this is used for detail.
|
||||
// When this is enabled, functions must not be called.
|
||||
PrintMachineCodeHexPerFunctionDisassemblable = false
|
||||
)
|
||||
|
||||
// printTarget is the function index to print the machine code. This is used for debugging to print the machine code
|
||||
// of a specific function.
|
||||
const printTarget = -1
|
||||
|
||||
// PrintEnabledIndex returns true if the current function index is the print target.
|
||||
func PrintEnabledIndex(ctx context.Context) bool {
|
||||
if printTarget == -1 {
|
||||
return true
|
||||
}
|
||||
return GetCurrentFunctionIndex(ctx) == printTarget
|
||||
}
|
||||
|
||||
// ----- Validations -----
|
||||
const (
|
||||
// SSAValidationEnabled enables the SSA validation. This is disabled by default since the operation is expensive.
|
||||
SSAValidationEnabled = false
|
||||
)
|
||||
|
||||
// ----- Stack Guard Check -----
|
||||
const (
|
||||
// StackGuardCheckEnabled enables the stack guard check to ensure that our stack bounds check works correctly.
|
||||
StackGuardCheckEnabled = false
|
||||
StackGuardCheckGuardPageSize = 8096
|
||||
)
|
||||
|
||||
// CheckStackGuardPage checks the given stack guard page is not corrupted.
|
||||
func CheckStackGuardPage(s []byte) {
|
||||
for i := 0; i < StackGuardCheckGuardPageSize; i++ {
|
||||
if s[i] != 0 {
|
||||
panic(
|
||||
fmt.Sprintf("BUG: stack guard page is corrupted:\n\tguard_page=%s\n\tstack=%s",
|
||||
hex.EncodeToString(s[:StackGuardCheckGuardPageSize]),
|
||||
hex.EncodeToString(s[StackGuardCheckGuardPageSize:]),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----- Deterministic compilation verifier -----
|
||||
|
||||
const (
|
||||
// DeterministicCompilationVerifierEnabled enables the deterministic compilation verifier. This is disabled by default
|
||||
// since the operation is expensive. But when in doubt, enable this to make sure the compilation is deterministic.
|
||||
DeterministicCompilationVerifierEnabled = false
|
||||
DeterministicCompilationVerifyingIter = 5
|
||||
)
|
||||
|
||||
type (
|
||||
verifierState struct {
|
||||
initialCompilationDone bool
|
||||
maybeRandomizedIndexes []int
|
||||
r *rand.Rand
|
||||
values map[string]string
|
||||
}
|
||||
verifierStateContextKey struct{}
|
||||
currentFunctionNameKey struct{}
|
||||
currentFunctionIndexKey struct{}
|
||||
)
|
||||
|
||||
// NewDeterministicCompilationVerifierContext creates a new context with the deterministic compilation verifier used per wasm.Module.
|
||||
func NewDeterministicCompilationVerifierContext(ctx context.Context, localFunctions int) context.Context {
|
||||
maybeRandomizedIndexes := make([]int, localFunctions)
|
||||
for i := range maybeRandomizedIndexes {
|
||||
maybeRandomizedIndexes[i] = i
|
||||
}
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
return context.WithValue(ctx, verifierStateContextKey{}, &verifierState{
|
||||
r: r, maybeRandomizedIndexes: maybeRandomizedIndexes, values: map[string]string{},
|
||||
})
|
||||
}
|
||||
|
||||
// DeterministicCompilationVerifierRandomizeIndexes randomizes the indexes for the deterministic compilation verifier.
|
||||
// To get the randomized index, use DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex.
|
||||
func DeterministicCompilationVerifierRandomizeIndexes(ctx context.Context) {
|
||||
state := ctx.Value(verifierStateContextKey{}).(*verifierState)
|
||||
if !state.initialCompilationDone {
|
||||
// If this is the first attempt, we use the index as-is order.
|
||||
state.initialCompilationDone = true
|
||||
return
|
||||
}
|
||||
r := state.r
|
||||
r.Shuffle(len(state.maybeRandomizedIndexes), func(i, j int) {
|
||||
state.maybeRandomizedIndexes[i], state.maybeRandomizedIndexes[j] = state.maybeRandomizedIndexes[j], state.maybeRandomizedIndexes[i]
|
||||
})
|
||||
}
|
||||
|
||||
// DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex returns the randomized index for the given `index`
|
||||
// which is assigned by DeterministicCompilationVerifierRandomizeIndexes.
|
||||
func DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx context.Context, index int) int {
|
||||
state := ctx.Value(verifierStateContextKey{}).(*verifierState)
|
||||
ret := state.maybeRandomizedIndexes[index]
|
||||
return ret
|
||||
}
|
||||
|
||||
// VerifyOrSetDeterministicCompilationContextValue verifies that the `newValue` is the same as the previous value for the given `scope`
|
||||
// and the current function name. If the previous value doesn't exist, it sets the value to the given `newValue`.
|
||||
//
|
||||
// If the verification fails, this prints the diff and exits the process.
|
||||
func VerifyOrSetDeterministicCompilationContextValue(ctx context.Context, scope string, newValue string) {
|
||||
fn := ctx.Value(currentFunctionNameKey{}).(string)
|
||||
key := fn + ": " + scope
|
||||
verifierCtx := ctx.Value(verifierStateContextKey{}).(*verifierState)
|
||||
oldValue, ok := verifierCtx.values[key]
|
||||
if !ok {
|
||||
verifierCtx.values[key] = newValue
|
||||
return
|
||||
}
|
||||
if oldValue != newValue {
|
||||
fmt.Printf(
|
||||
`BUG: Deterministic compilation failed for function%s at scope="%s".
|
||||
|
||||
This is mostly due to (but might not be limited to):
|
||||
* Resetting ssa.Builder, backend.Compiler or frontend.Compiler, etc doens't work as expected, and the compilation has been affected by the previous iterations.
|
||||
* Using a map with non-deterministic iteration order.
|
||||
|
||||
---------- [old] ----------
|
||||
%s
|
||||
|
||||
---------- [new] ----------
|
||||
%s
|
||||
`,
|
||||
fn, scope, oldValue, newValue,
|
||||
)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// nolint
|
||||
const NeedFunctionNameInContext = PrintSSA ||
|
||||
PrintOptimizedSSA ||
|
||||
PrintSSAToBackendIRLowering ||
|
||||
PrintRegisterAllocated ||
|
||||
PrintFinalizedMachineCode ||
|
||||
PrintMachineCodeHexPerFunction ||
|
||||
DeterministicCompilationVerifierEnabled ||
|
||||
PerfMapEnabled
|
||||
|
||||
// SetCurrentFunctionName sets the current function name to the given `functionName`.
|
||||
func SetCurrentFunctionName(ctx context.Context, index int, functionName string) context.Context {
|
||||
ctx = context.WithValue(ctx, currentFunctionNameKey{}, functionName)
|
||||
ctx = context.WithValue(ctx, currentFunctionIndexKey{}, index)
|
||||
return ctx
|
||||
}
|
||||
|
||||
// GetCurrentFunctionName returns the current function name.
|
||||
func GetCurrentFunctionName(ctx context.Context) string {
|
||||
ret, _ := ctx.Value(currentFunctionNameKey{}).(string)
|
||||
return ret
|
||||
}
|
||||
|
||||
// GetCurrentFunctionIndex returns the current function index.
|
||||
func GetCurrentFunctionIndex(ctx context.Context) int {
|
||||
ret, _ := ctx.Value(currentFunctionIndexKey{}).(int)
|
||||
return ret
|
||||
}
|
||||
109
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/exitcode.go
generated
vendored
Normal file
109
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/exitcode.go
generated
vendored
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
package wazevoapi
|
||||
|
||||
// ExitCode is an exit code of an execution of a function.
|
||||
type ExitCode uint32
|
||||
|
||||
const (
|
||||
ExitCodeOK ExitCode = iota
|
||||
ExitCodeGrowStack
|
||||
ExitCodeGrowMemory
|
||||
ExitCodeUnreachable
|
||||
ExitCodeMemoryOutOfBounds
|
||||
// ExitCodeCallGoModuleFunction is an exit code for a call to an api.GoModuleFunction.
|
||||
ExitCodeCallGoModuleFunction
|
||||
// ExitCodeCallGoFunction is an exit code for a call to an api.GoFunction.
|
||||
ExitCodeCallGoFunction
|
||||
ExitCodeTableOutOfBounds
|
||||
ExitCodeIndirectCallNullPointer
|
||||
ExitCodeIndirectCallTypeMismatch
|
||||
ExitCodeIntegerDivisionByZero
|
||||
ExitCodeIntegerOverflow
|
||||
ExitCodeInvalidConversionToInteger
|
||||
ExitCodeCheckModuleExitCode
|
||||
ExitCodeCallListenerBefore
|
||||
ExitCodeCallListenerAfter
|
||||
ExitCodeCallGoModuleFunctionWithListener
|
||||
ExitCodeCallGoFunctionWithListener
|
||||
ExitCodeTableGrow
|
||||
ExitCodeRefFunc
|
||||
ExitCodeMemoryWait32
|
||||
ExitCodeMemoryWait64
|
||||
ExitCodeMemoryNotify
|
||||
ExitCodeUnalignedAtomic
|
||||
exitCodeMax
|
||||
)
|
||||
|
||||
const ExitCodeMask = 0xff
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
func (e ExitCode) String() string {
|
||||
switch e {
|
||||
case ExitCodeOK:
|
||||
return "ok"
|
||||
case ExitCodeGrowStack:
|
||||
return "grow_stack"
|
||||
case ExitCodeCallGoModuleFunction:
|
||||
return "call_go_module_function"
|
||||
case ExitCodeCallGoFunction:
|
||||
return "call_go_function"
|
||||
case ExitCodeUnreachable:
|
||||
return "unreachable"
|
||||
case ExitCodeMemoryOutOfBounds:
|
||||
return "memory_out_of_bounds"
|
||||
case ExitCodeUnalignedAtomic:
|
||||
return "unaligned_atomic"
|
||||
case ExitCodeTableOutOfBounds:
|
||||
return "table_out_of_bounds"
|
||||
case ExitCodeIndirectCallNullPointer:
|
||||
return "indirect_call_null_pointer"
|
||||
case ExitCodeIndirectCallTypeMismatch:
|
||||
return "indirect_call_type_mismatch"
|
||||
case ExitCodeIntegerDivisionByZero:
|
||||
return "integer_division_by_zero"
|
||||
case ExitCodeIntegerOverflow:
|
||||
return "integer_overflow"
|
||||
case ExitCodeInvalidConversionToInteger:
|
||||
return "invalid_conversion_to_integer"
|
||||
case ExitCodeCheckModuleExitCode:
|
||||
return "check_module_exit_code"
|
||||
case ExitCodeCallListenerBefore:
|
||||
return "call_listener_before"
|
||||
case ExitCodeCallListenerAfter:
|
||||
return "call_listener_after"
|
||||
case ExitCodeCallGoModuleFunctionWithListener:
|
||||
return "call_go_module_function_with_listener"
|
||||
case ExitCodeCallGoFunctionWithListener:
|
||||
return "call_go_function_with_listener"
|
||||
case ExitCodeGrowMemory:
|
||||
return "grow_memory"
|
||||
case ExitCodeTableGrow:
|
||||
return "table_grow"
|
||||
case ExitCodeRefFunc:
|
||||
return "ref_func"
|
||||
case ExitCodeMemoryWait32:
|
||||
return "memory_wait32"
|
||||
case ExitCodeMemoryWait64:
|
||||
return "memory_wait64"
|
||||
case ExitCodeMemoryNotify:
|
||||
return "memory_notify"
|
||||
}
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
func ExitCodeCallGoModuleFunctionWithIndex(index int, withListener bool) ExitCode {
|
||||
if withListener {
|
||||
return ExitCodeCallGoModuleFunctionWithListener | ExitCode(index<<8)
|
||||
}
|
||||
return ExitCodeCallGoModuleFunction | ExitCode(index<<8)
|
||||
}
|
||||
|
||||
func ExitCodeCallGoFunctionWithIndex(index int, withListener bool) ExitCode {
|
||||
if withListener {
|
||||
return ExitCodeCallGoFunctionWithListener | ExitCode(index<<8)
|
||||
}
|
||||
return ExitCodeCallGoFunction | ExitCode(index<<8)
|
||||
}
|
||||
|
||||
func GoFunctionIndexFromExitCode(exitCode ExitCode) int {
|
||||
return int(exitCode >> 8)
|
||||
}
|
||||
216
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/offsetdata.go
generated
vendored
Normal file
216
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/offsetdata.go
generated
vendored
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
package wazevoapi
|
||||
|
||||
import (
|
||||
"github.com/tetratelabs/wazero/internal/wasm"
|
||||
)
|
||||
|
||||
const (
|
||||
// FunctionInstanceSize is the size of wazevo.functionInstance.
|
||||
FunctionInstanceSize = 24
|
||||
// FunctionInstanceExecutableOffset is an offset of `executable` field in wazevo.functionInstance
|
||||
FunctionInstanceExecutableOffset = 0
|
||||
// FunctionInstanceModuleContextOpaquePtrOffset is an offset of `moduleContextOpaquePtr` field in wazevo.functionInstance
|
||||
FunctionInstanceModuleContextOpaquePtrOffset = 8
|
||||
// FunctionInstanceTypeIDOffset is an offset of `typeID` field in wazevo.functionInstance
|
||||
FunctionInstanceTypeIDOffset = 16
|
||||
)
|
||||
|
||||
const (
|
||||
// ExecutionContextOffsetExitCodeOffset is an offset of `exitCode` field in wazevo.executionContext
|
||||
ExecutionContextOffsetExitCodeOffset Offset = 0
|
||||
// ExecutionContextOffsetCallerModuleContextPtr is an offset of `callerModuleContextPtr` field in wazevo.executionContext
|
||||
ExecutionContextOffsetCallerModuleContextPtr Offset = 8
|
||||
// ExecutionContextOffsetOriginalFramePointer is an offset of `originalFramePointer` field in wazevo.executionContext
|
||||
ExecutionContextOffsetOriginalFramePointer Offset = 16
|
||||
// ExecutionContextOffsetOriginalStackPointer is an offset of `originalStackPointer` field in wazevo.executionContext
|
||||
ExecutionContextOffsetOriginalStackPointer Offset = 24
|
||||
// ExecutionContextOffsetGoReturnAddress is an offset of `goReturnAddress` field in wazevo.executionContext
|
||||
ExecutionContextOffsetGoReturnAddress Offset = 32
|
||||
// ExecutionContextOffsetStackBottomPtr is an offset of `stackBottomPtr` field in wazevo.executionContext
|
||||
ExecutionContextOffsetStackBottomPtr Offset = 40
|
||||
// ExecutionContextOffsetGoCallReturnAddress is an offset of `goCallReturnAddress` field in wazevo.executionContext
|
||||
ExecutionContextOffsetGoCallReturnAddress Offset = 48
|
||||
// ExecutionContextOffsetStackPointerBeforeGoCall is an offset of `StackPointerBeforeGoCall` field in wazevo.executionContext
|
||||
ExecutionContextOffsetStackPointerBeforeGoCall Offset = 56
|
||||
// ExecutionContextOffsetStackGrowRequiredSize is an offset of `stackGrowRequiredSize` field in wazevo.executionContext
|
||||
ExecutionContextOffsetStackGrowRequiredSize Offset = 64
|
||||
// ExecutionContextOffsetMemoryGrowTrampolineAddress is an offset of `memoryGrowTrampolineAddress` field in wazevo.executionContext
|
||||
ExecutionContextOffsetMemoryGrowTrampolineAddress Offset = 72
|
||||
// ExecutionContextOffsetStackGrowCallTrampolineAddress is an offset of `stackGrowCallTrampolineAddress` field in wazevo.executionContext.
|
||||
ExecutionContextOffsetStackGrowCallTrampolineAddress Offset = 80
|
||||
// ExecutionContextOffsetCheckModuleExitCodeTrampolineAddress is an offset of `checkModuleExitCodeTrampolineAddress` field in wazevo.executionContext.
|
||||
ExecutionContextOffsetCheckModuleExitCodeTrampolineAddress Offset = 88
|
||||
// ExecutionContextOffsetSavedRegistersBegin is an offset of the first element of `savedRegisters` field in wazevo.executionContext
|
||||
ExecutionContextOffsetSavedRegistersBegin Offset = 96
|
||||
// ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque is an offset of `goFunctionCallCalleeModuleContextOpaque` field in wazevo.executionContext
|
||||
ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque Offset = 1120
|
||||
// ExecutionContextOffsetTableGrowTrampolineAddress is an offset of `tableGrowTrampolineAddress` field in wazevo.executionContext
|
||||
ExecutionContextOffsetTableGrowTrampolineAddress Offset = 1128
|
||||
// ExecutionContextOffsetRefFuncTrampolineAddress is an offset of `refFuncTrampolineAddress` field in wazevo.executionContext
|
||||
ExecutionContextOffsetRefFuncTrampolineAddress Offset = 1136
|
||||
ExecutionContextOffsetMemmoveAddress Offset = 1144
|
||||
ExecutionContextOffsetFramePointerBeforeGoCall Offset = 1152
|
||||
ExecutionContextOffsetMemoryWait32TrampolineAddress Offset = 1160
|
||||
ExecutionContextOffsetMemoryWait64TrampolineAddress Offset = 1168
|
||||
ExecutionContextOffsetMemoryNotifyTrampolineAddress Offset = 1176
|
||||
)
|
||||
|
||||
// ModuleContextOffsetData allows the compilers to get the information about offsets to the fields of wazevo.moduleContextOpaque,
|
||||
// This is unique per module.
|
||||
type ModuleContextOffsetData struct {
|
||||
TotalSize int
|
||||
ModuleInstanceOffset,
|
||||
LocalMemoryBegin,
|
||||
ImportedMemoryBegin,
|
||||
ImportedFunctionsBegin,
|
||||
GlobalsBegin,
|
||||
TypeIDs1stElement,
|
||||
TablesBegin,
|
||||
BeforeListenerTrampolines1stElement,
|
||||
AfterListenerTrampolines1stElement,
|
||||
DataInstances1stElement,
|
||||
ElementInstances1stElement Offset
|
||||
}
|
||||
|
||||
// ImportedFunctionOffset returns an offset of the i-th imported function.
|
||||
// Each item is stored as wazevo.functionInstance whose size matches FunctionInstanceSize.
|
||||
func (m *ModuleContextOffsetData) ImportedFunctionOffset(i wasm.Index) (
|
||||
executableOffset, moduleCtxOffset, typeIDOffset Offset,
|
||||
) {
|
||||
base := m.ImportedFunctionsBegin + Offset(i)*FunctionInstanceSize
|
||||
return base, base + 8, base + 16
|
||||
}
|
||||
|
||||
// GlobalInstanceOffset returns an offset of the i-th global instance.
|
||||
func (m *ModuleContextOffsetData) GlobalInstanceOffset(i wasm.Index) Offset {
|
||||
return m.GlobalsBegin + Offset(i)*16
|
||||
}
|
||||
|
||||
// Offset represents an offset of a field of a struct.
|
||||
type Offset int32
|
||||
|
||||
// U32 encodes an Offset as uint32 for convenience.
|
||||
func (o Offset) U32() uint32 {
|
||||
return uint32(o)
|
||||
}
|
||||
|
||||
// I64 encodes an Offset as int64 for convenience.
|
||||
func (o Offset) I64() int64 {
|
||||
return int64(o)
|
||||
}
|
||||
|
||||
// U64 encodes an Offset as int64 for convenience.
|
||||
func (o Offset) U64() uint64 {
|
||||
return uint64(o)
|
||||
}
|
||||
|
||||
// LocalMemoryBase returns an offset of the first byte of the local memory.
|
||||
func (m *ModuleContextOffsetData) LocalMemoryBase() Offset {
|
||||
return m.LocalMemoryBegin
|
||||
}
|
||||
|
||||
// LocalMemoryLen returns an offset of the length of the local memory buffer.
|
||||
func (m *ModuleContextOffsetData) LocalMemoryLen() Offset {
|
||||
if l := m.LocalMemoryBegin; l >= 0 {
|
||||
return l + 8
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// TableOffset returns an offset of the i-th table instance.
|
||||
func (m *ModuleContextOffsetData) TableOffset(tableIndex int) Offset {
|
||||
return m.TablesBegin + Offset(tableIndex)*8
|
||||
}
|
||||
|
||||
// NewModuleContextOffsetData creates a ModuleContextOffsetData determining the structure of moduleContextOpaque for the given Module.
|
||||
// The structure is described in the comment of wazevo.moduleContextOpaque.
|
||||
func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContextOffsetData {
|
||||
ret := ModuleContextOffsetData{}
|
||||
var offset Offset
|
||||
|
||||
ret.ModuleInstanceOffset = 0
|
||||
offset += 8
|
||||
|
||||
if m.MemorySection != nil {
|
||||
ret.LocalMemoryBegin = offset
|
||||
// buffer base + memory size.
|
||||
const localMemorySizeInOpaqueModuleContext = 16
|
||||
offset += localMemorySizeInOpaqueModuleContext
|
||||
} else {
|
||||
// Indicates that there's no local memory
|
||||
ret.LocalMemoryBegin = -1
|
||||
}
|
||||
|
||||
if m.ImportMemoryCount > 0 {
|
||||
offset = align8(offset)
|
||||
// *wasm.MemoryInstance + imported memory's owner (moduleContextOpaque)
|
||||
const importedMemorySizeInOpaqueModuleContext = 16
|
||||
ret.ImportedMemoryBegin = offset
|
||||
offset += importedMemorySizeInOpaqueModuleContext
|
||||
} else {
|
||||
// Indicates that there's no imported memory
|
||||
ret.ImportedMemoryBegin = -1
|
||||
}
|
||||
|
||||
if m.ImportFunctionCount > 0 {
|
||||
offset = align8(offset)
|
||||
ret.ImportedFunctionsBegin = offset
|
||||
// Each function is stored wazevo.functionInstance.
|
||||
size := int(m.ImportFunctionCount) * FunctionInstanceSize
|
||||
offset += Offset(size)
|
||||
} else {
|
||||
ret.ImportedFunctionsBegin = -1
|
||||
}
|
||||
|
||||
if globals := int(m.ImportGlobalCount) + len(m.GlobalSection); globals > 0 {
|
||||
// Align to 16 bytes for globals, as f32/f64/v128 might be loaded via SIMD instructions.
|
||||
offset = align16(offset)
|
||||
ret.GlobalsBegin = offset
|
||||
// Pointers to *wasm.GlobalInstance.
|
||||
offset += Offset(globals) * 16
|
||||
} else {
|
||||
ret.GlobalsBegin = -1
|
||||
}
|
||||
|
||||
if tables := len(m.TableSection) + int(m.ImportTableCount); tables > 0 {
|
||||
offset = align8(offset)
|
||||
ret.TypeIDs1stElement = offset
|
||||
offset += 8 // First element of TypeIDs.
|
||||
|
||||
ret.TablesBegin = offset
|
||||
// Pointers to *wasm.TableInstance.
|
||||
offset += Offset(tables) * 8
|
||||
} else {
|
||||
ret.TypeIDs1stElement = -1
|
||||
ret.TablesBegin = -1
|
||||
}
|
||||
|
||||
if withListener {
|
||||
offset = align8(offset)
|
||||
ret.BeforeListenerTrampolines1stElement = offset
|
||||
offset += 8 // First element of BeforeListenerTrampolines.
|
||||
|
||||
ret.AfterListenerTrampolines1stElement = offset
|
||||
offset += 8 // First element of AfterListenerTrampolines.
|
||||
} else {
|
||||
ret.BeforeListenerTrampolines1stElement = -1
|
||||
ret.AfterListenerTrampolines1stElement = -1
|
||||
}
|
||||
|
||||
ret.DataInstances1stElement = offset
|
||||
offset += 8 // First element of DataInstances.
|
||||
|
||||
ret.ElementInstances1stElement = offset
|
||||
offset += 8 // First element of ElementInstances.
|
||||
|
||||
ret.TotalSize = int(align16(offset))
|
||||
return ret
|
||||
}
|
||||
|
||||
func align16(o Offset) Offset {
|
||||
return (o + 15) &^ 15
|
||||
}
|
||||
|
||||
func align8(o Offset) Offset {
|
||||
return (o + 7) &^ 7
|
||||
}
|
||||
96
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap.go
generated
vendored
Normal file
96
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap.go
generated
vendored
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
package wazevoapi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var PerfMap *Perfmap
|
||||
|
||||
func init() {
|
||||
if PerfMapEnabled {
|
||||
pid := os.Getpid()
|
||||
filename := "/tmp/perf-" + strconv.Itoa(pid) + ".map"
|
||||
|
||||
fh, err := os.OpenFile(filename, os.O_APPEND|os.O_RDWR|os.O_CREATE, 0o644)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
PerfMap = &Perfmap{fh: fh}
|
||||
}
|
||||
}
|
||||
|
||||
// Perfmap holds perfmap entries to be flushed into a perfmap file.
|
||||
type Perfmap struct {
|
||||
entries []entry
|
||||
mux sync.Mutex
|
||||
fh *os.File
|
||||
}
|
||||
|
||||
type entry struct {
|
||||
index int
|
||||
offset int64
|
||||
size uint64
|
||||
name string
|
||||
}
|
||||
|
||||
func (f *Perfmap) Lock() {
|
||||
f.mux.Lock()
|
||||
}
|
||||
|
||||
func (f *Perfmap) Unlock() {
|
||||
f.mux.Unlock()
|
||||
}
|
||||
|
||||
// AddModuleEntry adds a perfmap entry into the perfmap file.
|
||||
// index is the index of the function in the module, offset is the offset of the function in the module,
|
||||
// size is the size of the function, and name is the name of the function.
|
||||
//
|
||||
// Note that the entries are not flushed into the perfmap file until Flush is called,
|
||||
// and the entries are module-scoped; Perfmap must be locked until Flush is called.
|
||||
func (f *Perfmap) AddModuleEntry(index int, offset int64, size uint64, name string) {
|
||||
e := entry{index: index, offset: offset, size: size, name: name}
|
||||
if f.entries == nil {
|
||||
f.entries = []entry{e}
|
||||
return
|
||||
}
|
||||
f.entries = append(f.entries, e)
|
||||
}
|
||||
|
||||
// Flush writes the perfmap entries into the perfmap file where the entries are adjusted by the given `addr` and `functionOffsets`.
|
||||
func (f *Perfmap) Flush(addr uintptr, functionOffsets []int) {
|
||||
defer func() {
|
||||
_ = f.fh.Sync()
|
||||
}()
|
||||
|
||||
for _, e := range f.entries {
|
||||
if _, err := f.fh.WriteString(fmt.Sprintf("%x %s %s\n",
|
||||
uintptr(e.offset)+addr+uintptr(functionOffsets[e.index]),
|
||||
strconv.FormatUint(e.size, 16),
|
||||
e.name,
|
||||
)); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
f.entries = f.entries[:0]
|
||||
}
|
||||
|
||||
// Clear clears the perfmap entries not yet flushed.
|
||||
func (f *Perfmap) Clear() {
|
||||
f.entries = f.entries[:0]
|
||||
}
|
||||
|
||||
// AddEntry writes a perfmap entry directly into the perfmap file, not using the entries.
|
||||
func (f *Perfmap) AddEntry(addr uintptr, size uint64, name string) {
|
||||
_, err := f.fh.WriteString(fmt.Sprintf("%x %s %s\n",
|
||||
addr,
|
||||
strconv.FormatUint(size, 16),
|
||||
name,
|
||||
))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
5
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_disabled.go
generated
vendored
Normal file
5
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_disabled.go
generated
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
//go:build !perfmap
|
||||
|
||||
package wazevoapi
|
||||
|
||||
const PerfMapEnabled = false
|
||||
5
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_enabled.go
generated
vendored
Normal file
5
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/perfmap_enabled.go
generated
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
//go:build perfmap
|
||||
|
||||
package wazevoapi
|
||||
|
||||
const PerfMapEnabled = true
|
||||
215
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go
generated
vendored
Normal file
215
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go
generated
vendored
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
package wazevoapi
|
||||
|
||||
const poolPageSize = 128
|
||||
|
||||
// Pool is a pool of T that can be allocated and reset.
|
||||
// This is useful to avoid unnecessary allocations.
|
||||
type Pool[T any] struct {
|
||||
pages []*[poolPageSize]T
|
||||
resetFn func(*T)
|
||||
allocated, index int
|
||||
}
|
||||
|
||||
// NewPool returns a new Pool.
|
||||
// resetFn is called when a new T is allocated in Pool.Allocate.
|
||||
func NewPool[T any](resetFn func(*T)) Pool[T] {
|
||||
var ret Pool[T]
|
||||
ret.resetFn = resetFn
|
||||
ret.Reset()
|
||||
return ret
|
||||
}
|
||||
|
||||
// Allocated returns the number of allocated T currently in the pool.
|
||||
func (p *Pool[T]) Allocated() int {
|
||||
return p.allocated
|
||||
}
|
||||
|
||||
// Allocate allocates a new T from the pool.
|
||||
func (p *Pool[T]) Allocate() *T {
|
||||
if p.index == poolPageSize {
|
||||
if len(p.pages) == cap(p.pages) {
|
||||
p.pages = append(p.pages, new([poolPageSize]T))
|
||||
} else {
|
||||
i := len(p.pages)
|
||||
p.pages = p.pages[:i+1]
|
||||
if p.pages[i] == nil {
|
||||
p.pages[i] = new([poolPageSize]T)
|
||||
}
|
||||
}
|
||||
p.index = 0
|
||||
}
|
||||
ret := &p.pages[len(p.pages)-1][p.index]
|
||||
if p.resetFn != nil {
|
||||
p.resetFn(ret)
|
||||
}
|
||||
p.index++
|
||||
p.allocated++
|
||||
return ret
|
||||
}
|
||||
|
||||
// View returns the pointer to i-th item from the pool.
|
||||
func (p *Pool[T]) View(i int) *T {
|
||||
page, index := i/poolPageSize, i%poolPageSize
|
||||
return &p.pages[page][index]
|
||||
}
|
||||
|
||||
// Reset resets the pool.
|
||||
func (p *Pool[T]) Reset() {
|
||||
p.pages = p.pages[:0]
|
||||
p.index = poolPageSize
|
||||
p.allocated = 0
|
||||
}
|
||||
|
||||
// IDedPool is a pool of T that can be allocated and reset, with a way to get T by an ID.
|
||||
type IDedPool[T any] struct {
|
||||
pool Pool[T]
|
||||
idToItems []*T
|
||||
maxIDEncountered int
|
||||
}
|
||||
|
||||
// NewIDedPool returns a new IDedPool.
|
||||
func NewIDedPool[T any](resetFn func(*T)) IDedPool[T] {
|
||||
return IDedPool[T]{pool: NewPool[T](resetFn)}
|
||||
}
|
||||
|
||||
// GetOrAllocate returns the T with the given id.
|
||||
func (p *IDedPool[T]) GetOrAllocate(id int) *T {
|
||||
if p.maxIDEncountered < id {
|
||||
p.maxIDEncountered = id
|
||||
}
|
||||
if id >= len(p.idToItems) {
|
||||
p.idToItems = append(p.idToItems, make([]*T, id-len(p.idToItems)+1)...)
|
||||
}
|
||||
if p.idToItems[id] == nil {
|
||||
p.idToItems[id] = p.pool.Allocate()
|
||||
}
|
||||
return p.idToItems[id]
|
||||
}
|
||||
|
||||
// Get returns the T with the given id, or nil if it's not allocated.
|
||||
func (p *IDedPool[T]) Get(id int) *T {
|
||||
if id >= len(p.idToItems) {
|
||||
return nil
|
||||
}
|
||||
return p.idToItems[id]
|
||||
}
|
||||
|
||||
// Reset resets the pool.
|
||||
func (p *IDedPool[T]) Reset() {
|
||||
p.pool.Reset()
|
||||
for i := range p.idToItems {
|
||||
p.idToItems[i] = nil
|
||||
}
|
||||
p.maxIDEncountered = -1
|
||||
}
|
||||
|
||||
// MaxIDEncountered returns the maximum id encountered so far.
|
||||
func (p *IDedPool[T]) MaxIDEncountered() int {
|
||||
return p.maxIDEncountered
|
||||
}
|
||||
|
||||
// arraySize is the size of the array used in VarLengthPool's arrayPool.
|
||||
// This is chosen to be 8, which is empirically a good number among 8, 12, 16 and 20.
|
||||
const arraySize = 8
|
||||
|
||||
// VarLengthPool is a pool of VarLength[T] that can be allocated and reset.
|
||||
type (
|
||||
VarLengthPool[T any] struct {
|
||||
arrayPool Pool[varLengthPoolArray[T]]
|
||||
slicePool Pool[[]T]
|
||||
}
|
||||
// varLengthPoolArray wraps an array and keeps track of the next index to be used to avoid the heap allocation.
|
||||
varLengthPoolArray[T any] struct {
|
||||
arr [arraySize]T
|
||||
next int
|
||||
}
|
||||
)
|
||||
|
||||
// VarLength is a variable length array that can be reused via a pool.
|
||||
type VarLength[T any] struct {
|
||||
arr *varLengthPoolArray[T]
|
||||
slc *[]T
|
||||
}
|
||||
|
||||
// NewVarLengthPool returns a new VarLengthPool.
|
||||
func NewVarLengthPool[T any]() VarLengthPool[T] {
|
||||
return VarLengthPool[T]{
|
||||
arrayPool: NewPool[varLengthPoolArray[T]](func(v *varLengthPoolArray[T]) {
|
||||
v.next = 0
|
||||
}),
|
||||
slicePool: NewPool[[]T](func(i *[]T) {
|
||||
*i = (*i)[:0]
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
// NewNilVarLength returns a new VarLength[T] with a nil backing.
|
||||
func NewNilVarLength[T any]() VarLength[T] {
|
||||
return VarLength[T]{}
|
||||
}
|
||||
|
||||
// Allocate allocates a new VarLength[T] from the pool.
|
||||
func (p *VarLengthPool[T]) Allocate(knownMin int) VarLength[T] {
|
||||
if knownMin <= arraySize {
|
||||
arr := p.arrayPool.Allocate()
|
||||
return VarLength[T]{arr: arr}
|
||||
}
|
||||
slc := p.slicePool.Allocate()
|
||||
return VarLength[T]{slc: slc}
|
||||
}
|
||||
|
||||
// Reset resets the pool.
|
||||
func (p *VarLengthPool[T]) Reset() {
|
||||
p.arrayPool.Reset()
|
||||
p.slicePool.Reset()
|
||||
}
|
||||
|
||||
// Append appends items to the backing slice just like the `append` builtin function in Go.
|
||||
func (i VarLength[T]) Append(p *VarLengthPool[T], items ...T) VarLength[T] {
|
||||
if i.slc != nil {
|
||||
*i.slc = append(*i.slc, items...)
|
||||
return i
|
||||
}
|
||||
|
||||
if i.arr == nil {
|
||||
i.arr = p.arrayPool.Allocate()
|
||||
}
|
||||
|
||||
arr := i.arr
|
||||
if arr.next+len(items) <= arraySize {
|
||||
for _, item := range items {
|
||||
arr.arr[arr.next] = item
|
||||
arr.next++
|
||||
}
|
||||
} else {
|
||||
slc := p.slicePool.Allocate()
|
||||
// Copy the array to the slice.
|
||||
for ptr := 0; ptr < arr.next; ptr++ {
|
||||
*slc = append(*slc, arr.arr[ptr])
|
||||
}
|
||||
i.slc = slc
|
||||
*i.slc = append(*i.slc, items...)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// View returns the backing slice.
|
||||
func (i VarLength[T]) View() []T {
|
||||
if i.slc != nil {
|
||||
return *i.slc
|
||||
} else if i.arr != nil {
|
||||
arr := i.arr
|
||||
return arr.arr[:arr.next]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cut cuts the backing slice to the given length.
|
||||
// Precondition: n <= len(i.backing).
|
||||
func (i VarLength[T]) Cut(n int) {
|
||||
if i.slc != nil {
|
||||
*i.slc = (*i.slc)[:n]
|
||||
} else if i.arr != nil {
|
||||
i.arr.next = n
|
||||
}
|
||||
}
|
||||
15
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/ptr.go
generated
vendored
Normal file
15
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/ptr.go
generated
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
package wazevoapi
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// PtrFromUintptr resurrects the original *T from the given uintptr.
|
||||
// The caller of this function MUST be sure that ptr is valid.
|
||||
func PtrFromUintptr[T any](ptr uintptr) *T {
|
||||
// Wraps ptrs as the double pointer in order to avoid the unsafe access as detected by race detector.
|
||||
//
|
||||
// For example, if we have (*function)(unsafe.Pointer(ptr)) instead, then the race detector's "checkptr"
|
||||
// subroutine wanrs as "checkptr: pointer arithmetic result points to invalid allocation"
|
||||
// https://github.com/golang/go/blob/1ce7fcf139417d618c2730010ede2afb41664211/src/runtime/checkptr.go#L69
|
||||
var wrapped *uintptr = &ptr
|
||||
return *(**T)(unsafe.Pointer(wrapped))
|
||||
}
|
||||
26
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/queue.go
generated
vendored
Normal file
26
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/queue.go
generated
vendored
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
package wazevoapi
|
||||
|
||||
// Queue is the resettable queue where the underlying slice is reused.
|
||||
type Queue[T any] struct {
|
||||
index int
|
||||
Data []T
|
||||
}
|
||||
|
||||
func (q *Queue[T]) Enqueue(v T) {
|
||||
q.Data = append(q.Data, v)
|
||||
}
|
||||
|
||||
func (q *Queue[T]) Dequeue() (ret T) {
|
||||
ret = q.Data[q.index]
|
||||
q.index++
|
||||
return
|
||||
}
|
||||
|
||||
func (q *Queue[T]) Empty() bool {
|
||||
return q.index >= len(q.Data)
|
||||
}
|
||||
|
||||
func (q *Queue[T]) Reset() {
|
||||
q.index = 0
|
||||
q.Data = q.Data[:0]
|
||||
}
|
||||
13
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go
generated
vendored
Normal file
13
vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/resetmap.go
generated
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
package wazevoapi
|
||||
|
||||
// ResetMap resets the map to an empty state, or creates a new map if it is nil.
|
||||
func ResetMap[K comparable, V any](m map[K]V) map[K]V {
|
||||
if m == nil {
|
||||
m = make(map[K]V)
|
||||
} else {
|
||||
for v := range m {
|
||||
delete(m, v)
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue