[experiment] add alternative wasm sqlite3 implementation available via build-tag (#2863)

This allows for building GoToSocial with [SQLite transpiled to WASM](https://github.com/ncruces/go-sqlite3) and accessed through [Wazero](https://wazero.io/).
2025-10-29 17:32:25 -05:00 · 2024-05-27 15:46:15 +00:00 · 2024-05-27 15:46:15 +00:00 · 1e7b32490d
commit 1e7b32490d
parent cce21c11cb
398 changed files with 86174 additions and 684 deletions
--- a/vendor/github.com/tetratelabs/wazero/.editorconfig
+++ b/vendor/github.com/tetratelabs/wazero/.editorconfig
@ -0,0 +1,7 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
--- a/vendor/github.com/tetratelabs/wazero/.gitattributes
+++ b/vendor/github.com/tetratelabs/wazero/.gitattributes
@ -0,0 +1,2 @@
+# Improves experience of commands like `make format` on Windows
+* text=auto eol=lf
--- a/vendor/github.com/tetratelabs/wazero/.gitignore
+++ b/vendor/github.com/tetratelabs/wazero/.gitignore
@ -0,0 +1,45 @@
+# If you prefer the allow list template instead of the deny list, see community template:
+# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
+#
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+/wazero
+build
+dist
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Dependency directories (remove the comment below to include it)
+# vendor/
+
+# Go workspace file
+go.work
+
+# Goland
+.idea
+
+# AssemblyScript
+node_modules
+package-lock.json
+
+# codecov.io
+/coverage.txt
+
+.vagrant
+
+zig-cache/
+zig-out/
+
+.DS_Store
+
+# Ignore compiled stdlib test cases.
+/internal/integration_test/stdlibs/testdata
+/internal/integration_test/libsodium/testdata
--- a/vendor/github.com/tetratelabs/wazero/.gitmodules
+++ b/vendor/github.com/tetratelabs/wazero/.gitmodules
@ -0,0 +1,3 @@
+[submodule "site/themes/hello-friend"]
+	path = site/themes/hello-friend
+	url = https://github.com/panr/hugo-theme-hello-friend.git
--- a/vendor/github.com/tetratelabs/wazero/CONTRIBUTING.md
+++ b/vendor/github.com/tetratelabs/wazero/CONTRIBUTING.md
@ -0,0 +1,75 @@
+# Contributing
+
+We welcome contributions from the community. Please read the following guidelines carefully to maximize the chances of your PR being merged.
+
+## Coding Style
+
+- To ensure your change passes format checks, run `make check`. To format your files, you can run `make format`.
+- We follow standard Go table-driven tests and use an internal [testing library](./internal/testing/require) to assert correctness. To verify all tests pass, you can run `make test`.
+
+## DCO
+
+We require DCO signoff line in every commit to this repo.
+
+The sign-off is a simple line at the end of the explanation for the
+patch, which certifies that you wrote it or otherwise have the right to
+pass it on as an open-source patch. The rules are pretty simple: if you
+can certify the below (from
+[developercertificate.org](https://developercertificate.org/)):
+
+```
+Developer Certificate of Origin
+Version 1.1
+Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
+660 York Street, Suite 102,
+San Francisco, CA 94110 USA
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+Developer's Certificate of Origin 1.1
+By making a contribution to this project, I certify that:
+(a) The contribution was created in whole or in part by me and I
+    have the right to submit it under the open source license
+    indicated in the file; or
+(b) The contribution is based upon previous work that, to the best
+    of my knowledge, is covered under an appropriate open source
+    license and I have the right under that license to submit that
+    work with modifications, whether created in whole or in part
+    by me, under the same open source license (unless I am
+    permitted to submit under a different license), as indicated
+    in the file; or
+(c) The contribution was provided directly to me by some other
+    person who certified (a), (b) or (c) and I have not modified
+    it.
+(d) I understand and agree that this project and the contribution
+    are public and that a record of the contribution (including all
+    personal information I submit with it, including my sign-off) is
+    maintained indefinitely and may be redistributed consistent with
+    this project or the open source license(s) involved.
+```
+
+then you just add a line to every git commit message:
+
+    Signed-off-by: Joe Smith <joe@gmail.com>
+
+using your real name (sorry, no pseudonyms or anonymous contributions.)
+
+You can add the sign off when creating the git commit via `git commit -s`.
+
+## Code Reviews
+
+* The pull request title should describe what the change does and not embed issue numbers.
+The pull request should only be blank when the change is minor. Any feature should include
+a description of the change and what motivated it. If the change or design changes through
+review, please keep the title and description updated accordingly.
+* A single approval is sufficient to merge. If a reviewer asks for
+changes in a PR they should be addressed before the PR is merged,
+even if another reviewer has already approved the PR.
+* During the review, address the comments and commit the changes
+_without_ squashing the commits. This facilitates incremental reviews
+since the reviewer does not go through all the code again to find out
+what has changed since the last review. When a change goes out of sync with main,
+please rebase and force push, keeping the original commits where practical.
+* Commits are squashed prior to merging a pull request, using the title
+as commit message by default. Maintainers may request contributors to
+edit the pull request tite to ensure that it remains descriptive as a
+commit message. Alternatively, maintainers may change the commit message directly.
--- a/vendor/github.com/tetratelabs/wazero/LICENSE
+++ b/vendor/github.com/tetratelabs/wazero/LICENSE
@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2020-2023 wazero authors
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/vendor/github.com/tetratelabs/wazero/Makefile
+++ b/vendor/github.com/tetratelabs/wazero/Makefile
@ -0,0 +1,381 @@
+
+gofumpt       := mvdan.cc/gofumpt@v0.5.0
+gosimports    := github.com/rinchsan/gosimports/cmd/gosimports@v0.3.8
+golangci_lint := github.com/golangci/golangci-lint/cmd/golangci-lint@v1.55.2
+asmfmt        := github.com/klauspost/asmfmt/cmd/asmfmt@v1.3.2
+# sync this with netlify.toml!
+hugo          := github.com/gohugoio/hugo@v0.115.2
+
+# Make 3.81 doesn't support '**' globbing: Set explicitly instead of recursion.
+all_sources   := $(wildcard *.go */*.go */*/*.go */*/*/*.go */*/*/*.go */*/*/*/*.go)
+all_testdata  := $(wildcard testdata/* */testdata/* */*/testdata/* */*/testdata/*/* */*/*/testdata/*)
+all_testing   := $(wildcard internal/testing/* internal/testing/*/* internal/testing/*/*/*)
+all_examples  := $(wildcard examples/* examples/*/* examples/*/*/* */*/example/* */*/example/*/* */*/example/*/*/*)
+all_it        := $(wildcard internal/integration_test/* internal/integration_test/*/* internal/integration_test/*/*/*)
+# main_sources exclude any test or example related code
+main_sources  := $(wildcard $(filter-out %_test.go $(all_testdata) $(all_testing) $(all_examples) $(all_it), $(all_sources)))
+# main_packages collect the unique main source directories (sort will dedupe).
+# Paths need to all start with ./, so we do that manually vs foreach which strips it.
+main_packages := $(sort $(foreach f,$(dir $(main_sources)),$(if $(findstring ./,$(f)),./,./$(f))))
+
+go_test_options ?= -timeout 300s
+
+ensureCompilerFastest := -ldflags '-X github.com/tetratelabs/wazero/internal/integration_test/vs.ensureCompilerFastest=true'
+.PHONY: bench
+bench:
+	@go build ./internal/integration_test/bench/...
+	@# Don't use -test.benchmem as it isn't accurate when comparing against CGO libs
+	@for d in vs/time vs/wasmedge vs/wasmtime ; do \
+		cd ./internal/integration_test/$$d ; \
+		go test -bench=. . -tags='wasmedge' $(ensureCompilerFastest) ; \
+		cd - ;\
+	done
+
+bench_testdata_dir := internal/integration_test/bench/testdata
+.PHONY: build.bench
+build.bench:
+	@tinygo build -o $(bench_testdata_dir)/case.wasm -scheduler=none --no-debug -target=wasi $(bench_testdata_dir)/case.go
+
+.PHONY: test.examples
+test.examples:
+	@go test $(go_test_options) ./examples/... ./imports/assemblyscript/example/... ./imports/emscripten/... ./imports/wasi_snapshot_preview1/example/...
+
+.PHONY: build.examples.as
+build.examples.as:
+	@cd ./imports/assemblyscript/example/testdata && npm install && npm run build
+
+%.wasm: %.zig
+	@(cd $(@D); zig build -Doptimize=ReleaseSmall)
+	@mv $(@D)/zig-out/*/$(@F) $(@D)
+
+.PHONY: build.examples.zig
+build.examples.zig: examples/allocation/zig/testdata/greet.wasm imports/wasi_snapshot_preview1/example/testdata/zig/cat.wasm imports/wasi_snapshot_preview1/testdata/zig/wasi.wasm
+	@cd internal/testing/dwarftestdata/testdata/zig; zig build; mv zig-out/*/main.wasm ./ # Need DWARF custom sections.
+
+tinygo_sources := examples/basic/testdata/add.go examples/allocation/tinygo/testdata/greet.go examples/cli/testdata/cli.go imports/wasi_snapshot_preview1/example/testdata/tinygo/cat.go imports/wasi_snapshot_preview1/testdata/tinygo/wasi.go cmd/wazero/testdata/cat/cat.go
+.PHONY: build.examples.tinygo
+build.examples.tinygo: $(tinygo_sources)
+	@for f in $^; do \
+	    tinygo build -o $$(echo $$f | sed -e 's/\.go/\.wasm/') -scheduler=none --no-debug --target=wasi $$f; \
+	done
+	@mv cmd/wazero/testdata/cat/cat.wasm cmd/wazero/testdata/cat/cat-tinygo.wasm
+
+# We use zig to build C as it is easy to install and embeds a copy of zig-cc.
+# Note: Don't use "-Oz" as that breaks our wasi sock example.
+c_sources := imports/wasi_snapshot_preview1/example/testdata/zig-cc/cat.c imports/wasi_snapshot_preview1/testdata/zig-cc/wasi.c internal/testing/dwarftestdata/testdata/zig-cc/main.c
+.PHONY: build.examples.zig-cc
+build.examples.zig-cc: $(c_sources)
+	@for f in $^; do \
+	    zig cc --target=wasm32-wasi -o $$(echo $$f | sed -e 's/\.c/\.wasm/') $$f; \
+	done
+
+# Here are the emcc args we use:
+#
+# * `-Oz` - most optimization for code size.
+# * `--profiling` - adds the name section.
+# * `-s STANDALONE_WASM` - ensures wasm is built for a non-js runtime.
+# * `-s EXPORTED_FUNCTIONS=_malloc,_free` - export allocation functions so that
+#   they can be used externally as "malloc" and "free".
+# * `-s WARN_ON_UNDEFINED_SYMBOLS=0` - imports not defined in JavaScript error
+#   otherwise. See https://github.com/emscripten-core/emscripten/issues/13641
+# * `-s TOTAL_STACK=8KB -s TOTAL_MEMORY=64KB` - reduce memory default from 16MB
+#   to one page (64KB). To do this, we have to reduce the stack size.
+# * `-s ALLOW_MEMORY_GROWTH` - allows "memory.grow" instructions to succeed, but
+#   requires a function import "emscripten_notify_memory_growth".
+emscripten_sources := $(wildcard imports/emscripten/testdata/*.cc)
+.PHONY: build.examples.emscripten
+build.examples.emscripten: $(emscripten_sources)
+	@for f in $^; do \
+		em++ -Oz --profiling \
+		-s STANDALONE_WASM \
+		-s EXPORTED_FUNCTIONS=_malloc,_free \
+		-s WARN_ON_UNDEFINED_SYMBOLS=0 \
+		-s TOTAL_STACK=8KB -s TOTAL_MEMORY=64KB \
+		-s ALLOW_MEMORY_GROWTH \
+		--std=c++17 -o $$(echo $$f | sed -e 's/\.cc/\.wasm/') $$f; \
+	done
+
+%/greet.wasm : cargo_target := wasm32-unknown-unknown
+%/cat.wasm : cargo_target := wasm32-wasi
+%/wasi.wasm : cargo_target := wasm32-wasi
+
+.PHONY: build.examples.rust
+build.examples.rust: examples/allocation/rust/testdata/greet.wasm imports/wasi_snapshot_preview1/example/testdata/cargo-wasi/cat.wasm imports/wasi_snapshot_preview1/testdata/cargo-wasi/wasi.wasm internal/testing/dwarftestdata/testdata/rust/main.wasm.xz
+
+# Normally, we build release because it is smaller. Testing dwarf requires the debug build.
+internal/testing/dwarftestdata/testdata/rust/main.wasm.xz:
+	cd $(@D) && cargo wasi build
+	mv $(@D)/target/wasm32-wasi/debug/main.wasm $(@D)
+	cd $(@D) && xz -k -f ./main.wasm # Rust's DWARF section is huge, so compress it.
+
+# Builds rust using cargo normally, or cargo-wasi.
+%.wasm: %.rs
+	@(cd $(@D); cargo $(if $(findstring wasi,$(cargo_target)),wasi build,build --target $(cargo_target)) --release)
+	@mv $(@D)/target/$(cargo_target)/release/$(@F) $(@D)
+
+spectest_base_dir := internal/integration_test/spectest
+spectest_v1_dir := $(spectest_base_dir)/v1
+spectest_v1_testdata_dir := $(spectest_v1_dir)/testdata
+spec_version_v1 := wg-1.0
+spectest_v2_dir := $(spectest_base_dir)/v2
+spectest_v2_testdata_dir := $(spectest_v2_dir)/testdata
+# Latest draft state as of March 12, 2024.
+spec_version_v2 := 1c5e5d178bd75c79b7a12881c529098beaee2a05
+spectest_threads_dir := $(spectest_base_dir)/threads
+spectest_threads_testdata_dir := $(spectest_threads_dir)/testdata
+# From https://github.com/WebAssembly/threads/tree/upstream-rebuild which has not been merged to main yet.
+# It will likely be renamed to main in the future - https://github.com/WebAssembly/threads/issues/216.
+spec_version_threads := 3635ca51a17e57e106988846c5b0e0cc48ac04fc
+
+.PHONY: build.spectest
+build.spectest:
+	@$(MAKE) build.spectest.v1
+	@$(MAKE) build.spectest.v2
+
+.PHONY: build.spectest.v1
+build.spectest.v1: # Note: wabt by default uses >1.0 features, so wast2json flags might drift as they include more. See WebAssembly/wabt#1878
+	@rm -rf $(spectest_v1_testdata_dir)
+	@mkdir -p $(spectest_v1_testdata_dir)
+	@cd $(spectest_v1_testdata_dir) \
+		&& curl -sSL 'https://api.github.com/repos/WebAssembly/spec/contents/test/core?ref=$(spec_version_v1)' | jq -r '.[]| .download_url' | grep -E ".wast" | xargs -Iurl curl -sJL url -O
+	@cd $(spectest_v1_testdata_dir) && for f in `find . -name '*.wast'`; do \
+		perl -pi -e 's/\(assert_return_canonical_nan\s(\(invoke\s"f32.demote_f64"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \(f32.const nan:canonical\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_arithmetic_nan\s(\(invoke\s"f32.demote_f64"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \(f32.const nan:arithmetic\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_canonical_nan\s(\(invoke\s"f64\.promote_f32"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \(f64.const nan:canonical\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_arithmetic_nan\s(\(invoke\s"f64\.promote_f32"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \(f64.const nan:arithmetic\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_canonical_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \($$2.const nan:canonical\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_arithmetic_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \($$2.const nan:arithmetic\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_canonical_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\s\([a-z0-9.\s+-:]+\)\))\)/\(assert_return $$1 \($$2.const nan:canonical\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_arithmetic_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\s\([a-z0-9.\s+-:]+\)\))\)/\(assert_return $$1 \($$2.const nan:arithmetic\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_canonical_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \($$2.const nan:canonical\)\)/g' $$f; \
+		perl -pi -e 's/\(assert_return_arithmetic_nan\s(\(invoke\s"[a-z._0-9]+"\s\((f[0-9]{2})\.const\s[a-z0-9.+:-]+\)\))\)/\(assert_return $$1 \($$2.const nan:arithmetic\)\)/g' $$f; \
+		wast2json \
+			--disable-saturating-float-to-int \
+			--disable-sign-extension \
+			--disable-simd \
+			--disable-multi-value \
+			--disable-bulk-memory \
+			--disable-reference-types \
+			--debug-names $$f; \
+	done
+
+.PHONY: build.spectest.v2
+build.spectest.v2: # Note: SIMD cases are placed in the "simd" subdirectory.
+	@mkdir -p $(spectest_v2_testdata_dir)
+	@cd $(spectest_v2_testdata_dir) \
+		&& curl -sSL 'https://api.github.com/repos/WebAssembly/spec/contents/test/core?ref=$(spec_version_v2)' | jq -r '.[]| .download_url' | grep -E ".wast" | xargs -Iurl curl -sJL url -O
+	@cd $(spectest_v2_testdata_dir) \
+		&& curl -sSL 'https://api.github.com/repos/WebAssembly/spec/contents/test/core/simd?ref=$(spec_version_v2)' | jq -r '.[]| .download_url' | grep -E ".wast" | xargs -Iurl curl -sJL url -O
+	@cd $(spectest_v2_testdata_dir) && for f in `find . -name '*.wast'`; do \
+		wast2json --debug-names --no-check $$f || true; \
+	done # Ignore the error here as some tests (e.g. comments.wast right now) are not supported by wast2json yet.
+
+# Note: We currently cannot build the "threads" subdirectory that spawns threads due to missing support in wast2json.
+# https://github.com/WebAssembly/wabt/issues/2348#issuecomment-1878003959
+.PHONY: build.spectest.threads
+build.spectest.threads:
+	@mkdir -p $(spectest_threads_testdata_dir)
+	@cd $(spectest_threads_testdata_dir) \
+		&& curl -sSL 'https://api.github.com/repos/WebAssembly/threads/contents/test/core?ref=$(spec_version_threads)' | jq -r '.[]| .download_url' | grep -E "atomic.wast" | xargs -Iurl curl -sJL url -O
+	@cd $(spectest_threads_testdata_dir) && for f in `find . -name '*.wast'`; do \
+		wast2json --enable-threads --debug-names $$f; \
+	done
+
+.PHONY: test
+test:
+	@go test $(go_test_options) $$(go list ./... | grep -vE '$(spectest_v1_dir)|$(spectest_v2_dir)')
+	@cd internal/version/testdata && go test $(go_test_options) ./...
+	@cd internal/integration_test/fuzz/wazerolib && CGO_ENABLED=0 WASM_BINARY_PATH=testdata/test.wasm go test ./...
+
+.PHONY: coverage
+# replace spaces with commas
+coverpkg = $(shell echo $(main_packages) | tr ' ' ',')
+coverage: ## Generate test coverage
+	@go test -coverprofile=coverage.txt -covermode=atomic --coverpkg=$(coverpkg) $(main_packages)
+	@go tool cover -func coverage.txt
+
+.PHONY: spectest
+spectest:
+	@$(MAKE) spectest.v1
+	@$(MAKE) spectest.v2
+
+spectest.v1:
+	@go test $(go_test_options) $$(go list ./... | grep $(spectest_v1_dir))
+
+spectest.v2:
+	@go test $(go_test_options) $$(go list ./... | grep $(spectest_v2_dir))
+
+golangci_lint_path := $(shell go env GOPATH)/bin/golangci-lint
+
+$(golangci_lint_path):
+	@go install $(golangci_lint)
+
+golangci_lint_goarch ?= $(shell go env GOARCH)
+
+.PHONY: lint
+lint: $(golangci_lint_path)
+	@GOARCH=$(golangci_lint_goarch) CGO_ENABLED=0 $(golangci_lint_path) run --timeout 5m
+
+.PHONY: format
+format:
+	@go run $(gofumpt) -l -w .
+	@go run $(gosimports) -local github.com/tetratelabs/ -w $(shell find . -name '*.go' -type f)
+	@go run $(asmfmt) -w $(shell find . -name '*.s' -type f)
+
+.PHONY: check  # Pre-flight check for pull requests
+check:
+# The following checks help ensure our platform-specific code used for system
+# calls safely falls back on a platform unsupported by the compiler engine.
+# This makes sure the intepreter can be used. Most often the package that can
+# drift here is "platform" or "sysfs":
+#
+# Ensure we build on plan9. See #1578
+	@GOARCH=amd64 GOOS=plan9 go build ./...
+# Ensure we build on gojs. See #1526.
+	@GOARCH=wasm GOOS=js go build ./...
+# Ensure we build on wasip1. See #1526.
+	@GOARCH=wasm GOOS=wasip1 go build ./...
+# Ensure we build on aix. See #1723
+	@GOARCH=ppc64 GOOS=aix go build ./...
+# Ensure we build on windows:
+	@GOARCH=amd64 GOOS=windows go build ./...
+# Ensure we build on an arbitrary operating system:
+	@GOARCH=amd64 GOOS=dragonfly go build ./...
+# Ensure we build on solaris/illumos:
+	@GOARCH=amd64 GOOS=illumos go build ./...
+	@GOARCH=amd64 GOOS=solaris go build ./...
+# Ensure we build on linux arm for Dapr:
+#	gh release view -R dapr/dapr --json assets --jq 'first(.assets[] | select(.name = "daprd_linux_arm.tar.gz") | {url, downloadCount})'
+	@GOARCH=arm GOOS=linux go build ./...
+# Ensure we build on linux 386 for Trivy:
+#	gh release view -R aquasecurity/trivy --json assets --jq 'first(.assets[] | select(.name| test("Linux-32bit.*tar.gz")) | {url, downloadCount})'
+	@GOARCH=386 GOOS=linux go build ./...
+# Ensure we build on FreeBSD amd64 for Trivy:
+#	gh release view -R aquasecurity/trivy --json assets --jq 'first(.assets[] | select(.name| test("FreeBSD-64bit.*tar.gz")) | {url, downloadCount})'
+	@GOARCH=amd64 GOOS=freebsd go build ./...
+	@$(MAKE) lint golangci_lint_goarch=arm64
+	@$(MAKE) lint golangci_lint_goarch=amd64
+	@$(MAKE) format
+	@go mod tidy
+	@if [ ! -z "`git status -s`" ]; then \
+		echo "The following differences will fail CI until committed:"; \
+		git diff --exit-code; \
+	fi
+
+.PHONY: site
+site: ## Serve website content
+	@git submodule update --init
+	@cd site && go run $(hugo) server --minify --disableFastRender --baseURL localhost:1313 --cleanDestinationDir -D
+
+.PHONY: clean
+clean: ## Ensure a clean build
+	@rm -rf dist build coverage.txt
+	@go clean -testcache
+
+fuzz_default_flags := --no-trace-compares --sanitizer=none -- -rss_limit_mb=8192
+
+fuzz_timeout_seconds ?= 10
+.PHONY: fuzz
+fuzz:
+	@cd internal/integration_test/fuzz && cargo test
+	@cd internal/integration_test/fuzz && cargo fuzz run logging_no_diff $(fuzz_default_flags) -max_total_time=$(fuzz_timeout_seconds)
+	@cd internal/integration_test/fuzz && cargo fuzz run no_diff $(fuzz_default_flags) -max_total_time=$(fuzz_timeout_seconds)
+	@cd internal/integration_test/fuzz && cargo fuzz run memory_no_diff $(fuzz_default_flags) -max_total_time=$(fuzz_timeout_seconds)
+	@cd internal/integration_test/fuzz && cargo fuzz run validation $(fuzz_default_flags) -max_total_time=$(fuzz_timeout_seconds)
+
+libsodium:
+	cd ./internal/integration_test/libsodium/testdata && \
+		curl -s "https://api.github.com/repos/jedisct1/webassembly-benchmarks/contents/2022-12/wasm?ref=7e86d68e99e60130899fbe3b3ab6e9dce9187a7c" \
+		| jq -r '.[] | .download_url' | xargs -n 1 curl -LO
+
+#### CLI release related ####
+
+VERSION ?= dev
+# Default to a dummy version 0.0.1.1, which is always lower than a real release.
+# Legal version values should look like 'x.x.x.x' where x is an integer from 0 to 65534.
+# https://learn.microsoft.com/en-us/windows/win32/msi/productversion?redirectedfrom=MSDN
+# https://stackoverflow.com/questions/9312221/msi-version-numbers
+MSI_VERSION ?= 0.0.1.1
+non_windows_platforms := darwin_amd64 darwin_arm64 linux_amd64 linux_arm64
+non_windows_archives  := $(non_windows_platforms:%=dist/wazero_$(VERSION)_%.tar.gz)
+windows_platforms     := windows_amd64 # TODO: add arm64 windows once we start testing on it.
+windows_archives      := $(windows_platforms:%=dist/wazero_$(VERSION)_%.zip) $(windows_platforms:%=dist/wazero_$(VERSION)_%.msi)
+checksum_txt          := dist/wazero_$(VERSION)_checksums.txt
+
+# define macros for multi-platform builds. these parse the filename being built
+go-arch = $(if $(findstring amd64,$1),amd64,arm64)
+go-os   = $(if $(findstring .exe,$1),windows,$(if $(findstring linux,$1),linux,darwin))
+# msi-arch is a macro so we can detect it based on the file naming convention
+msi-arch     = $(if $(findstring amd64,$1),x64,arm64)
+
+build/wazero_%/wazero:
+	$(call go-build,$@,$<)
+
+build/wazero_%/wazero.exe:
+	$(call go-build,$@,$<)
+
+dist/wazero_$(VERSION)_%.tar.gz: build/wazero_%/wazero
+	@echo tar.gz "tarring $@"
+	@mkdir -p $(@D)
+# On Windows, we pass the special flag `--mode='+rx' to ensure that we set the executable flag.
+# This is only supported by GNU Tar, so we set it conditionally.
+	@tar -C $(<D) -cpzf $@ $(if $(findstring Windows_NT,$(OS)),--mode='+rx',) $(<F)
+	@echo tar.gz "ok"
+
+define go-build
+	@echo "building $1"
+	@# $(go:go=) removes the trailing 'go', so we can insert cross-build variables
+	@$(go:go=) CGO_ENABLED=0 GOOS=$(call go-os,$1) GOARCH=$(call go-arch,$1) go build \
+		-ldflags "-s -w -X github.com/tetratelabs/wazero/internal/version.version=$(VERSION)" \
+		-o $1 $2 ./cmd/wazero
+	@echo build "ok"
+endef
+
+# this makes a marker file ending in .signed to avoid repeatedly calling codesign
+%.signed: %
+	$(call codesign,$<)
+	@touch $@
+
+# This requires osslsigncode package (apt or brew) or latest windows release from mtrojnar/osslsigncode
+#
+# Default is self-signed while production should be a Digicert signing key
+#
+# Ex.
+# ```bash
+# keytool -genkey -alias wazero -storetype PKCS12 -keyalg RSA -keysize 2048 -storepass wazero-bunch \
+# -keystore wazero.p12 -dname "O=wazero,CN=wazero.io" -validity 3650
+# ```
+WINDOWS_CODESIGN_P12      ?= packaging/msi/wazero.p12
+WINDOWS_CODESIGN_PASSWORD ?= wazero-bunch
+define codesign
+	@printf "$(ansi_format_dark)" codesign "signing $1"
+	@osslsigncode sign -h sha256 -pkcs12 ${WINDOWS_CODESIGN_P12} -pass "${WINDOWS_CODESIGN_PASSWORD}" \
+	-n "wazero is the zero dependency WebAssembly runtime for Go developers" -i https://wazero.io -t http://timestamp.digicert.com \
+	$(if $(findstring msi,$(1)),-add-msi-dse) -in $1 -out $1-signed
+	@mv $1-signed $1
+	@printf "$(ansi_format_bright)" codesign "ok"
+endef
+
+# This task is only supported on Windows, where we use candle.exe (compile wxs to wixobj) and light.exe (link to msi)
+dist/wazero_$(VERSION)_%.msi: build/wazero_%/wazero.exe.signed
+ifeq ($(OS),Windows_NT)
+	@echo msi "building $@"
+	@mkdir -p $(@D)
+	@candle -nologo -arch $(call msi-arch,$@) -dVersion=$(MSI_VERSION) -dBin=$(<:.signed=) -o build/wazero.wixobj packaging/msi/wazero.wxs
+	@light -nologo -o $@ build/wazero.wixobj -spdb
+	$(call codesign,$@)
+	@echo msi "ok"
+endif
+
+dist/wazero_$(VERSION)_%.zip: build/wazero_%/wazero.exe.signed
+	@echo zip "zipping $@"
+	@mkdir -p $(@D)
+	@zip -qj $@ $(<:.signed=)
+	@echo zip "ok"
+
+# Darwin doesn't have sha256sum. See https://github.com/actions/virtual-environments/issues/90
+sha256sum := $(if $(findstring darwin,$(shell go env GOOS)),shasum -a 256,sha256sum)
+$(checksum_txt):
+	@cd $(@D); touch $(@F); $(sha256sum) * >> $(@F)
+
+dist: $(non_windows_archives) $(if $(findstring Windows_NT,$(OS)),$(windows_archives),) $(checksum_txt)
--- a/vendor/github.com/tetratelabs/wazero/NOTICE
+++ b/vendor/github.com/tetratelabs/wazero/NOTICE
@ -0,0 +1,2 @@
+wazero
+Copyright 2020-2023 wazero authors
--- a/vendor/github.com/tetratelabs/wazero/RATIONALE.md
+++ b/vendor/github.com/tetratelabs/wazero/RATIONALE.md
--- a/vendor/github.com/tetratelabs/wazero/README.md
+++ b/vendor/github.com/tetratelabs/wazero/README.md
@ -0,0 +1,132 @@
+# wazero: the zero dependency WebAssembly runtime for Go developers
+
+[![WebAssembly Core Specification Test](https://github.com/tetratelabs/wazero/actions/workflows/spectest.yaml/badge.svg)](https://github.com/tetratelabs/wazero/actions/workflows/spectest.yaml) [![Go Reference](https://pkg.go.dev/badge/github.com/tetratelabs/wazero.svg)](https://pkg.go.dev/github.com/tetratelabs/wazero) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+
+WebAssembly is a way to safely run code compiled in other languages. Runtimes
+execute WebAssembly Modules (Wasm), which are most often binaries with a `.wasm`
+extension.
+
+wazero is a WebAssembly Core Specification [1.0][1] and [2.0][2] compliant
+runtime written in Go. It has *zero dependencies*, and doesn't rely on CGO.
+This means you can run applications in other languages and still keep cross
+compilation.
+
+Import wazero and extend your Go application with code written in any language!
+
+## Example
+
+The best way to learn wazero is by trying one of our [examples](examples/README.md). The
+most [basic example](examples/basic) extends a Go application with an addition
+function defined in WebAssembly.
+
+## Runtime
+
+There are two runtime configurations supported in wazero: _Compiler_ is default:
+
+By default, ex `wazero.NewRuntime(ctx)`, the Compiler is used if supported. You
+can also force the interpreter like so:
+```go
+r := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigInterpreter())
+```
+
+### Interpreter
+Interpreter is a naive interpreter-based implementation of Wasm virtual
+machine. Its implementation doesn't have any platform (GOARCH, GOOS) specific
+code, therefore _interpreter_ can be used for any compilation target available
+for Go (such as `riscv64`).
+
+### Compiler
+Compiler compiles WebAssembly modules into machine code ahead of time (AOT),
+during `Runtime.CompileModule`. This means your WebAssembly functions execute
+natively at runtime. Compiler is faster than Interpreter, often by order of
+magnitude (10x) or more. This is done without host-specific dependencies.
+
+### Conformance
+
+Both runtimes pass WebAssembly Core [1.0][7] and [2.0][14] specification tests
+on supported platforms:
+
+|   Runtime   |                 Usage                  | amd64 | arm64 | others |
+|:-----------:|:--------------------------------------:|:-----:|:-----:|:------:|
+| Interpreter | `wazero.NewRuntimeConfigInterpreter()` |   ✅   |   ✅   |   ✅    |
+|  Compiler   |  `wazero.NewRuntimeConfigCompiler()`   |   ✅   |   ✅   |   ❌    |
+
+## Support Policy
+
+The below support policy focuses on compatibility concerns of those embedding
+wazero into their Go applications.
+
+### wazero
+
+wazero's [1.0 release][15] happened in March 2023, and is [in use][16] by many
+projects and production sites.
+
+We offer an API stability promise with semantic versioning. In other words, we
+promise to not break any exported function signature without incrementing the
+major version. This does not mean no innovation: New features and behaviors
+happen with a minor version increment, e.g. 1.0.11 to 1.2.0. We also fix bugs
+or change internal details with a patch version, e.g. 1.0.0 to 1.0.1.
+
+You can get the latest version of wazero like this.
+```bash
+go get github.com/tetratelabs/wazero@latest
+```
+
+Please give us a [star][17] if you end up using wazero!
+
+### Go
+
+wazero has no dependencies except Go, so the only source of conflict in your
+project's use of wazero is the Go version.
+
+wazero follows the same version policy as Go's [Release Policy][10]: two
+versions. wazero will ensure these versions work and bugs are valid if there's
+an issue with a current Go version.
+
+Additionally, wazero intentionally delays usage of language or standard library
+features one additional version. For example, when Go 1.29 is released, wazero
+can use language features or standard libraries added in 1.27. This is a
+convenience for embedders who have a slower version policy than Go. However,
+only supported Go versions may be used to raise support issues.
+
+### Platform
+
+wazero has two runtime modes: Interpreter and Compiler. The only supported operating
+systems are ones we test, but that doesn't necessarily mean other operating
+system versions won't work.
+
+We currently test Linux (Ubuntu and scratch), MacOS and Windows as packaged by
+[GitHub Actions][11], as well compilation of 32-bit Linux and 64-bit FreeBSD.
+
+* Interpreter
+  * Linux is tested on amd64 (native) as well arm64 and riscv64 via emulation.
+  * MacOS and Windows are only tested on amd64.
+* Compiler
+  * Linux is tested on amd64 (native) as well arm64 via emulation.
+  * MacOS and Windows are only tested on amd64.
+
+wazero has no dependencies and doesn't require CGO. This means it can also be
+embedded in an application that doesn't use an operating system. This is a main
+differentiator between wazero and alternatives.
+
+We verify zero dependencies by running tests in Docker's [scratch image][12].
+This approach ensures compatibility with any parent image.
+
+-----
+wazero is a registered trademark of Tetrate.io, Inc. in the United States and/or other countries
+
+[1]: https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/
+[2]: https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/
+[4]: https://github.com/WebAssembly/meetings/blob/main/process/subgroups.md
+[5]: https://github.com/WebAssembly/WASI
+[6]: https://pkg.go.dev/golang.org/x/sys/unix
+[7]: https://github.com/WebAssembly/spec/tree/wg-1.0/test/core
+[9]: https://github.com/tetratelabs/wazero/issues/506
+[10]: https://go.dev/doc/devel/release
+[11]: https://github.com/actions/virtual-environments
+[12]: https://docs.docker.com/develop/develop-images/baseimages/#create-a-simple-parent-image-using-scratch
+[13]: https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md
+[14]: https://github.com/WebAssembly/spec/tree/d39195773112a22b245ffbe864bab6d1182ccb06/test/core
+[15]: https://tetrate.io/blog/introducing-wazero-from-tetrate/
+[16]: https://wazero.io/community/users/
+[17]: https://github.com/tetratelabs/wazero/stargazers
--- a/vendor/github.com/tetratelabs/wazero/api/features.go
+++ b/vendor/github.com/tetratelabs/wazero/api/features.go
@ -0,0 +1,214 @@
+package api
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CoreFeatures is a bit flag of WebAssembly Core specification features. See
+// https://github.com/WebAssembly/proposals for proposals and their status.
+//
+// Constants define individual features, such as CoreFeatureMultiValue, or
+// groups of "finished" features, assigned to a WebAssembly Core Specification
+// version, e.g. CoreFeaturesV1 or CoreFeaturesV2.
+//
+// Note: Numeric values are not intended to be interpreted except as bit flags.
+type CoreFeatures uint64
+
+// CoreFeaturesV1 are features included in the WebAssembly Core Specification
+// 1.0. As of late 2022, this is the only version that is a Web Standard (W3C
+// Recommendation).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/
+const CoreFeaturesV1 = CoreFeatureMutableGlobal
+
+// CoreFeaturesV2 are features included in the WebAssembly Core Specification
+// 2.0 (20220419). As of late 2022, version 2.0 is a W3C working draft, not yet
+// a Web Standard (W3C Recommendation).
+//
+// See https://www.w3.org/TR/2022/WD-wasm-core-2-20220419/appendix/changes.html#release-1-1
+const CoreFeaturesV2 = CoreFeaturesV1 |
+	CoreFeatureBulkMemoryOperations |
+	CoreFeatureMultiValue |
+	CoreFeatureNonTrappingFloatToIntConversion |
+	CoreFeatureReferenceTypes |
+	CoreFeatureSignExtensionOps |
+	CoreFeatureSIMD
+
+const (
+	// CoreFeatureBulkMemoryOperations adds instructions modify ranges of
+	// memory or table entries ("bulk-memory-operations"). This is included in
+	// CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Here are the notable effects:
+	//   - Adds `memory.fill`, `memory.init`, `memory.copy` and `data.drop`
+	//     instructions.
+	//   - Adds `table.init`, `table.copy` and `elem.drop` instructions.
+	//   - Introduces a "passive" form of element and data segments.
+	//   - Stops checking "active" element and data segment boundaries at
+	//     compile-time, meaning they can error at runtime.
+	//
+	// Note: "bulk-memory-operations" is mixed with the "reference-types"
+	// proposal due to the WebAssembly Working Group merging them
+	// "mutually dependent". Therefore, enabling this feature requires enabling
+	// CoreFeatureReferenceTypes, and vice-versa.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md and
+	// https://github.com/WebAssembly/spec/pull/1287
+	CoreFeatureBulkMemoryOperations CoreFeatures = 1 << iota
+
+	// CoreFeatureMultiValue enables multiple values ("multi-value"). This is
+	// included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Here are the notable effects:
+	//   - Function (`func`) types allow more than one result.
+	//   - Block types (`block`, `loop` and `if`) can be arbitrary function
+	//     types.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/multi-value/Overview.md
+	CoreFeatureMultiValue
+
+	// CoreFeatureMutableGlobal allows globals to be mutable. This is included
+	// in both CoreFeaturesV1 and CoreFeaturesV2.
+	//
+	// When false, an api.Global can never be cast to an api.MutableGlobal, and
+	// any wasm that includes global vars will fail to parse.
+	CoreFeatureMutableGlobal
+
+	// CoreFeatureNonTrappingFloatToIntConversion enables non-trapping
+	// float-to-int conversions ("nontrapping-float-to-int-conversion"). This
+	// is included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// The only effect of enabling is allowing the following instructions,
+	// which return 0 on NaN instead of panicking.
+	//   - `i32.trunc_sat_f32_s`
+	//   - `i32.trunc_sat_f32_u`
+	//   - `i32.trunc_sat_f64_s`
+	//   - `i32.trunc_sat_f64_u`
+	//   - `i64.trunc_sat_f32_s`
+	//   - `i64.trunc_sat_f32_u`
+	//   - `i64.trunc_sat_f64_s`
+	//   - `i64.trunc_sat_f64_u`
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/nontrapping-float-to-int-conversion/Overview.md
+	CoreFeatureNonTrappingFloatToIntConversion
+
+	// CoreFeatureReferenceTypes enables various instructions and features
+	// related to table and new reference types. This is included in
+	// CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	//   - Introduction of new value types: `funcref` and `externref`.
+	//   - Support for the following new instructions:
+	//     - `ref.null`
+	//     - `ref.func`
+	//     - `ref.is_null`
+	//     - `table.fill`
+	//     - `table.get`
+	//     - `table.grow`
+	//     - `table.set`
+	//     - `table.size`
+	//   - Support for multiple tables per module:
+	//     - `call_indirect`, `table.init`, `table.copy` and `elem.drop`
+	//   - Support for instructions can take non-zero table index.
+	//     - Element segments can take non-zero table index.
+	//
+	// Note: "reference-types" is mixed with the "bulk-memory-operations"
+	// proposal due to the WebAssembly Working Group merging them
+	// "mutually dependent". Therefore, enabling this feature requires enabling
+	// CoreFeatureBulkMemoryOperations, and vice-versa.
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+	// https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md and
+	// https://github.com/WebAssembly/spec/pull/1287
+	CoreFeatureReferenceTypes
+
+	// CoreFeatureSignExtensionOps enables sign extension instructions
+	// ("sign-extension-ops"). This is included in CoreFeaturesV2, but not
+	// CoreFeaturesV1.
+	//
+	// Adds instructions:
+	//   - `i32.extend8_s`
+	//   - `i32.extend16_s`
+	//   - `i64.extend8_s`
+	//   - `i64.extend16_s`
+	//   - `i64.extend32_s`
+	//
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/sign-extension-ops/Overview.md
+	CoreFeatureSignExtensionOps
+
+	// CoreFeatureSIMD enables the vector value type and vector instructions
+	// (aka SIMD). This is included in CoreFeaturesV2, but not CoreFeaturesV1.
+	//
+	// Note: The instruction list is too long to enumerate in godoc.
+	// See https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md
+	CoreFeatureSIMD
+
+	// Update experimental/features.go when adding elements here.
+)
+
+// SetEnabled enables or disables the feature or group of features.
+func (f CoreFeatures) SetEnabled(feature CoreFeatures, val bool) CoreFeatures {
+	if val {
+		return f | feature
+	}
+	return f &^ feature
+}
+
+// IsEnabled returns true if the feature (or group of features) is enabled.
+func (f CoreFeatures) IsEnabled(feature CoreFeatures) bool {
+	return f&feature != 0
+}
+
+// RequireEnabled returns an error if the feature (or group of features) is not
+// enabled.
+func (f CoreFeatures) RequireEnabled(feature CoreFeatures) error {
+	if f&feature == 0 {
+		return fmt.Errorf("feature %q is disabled", feature)
+	}
+	return nil
+}
+
+// String implements fmt.Stringer by returning each enabled feature.
+func (f CoreFeatures) String() string {
+	var builder strings.Builder
+	for i := 0; i <= 63; i++ { // cycle through all bits to reduce code and maintenance
+		target := CoreFeatures(1 << i)
+		if f.IsEnabled(target) {
+			if name := featureName(target); name != "" {
+				if builder.Len() > 0 {
+					builder.WriteByte('|')
+				}
+				builder.WriteString(name)
+			}
+		}
+	}
+	return builder.String()
+}
+
+func featureName(f CoreFeatures) string {
+	switch f {
+	case CoreFeatureMutableGlobal:
+		// match https://github.com/WebAssembly/mutable-global
+		return "mutable-global"
+	case CoreFeatureSignExtensionOps:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/sign-extension-ops/Overview.md
+		return "sign-extension-ops"
+	case CoreFeatureMultiValue:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/multi-value/Overview.md
+		return "multi-value"
+	case CoreFeatureNonTrappingFloatToIntConversion:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/nontrapping-float-to-int-conversion/Overview.md
+		return "nontrapping-float-to-int-conversion"
+	case CoreFeatureBulkMemoryOperations:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/bulk-memory-operations/Overview.md
+		return "bulk-memory-operations"
+	case CoreFeatureReferenceTypes:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/reference-types/Overview.md
+		return "reference-types"
+	case CoreFeatureSIMD:
+		// match https://github.com/WebAssembly/spec/blob/wg-2.0.draft1/proposals/simd/SIMD.md
+		return "simd"
+	}
+	return ""
+}
--- a/vendor/github.com/tetratelabs/wazero/api/wasm.go
+++ b/vendor/github.com/tetratelabs/wazero/api/wasm.go
@ -0,0 +1,762 @@
+// Package api includes constants and interfaces used by both end-users and internal implementations.
+package api
+
+import (
+	"context"
+	"fmt"
+	"math"
+
+	"github.com/tetratelabs/wazero/internal/internalapi"
+)
+
+// ExternType classifies imports and exports with their respective types.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#external-types%E2%91%A0
+type ExternType = byte
+
+const (
+	ExternTypeFunc   ExternType = 0x00
+	ExternTypeTable  ExternType = 0x01
+	ExternTypeMemory ExternType = 0x02
+	ExternTypeGlobal ExternType = 0x03
+)
+
+// The below are exported to consolidate parsing behavior for external types.
+const (
+	// ExternTypeFuncName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeFunc.
+	ExternTypeFuncName = "func"
+	// ExternTypeTableName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeTable.
+	ExternTypeTableName = "table"
+	// ExternTypeMemoryName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeMemory.
+	ExternTypeMemoryName = "memory"
+	// ExternTypeGlobalName is the name of the WebAssembly 1.0 (20191205) Text Format field for ExternTypeGlobal.
+	ExternTypeGlobalName = "global"
+)
+
+// ExternTypeName returns the name of the WebAssembly 1.0 (20191205) Text Format field of the given type.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A4
+func ExternTypeName(et ExternType) string {
+	switch et {
+	case ExternTypeFunc:
+		return ExternTypeFuncName
+	case ExternTypeTable:
+		return ExternTypeTableName
+	case ExternTypeMemory:
+		return ExternTypeMemoryName
+	case ExternTypeGlobal:
+		return ExternTypeGlobalName
+	}
+	return fmt.Sprintf("%#x", et)
+}
+
+// ValueType describes a parameter or result type mapped to a WebAssembly
+// function signature.
+//
+// The following describes how to convert between Wasm and Golang types:
+//
+//   - ValueTypeI32 - EncodeU32 DecodeU32 for uint32 / EncodeI32 DecodeI32 for int32
+//   - ValueTypeI64 - uint64(int64)
+//   - ValueTypeF32 - EncodeF32 DecodeF32 from float32
+//   - ValueTypeF64 - EncodeF64 DecodeF64 from float64
+//   - ValueTypeExternref - unintptr(unsafe.Pointer(p)) where p is any pointer
+//     type in Go (e.g. *string)
+//
+// e.g. Given a Text Format type use (param i64) (result i64), no conversion is
+// necessary.
+//
+//	results, _ := fn(ctx, input)
+//	result := result[0]
+//
+// e.g. Given a Text Format type use (param f64) (result f64), conversion is
+// necessary.
+//
+//	results, _ := fn(ctx, api.EncodeF64(input))
+//	result := api.DecodeF64(result[0])
+//
+// Note: This is a type alias as it is easier to encode and decode in the
+// binary format.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#binary-valtype
+type ValueType = byte
+
+const (
+	// ValueTypeI32 is a 32-bit integer.
+	ValueTypeI32 ValueType = 0x7f
+	// ValueTypeI64 is a 64-bit integer.
+	ValueTypeI64 ValueType = 0x7e
+	// ValueTypeF32 is a 32-bit floating point number.
+	ValueTypeF32 ValueType = 0x7d
+	// ValueTypeF64 is a 64-bit floating point number.
+	ValueTypeF64 ValueType = 0x7c
+
+	// ValueTypeExternref is a externref type.
+	//
+	// Note: in wazero, externref type value are opaque raw 64-bit pointers,
+	// and the ValueTypeExternref type in the signature will be translated as
+	// uintptr in wazero's API level.
+	//
+	// For example, given the import function:
+	//	(func (import "env" "f") (param externref) (result externref))
+	//
+	// This can be defined in Go as:
+	//  r.NewHostModuleBuilder("env").
+	//		NewFunctionBuilder().
+	//		WithFunc(func(context.Context, _ uintptr) (_ uintptr) { return }).
+	//		Export("f")
+	//
+	// Note: The usage of this type is toggled with api.CoreFeatureBulkMemoryOperations.
+	ValueTypeExternref ValueType = 0x6f
+)
+
+// ValueTypeName returns the type name of the given ValueType as a string.
+// These type names match the names used in the WebAssembly text format.
+//
+// Note: This returns "unknown", if an undefined ValueType value is passed.
+func ValueTypeName(t ValueType) string {
+	switch t {
+	case ValueTypeI32:
+		return "i32"
+	case ValueTypeI64:
+		return "i64"
+	case ValueTypeF32:
+		return "f32"
+	case ValueTypeF64:
+		return "f64"
+	case ValueTypeExternref:
+		return "externref"
+	}
+	return "unknown"
+}
+
+// Module is a sandboxed, ready to execute Wasm module. This can be used to get exported functions, etc.
+//
+// In WebAssembly terminology, this corresponds to a "Module Instance", but wazero calls pre-instantiation module as
+// "Compiled Module" as in wazero.CompiledModule, therefore we call this post-instantiation module simply "Module".
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#module-instances%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - Closing the wazero.Runtime closes any Module it instantiated.
+type Module interface {
+	fmt.Stringer
+
+	// Name is the name this module was instantiated with. Exported functions can be imported with this name.
+	Name() string
+
+	// Memory returns a memory defined in this module or nil if there are none wasn't.
+	Memory() Memory
+
+	// ExportedFunction returns a function exported from this module or nil if it wasn't.
+	//
+	// Note: The default wazero.ModuleConfig attempts to invoke `_start`, which
+	// in rare cases can close the module. When in doubt, check IsClosed prior
+	// to invoking a function export after instantiation.
+	ExportedFunction(name string) Function
+
+	// ExportedFunctionDefinitions returns all the exported function
+	// definitions in this module, keyed on export name.
+	ExportedFunctionDefinitions() map[string]FunctionDefinition
+
+	// TODO: Table
+
+	// ExportedMemory returns a memory exported from this module or nil if it wasn't.
+	//
+	// WASI modules require exporting a Memory named "memory". This means that a module successfully initialized
+	// as a WASI Command or Reactor will never return nil for this name.
+	//
+	// See https://github.com/WebAssembly/WASI/blob/snapshot-01/design/application-abi.md#current-unstable-abi
+	ExportedMemory(name string) Memory
+
+	// ExportedMemoryDefinitions returns all the exported memory definitions
+	// in this module, keyed on export name.
+	//
+	// Note: As of WebAssembly Core Specification 2.0, there can be at most one
+	// memory.
+	ExportedMemoryDefinitions() map[string]MemoryDefinition
+
+	// ExportedGlobal a global exported from this module or nil if it wasn't.
+	ExportedGlobal(name string) Global
+
+	// CloseWithExitCode releases resources allocated for this Module. Use a non-zero exitCode parameter to indicate a
+	// failure to ExportedFunction callers.
+	//
+	// The error returned here, if present, is about resource de-allocation (such as I/O errors). Only the last error is
+	// returned, so a non-nil return means at least one error happened. Regardless of error, this Module will
+	// be removed, making its name available again.
+	//
+	// Calling this inside a host function is safe, and may cause ExportedFunction callers to receive a sys.ExitError
+	// with the exitCode.
+	CloseWithExitCode(ctx context.Context, exitCode uint32) error
+
+	// Closer closes this module by delegating to CloseWithExitCode with an exit code of zero.
+	Closer
+
+	// IsClosed returns true if the module is closed, so no longer usable.
+	//
+	// This can happen for the following reasons:
+	//   - Closer was called directly.
+	//   - A guest function called Closer indirectly, such as `_start` calling
+	//     `proc_exit`, which internally closed the module.
+	//   - wazero.RuntimeConfig `WithCloseOnContextDone` was enabled and a
+	//     context completion closed the module.
+	//
+	// Where any of the above are possible, check this value before calling an
+	// ExportedFunction, even if you didn't formerly receive a sys.ExitError.
+	// sys.ExitError is only returned on non-zero code, something that closes
+	// the module successfully will not result it one.
+	IsClosed() bool
+
+	internalapi.WazeroOnly
+}
+
+// Closer closes a resource.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type Closer interface {
+	// Close closes the resource.
+	//
+	// Note: The context parameter is used for value lookup, such as for
+	// logging. A canceled or otherwise done context will not prevent Close
+	// from succeeding.
+	Close(context.Context) error
+}
+
+// ExportDefinition is a WebAssembly type exported in a module
+// (wazero.CompiledModule).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type ExportDefinition interface {
+	// ModuleName is the possibly empty name of the module defining this
+	// export.
+	//
+	// Note: This may be different from Module.Name, because a compiled module
+	// can be instantiated multiple times as different names.
+	ModuleName() string
+
+	// Index is the position in the module's index, imports first.
+	Index() uint32
+
+	// Import returns true with the module and name when this was imported.
+	// Otherwise, it returns false.
+	//
+	// Note: Empty string is valid for both names in the WebAssembly Core
+	// Specification, so "" "" is possible.
+	Import() (moduleName, name string, isImport bool)
+
+	// ExportNames include all exported names.
+	//
+	// Note: The empty name is allowed in the WebAssembly Core Specification,
+	// so "" is possible.
+	ExportNames() []string
+
+	internalapi.WazeroOnly
+}
+
+// MemoryDefinition is a WebAssembly memory exported in a module
+// (wazero.CompiledModule). Units are in pages (64KB).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type MemoryDefinition interface {
+	ExportDefinition
+
+	// Min returns the possibly zero initial count of 64KB pages.
+	Min() uint32
+
+	// Max returns the possibly zero max count of 64KB pages, or false if
+	// unbounded.
+	Max() (uint32, bool)
+
+	internalapi.WazeroOnly
+}
+
+// FunctionDefinition is a WebAssembly function exported in a module
+// (wazero.CompiledModule).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#exports%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type FunctionDefinition interface {
+	ExportDefinition
+
+	// Name is the module-defined name of the function, which is not necessarily
+	// the same as its export name.
+	Name() string
+
+	// DebugName identifies this function based on its Index or Name in the
+	// module. This is used for errors and stack traces. e.g. "env.abort".
+	//
+	// When the function name is empty, a substitute name is generated by
+	// prefixing '$' to its position in the index. Ex ".$0" is the
+	// first function (possibly imported) in an unnamed module.
+	//
+	// The format is dot-delimited module and function name, but there are no
+	// restrictions on the module and function name. This means either can be
+	// empty or include dots. e.g. "x.x.x" could mean module "x" and name "x.x",
+	// or it could mean module "x.x" and name "x".
+	//
+	// Note: This name is stable regardless of import or export. For example,
+	// if Import returns true, the value is still based on the Name or Index
+	// and not the imported function name.
+	DebugName() string
+
+	// GoFunction is non-nil when implemented by the embedder instead of a wasm
+	// binary, e.g. via wazero.HostModuleBuilder
+	//
+	// The expected results are nil, GoFunction or GoModuleFunction.
+	GoFunction() interface{}
+
+	// ParamTypes are the possibly empty sequence of value types accepted by a
+	// function with this signature.
+	//
+	// See ValueType documentation for encoding rules.
+	ParamTypes() []ValueType
+
+	// ParamNames are index-correlated with ParamTypes or nil if not available
+	// for one or more parameters.
+	ParamNames() []string
+
+	// ResultTypes are the results of the function.
+	//
+	// When WebAssembly 1.0 (20191205), there can be at most one result.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#result-types%E2%91%A0
+	//
+	// See ValueType documentation for encoding rules.
+	ResultTypes() []ValueType
+
+	// ResultNames are index-correlated with ResultTypes or nil if not
+	// available for one or more results.
+	ResultNames() []string
+
+	internalapi.WazeroOnly
+}
+
+// Function is a WebAssembly function exported from an instantiated module
+// (wazero.Runtime InstantiateModule).
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#syntax-func
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type Function interface {
+	// Definition is metadata about this function from its defining module.
+	Definition() FunctionDefinition
+
+	// Call invokes the function with the given parameters and returns any
+	// results or an error for any failure looking up or invoking the function.
+	//
+	// Encoding is described in Definition, and supplying an incorrect count of
+	// parameters vs FunctionDefinition.ParamTypes is an error.
+	//
+	// If the exporting Module was closed during this call, the error returned
+	// may be a sys.ExitError. See Module.CloseWithExitCode for details.
+	//
+	// Call is not goroutine-safe, therefore it is recommended to create
+	// another Function if you want to invoke the same function concurrently.
+	// On the other hand, sequential invocations of Call is allowed.
+	// However, this should not be called multiple times until the previous Call returns.
+	//
+	// To safely encode/decode params/results expressed as uint64, users are encouraged to
+	// use api.EncodeXXX or DecodeXXX functions. See the docs on api.ValueType.
+	//
+	// When RuntimeConfig.WithCloseOnContextDone is toggled, the invocation of this Call method is ensured to be closed
+	// whenever one of the three conditions is met. In the event of close, sys.ExitError will be returned and
+	// the api.Module from which this api.Function is derived will be made closed. See the documentation of
+	// WithCloseOnContextDone on wazero.RuntimeConfig for detail. See examples in context_done_example_test.go for
+	// the end-to-end demonstrations of how these terminations can be performed.
+	Call(ctx context.Context, params ...uint64) ([]uint64, error)
+
+	// CallWithStack is an optimized variation of Call that saves memory
+	// allocations when the stack slice is reused across calls.
+	//
+	// Stack length must be at least the max of parameter or result length.
+	// The caller adds parameters in order to the stack, and reads any results
+	// in order from the stack, except in the error case.
+	//
+	// For example, the following reuses the same stack slice to call searchFn
+	// repeatedly saving one allocation per iteration:
+	//
+	//	stack := make([]uint64, 4)
+	//	for i, search := range searchParams {
+	//		// copy the next params to the stack
+	//		copy(stack, search)
+	//		if err := searchFn.CallWithStack(ctx, stack); err != nil {
+	//			return err
+	//		} else if stack[0] == 1 { // found
+	//			return i // searchParams[i] matched!
+	//		}
+	//	}
+	//
+	// # Notes
+	//
+	//   - This is similar to GoModuleFunction, except for using calling functions
+	//     instead of implementing them. Moreover, this is used regardless of
+	//     whether the callee is a host or wasm defined function.
+	CallWithStack(ctx context.Context, stack []uint64) error
+
+	internalapi.WazeroOnly
+}
+
+// GoModuleFunction is a Function implemented in Go instead of a wasm binary.
+// The Module parameter is the calling module, used to access memory or
+// exported functions. See GoModuleFunc for an example.
+//
+// The stack is includes any parameters encoded according to their ValueType.
+// Its length is the max of parameter or result length. When there are results,
+// write them in order beginning at index zero. Do not use the stack after the
+// function returns.
+//
+// Here's a typical way to read three parameters and write back one.
+//
+//	// read parameters off the stack in index order
+//	argv, argvBuf := api.DecodeU32(stack[0]), api.DecodeU32(stack[1])
+//
+//	// write results back to the stack in index order
+//	stack[0] = api.EncodeU32(ErrnoSuccess)
+//
+// This function can be non-deterministic or cause side effects. It also
+// has special properties not defined in the WebAssembly Core specification.
+// Notably, this uses the caller's memory (via Module.Memory). See
+// https://www.w3.org/TR/wasm-core-1/#host-functions%E2%91%A0
+//
+// Most end users will not define functions directly with this, as they will
+// use reflection or code generators instead. These approaches are more
+// idiomatic as they can map go types to ValueType. This type is exposed for
+// those willing to trade usability and safety for performance.
+//
+// To safely decode/encode values from/to the uint64 stack, users are encouraged to use
+// api.EncodeXXX or api.DecodeXXX functions. See the docs on api.ValueType.
+type GoModuleFunction interface {
+	Call(ctx context.Context, mod Module, stack []uint64)
+}
+
+// GoModuleFunc is a convenience for defining an inlined function.
+//
+// For example, the following returns an uint32 value read from parameter zero:
+//
+//	api.GoModuleFunc(func(ctx context.Context, mod api.Module, stack []uint64) {
+//		offset := api.DecodeU32(stack[0]) // read the parameter from the stack
+//
+//		ret, ok := mod.Memory().ReadUint32Le(offset)
+//		if !ok {
+//			panic("out of memory")
+//		}
+//
+//		stack[0] = api.EncodeU32(ret) // add the result back to the stack.
+//	})
+type GoModuleFunc func(ctx context.Context, mod Module, stack []uint64)
+
+// Call implements GoModuleFunction.Call.
+func (f GoModuleFunc) Call(ctx context.Context, mod Module, stack []uint64) {
+	f(ctx, mod, stack)
+}
+
+// GoFunction is an optimized form of GoModuleFunction which doesn't require
+// the Module parameter. See GoFunc for an example.
+//
+// For example, this function does not need to use the importing module's
+// memory or exported functions.
+type GoFunction interface {
+	Call(ctx context.Context, stack []uint64)
+}
+
+// GoFunc is a convenience for defining an inlined function.
+//
+// For example, the following returns the sum of two uint32 parameters:
+//
+//	api.GoFunc(func(ctx context.Context, stack []uint64) {
+//		x, y := api.DecodeU32(stack[0]), api.DecodeU32(stack[1])
+//		stack[0] = api.EncodeU32(x + y)
+//	})
+type GoFunc func(ctx context.Context, stack []uint64)
+
+// Call implements GoFunction.Call.
+func (f GoFunc) Call(ctx context.Context, stack []uint64) {
+	f(ctx, stack)
+}
+
+// Global is a WebAssembly 1.0 (20191205) global exported from an instantiated module (wazero.Runtime InstantiateModule).
+//
+// For example, if the value is not mutable, you can read it once:
+//
+//	offset := module.ExportedGlobal("memory.offset").Get()
+//
+// Globals are allowed by specification to be mutable. However, this can be disabled by configuration. When in doubt,
+// safe cast to find out if the value can change. Here's an example:
+//
+//	offset := module.ExportedGlobal("memory.offset")
+//	if _, ok := offset.(api.MutableGlobal); ok {
+//		// value can change
+//	} else {
+//		// value is constant
+//	}
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#globals%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type Global interface {
+	fmt.Stringer
+
+	// Type describes the numeric type of the global.
+	Type() ValueType
+
+	// Get returns the last known value of this global.
+	//
+	// See Type for how to decode this value to a Go type.
+	Get() uint64
+}
+
+// MutableGlobal is a Global whose value can be updated at runtime (variable).
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type MutableGlobal interface {
+	Global
+
+	// Set updates the value of this global.
+	//
+	// See Global.Type for how to encode this value from a Go type.
+	Set(v uint64)
+
+	internalapi.WazeroOnly
+}
+
+// Memory allows restricted access to a module's memory. Notably, this does not allow growing.
+//
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#storage%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - This includes all value types available in WebAssembly 1.0 (20191205) and all are encoded little-endian.
+type Memory interface {
+	// Definition is metadata about this memory from its defining module.
+	Definition() MemoryDefinition
+
+	// Size returns the memory size in bytes available.
+	// e.g. If the underlying memory has 1 page: 65536
+	//
+	// # Notes
+	//
+	//   - This overflows (returns zero) if the memory has the maximum 65536 pages.
+	// 	   As a workaround until wazero v2 to fix the return type, use Grow(0) to obtain the current pages and
+	//     multiply by 65536.
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#-hrefsyntax-instr-memorymathsfmemorysize%E2%91%A0
+	Size() uint32
+
+	// Grow increases memory by the delta in pages (65536 bytes per page).
+	// The return val is the previous memory size in pages, or false if the
+	// delta was ignored as it exceeds MemoryDefinition.Max.
+	//
+	// # Notes
+	//
+	//   - This is the same as the "memory.grow" instruction defined in the
+	//	   WebAssembly Core Specification, except returns false instead of -1.
+	//   - When this returns true, any shared views via Read must be refreshed.
+	//
+	// See MemorySizer Read and https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#grow-mem
+	Grow(deltaPages uint32) (previousPages uint32, ok bool)
+
+	// ReadByte reads a single byte from the underlying buffer at the offset or returns false if out of range.
+	ReadByte(offset uint32) (byte, bool)
+
+	// ReadUint16Le reads a uint16 in little-endian encoding from the underlying buffer at the offset in or returns
+	// false if out of range.
+	ReadUint16Le(offset uint32) (uint16, bool)
+
+	// ReadUint32Le reads a uint32 in little-endian encoding from the underlying buffer at the offset in or returns
+	// false if out of range.
+	ReadUint32Le(offset uint32) (uint32, bool)
+
+	// ReadFloat32Le reads a float32 from 32 IEEE 754 little-endian encoded bits in the underlying buffer at the offset
+	// or returns false if out of range.
+	// See math.Float32bits
+	ReadFloat32Le(offset uint32) (float32, bool)
+
+	// ReadUint64Le reads a uint64 in little-endian encoding from the underlying buffer at the offset or returns false
+	// if out of range.
+	ReadUint64Le(offset uint32) (uint64, bool)
+
+	// ReadFloat64Le reads a float64 from 64 IEEE 754 little-endian encoded bits in the underlying buffer at the offset
+	// or returns false if out of range.
+	//
+	// See math.Float64bits
+	ReadFloat64Le(offset uint32) (float64, bool)
+
+	// Read reads byteCount bytes from the underlying buffer at the offset or
+	// returns false if out of range.
+	//
+	// For example, to search for a NUL-terminated string:
+	//	buf, _ = memory.Read(offset, byteCount)
+	//	n := bytes.IndexByte(buf, 0)
+	//	if n < 0 {
+	//		// Not found!
+	//	}
+	//
+	// Write-through
+	//
+	// This returns a view of the underlying memory, not a copy. This means any
+	// writes to the slice returned are visible to Wasm, and any updates from
+	// Wasm are visible reading the returned slice.
+	//
+	// For example:
+	//	buf, _ = memory.Read(offset, byteCount)
+	//	buf[1] = 'a' // writes through to memory, meaning Wasm code see 'a'.
+	//
+	// If you don't intend-write through, make a copy of the returned slice.
+	//
+	// When to refresh Read
+	//
+	// The returned slice disconnects on any capacity change. For example,
+	// `buf = append(buf, 'a')` might result in a slice that is no longer
+	// shared. The same exists Wasm side. For example, if Wasm changes its
+	// memory capacity, ex via "memory.grow"), the host slice is no longer
+	// shared. Those who need a stable view must set Wasm memory min=max, or
+	// use wazero.RuntimeConfig WithMemoryCapacityPages to ensure max is always
+	// allocated.
+	Read(offset, byteCount uint32) ([]byte, bool)
+
+	// WriteByte writes a single byte to the underlying buffer at the offset in or returns false if out of range.
+	WriteByte(offset uint32, v byte) bool
+
+	// WriteUint16Le writes the value in little-endian encoding to the underlying buffer at the offset in or returns
+	// false if out of range.
+	WriteUint16Le(offset uint32, v uint16) bool
+
+	// WriteUint32Le writes the value in little-endian encoding to the underlying buffer at the offset in or returns
+	// false if out of range.
+	WriteUint32Le(offset, v uint32) bool
+
+	// WriteFloat32Le writes the value in 32 IEEE 754 little-endian encoded bits to the underlying buffer at the offset
+	// or returns false if out of range.
+	//
+	// See math.Float32bits
+	WriteFloat32Le(offset uint32, v float32) bool
+
+	// WriteUint64Le writes the value in little-endian encoding to the underlying buffer at the offset in or returns
+	// false if out of range.
+	WriteUint64Le(offset uint32, v uint64) bool
+
+	// WriteFloat64Le writes the value in 64 IEEE 754 little-endian encoded bits to the underlying buffer at the offset
+	// or returns false if out of range.
+	//
+	// See math.Float64bits
+	WriteFloat64Le(offset uint32, v float64) bool
+
+	// Write writes the slice to the underlying buffer at the offset or returns false if out of range.
+	Write(offset uint32, v []byte) bool
+
+	// WriteString writes the string to the underlying buffer at the offset or returns false if out of range.
+	WriteString(offset uint32, v string) bool
+
+	internalapi.WazeroOnly
+}
+
+// CustomSection contains the name and raw data of a custom section.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type CustomSection interface {
+	// Name is the name of the custom section
+	Name() string
+	// Data is the raw data of the custom section
+	Data() []byte
+
+	internalapi.WazeroOnly
+}
+
+// EncodeExternref encodes the input as a ValueTypeExternref.
+//
+// See DecodeExternref
+func EncodeExternref(input uintptr) uint64 {
+	return uint64(input)
+}
+
+// DecodeExternref decodes the input as a ValueTypeExternref.
+//
+// See EncodeExternref
+func DecodeExternref(input uint64) uintptr {
+	return uintptr(input)
+}
+
+// EncodeI32 encodes the input as a ValueTypeI32.
+func EncodeI32(input int32) uint64 {
+	return uint64(uint32(input))
+}
+
+// DecodeI32 decodes the input as a ValueTypeI32.
+func DecodeI32(input uint64) int32 {
+	return int32(input)
+}
+
+// EncodeU32 encodes the input as a ValueTypeI32.
+func EncodeU32(input uint32) uint64 {
+	return uint64(input)
+}
+
+// DecodeU32 decodes the input as a ValueTypeI32.
+func DecodeU32(input uint64) uint32 {
+	return uint32(input)
+}
+
+// EncodeI64 encodes the input as a ValueTypeI64.
+func EncodeI64(input int64) uint64 {
+	return uint64(input)
+}
+
+// EncodeF32 encodes the input as a ValueTypeF32.
+//
+// See DecodeF32
+func EncodeF32(input float32) uint64 {
+	return uint64(math.Float32bits(input))
+}
+
+// DecodeF32 decodes the input as a ValueTypeF32.
+//
+// See EncodeF32
+func DecodeF32(input uint64) float32 {
+	return math.Float32frombits(uint32(input))
+}
+
+// EncodeF64 encodes the input as a ValueTypeF64.
+//
+// See EncodeF32
+func EncodeF64(input float64) uint64 {
+	return math.Float64bits(input)
+}
+
+// DecodeF64 decodes the input as a ValueTypeF64.
+//
+// See EncodeF64
+func DecodeF64(input uint64) float64 {
+	return math.Float64frombits(input)
+}
--- a/vendor/github.com/tetratelabs/wazero/builder.go
+++ b/vendor/github.com/tetratelabs/wazero/builder.go
@ -0,0 +1,352 @@
+package wazero
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// HostFunctionBuilder defines a host function (in Go), so that a
+// WebAssembly binary (e.g. %.wasm file) can import and use it.
+//
+// Here's an example of an addition function:
+//
+//	hostModuleBuilder.NewFunctionBuilder().
+//		WithFunc(func(cxt context.Context, x, y uint32) uint32 {
+//			return x + y
+//		}).
+//		Export("add")
+//
+// # Memory
+//
+// All host functions act on the importing api.Module, including any memory
+// exported in its binary (%.wasm file). If you are reading or writing memory,
+// it is sand-boxed Wasm memory defined by the guest.
+//
+// Below, `m` is the importing module, defined in Wasm. `fn` is a host function
+// added via Export. This means that `x` was read from memory defined in Wasm,
+// not arbitrary memory in the process.
+//
+//	fn := func(ctx context.Context, m api.Module, offset uint32) uint32 {
+//		x, _ := m.Memory().ReadUint32Le(ctx, offset)
+//		return x
+//	}
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+type HostFunctionBuilder interface {
+	// WithGoFunction is an advanced feature for those who need higher
+	// performance than WithFunc at the cost of more complexity.
+	//
+	// Here's an example addition function:
+	//
+	//	builder.WithGoFunction(api.GoFunc(func(ctx context.Context, stack []uint64) {
+	//		x, y := api.DecodeI32(stack[0]), api.DecodeI32(stack[1])
+	//		sum := x + y
+	//		stack[0] = api.EncodeI32(sum)
+	//	}), []api.ValueType{api.ValueTypeI32, api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32})
+	//
+	// As you can see above, defining in this way implies knowledge of which
+	// WebAssembly api.ValueType is appropriate for each parameter and result.
+	//
+	// See WithGoModuleFunction if you also need to access the calling module.
+	WithGoFunction(fn api.GoFunction, params, results []api.ValueType) HostFunctionBuilder
+
+	// WithGoModuleFunction is an advanced feature for those who need higher
+	// performance than WithFunc at the cost of more complexity.
+	//
+	// Here's an example addition function that loads operands from memory:
+	//
+	//	builder.WithGoModuleFunction(api.GoModuleFunc(func(ctx context.Context, m api.Module, stack []uint64) {
+	//		mem := m.Memory()
+	//		offset := api.DecodeU32(stack[0])
+	//
+	//		x, _ := mem.ReadUint32Le(ctx, offset)
+	//		y, _ := mem.ReadUint32Le(ctx, offset + 4) // 32 bits == 4 bytes!
+	//		sum := x + y
+	//
+	//		stack[0] = api.EncodeU32(sum)
+	//	}), []api.ValueType{api.ValueTypeI32}, []api.ValueType{api.ValueTypeI32})
+	//
+	// As you can see above, defining in this way implies knowledge of which
+	// WebAssembly api.ValueType is appropriate for each parameter and result.
+	//
+	// See WithGoFunction if you don't need access to the calling module.
+	WithGoModuleFunction(fn api.GoModuleFunction, params, results []api.ValueType) HostFunctionBuilder
+
+	// WithFunc uses reflect.Value to map a go `func` to a WebAssembly
+	// compatible Signature. An input that isn't a `func` will fail to
+	// instantiate.
+	//
+	// Here's an example of an addition function:
+	//
+	//	builder.WithFunc(func(cxt context.Context, x, y uint32) uint32 {
+	//		return x + y
+	//	})
+	//
+	// # Defining a function
+	//
+	// Except for the context.Context and optional api.Module, all parameters
+	// or result types must map to WebAssembly numeric value types. This means
+	// uint32, int32, uint64, int64, float32 or float64.
+	//
+	// api.Module may be specified as the second parameter, usually to access
+	// memory. This is important because there are only numeric types in Wasm.
+	// The only way to share other data is via writing memory and sharing
+	// offsets.
+	//
+	//	builder.WithFunc(func(ctx context.Context, m api.Module, offset uint32) uint32 {
+	//		mem := m.Memory()
+	//		x, _ := mem.ReadUint32Le(ctx, offset)
+	//		y, _ := mem.ReadUint32Le(ctx, offset + 4) // 32 bits == 4 bytes!
+	//		return x + y
+	//	})
+	//
+	// This example propagates context properly when calling other functions
+	// exported in the api.Module:
+	//
+	//	builder.WithFunc(func(ctx context.Context, m api.Module, offset, byteCount uint32) uint32 {
+	//		fn = m.ExportedFunction("__read")
+	//		results, err := fn(ctx, offset, byteCount)
+	//	--snip--
+	WithFunc(interface{}) HostFunctionBuilder
+
+	// WithName defines the optional module-local name of this function, e.g.
+	// "random_get"
+	//
+	// Note: This is not required to match the Export name.
+	WithName(name string) HostFunctionBuilder
+
+	// WithParameterNames defines optional parameter names of the function
+	// signature, e.x. "buf", "buf_len"
+	//
+	// Note: When defined, names must be provided for all parameters.
+	WithParameterNames(names ...string) HostFunctionBuilder
+
+	// WithResultNames defines optional result names of the function
+	// signature, e.x. "errno"
+	//
+	// Note: When defined, names must be provided for all results.
+	WithResultNames(names ...string) HostFunctionBuilder
+
+	// Export exports this to the HostModuleBuilder as the given name, e.g.
+	// "random_get"
+	Export(name string) HostModuleBuilder
+}
+
+// HostModuleBuilder is a way to define host functions (in Go), so that a
+// WebAssembly binary (e.g. %.wasm file) can import and use them.
+//
+// Specifically, this implements the host side of an Application Binary
+// Interface (ABI) like WASI or AssemblyScript.
+//
+// For example, this defines and instantiates a module named "env" with one
+// function:
+//
+//	ctx := context.Background()
+//	r := wazero.NewRuntime(ctx)
+//	defer r.Close(ctx) // This closes everything this Runtime created.
+//
+//	hello := func() {
+//		println("hello!")
+//	}
+//	env, _ := r.NewHostModuleBuilder("env").
+//		NewFunctionBuilder().WithFunc(hello).Export("hello").
+//		Instantiate(ctx)
+//
+// If the same module may be instantiated multiple times, it is more efficient
+// to separate steps. Here's an example:
+//
+//	compiled, _ := r.NewHostModuleBuilder("env").
+//		NewFunctionBuilder().WithFunc(getRandomString).Export("get_random_string").
+//		Compile(ctx)
+//
+//	env1, _ := r.InstantiateModule(ctx, compiled, wazero.NewModuleConfig().WithName("env.1"))
+//	env2, _ := r.InstantiateModule(ctx, compiled, wazero.NewModuleConfig().WithName("env.2"))
+//
+// See HostFunctionBuilder for valid host function signatures and other details.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - HostModuleBuilder is mutable: each method returns the same instance for
+//     chaining.
+//   - methods do not return errors, to allow chaining. Any validation errors
+//     are deferred until Compile.
+//   - Functions are indexed in order of calls to NewFunctionBuilder as
+//     insertion ordering is needed by ABI such as Emscripten (invoke_*).
+type HostModuleBuilder interface {
+	// Note: until golang/go#5860, we can't use example tests to embed code in interface godocs.
+
+	// NewFunctionBuilder begins the definition of a host function.
+	NewFunctionBuilder() HostFunctionBuilder
+
+	// Compile returns a CompiledModule that can be instantiated by Runtime.
+	Compile(context.Context) (CompiledModule, error)
+
+	// Instantiate is a convenience that calls Compile, then Runtime.InstantiateModule.
+	// This can fail for reasons documented on Runtime.InstantiateModule.
+	//
+	// Here's an example:
+	//
+	//	ctx := context.Background()
+	//	r := wazero.NewRuntime(ctx)
+	//	defer r.Close(ctx) // This closes everything this Runtime created.
+	//
+	//	hello := func() {
+	//		println("hello!")
+	//	}
+	//	env, _ := r.NewHostModuleBuilder("env").
+	//		NewFunctionBuilder().WithFunc(hello).Export("hello").
+	//		Instantiate(ctx)
+	//
+	// # Notes
+	//
+	//   - Closing the Runtime has the same effect as closing the result.
+	//   - Fields in the builder are copied during instantiation: Later changes do not affect the instantiated result.
+	//   - To avoid using configuration defaults, use Compile instead.
+	Instantiate(context.Context) (api.Module, error)
+}
+
+// hostModuleBuilder implements HostModuleBuilder
+type hostModuleBuilder struct {
+	r              *runtime
+	moduleName     string
+	exportNames    []string
+	nameToHostFunc map[string]*wasm.HostFunc
+}
+
+// NewHostModuleBuilder implements Runtime.NewHostModuleBuilder
+func (r *runtime) NewHostModuleBuilder(moduleName string) HostModuleBuilder {
+	return &hostModuleBuilder{
+		r:              r,
+		moduleName:     moduleName,
+		nameToHostFunc: map[string]*wasm.HostFunc{},
+	}
+}
+
+// hostFunctionBuilder implements HostFunctionBuilder
+type hostFunctionBuilder struct {
+	b           *hostModuleBuilder
+	fn          interface{}
+	name        string
+	paramNames  []string
+	resultNames []string
+}
+
+// WithGoFunction implements HostFunctionBuilder.WithGoFunction
+func (h *hostFunctionBuilder) WithGoFunction(fn api.GoFunction, params, results []api.ValueType) HostFunctionBuilder {
+	h.fn = &wasm.HostFunc{ParamTypes: params, ResultTypes: results, Code: wasm.Code{GoFunc: fn}}
+	return h
+}
+
+// WithGoModuleFunction implements HostFunctionBuilder.WithGoModuleFunction
+func (h *hostFunctionBuilder) WithGoModuleFunction(fn api.GoModuleFunction, params, results []api.ValueType) HostFunctionBuilder {
+	h.fn = &wasm.HostFunc{ParamTypes: params, ResultTypes: results, Code: wasm.Code{GoFunc: fn}}
+	return h
+}
+
+// WithFunc implements HostFunctionBuilder.WithFunc
+func (h *hostFunctionBuilder) WithFunc(fn interface{}) HostFunctionBuilder {
+	h.fn = fn
+	return h
+}
+
+// WithName implements HostFunctionBuilder.WithName
+func (h *hostFunctionBuilder) WithName(name string) HostFunctionBuilder {
+	h.name = name
+	return h
+}
+
+// WithParameterNames implements HostFunctionBuilder.WithParameterNames
+func (h *hostFunctionBuilder) WithParameterNames(names ...string) HostFunctionBuilder {
+	h.paramNames = names
+	return h
+}
+
+// WithResultNames implements HostFunctionBuilder.WithResultNames
+func (h *hostFunctionBuilder) WithResultNames(names ...string) HostFunctionBuilder {
+	h.resultNames = names
+	return h
+}
+
+// Export implements HostFunctionBuilder.Export
+func (h *hostFunctionBuilder) Export(exportName string) HostModuleBuilder {
+	var hostFn *wasm.HostFunc
+	if fn, ok := h.fn.(*wasm.HostFunc); ok {
+		hostFn = fn
+	} else {
+		hostFn = &wasm.HostFunc{Code: wasm.Code{GoFunc: h.fn}}
+	}
+
+	// Assign any names from the builder
+	hostFn.ExportName = exportName
+	if h.name != "" {
+		hostFn.Name = h.name
+	}
+	if len(h.paramNames) != 0 {
+		hostFn.ParamNames = h.paramNames
+	}
+	if len(h.resultNames) != 0 {
+		hostFn.ResultNames = h.resultNames
+	}
+
+	h.b.ExportHostFunc(hostFn)
+	return h.b
+}
+
+// ExportHostFunc implements wasm.HostFuncExporter
+func (b *hostModuleBuilder) ExportHostFunc(fn *wasm.HostFunc) {
+	if _, ok := b.nameToHostFunc[fn.ExportName]; !ok { // add a new name
+		b.exportNames = append(b.exportNames, fn.ExportName)
+	}
+	b.nameToHostFunc[fn.ExportName] = fn
+}
+
+// NewFunctionBuilder implements HostModuleBuilder.NewFunctionBuilder
+func (b *hostModuleBuilder) NewFunctionBuilder() HostFunctionBuilder {
+	return &hostFunctionBuilder{b: b}
+}
+
+// Compile implements HostModuleBuilder.Compile
+func (b *hostModuleBuilder) Compile(ctx context.Context) (CompiledModule, error) {
+	module, err := wasm.NewHostModule(b.moduleName, b.exportNames, b.nameToHostFunc, b.r.enabledFeatures)
+	if err != nil {
+		return nil, err
+	} else if err = module.Validate(b.r.enabledFeatures); err != nil {
+		return nil, err
+	}
+
+	c := &compiledModule{module: module, compiledEngine: b.r.store.Engine}
+	listeners, err := buildFunctionListeners(ctx, module)
+	if err != nil {
+		return nil, err
+	}
+
+	if err = b.r.store.Engine.CompileModule(ctx, module, listeners, false); err != nil {
+		return nil, err
+	}
+
+	// typeIDs are static and compile-time known.
+	typeIDs, err := b.r.store.GetFunctionTypeIDs(module.TypeSection)
+	if err != nil {
+		return nil, err
+	}
+	c.typeIDs = typeIDs
+
+	return c, nil
+}
+
+// Instantiate implements HostModuleBuilder.Instantiate
+func (b *hostModuleBuilder) Instantiate(ctx context.Context) (api.Module, error) {
+	if compiled, err := b.Compile(ctx); err != nil {
+		return nil, err
+	} else {
+		compiled.(*compiledModule).closeWithModule = true
+		return b.r.InstantiateModule(ctx, compiled, NewModuleConfig())
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/cache.go
+++ b/vendor/github.com/tetratelabs/wazero/cache.go
@ -0,0 +1,116 @@
+package wazero
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os"
+	"path"
+	"path/filepath"
+	goruntime "runtime"
+	"sync"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/version"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// CompilationCache reduces time spent compiling (Runtime.CompileModule) the same wasm module.
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - Instances of this can be reused across multiple runtimes, if configured
+//     via RuntimeConfig.
+type CompilationCache interface{ api.Closer }
+
+// NewCompilationCache returns a new CompilationCache to be passed to RuntimeConfig.
+// This configures only in-memory cache, and doesn't persist to the file system. See wazero.NewCompilationCacheWithDir for detail.
+//
+// The returned CompilationCache can be used to share the in-memory compilation results across multiple instances of wazero.Runtime.
+func NewCompilationCache() CompilationCache {
+	return &cache{}
+}
+
+// NewCompilationCacheWithDir is like wazero.NewCompilationCache except the result also writes
+// state into the directory specified by `dirname` parameter.
+//
+// If the dirname doesn't exist, this creates it or returns an error.
+//
+// Those running wazero as a CLI or frequently restarting a process using the same wasm should
+// use this feature to reduce time waiting to compile the same module a second time.
+//
+// The contents written into dirname are wazero-version specific, meaning different versions of
+// wazero will duplicate entries for the same input wasm.
+//
+// Note: The embedder must safeguard this directory from external changes.
+func NewCompilationCacheWithDir(dirname string) (CompilationCache, error) {
+	c := &cache{}
+	err := c.ensuresFileCache(dirname, version.GetWazeroVersion())
+	return c, err
+}
+
+// cache implements Cache interface.
+type cache struct {
+	// eng is the engine for this cache. If the cache is configured, the engine is shared across multiple instances of
+	// Runtime, and its lifetime is not bound to them. Instead, the engine is alive until Cache.Close is called.
+	engs      [engineKindCount]wasm.Engine
+	fileCache filecache.Cache
+	initOnces [engineKindCount]sync.Once
+}
+
+func (c *cache) initEngine(ek engineKind, ne newEngine, ctx context.Context, features api.CoreFeatures) wasm.Engine {
+	c.initOnces[ek].Do(func() { c.engs[ek] = ne(ctx, features, c.fileCache) })
+	return c.engs[ek]
+}
+
+// Close implements the same method on the Cache interface.
+func (c *cache) Close(_ context.Context) (err error) {
+	for _, eng := range c.engs {
+		if eng != nil {
+			if err = eng.Close(); err != nil {
+				return
+			}
+		}
+	}
+	return
+}
+
+func (c *cache) ensuresFileCache(dir string, wazeroVersion string) error {
+	// Resolve a potentially relative directory into an absolute one.
+	var err error
+	dir, err = filepath.Abs(dir)
+	if err != nil {
+		return err
+	}
+
+	// Ensure the user-supplied directory.
+	if err = mkdir(dir); err != nil {
+		return err
+	}
+
+	// Create a version-specific directory to avoid conflicts.
+	dirname := path.Join(dir, "wazero-"+wazeroVersion+"-"+goruntime.GOARCH+"-"+goruntime.GOOS)
+	if err = mkdir(dirname); err != nil {
+		return err
+	}
+
+	c.fileCache = filecache.New(dirname)
+	return nil
+}
+
+func mkdir(dirname string) error {
+	if st, err := os.Stat(dirname); errors.Is(err, os.ErrNotExist) {
+		// If the directory not found, create the cache dir.
+		if err = os.MkdirAll(dirname, 0o700); err != nil {
+			return fmt.Errorf("create directory %s: %v", dirname, err)
+		}
+	} else if err != nil {
+		return err
+	} else if !st.IsDir() {
+		return fmt.Errorf("%s is not dir", dirname)
+	}
+	return nil
+}
--- a/vendor/github.com/tetratelabs/wazero/codecov.yml
+++ b/vendor/github.com/tetratelabs/wazero/codecov.yml
@ -0,0 +1,9 @@
+# Codecov for main is visible here https://app.codecov.io/gh/tetratelabs/wazero
+
+# We use codecov only as a UI, so we disable PR comments and commit status.
+# See https://docs.codecov.com/docs/pull-request-comments
+comment: false
+coverage:
+  status:
+    project: off
+    patch: off
--- a/vendor/github.com/tetratelabs/wazero/config.go
+++ b/vendor/github.com/tetratelabs/wazero/config.go
@ -0,0 +1,876 @@
+package wazero
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"math"
+	"net"
+	"time"
+
+	"github.com/tetratelabs/wazero/api"
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/engine/interpreter"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+	"github.com/tetratelabs/wazero/internal/platform"
+	internalsock "github.com/tetratelabs/wazero/internal/sock"
+	internalsys "github.com/tetratelabs/wazero/internal/sys"
+	"github.com/tetratelabs/wazero/internal/wasm"
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// RuntimeConfig controls runtime behavior, with the default implementation as
+// NewRuntimeConfig
+//
+// The example below explicitly limits to Wasm Core 1.0 features as opposed to
+// relying on defaults:
+//
+//	rConfig = wazero.NewRuntimeConfig().WithCoreFeatures(api.CoreFeaturesV1)
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - RuntimeConfig is immutable. Each WithXXX function returns a new instance
+//     including the corresponding change.
+type RuntimeConfig interface {
+	// WithCoreFeatures sets the WebAssembly Core specification features this
+	// runtime supports. Defaults to api.CoreFeaturesV2.
+	//
+	// Example of disabling a specific feature:
+	//	features := api.CoreFeaturesV2.SetEnabled(api.CoreFeatureMutableGlobal, false)
+	//	rConfig = wazero.NewRuntimeConfig().WithCoreFeatures(features)
+	//
+	// # Why default to version 2.0?
+	//
+	// Many compilers that target WebAssembly require features after
+	// api.CoreFeaturesV1 by default. For example, TinyGo v0.24+ requires
+	// api.CoreFeatureBulkMemoryOperations. To avoid runtime errors, wazero
+	// defaults to api.CoreFeaturesV2, even though it is not yet a Web
+	// Standard (REC).
+	WithCoreFeatures(api.CoreFeatures) RuntimeConfig
+
+	// WithMemoryLimitPages overrides the maximum pages allowed per memory. The
+	// default is 65536, allowing 4GB total memory per instance if the maximum is
+	// not encoded in a Wasm binary. Setting a value larger than default will panic.
+	//
+	// This example reduces the largest possible memory size from 4GB to 128KB:
+	//	rConfig = wazero.NewRuntimeConfig().WithMemoryLimitPages(2)
+	//
+	// Note: Wasm has 32-bit memory and each page is 65536 (2^16) bytes. This
+	// implies a max of 65536 (2^16) addressable pages.
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#grow-mem
+	WithMemoryLimitPages(memoryLimitPages uint32) RuntimeConfig
+
+	// WithMemoryCapacityFromMax eagerly allocates max memory, unless max is
+	// not defined. The default is false, which means minimum memory is
+	// allocated and any call to grow memory results in re-allocations.
+	//
+	// This example ensures any memory.grow instruction will never re-allocate:
+	//	rConfig = wazero.NewRuntimeConfig().WithMemoryCapacityFromMax(true)
+	//
+	// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#grow-mem
+	//
+	// Note: if the memory maximum is not encoded in a Wasm binary, this
+	// results in allocating 4GB. See the doc on WithMemoryLimitPages for detail.
+	WithMemoryCapacityFromMax(memoryCapacityFromMax bool) RuntimeConfig
+
+	// WithDebugInfoEnabled toggles DWARF based stack traces in the face of
+	// runtime errors. Defaults to true.
+	//
+	// Those who wish to disable this, can like so:
+	//
+	//	r := wazero.NewRuntimeWithConfig(wazero.NewRuntimeConfig().WithDebugInfoEnabled(false)
+	//
+	// When disabled, a stack trace message looks like:
+	//
+	//	wasm stack trace:
+	//		.runtime._panic(i32)
+	//		.myFunc()
+	//		.main.main()
+	//		.runtime.run()
+	//		._start()
+	//
+	// When enabled, the stack trace includes source code information:
+	//
+	//	wasm stack trace:
+	//		.runtime._panic(i32)
+	//		  0x16e2: /opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/runtime_tinygowasm.go:73:6
+	//		.myFunc()
+	//		  0x190b: /Users/XXXXX/wazero/internal/testing/dwarftestdata/testdata/main.go:19:7
+	//		.main.main()
+	//		  0x18ed: /Users/XXXXX/wazero/internal/testing/dwarftestdata/testdata/main.go:4:3
+	//		.runtime.run()
+	//		  0x18cc: /opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/scheduler_none.go:26:10
+	//		._start()
+	//		  0x18b6: /opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/runtime_wasm_wasi.go:22:5
+	//
+	// Note: This only takes into effect when the original Wasm binary has the
+	// DWARF "custom sections" that are often stripped, depending on
+	// optimization flags passed to the compiler.
+	WithDebugInfoEnabled(bool) RuntimeConfig
+
+	// WithCompilationCache configures how runtime caches the compiled modules. In the default configuration, compilation results are
+	// only in-memory until Runtime.Close is closed, and not shareable by multiple Runtime.
+	//
+	// Below defines the shared cache across multiple instances of Runtime:
+	//
+	//	// Creates the new Cache and the runtime configuration with it.
+	//	cache := wazero.NewCompilationCache()
+	//	defer cache.Close()
+	//	config := wazero.NewRuntimeConfig().WithCompilationCache(c)
+	//
+	//	// Creates two runtimes while sharing compilation caches.
+	//	foo := wazero.NewRuntimeWithConfig(context.Background(), config)
+	// 	bar := wazero.NewRuntimeWithConfig(context.Background(), config)
+	//
+	// # Cache Key
+	//
+	// Cached files are keyed on the version of wazero. This is obtained from go.mod of your application,
+	// and we use it to verify the compatibility of caches against the currently-running wazero.
+	// However, if you use this in tests of a package not named as `main`, then wazero cannot obtain the correct
+	// version of wazero due to the known issue of debug.BuildInfo function: https://github.com/golang/go/issues/33976.
+	// As a consequence, your cache won't contain the correct version information and always be treated as `dev` version.
+	// To avoid this issue, you can pass -ldflags "-X github.com/tetratelabs/wazero/internal/version.version=foo" when running tests.
+	WithCompilationCache(CompilationCache) RuntimeConfig
+
+	// WithCustomSections toggles parsing of "custom sections". Defaults to false.
+	//
+	// When enabled, it is possible to retrieve custom sections from a CompiledModule:
+	//
+	//	config := wazero.NewRuntimeConfig().WithCustomSections(true)
+	//	r := wazero.NewRuntimeWithConfig(ctx, config)
+	//	c, err := r.CompileModule(ctx, wasm)
+	//	customSections := c.CustomSections()
+	WithCustomSections(bool) RuntimeConfig
+
+	// WithCloseOnContextDone ensures the executions of functions to be closed under one of the following circumstances:
+	//
+	// 	- context.Context passed to the Call method of api.Function is canceled during execution. (i.e. ctx by context.WithCancel)
+	// 	- context.Context passed to the Call method of api.Function reaches timeout during execution. (i.e. ctx by context.WithTimeout or context.WithDeadline)
+	// 	- Close or CloseWithExitCode of api.Module is explicitly called during execution.
+	//
+	// This is especially useful when one wants to run untrusted Wasm binaries since otherwise, any invocation of
+	// api.Function can potentially block the corresponding Goroutine forever. Moreover, it might block the
+	// entire underlying OS thread which runs the api.Function call. See "Why it's safe to execute runtime-generated
+	// machine codes against async Goroutine preemption" section in RATIONALE.md for detail.
+	//
+	// Note that this comes with a bit of extra cost when enabled. The reason is that internally this forces
+	// interpreter and compiler runtimes to insert the periodical checks on the conditions above. For that reason,
+	// this is disabled by default.
+	//
+	// See examples in context_done_example_test.go for the end-to-end demonstrations.
+	//
+	// When the invocations of api.Function are closed due to this, sys.ExitError is raised to the callers and
+	// the api.Module from which the functions are derived is made closed.
+	WithCloseOnContextDone(bool) RuntimeConfig
+}
+
+// NewRuntimeConfig returns a RuntimeConfig using the compiler if it is supported in this environment,
+// or the interpreter otherwise.
+func NewRuntimeConfig() RuntimeConfig {
+	return newRuntimeConfig()
+}
+
+type newEngine func(context.Context, api.CoreFeatures, filecache.Cache) wasm.Engine
+
+type runtimeConfig struct {
+	enabledFeatures       api.CoreFeatures
+	memoryLimitPages      uint32
+	memoryCapacityFromMax bool
+	engineKind            engineKind
+	dwarfDisabled         bool // negative as defaults to enabled
+	newEngine             newEngine
+	cache                 CompilationCache
+	storeCustomSections   bool
+	ensureTermination     bool
+}
+
+// engineLessConfig helps avoid copy/pasting the wrong defaults.
+var engineLessConfig = &runtimeConfig{
+	enabledFeatures:       api.CoreFeaturesV2,
+	memoryLimitPages:      wasm.MemoryLimitPages,
+	memoryCapacityFromMax: false,
+	dwarfDisabled:         false,
+}
+
+type engineKind int
+
+const (
+	engineKindCompiler engineKind = iota
+	engineKindInterpreter
+	engineKindCount
+)
+
+// NewRuntimeConfigCompiler compiles WebAssembly modules into
+// runtime.GOARCH-specific assembly for optimal performance.
+//
+// The default implementation is AOT (Ahead of Time) compilation, applied at
+// Runtime.CompileModule. This allows consistent runtime performance, as well
+// the ability to reduce any first request penalty.
+//
+// Note: While this is technically AOT, this does not imply any action on your
+// part. wazero automatically performs ahead-of-time compilation as needed when
+// Runtime.CompileModule is invoked.
+//
+// Warning: This panics at runtime if the runtime.GOOS or runtime.GOARCH does not
+// support compiler. Use NewRuntimeConfig to safely detect and fallback to
+// NewRuntimeConfigInterpreter if needed.
+func NewRuntimeConfigCompiler() RuntimeConfig {
+	ret := engineLessConfig.clone()
+	ret.engineKind = engineKindCompiler
+	ret.newEngine = wazevo.NewEngine
+	return ret
+}
+
+// NewRuntimeConfigInterpreter interprets WebAssembly modules instead of compiling them into assembly.
+func NewRuntimeConfigInterpreter() RuntimeConfig {
+	ret := engineLessConfig.clone()
+	ret.engineKind = engineKindInterpreter
+	ret.newEngine = interpreter.NewEngine
+	return ret
+}
+
+// clone makes a deep copy of this runtime config.
+func (c *runtimeConfig) clone() *runtimeConfig {
+	ret := *c // copy except maps which share a ref
+	return &ret
+}
+
+// WithCoreFeatures implements RuntimeConfig.WithCoreFeatures
+func (c *runtimeConfig) WithCoreFeatures(features api.CoreFeatures) RuntimeConfig {
+	ret := c.clone()
+	ret.enabledFeatures = features
+	return ret
+}
+
+// WithCloseOnContextDone implements RuntimeConfig.WithCloseOnContextDone
+func (c *runtimeConfig) WithCloseOnContextDone(ensure bool) RuntimeConfig {
+	ret := c.clone()
+	ret.ensureTermination = ensure
+	return ret
+}
+
+// WithMemoryLimitPages implements RuntimeConfig.WithMemoryLimitPages
+func (c *runtimeConfig) WithMemoryLimitPages(memoryLimitPages uint32) RuntimeConfig {
+	ret := c.clone()
+	// This panics instead of returning an error as it is unlikely.
+	if memoryLimitPages > wasm.MemoryLimitPages {
+		panic(fmt.Errorf("memoryLimitPages invalid: %d > %d", memoryLimitPages, wasm.MemoryLimitPages))
+	}
+	ret.memoryLimitPages = memoryLimitPages
+	return ret
+}
+
+// WithCompilationCache implements RuntimeConfig.WithCompilationCache
+func (c *runtimeConfig) WithCompilationCache(ca CompilationCache) RuntimeConfig {
+	ret := c.clone()
+	ret.cache = ca
+	return ret
+}
+
+// WithMemoryCapacityFromMax implements RuntimeConfig.WithMemoryCapacityFromMax
+func (c *runtimeConfig) WithMemoryCapacityFromMax(memoryCapacityFromMax bool) RuntimeConfig {
+	ret := c.clone()
+	ret.memoryCapacityFromMax = memoryCapacityFromMax
+	return ret
+}
+
+// WithDebugInfoEnabled implements RuntimeConfig.WithDebugInfoEnabled
+func (c *runtimeConfig) WithDebugInfoEnabled(dwarfEnabled bool) RuntimeConfig {
+	ret := c.clone()
+	ret.dwarfDisabled = !dwarfEnabled
+	return ret
+}
+
+// WithCustomSections implements RuntimeConfig.WithCustomSections
+func (c *runtimeConfig) WithCustomSections(storeCustomSections bool) RuntimeConfig {
+	ret := c.clone()
+	ret.storeCustomSections = storeCustomSections
+	return ret
+}
+
+// CompiledModule is a WebAssembly module ready to be instantiated (Runtime.InstantiateModule) as an api.Module.
+//
+// In WebAssembly terminology, this is a decoded, validated, and possibly also compiled module. wazero avoids using
+// the name "Module" for both before and after instantiation as the name conflation has caused confusion.
+// See https://www.w3.org/TR/2019/REC-wasm-core-1-20191205/#semantic-phases%E2%91%A0
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - Closing the wazero.Runtime closes any CompiledModule it compiled.
+type CompiledModule interface {
+	// Name returns the module name encoded into the binary or empty if not.
+	Name() string
+
+	// ImportedFunctions returns all the imported functions
+	// (api.FunctionDefinition) in this module or nil if there are none.
+	//
+	// Note: Unlike ExportedFunctions, there is no unique constraint on
+	// imports.
+	ImportedFunctions() []api.FunctionDefinition
+
+	// ExportedFunctions returns all the exported functions
+	// (api.FunctionDefinition) in this module keyed on export name.
+	ExportedFunctions() map[string]api.FunctionDefinition
+
+	// ImportedMemories returns all the imported memories
+	// (api.MemoryDefinition) in this module or nil if there are none.
+	//
+	// ## Notes
+	//   - As of WebAssembly Core Specification 2.0, there can be at most one
+	//     memory.
+	//   - Unlike ExportedMemories, there is no unique constraint on imports.
+	ImportedMemories() []api.MemoryDefinition
+
+	// ExportedMemories returns all the exported memories
+	// (api.MemoryDefinition) in this module keyed on export name.
+	//
+	// Note: As of WebAssembly Core Specification 2.0, there can be at most one
+	// memory.
+	ExportedMemories() map[string]api.MemoryDefinition
+
+	// CustomSections returns all the custom sections
+	// (api.CustomSection) in this module keyed on the section name.
+	CustomSections() []api.CustomSection
+
+	// Close releases all the allocated resources for this CompiledModule.
+	//
+	// Note: It is safe to call Close while having outstanding calls from an
+	// api.Module instantiated from this.
+	Close(context.Context) error
+}
+
+// compile-time check to ensure compiledModule implements CompiledModule
+var _ CompiledModule = &compiledModule{}
+
+type compiledModule struct {
+	module *wasm.Module
+	// compiledEngine holds an engine on which `module` is compiled.
+	compiledEngine wasm.Engine
+	// closeWithModule prevents leaking compiled code when a module is compiled implicitly.
+	closeWithModule bool
+	typeIDs         []wasm.FunctionTypeID
+}
+
+// Name implements CompiledModule.Name
+func (c *compiledModule) Name() (moduleName string) {
+	if ns := c.module.NameSection; ns != nil {
+		moduleName = ns.ModuleName
+	}
+	return
+}
+
+// Close implements CompiledModule.Close
+func (c *compiledModule) Close(context.Context) error {
+	c.compiledEngine.DeleteCompiledModule(c.module)
+	// It is possible the underlying may need to return an error later, but in any case this matches api.Module.Close.
+	return nil
+}
+
+// ImportedFunctions implements CompiledModule.ImportedFunctions
+func (c *compiledModule) ImportedFunctions() []api.FunctionDefinition {
+	return c.module.ImportedFunctions()
+}
+
+// ExportedFunctions implements CompiledModule.ExportedFunctions
+func (c *compiledModule) ExportedFunctions() map[string]api.FunctionDefinition {
+	return c.module.ExportedFunctions()
+}
+
+// ImportedMemories implements CompiledModule.ImportedMemories
+func (c *compiledModule) ImportedMemories() []api.MemoryDefinition {
+	return c.module.ImportedMemories()
+}
+
+// ExportedMemories implements CompiledModule.ExportedMemories
+func (c *compiledModule) ExportedMemories() map[string]api.MemoryDefinition {
+	return c.module.ExportedMemories()
+}
+
+// CustomSections implements CompiledModule.CustomSections
+func (c *compiledModule) CustomSections() []api.CustomSection {
+	ret := make([]api.CustomSection, len(c.module.CustomSections))
+	for i, d := range c.module.CustomSections {
+		ret[i] = &customSection{data: d.Data, name: d.Name}
+	}
+	return ret
+}
+
+// customSection implements wasm.CustomSection
+type customSection struct {
+	internalapi.WazeroOnlyType
+	name string
+	data []byte
+}
+
+// Name implements wasm.CustomSection.Name
+func (c *customSection) Name() string {
+	return c.name
+}
+
+// Data implements wasm.CustomSection.Data
+func (c *customSection) Data() []byte {
+	return c.data
+}
+
+// ModuleConfig configures resources needed by functions that have low-level interactions with the host operating
+// system. Using this, resources such as STDIN can be isolated, so that the same module can be safely instantiated
+// multiple times.
+//
+// Here's an example:
+//
+//	// Initialize base configuration:
+//	config := wazero.NewModuleConfig().WithStdout(buf).WithSysNanotime()
+//
+//	// Assign different configuration on each instantiation
+//	mod, _ := r.InstantiateModule(ctx, compiled, config.WithName("rotate").WithArgs("rotate", "angle=90", "dir=cw"))
+//
+// While wazero supports Windows as a platform, host functions using ModuleConfig follow a UNIX dialect.
+// See RATIONALE.md for design background and relationship to WebAssembly System Interfaces (WASI).
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - ModuleConfig is immutable. Each WithXXX function returns a new instance
+//     including the corresponding change.
+type ModuleConfig interface {
+	// WithArgs assigns command-line arguments visible to an imported function that reads an arg vector (argv). Defaults to
+	// none. Runtime.InstantiateModule errs if any arg is empty.
+	//
+	// These values are commonly read by the functions like "args_get" in "wasi_snapshot_preview1" although they could be
+	// read by functions imported from other modules.
+	//
+	// Similar to os.Args and exec.Cmd Env, many implementations would expect a program name to be argv[0]. However, neither
+	// WebAssembly nor WebAssembly System Interfaces (WASI) define this. Regardless, you may choose to set the first
+	// argument to the same value set via WithName.
+	//
+	// Note: This does not default to os.Args as that violates sandboxing.
+	//
+	// See https://linux.die.net/man/3/argv and https://en.wikipedia.org/wiki/Null-terminated_string
+	WithArgs(...string) ModuleConfig
+
+	// WithEnv sets an environment variable visible to a Module that imports functions. Defaults to none.
+	// Runtime.InstantiateModule errs if the key is empty or contains a NULL(0) or equals("") character.
+	//
+	// Validation is the same as os.Setenv on Linux and replaces any existing value. Unlike exec.Cmd Env, this does not
+	// default to the current process environment as that would violate sandboxing. This also does not preserve order.
+	//
+	// Environment variables are commonly read by the functions like "environ_get" in "wasi_snapshot_preview1" although
+	// they could be read by functions imported from other modules.
+	//
+	// While similar to process configuration, there are no assumptions that can be made about anything OS-specific. For
+	// example, neither WebAssembly nor WebAssembly System Interfaces (WASI) define concerns processes have, such as
+	// case-sensitivity on environment keys. For portability, define entries with case-insensitively unique keys.
+	//
+	// See https://linux.die.net/man/3/environ and https://en.wikipedia.org/wiki/Null-terminated_string
+	WithEnv(key, value string) ModuleConfig
+
+	// WithFS is a convenience that calls WithFSConfig with an FSConfig of the
+	// input for the root ("/") guest path.
+	WithFS(fs.FS) ModuleConfig
+
+	// WithFSConfig configures the filesystem available to each guest
+	// instantiated with this configuration. By default, no file access is
+	// allowed, so functions like `path_open` result in unsupported errors
+	// (e.g. syscall.ENOSYS).
+	WithFSConfig(FSConfig) ModuleConfig
+
+	// WithName configures the module name. Defaults to what was decoded from
+	// the name section. Empty string ("") clears any name.
+	WithName(string) ModuleConfig
+
+	// WithStartFunctions configures the functions to call after the module is
+	// instantiated. Defaults to "_start".
+	//
+	// Clearing the default is supported, via `WithStartFunctions()`.
+	//
+	// # Notes
+	//
+	//   - If a start function doesn't exist, it is skipped. However, any that
+	//     do exist are called in order.
+	//   - Start functions are not intended to be called multiple times.
+	//     Functions that should be called multiple times should be invoked
+	//     manually via api.Module's `ExportedFunction` method.
+	//   - Start functions commonly exit the module during instantiation,
+	//     preventing use of any functions later. This is the case in "wasip1",
+	//     which defines the default value "_start".
+	//   - See /RATIONALE.md for motivation of this feature.
+	WithStartFunctions(...string) ModuleConfig
+
+	// WithStderr configures where standard error (file descriptor 2) is written. Defaults to io.Discard.
+	//
+	// This writer is most commonly used by the functions like "fd_write" in "wasi_snapshot_preview1" although it could
+	// be used by functions imported from other modules.
+	//
+	// # Notes
+	//
+	//   - The caller is responsible to close any io.Writer they supply: It is not closed on api.Module Close.
+	//   - This does not default to os.Stderr as that both violates sandboxing and prevents concurrent modules.
+	//
+	// See https://linux.die.net/man/3/stderr
+	WithStderr(io.Writer) ModuleConfig
+
+	// WithStdin configures where standard input (file descriptor 0) is read. Defaults to return io.EOF.
+	//
+	// This reader is most commonly used by the functions like "fd_read" in "wasi_snapshot_preview1" although it could
+	// be used by functions imported from other modules.
+	//
+	// # Notes
+	//
+	//   - The caller is responsible to close any io.Reader they supply: It is not closed on api.Module Close.
+	//   - This does not default to os.Stdin as that both violates sandboxing and prevents concurrent modules.
+	//
+	// See https://linux.die.net/man/3/stdin
+	WithStdin(io.Reader) ModuleConfig
+
+	// WithStdout configures where standard output (file descriptor 1) is written. Defaults to io.Discard.
+	//
+	// This writer is most commonly used by the functions like "fd_write" in "wasi_snapshot_preview1" although it could
+	// be used by functions imported from other modules.
+	//
+	// # Notes
+	//
+	//   - The caller is responsible to close any io.Writer they supply: It is not closed on api.Module Close.
+	//   - This does not default to os.Stdout as that both violates sandboxing and prevents concurrent modules.
+	//
+	// See https://linux.die.net/man/3/stdout
+	WithStdout(io.Writer) ModuleConfig
+
+	// WithWalltime configures the wall clock, sometimes referred to as the
+	// real time clock. sys.Walltime returns the current unix/epoch time,
+	// seconds since midnight UTC 1 January 1970, with a nanosecond fraction.
+	// This defaults to a fake result that increases by 1ms on each reading.
+	//
+	// Here's an example that uses a custom clock:
+	//	moduleConfig = moduleConfig.
+	//		WithWalltime(func(context.Context) (sec int64, nsec int32) {
+	//			return clock.walltime()
+	//		}, sys.ClockResolution(time.Microsecond.Nanoseconds()))
+	//
+	// # Notes:
+	//   - This does not default to time.Now as that violates sandboxing.
+	//   - This is used to implement host functions such as WASI
+	//     `clock_time_get` with the `realtime` clock ID.
+	//   - Use WithSysWalltime for a usable implementation.
+	WithWalltime(sys.Walltime, sys.ClockResolution) ModuleConfig
+
+	// WithSysWalltime uses time.Now for sys.Walltime with a resolution of 1us
+	// (1000ns).
+	//
+	// See WithWalltime
+	WithSysWalltime() ModuleConfig
+
+	// WithNanotime configures the monotonic clock, used to measure elapsed
+	// time in nanoseconds. Defaults to a fake result that increases by 1ms
+	// on each reading.
+	//
+	// Here's an example that uses a custom clock:
+	//	moduleConfig = moduleConfig.
+	//		WithNanotime(func(context.Context) int64 {
+	//			return clock.nanotime()
+	//		}, sys.ClockResolution(time.Microsecond.Nanoseconds()))
+	//
+	// # Notes:
+	//   - This does not default to time.Since as that violates sandboxing.
+	//   - This is used to implement host functions such as WASI
+	//     `clock_time_get` with the `monotonic` clock ID.
+	//   - Some compilers implement sleep by looping on sys.Nanotime (e.g. Go).
+	//   - If you set this, you should probably set WithNanosleep also.
+	//   - Use WithSysNanotime for a usable implementation.
+	WithNanotime(sys.Nanotime, sys.ClockResolution) ModuleConfig
+
+	// WithSysNanotime uses time.Now for sys.Nanotime with a resolution of 1us.
+	//
+	// See WithNanotime
+	WithSysNanotime() ModuleConfig
+
+	// WithNanosleep configures the how to pause the current goroutine for at
+	// least the configured nanoseconds. Defaults to return immediately.
+	//
+	// This example uses a custom sleep function:
+	//	moduleConfig = moduleConfig.
+	//		WithNanosleep(func(ns int64) {
+	//			rel := unix.NsecToTimespec(ns)
+	//			remain := unix.Timespec{}
+	//			for { // loop until no more time remaining
+	//				err := unix.ClockNanosleep(unix.CLOCK_MONOTONIC, 0, &rel, &remain)
+	//			--snip--
+	//
+	// # Notes:
+	//   - This does not default to time.Sleep as that violates sandboxing.
+	//   - This is used to implement host functions such as WASI `poll_oneoff`.
+	//   - Some compilers implement sleep by looping on sys.Nanotime (e.g. Go).
+	//   - If you set this, you should probably set WithNanotime also.
+	//   - Use WithSysNanosleep for a usable implementation.
+	WithNanosleep(sys.Nanosleep) ModuleConfig
+
+	// WithOsyield yields the processor, typically to implement spin-wait
+	// loops. Defaults to return immediately.
+	//
+	// # Notes:
+	//   - This primarily supports `sched_yield` in WASI
+	//   - This does not default to runtime.osyield as that violates sandboxing.
+	WithOsyield(sys.Osyield) ModuleConfig
+
+	// WithSysNanosleep uses time.Sleep for sys.Nanosleep.
+	//
+	// See WithNanosleep
+	WithSysNanosleep() ModuleConfig
+
+	// WithRandSource configures a source of random bytes. Defaults to return a
+	// deterministic source. You might override this with crypto/rand.Reader
+	//
+	// This reader is most commonly used by the functions like "random_get" in
+	// "wasi_snapshot_preview1", "seed" in AssemblyScript standard "env", and
+	// "getRandomData" when runtime.GOOS is "js".
+	//
+	// Note: The caller is responsible to close any io.Reader they supply: It
+	// is not closed on api.Module Close.
+	WithRandSource(io.Reader) ModuleConfig
+}
+
+type moduleConfig struct {
+	name               string
+	nameSet            bool
+	startFunctions     []string
+	stdin              io.Reader
+	stdout             io.Writer
+	stderr             io.Writer
+	randSource         io.Reader
+	walltime           sys.Walltime
+	walltimeResolution sys.ClockResolution
+	nanotime           sys.Nanotime
+	nanotimeResolution sys.ClockResolution
+	nanosleep          sys.Nanosleep
+	osyield            sys.Osyield
+	args               [][]byte
+	// environ is pair-indexed to retain order similar to os.Environ.
+	environ [][]byte
+	// environKeys allow overwriting of existing values.
+	environKeys map[string]int
+	// fsConfig is the file system configuration for ABI like WASI.
+	fsConfig FSConfig
+	// sockConfig is the network listener configuration for ABI like WASI.
+	sockConfig *internalsock.Config
+}
+
+// NewModuleConfig returns a ModuleConfig that can be used for configuring module instantiation.
+func NewModuleConfig() ModuleConfig {
+	return &moduleConfig{
+		startFunctions: []string{"_start"},
+		environKeys:    map[string]int{},
+	}
+}
+
+// clone makes a deep copy of this module config.
+func (c *moduleConfig) clone() *moduleConfig {
+	ret := *c // copy except maps which share a ref
+	ret.environKeys = make(map[string]int, len(c.environKeys))
+	for key, value := range c.environKeys {
+		ret.environKeys[key] = value
+	}
+	return &ret
+}
+
+// WithArgs implements ModuleConfig.WithArgs
+func (c *moduleConfig) WithArgs(args ...string) ModuleConfig {
+	ret := c.clone()
+	ret.args = toByteSlices(args)
+	return ret
+}
+
+func toByteSlices(strings []string) (result [][]byte) {
+	if len(strings) == 0 {
+		return
+	}
+	result = make([][]byte, len(strings))
+	for i, a := range strings {
+		result[i] = []byte(a)
+	}
+	return
+}
+
+// WithEnv implements ModuleConfig.WithEnv
+func (c *moduleConfig) WithEnv(key, value string) ModuleConfig {
+	ret := c.clone()
+	// Check to see if this key already exists and update it.
+	if i, ok := ret.environKeys[key]; ok {
+		ret.environ[i+1] = []byte(value) // environ is pair-indexed, so the value is 1 after the key.
+	} else {
+		ret.environKeys[key] = len(ret.environ)
+		ret.environ = append(ret.environ, []byte(key), []byte(value))
+	}
+	return ret
+}
+
+// WithFS implements ModuleConfig.WithFS
+func (c *moduleConfig) WithFS(fs fs.FS) ModuleConfig {
+	var config FSConfig
+	if fs != nil {
+		config = NewFSConfig().WithFSMount(fs, "")
+	}
+	return c.WithFSConfig(config)
+}
+
+// WithFSConfig implements ModuleConfig.WithFSConfig
+func (c *moduleConfig) WithFSConfig(config FSConfig) ModuleConfig {
+	ret := c.clone()
+	ret.fsConfig = config
+	return ret
+}
+
+// WithName implements ModuleConfig.WithName
+func (c *moduleConfig) WithName(name string) ModuleConfig {
+	ret := c.clone()
+	ret.nameSet = true
+	ret.name = name
+	return ret
+}
+
+// WithStartFunctions implements ModuleConfig.WithStartFunctions
+func (c *moduleConfig) WithStartFunctions(startFunctions ...string) ModuleConfig {
+	ret := c.clone()
+	ret.startFunctions = startFunctions
+	return ret
+}
+
+// WithStderr implements ModuleConfig.WithStderr
+func (c *moduleConfig) WithStderr(stderr io.Writer) ModuleConfig {
+	ret := c.clone()
+	ret.stderr = stderr
+	return ret
+}
+
+// WithStdin implements ModuleConfig.WithStdin
+func (c *moduleConfig) WithStdin(stdin io.Reader) ModuleConfig {
+	ret := c.clone()
+	ret.stdin = stdin
+	return ret
+}
+
+// WithStdout implements ModuleConfig.WithStdout
+func (c *moduleConfig) WithStdout(stdout io.Writer) ModuleConfig {
+	ret := c.clone()
+	ret.stdout = stdout
+	return ret
+}
+
+// WithWalltime implements ModuleConfig.WithWalltime
+func (c *moduleConfig) WithWalltime(walltime sys.Walltime, resolution sys.ClockResolution) ModuleConfig {
+	ret := c.clone()
+	ret.walltime = walltime
+	ret.walltimeResolution = resolution
+	return ret
+}
+
+// We choose arbitrary resolutions here because there's no perfect alternative. For example, according to the
+// source in time.go, windows monotonic resolution can be 15ms. This chooses arbitrarily 1us for wall time and
+// 1ns for monotonic. See RATIONALE.md for more context.
+
+// WithSysWalltime implements ModuleConfig.WithSysWalltime
+func (c *moduleConfig) WithSysWalltime() ModuleConfig {
+	return c.WithWalltime(platform.Walltime, sys.ClockResolution(time.Microsecond.Nanoseconds()))
+}
+
+// WithNanotime implements ModuleConfig.WithNanotime
+func (c *moduleConfig) WithNanotime(nanotime sys.Nanotime, resolution sys.ClockResolution) ModuleConfig {
+	ret := c.clone()
+	ret.nanotime = nanotime
+	ret.nanotimeResolution = resolution
+	return ret
+}
+
+// WithSysNanotime implements ModuleConfig.WithSysNanotime
+func (c *moduleConfig) WithSysNanotime() ModuleConfig {
+	return c.WithNanotime(platform.Nanotime, sys.ClockResolution(1))
+}
+
+// WithNanosleep implements ModuleConfig.WithNanosleep
+func (c *moduleConfig) WithNanosleep(nanosleep sys.Nanosleep) ModuleConfig {
+	ret := *c // copy
+	ret.nanosleep = nanosleep
+	return &ret
+}
+
+// WithOsyield implements ModuleConfig.WithOsyield
+func (c *moduleConfig) WithOsyield(osyield sys.Osyield) ModuleConfig {
+	ret := *c // copy
+	ret.osyield = osyield
+	return &ret
+}
+
+// WithSysNanosleep implements ModuleConfig.WithSysNanosleep
+func (c *moduleConfig) WithSysNanosleep() ModuleConfig {
+	return c.WithNanosleep(platform.Nanosleep)
+}
+
+// WithRandSource implements ModuleConfig.WithRandSource
+func (c *moduleConfig) WithRandSource(source io.Reader) ModuleConfig {
+	ret := c.clone()
+	ret.randSource = source
+	return ret
+}
+
+// toSysContext creates a baseline wasm.Context configured by ModuleConfig.
+func (c *moduleConfig) toSysContext() (sysCtx *internalsys.Context, err error) {
+	var environ [][]byte // Intentionally doesn't pre-allocate to reduce logic to default to nil.
+	// Same validation as syscall.Setenv for Linux
+	for i := 0; i < len(c.environ); i += 2 {
+		key, value := c.environ[i], c.environ[i+1]
+		keyLen := len(key)
+		if keyLen == 0 {
+			err = errors.New("environ invalid: empty key")
+			return
+		}
+		valueLen := len(value)
+		result := make([]byte, keyLen+valueLen+1)
+		j := 0
+		for ; j < keyLen; j++ {
+			if k := key[j]; k == '=' { // NUL enforced in NewContext
+				err = errors.New("environ invalid: key contains '=' character")
+				return
+			} else {
+				result[j] = k
+			}
+		}
+		result[j] = '='
+		copy(result[j+1:], value)
+		environ = append(environ, result)
+	}
+
+	var fs []experimentalsys.FS
+	var guestPaths []string
+	if f, ok := c.fsConfig.(*fsConfig); ok {
+		fs, guestPaths = f.preopens()
+	}
+
+	var listeners []*net.TCPListener
+	if n := c.sockConfig; n != nil {
+		if listeners, err = n.BuildTCPListeners(); err != nil {
+			return
+		}
+	}
+
+	return internalsys.NewContext(
+		math.MaxUint32,
+		c.args,
+		environ,
+		c.stdin,
+		c.stdout,
+		c.stderr,
+		c.randSource,
+		c.walltime, c.walltimeResolution,
+		c.nanotime, c.nanotimeResolution,
+		c.nanosleep, c.osyield,
+		fs, guestPaths,
+		listeners,
+	)
+}
--- a/vendor/github.com/tetratelabs/wazero/config_supported.go
+++ b/vendor/github.com/tetratelabs/wazero/config_supported.go
@ -0,0 +1,14 @@
+// Note: The build constraints here are about the compiler, which is more
+// narrow than the architectures supported by the assembler.
+//
+// Constraints here must match platform.CompilerSupported.
+//
+// Meanwhile, users who know their runtime.GOOS can operate with the compiler
+// may choose to use NewRuntimeConfigCompiler explicitly.
+//go:build (amd64 || arm64) && (darwin || linux || freebsd || windows)
+
+package wazero
+
+func newRuntimeConfig() RuntimeConfig {
+	return NewRuntimeConfigCompiler()
+}
--- a/vendor/github.com/tetratelabs/wazero/config_unsupported.go
+++ b/vendor/github.com/tetratelabs/wazero/config_unsupported.go
@ -0,0 +1,8 @@
+// This is the opposite constraint of config_supported.go
+//go:build !(amd64 || arm64) || !(darwin || linux || freebsd || windows)
+
+package wazero
+
+func newRuntimeConfig() RuntimeConfig {
+	return NewRuntimeConfigInterpreter()
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go
@ -0,0 +1,48 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// Snapshot holds the execution state at the time of a Snapshotter.Snapshot call.
+type Snapshot interface {
+	// Restore sets the Wasm execution state to the capture. Because a host function
+	// calling this is resetting the pointer to the executation stack, the host function
+	// will not be able to return values in the normal way. ret is a slice of values the
+	// host function intends to return from the restored function.
+	Restore(ret []uint64)
+}
+
+// Snapshotter allows host functions to snapshot the WebAssembly execution environment.
+type Snapshotter interface {
+	// Snapshot captures the current execution state.
+	Snapshot() Snapshot
+}
+
+// EnableSnapshotterKey is a context key to indicate that snapshotting should be enabled.
+// The context.Context passed to a exported function invocation should have this key set
+// to a non-nil value, and host functions will be able to retrieve it using SnapshotterKey.
+//
+// Deprecated: use WithSnapshotter to enable snapshots.
+type EnableSnapshotterKey = expctxkeys.EnableSnapshotterKey
+
+// WithSnapshotter enables snapshots.
+// Passing the returned context to a exported function invocation enables snapshots,
+// and allows host functions to retrieve the Snapshotter using GetSnapshotter.
+func WithSnapshotter(ctx context.Context) context.Context {
+	return context.WithValue(ctx, expctxkeys.EnableSnapshotterKey{}, struct{}{})
+}
+
+// SnapshotterKey is a context key to access a Snapshotter from a host function.
+// It is only present if EnableSnapshotter was set in the function invocation context.
+//
+// Deprecated: use GetSnapshotter to get the snapshotter.
+type SnapshotterKey = expctxkeys.SnapshotterKey
+
+// GetSnapshotter gets the Snapshotter from a host function.
+// It is only present if WithSnapshotter was called with the function invocation context.
+func GetSnapshotter(ctx context.Context) Snapshotter {
+	return ctx.Value(expctxkeys.SnapshotterKey{}).(Snapshotter)
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/close.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/close.go
@ -0,0 +1,63 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// CloseNotifier is a notification hook, invoked when a module is closed.
+//
+// Note: This is experimental progress towards #1197, and likely to change. Do
+// not expose this in shared libraries as it can cause version locks.
+type CloseNotifier interface {
+	// CloseNotify is a notification that occurs *before* an api.Module is
+	// closed. `exitCode` is zero on success or in the case there was no exit
+	// code.
+	//
+	// Notes:
+	//   - This does not return an error because the module will be closed
+	//     unconditionally.
+	//   - Do not panic from this function as it doing so could cause resource
+	//     leaks.
+	//   - While this is only called once per module, if configured for
+	//     multiple modules, it will be called for each, e.g. on runtime close.
+	CloseNotify(ctx context.Context, exitCode uint32)
+}
+
+// ^-- Note: This might need to be a part of the listener or become a part of
+// host state implementation. For example, if this is used to implement state
+// cleanup for host modules, possibly something like below would be better, as
+// it could be implemented in a way that allows concurrent module use.
+//
+//	// key is like a context key, stateFactory is invoked per instantiate and
+//	// is associated with the key (exposed as `Module.State` similar to go
+//	// context). Using a key is better than the module name because we can
+//	// de-dupe it for host modules that can be instantiated into different
+//	// names. Also, you can make the key package private.
+//	HostModuleBuilder.WithState(key any, stateFactory func() Cleanup)`
+//
+// Such a design could work to isolate state only needed for wasip1, for
+// example the dirent cache. However, if end users use this for different
+// things, we may need separate designs.
+//
+// In summary, the purpose of this iteration is to identify projects that
+// would use something like this, and then we can figure out which way it
+// should go.
+
+// CloseNotifyFunc is a convenience for defining inlining a CloseNotifier.
+type CloseNotifyFunc func(ctx context.Context, exitCode uint32)
+
+// CloseNotify implements CloseNotifier.CloseNotify.
+func (f CloseNotifyFunc) CloseNotify(ctx context.Context, exitCode uint32) {
+	f(ctx, exitCode)
+}
+
+// WithCloseNotifier registers the given CloseNotifier into the given
+// context.Context.
+func WithCloseNotifier(ctx context.Context, notifier CloseNotifier) context.Context {
+	if notifier != nil {
+		return context.WithValue(ctx, expctxkeys.CloseNotifierKey{}, notifier)
+	}
+	return ctx
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/experimental.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/experimental.go
@ -0,0 +1,41 @@
+// Package experimental includes features we aren't yet sure about. These are enabled with context.Context keys.
+//
+// Note: All features here may be changed or deleted at any time, so use with caution!
+package experimental
+
+import (
+	"github.com/tetratelabs/wazero/api"
+)
+
+// InternalModule is an api.Module that exposes additional
+// information.
+type InternalModule interface {
+	api.Module
+
+	// NumGlobal returns the count of all globals in the module.
+	NumGlobal() int
+
+	// Global provides a read-only view for a given global index.
+	//
+	// The methods panics if i is out of bounds.
+	Global(i int) api.Global
+}
+
+// ProgramCounter is an opaque value representing a specific execution point in
+// a module. It is meant to be used with Function.SourceOffsetForPC and
+// StackIterator.
+type ProgramCounter uint64
+
+// InternalFunction exposes some information about a function instance.
+type InternalFunction interface {
+	// Definition provides introspection into the function's names and
+	// signature.
+	Definition() api.FunctionDefinition
+
+	// SourceOffsetForPC resolves a program counter into its corresponding
+	// offset in the Code section of the module this function belongs to.
+	// The source offset is meant to help map the function calls to their
+	// location in the original source files. Returns 0 if the offset cannot
+	// be calculated.
+	SourceOffsetForPC(pc ProgramCounter) uint64
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/features.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/features.go
@ -0,0 +1,15 @@
+package experimental
+
+import "github.com/tetratelabs/wazero/api"
+
+// CoreFeaturesThreads enables threads instructions ("threads").
+//
+// # Notes
+//
+//   - The instruction list is too long to enumerate in godoc.
+//     See https://github.com/WebAssembly/threads/blob/main/proposals/threads/Overview.md
+//   - Atomic operations are guest-only until api.Memory or otherwise expose them to host functions.
+//   - On systems without mmap available, the memory will pre-allocate to the maximum size. Many
+//     binaries will use a theroetical maximum like 4GB, so if using such a binary on a system
+//     without mmap, consider editing the binary to reduce the max size setting of memory.
+const CoreFeaturesThreads = api.CoreFeatureSIMD << 1
--- a/vendor/github.com/tetratelabs/wazero/experimental/listener.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/listener.go
@ -0,0 +1,330 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// StackIterator allows iterating on each function of the call stack, starting
+// from the top. At least one call to Next() is required to start the iteration.
+//
+// Note: The iterator provides a view of the call stack at the time of
+// iteration. As a result, parameter values may be different than the ones their
+// function was called with.
+type StackIterator interface {
+	// Next moves the iterator to the next function in the stack. Returns
+	// false if it reached the bottom of the stack.
+	Next() bool
+	// Function describes the function called by the current frame.
+	Function() InternalFunction
+	// ProgramCounter returns the program counter associated with the
+	// function call.
+	ProgramCounter() ProgramCounter
+}
+
+// FunctionListenerFactoryKey is a context.Context Value key.
+// Its associated value should be a FunctionListenerFactory.
+//
+// Deprecated: use WithFunctionListenerFactory to enable snapshots.
+type FunctionListenerFactoryKey = expctxkeys.FunctionListenerFactoryKey
+
+// WithFunctionListenerFactory registers a FunctionListenerFactory
+// with the context.
+func WithFunctionListenerFactory(ctx context.Context, factory FunctionListenerFactory) context.Context {
+	return context.WithValue(ctx, expctxkeys.FunctionListenerFactoryKey{}, factory)
+}
+
+// FunctionListenerFactory returns FunctionListeners to be notified when a
+// function is called.
+type FunctionListenerFactory interface {
+	// NewFunctionListener returns a FunctionListener for a defined function.
+	// If nil is returned, no listener will be notified.
+	NewFunctionListener(api.FunctionDefinition) FunctionListener
+	// ^^ A single instance can be returned to avoid instantiating a listener
+	// per function, especially as they may be thousands of functions. Shared
+	// listeners use their FunctionDefinition parameter to clarify.
+}
+
+// FunctionListener can be registered for any function via
+// FunctionListenerFactory to be notified when the function is called.
+type FunctionListener interface {
+	// Before is invoked before a function is called.
+	//
+	// There is always one corresponding call to After or Abort for each call to
+	// Before. This guarantee allows the listener to maintain an internal stack
+	// to perform correlations between the entry and exit of functions.
+	//
+	// # Params
+	//
+	//   - ctx: the context of the caller function which must be the same
+	//	   instance or parent of the result.
+	//   - mod: the calling module.
+	//   - def: the function definition.
+	//   - params:  api.ValueType encoded parameters.
+	//   - stackIterator: iterator on the call stack. At least one entry is
+	//     guaranteed (the called function), whose Args() will be equal to
+	//     params. The iterator will be reused between calls to Before.
+	//
+	// Note: api.Memory is meant for inspection, not modification.
+	// mod can be cast to InternalModule to read non-exported globals.
+	Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, params []uint64, stackIterator StackIterator)
+
+	// After is invoked after a function is called.
+	//
+	// # Params
+	//
+	//   - ctx: the context of the caller function.
+	//   - mod: the calling module.
+	//   - def: the function definition.
+	//   - results: api.ValueType encoded results.
+	//
+	// # Notes
+	//
+	//   - api.Memory is meant for inspection, not modification.
+	//   - This is not called when a host function panics, or a guest function traps.
+	//      See Abort for more details.
+	After(ctx context.Context, mod api.Module, def api.FunctionDefinition, results []uint64)
+
+	// Abort is invoked when a function does not return due to a trap or panic.
+	//
+	// # Params
+	//
+	//   - ctx: the context of the caller function.
+	//   - mod: the calling module.
+	//   - def: the function definition.
+	//   - err: the error value representing the reason why the function aborted.
+	//
+	// # Notes
+	//
+	//   - api.Memory is meant for inspection, not modification.
+	Abort(ctx context.Context, mod api.Module, def api.FunctionDefinition, err error)
+}
+
+// FunctionListenerFunc is a function type implementing the FunctionListener
+// interface, making it possible to use regular functions and methods as
+// listeners of function invocation.
+//
+// The FunctionListener interface declares two methods (Before and After),
+// but this type invokes its value only when Before is called. It is best
+// suites for cases where the host does not need to perform correlation
+// between the start and end of the function call.
+type FunctionListenerFunc func(context.Context, api.Module, api.FunctionDefinition, []uint64, StackIterator)
+
+// Before satisfies the FunctionListener interface, calls f.
+func (f FunctionListenerFunc) Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, params []uint64, stackIterator StackIterator) {
+	f(ctx, mod, def, params, stackIterator)
+}
+
+// After is declared to satisfy the FunctionListener interface, but it does
+// nothing.
+func (f FunctionListenerFunc) After(context.Context, api.Module, api.FunctionDefinition, []uint64) {
+}
+
+// Abort is declared to satisfy the FunctionListener interface, but it does
+// nothing.
+func (f FunctionListenerFunc) Abort(context.Context, api.Module, api.FunctionDefinition, error) {
+}
+
+// FunctionListenerFactoryFunc is a function type implementing the
+// FunctionListenerFactory interface, making it possible to use regular
+// functions and methods as factory of function listeners.
+type FunctionListenerFactoryFunc func(api.FunctionDefinition) FunctionListener
+
+// NewFunctionListener satisfies the FunctionListenerFactory interface, calls f.
+func (f FunctionListenerFactoryFunc) NewFunctionListener(def api.FunctionDefinition) FunctionListener {
+	return f(def)
+}
+
+// MultiFunctionListenerFactory constructs a FunctionListenerFactory which
+// combines the listeners created by each of the factories passed as arguments.
+//
+// This function is useful when multiple listeners need to be hooked to a module
+// because the propagation mechanism based on installing a listener factory in
+// the context.Context used when instantiating modules allows for a single
+// listener to be installed.
+//
+// The stack iterator passed to the Before method is reset so that each listener
+// can iterate the call stack independently without impacting the ability of
+// other listeners to do so.
+func MultiFunctionListenerFactory(factories ...FunctionListenerFactory) FunctionListenerFactory {
+	multi := make(multiFunctionListenerFactory, len(factories))
+	copy(multi, factories)
+	return multi
+}
+
+type multiFunctionListenerFactory []FunctionListenerFactory
+
+func (multi multiFunctionListenerFactory) NewFunctionListener(def api.FunctionDefinition) FunctionListener {
+	var lstns []FunctionListener
+	for _, factory := range multi {
+		if lstn := factory.NewFunctionListener(def); lstn != nil {
+			lstns = append(lstns, lstn)
+		}
+	}
+	switch len(lstns) {
+	case 0:
+		return nil
+	case 1:
+		return lstns[0]
+	default:
+		return &multiFunctionListener{lstns: lstns}
+	}
+}
+
+type multiFunctionListener struct {
+	lstns []FunctionListener
+	stack stackIterator
+}
+
+func (multi *multiFunctionListener) Before(ctx context.Context, mod api.Module, def api.FunctionDefinition, params []uint64, si StackIterator) {
+	multi.stack.base = si
+	for _, lstn := range multi.lstns {
+		multi.stack.index = -1
+		lstn.Before(ctx, mod, def, params, &multi.stack)
+	}
+}
+
+func (multi *multiFunctionListener) After(ctx context.Context, mod api.Module, def api.FunctionDefinition, results []uint64) {
+	for _, lstn := range multi.lstns {
+		lstn.After(ctx, mod, def, results)
+	}
+}
+
+func (multi *multiFunctionListener) Abort(ctx context.Context, mod api.Module, def api.FunctionDefinition, err error) {
+	for _, lstn := range multi.lstns {
+		lstn.Abort(ctx, mod, def, err)
+	}
+}
+
+type stackIterator struct {
+	base  StackIterator
+	index int
+	pcs   []uint64
+	fns   []InternalFunction
+}
+
+func (si *stackIterator) Next() bool {
+	if si.base != nil {
+		si.pcs = si.pcs[:0]
+		si.fns = si.fns[:0]
+
+		for si.base.Next() {
+			si.pcs = append(si.pcs, uint64(si.base.ProgramCounter()))
+			si.fns = append(si.fns, si.base.Function())
+		}
+
+		si.base = nil
+	}
+	si.index++
+	return si.index < len(si.pcs)
+}
+
+func (si *stackIterator) ProgramCounter() ProgramCounter {
+	return ProgramCounter(si.pcs[si.index])
+}
+
+func (si *stackIterator) Function() InternalFunction {
+	return si.fns[si.index]
+}
+
+// StackFrame represents a frame on the call stack.
+type StackFrame struct {
+	Function     api.Function
+	Params       []uint64
+	Results      []uint64
+	PC           uint64
+	SourceOffset uint64
+}
+
+type internalFunction struct {
+	definition   api.FunctionDefinition
+	sourceOffset uint64
+}
+
+func (f internalFunction) Definition() api.FunctionDefinition {
+	return f.definition
+}
+
+func (f internalFunction) SourceOffsetForPC(pc ProgramCounter) uint64 {
+	return f.sourceOffset
+}
+
+// stackFrameIterator is an implementation of the experimental.stackFrameIterator
+// interface.
+type stackFrameIterator struct {
+	index int
+	stack []StackFrame
+	fndef []api.FunctionDefinition
+}
+
+func (si *stackFrameIterator) Next() bool {
+	si.index++
+	return si.index < len(si.stack)
+}
+
+func (si *stackFrameIterator) Function() InternalFunction {
+	return internalFunction{
+		definition:   si.fndef[si.index],
+		sourceOffset: si.stack[si.index].SourceOffset,
+	}
+}
+
+func (si *stackFrameIterator) ProgramCounter() ProgramCounter {
+	return ProgramCounter(si.stack[si.index].PC)
+}
+
+// NewStackIterator constructs a stack iterator from a list of stack frames.
+// The top most frame is the last one.
+func NewStackIterator(stack ...StackFrame) StackIterator {
+	si := &stackFrameIterator{
+		index: -1,
+		stack: make([]StackFrame, len(stack)),
+		fndef: make([]api.FunctionDefinition, len(stack)),
+	}
+	for i := range stack {
+		si.stack[i] = stack[len(stack)-(i+1)]
+	}
+	// The size of function definition is only one pointer which should allow
+	// the compiler to optimize the conversion to api.FunctionDefinition; but
+	// the presence of internal.WazeroOnlyType, despite being defined as an
+	// empty struct, forces a heap allocation that we amortize by caching the
+	// result.
+	for i, frame := range stack {
+		si.fndef[i] = frame.Function.Definition()
+	}
+	return si
+}
+
+// BenchmarkFunctionListener implements a benchmark for function listeners.
+//
+// The benchmark calls Before and After methods repeatedly using the provided
+// module an stack frames to invoke the methods.
+//
+// The stack frame is a representation of the call stack that the Before method
+// will be invoked with. The top of the stack is stored at index zero. The stack
+// must contain at least one frame or the benchmark will fail.
+func BenchmarkFunctionListener(n int, module api.Module, stack []StackFrame, listener FunctionListener) {
+	if len(stack) == 0 {
+		panic("cannot benchmark function listener with an empty stack")
+	}
+
+	ctx := context.Background()
+	def := stack[0].Function.Definition()
+	params := stack[0].Params
+	results := stack[0].Results
+	stackIterator := &stackIterator{base: NewStackIterator(stack...)}
+
+	for i := 0; i < n; i++ {
+		stackIterator.index = -1
+		listener.Before(ctx, module, def, params, stackIterator)
+		listener.After(ctx, module, def, results)
+	}
+}
+
+// TODO: the calls to Abort are not yet tested in internal/testing/enginetest,
+// but they are validated indirectly in tests which exercise host logging,
+// like Test_procExit in imports/wasi_snapshot_preview1. Eventually we should
+// add dedicated tests to validate the behavior of the interpreter and compiler
+// engines independently.
--- a/vendor/github.com/tetratelabs/wazero/experimental/memory.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/memory.go
@ -0,0 +1,50 @@
+package experimental
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+)
+
+// MemoryAllocator is a memory allocation hook,
+// invoked to create a LinearMemory.
+type MemoryAllocator interface {
+	// Allocate should create a new LinearMemory with the given specification:
+	// cap is the suggested initial capacity for the backing []byte,
+	// and max the maximum length that will ever be requested.
+	//
+	// Notes:
+	//   - To back a shared memory, the address of the backing []byte cannot
+	//     change. This is checked at runtime. Implementations should document
+	//     if the returned LinearMemory meets this requirement.
+	Allocate(cap, max uint64) LinearMemory
+}
+
+// MemoryAllocatorFunc is a convenience for defining inlining a MemoryAllocator.
+type MemoryAllocatorFunc func(cap, max uint64) LinearMemory
+
+// Allocate implements MemoryAllocator.Allocate.
+func (f MemoryAllocatorFunc) Allocate(cap, max uint64) LinearMemory {
+	return f(cap, max)
+}
+
+// LinearMemory is an expandable []byte that backs a Wasm linear memory.
+type LinearMemory interface {
+	// Reallocates the linear memory to size bytes in length.
+	//
+	// Notes:
+	//   - To back a shared memory, Reallocate can't change the address of the
+	//     backing []byte (only its length/capacity may change).
+	Reallocate(size uint64) []byte
+	// Free the backing memory buffer.
+	Free()
+}
+
+// WithMemoryAllocator registers the given MemoryAllocator into the given
+// context.Context.
+func WithMemoryAllocator(ctx context.Context, allocator MemoryAllocator) context.Context {
+	if allocator != nil {
+		return context.WithValue(ctx, expctxkeys.MemoryAllocatorKey{}, allocator)
+	}
+	return ctx
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/dir.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/dir.go
@ -0,0 +1,92 @@
+package sys
+
+import (
+	"fmt"
+	"io/fs"
+
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// FileType is fs.FileMode masked on fs.ModeType. For example, zero is a
+// regular file, fs.ModeDir is a directory and fs.ModeIrregular is unknown.
+//
+// Note: This is defined by Linux, not POSIX.
+type FileType = fs.FileMode
+
+// Dirent is an entry read from a directory via File.Readdir.
+//
+// # Notes
+//
+//   - This extends `dirent` defined in POSIX with some fields defined by
+//     Linux. See https://man7.org/linux/man-pages/man3/readdir.3.html and
+//     https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html
+//   - This has a subset of fields defined in sys.Stat_t. Notably, there is no
+//     field corresponding to Stat_t.Dev because that value will be constant
+//     for all files in a directory. To get the Dev value, call File.Stat on
+//     the directory File.Readdir was called on.
+type Dirent struct {
+	// Ino is the file serial number, or zero if not available. See Ino for
+	// more details including impact returning a zero value.
+	Ino sys.Inode
+
+	// Name is the base name of the directory entry. Empty is invalid.
+	Name string
+
+	// Type is fs.FileMode masked on fs.ModeType. For example, zero is a
+	// regular file, fs.ModeDir is a directory and fs.ModeIrregular is unknown.
+	//
+	// Note: This is defined by Linux, not POSIX.
+	Type fs.FileMode
+}
+
+func (d *Dirent) String() string {
+	return fmt.Sprintf("name=%s, type=%v, ino=%d", d.Name, d.Type, d.Ino)
+}
+
+// IsDir returns true if the Type is fs.ModeDir.
+func (d *Dirent) IsDir() bool {
+	return d.Type == fs.ModeDir
+}
+
+// DirFile is embeddable to reduce the amount of functions to implement a file.
+type DirFile struct{}
+
+// IsAppend implements File.IsAppend
+func (DirFile) IsAppend() bool {
+	return false
+}
+
+// SetAppend implements File.SetAppend
+func (DirFile) SetAppend(bool) Errno {
+	return EISDIR
+}
+
+// IsDir implements File.IsDir
+func (DirFile) IsDir() (bool, Errno) {
+	return true, 0
+}
+
+// Read implements File.Read
+func (DirFile) Read([]byte) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Pread implements File.Pread
+func (DirFile) Pread([]byte, int64) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Write implements File.Write
+func (DirFile) Write([]byte) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Pwrite implements File.Pwrite
+func (DirFile) Pwrite([]byte, int64) (int, Errno) {
+	return 0, EISDIR
+}
+
+// Truncate implements File.Truncate
+func (DirFile) Truncate(int64) Errno {
+	return EISDIR
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/errno.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/errno.go
@ -0,0 +1,98 @@
+package sys
+
+import "strconv"
+
+// Errno is a subset of POSIX errno used by wazero interfaces. Zero is not an
+// error. Other values should not be interpreted numerically, rather by constants
+// prefixed with 'E'.
+//
+// See https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+type Errno uint16
+
+// ^-- Note: This will eventually move to the public /sys package. It is
+// experimental until we audit the socket related APIs to ensure we have all
+// the Errno it returns, and we export fs.FS. This is not in /internal/sys as
+// that would introduce a package cycle.
+
+// This is a subset of errors to reduce implementation burden. `wasip1` defines
+// almost all POSIX error numbers, but not all are used in practice. wazero
+// will add ones needed in POSIX order, as needed by functions that explicitly
+// document returning them.
+//
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-errno-enumu16
+const (
+	EACCES Errno = iota + 1
+	EAGAIN
+	EBADF
+	EEXIST
+	EFAULT
+	EINTR
+	EINVAL
+	EIO
+	EISDIR
+	ELOOP
+	ENAMETOOLONG
+	ENOENT
+	ENOSYS
+	ENOTDIR
+	ERANGE
+	ENOTEMPTY
+	ENOTSOCK
+	ENOTSUP
+	EPERM
+	EROFS
+
+	// NOTE ENOTCAPABLE is defined in wasip1, but not in POSIX. wasi-libc
+	// converts it to EBADF, ESPIPE or EINVAL depending on the call site.
+	// It isn't known if compilers who don't use ENOTCAPABLE would crash on it.
+)
+
+// Error implements error
+func (e Errno) Error() string {
+	switch e {
+	case 0: // not an error
+		return "success"
+	case EACCES:
+		return "permission denied"
+	case EAGAIN:
+		return "resource unavailable, try again"
+	case EBADF:
+		return "bad file descriptor"
+	case EEXIST:
+		return "file exists"
+	case EFAULT:
+		return "bad address"
+	case EINTR:
+		return "interrupted function"
+	case EINVAL:
+		return "invalid argument"
+	case EIO:
+		return "input/output error"
+	case EISDIR:
+		return "is a directory"
+	case ELOOP:
+		return "too many levels of symbolic links"
+	case ENAMETOOLONG:
+		return "filename too long"
+	case ENOENT:
+		return "no such file or directory"
+	case ENOSYS:
+		return "functionality not supported"
+	case ENOTDIR:
+		return "not a directory or a symbolic link to a directory"
+	case ERANGE:
+		return "result too large"
+	case ENOTEMPTY:
+		return "directory not empty"
+	case ENOTSOCK:
+		return "not a socket"
+	case ENOTSUP:
+		return "not supported (may be the same value as [EOPNOTSUPP])"
+	case EPERM:
+		return "operation not permitted"
+	case EROFS:
+		return "read-only file system"
+	default:
+		return "Errno(" + strconv.Itoa(int(e)) + ")"
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/error.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/error.go
@ -0,0 +1,45 @@
+package sys
+
+import (
+	"io"
+	"io/fs"
+	"os"
+)
+
+// UnwrapOSError returns an Errno or zero if the input is nil.
+func UnwrapOSError(err error) Errno {
+	if err == nil {
+		return 0
+	}
+	err = underlyingError(err)
+	switch err {
+	case nil, io.EOF:
+		return 0 // EOF is not a Errno
+	case fs.ErrInvalid:
+		return EINVAL
+	case fs.ErrPermission:
+		return EPERM
+	case fs.ErrExist:
+		return EEXIST
+	case fs.ErrNotExist:
+		return ENOENT
+	case fs.ErrClosed:
+		return EBADF
+	}
+	return errorToErrno(err)
+}
+
+// underlyingError returns the underlying error if a well-known OS error type.
+//
+// This impl is basically the same as os.underlyingError in os/error.go
+func underlyingError(err error) error {
+	switch err := err.(type) {
+	case *os.PathError:
+		return err.Err
+	case *os.LinkError:
+		return err.Err
+	case *os.SyscallError:
+		return err.Err
+	}
+	return err
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/file.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/file.go
@ -0,0 +1,316 @@
+package sys
+
+import "github.com/tetratelabs/wazero/sys"
+
+// File is a writeable fs.File bridge backed by syscall functions needed for ABI
+// including WASI.
+//
+// Implementations should embed UnimplementedFile for forward compatibility. Any
+// unsupported method or parameter should return ENOSYS.
+//
+// # Errors
+//
+// All methods that can return an error return a Errno, which is zero
+// on success.
+//
+// Restricting to Errno matches current WebAssembly host functions,
+// which are constrained to well-known error codes. For example, WASI maps syscall
+// errors to u32 numeric values.
+//
+// # Notes
+//
+//   - You must call Close to avoid file resource conflicts. For example,
+//     Windows cannot delete the underlying directory while a handle to it
+//     remains open.
+//   - A writable filesystem abstraction is not yet implemented as of Go 1.20.
+//     See https://github.com/golang/go/issues/45757
+type File interface {
+	// Dev returns the device ID (Stat_t.Dev) of this file, zero if unknown or
+	// an error retrieving it.
+	//
+	// # Errors
+	//
+	// Possible errors are those from Stat, except ENOSYS should not
+	// be returned. Zero should be returned if there is no implementation.
+	//
+	// # Notes
+	//
+	//   - Implementations should cache this result.
+	//   - This combined with Ino can implement os.SameFile.
+	Dev() (uint64, Errno)
+
+	// Ino returns the serial number (Stat_t.Ino) of this file, zero if unknown
+	// or an error retrieving it.
+	//
+	// # Errors
+	//
+	// Possible errors are those from Stat, except ENOSYS should not
+	// be returned. Zero should be returned if there is no implementation.
+	//
+	// # Notes
+	//
+	//   - Implementations should cache this result.
+	//   - This combined with Dev can implement os.SameFile.
+	Ino() (sys.Inode, Errno)
+
+	// IsDir returns true if this file is a directory or an error there was an
+	// error retrieving this information.
+	//
+	// # Errors
+	//
+	// Possible errors are those from Stat, except ENOSYS should not
+	// be returned. false should be returned if there is no implementation.
+	//
+	// # Notes
+	//
+	//   - Implementations should cache this result.
+	IsDir() (bool, Errno)
+
+	// IsAppend returns true if the file was opened with O_APPEND, or
+	// SetAppend was successfully enabled on this file.
+	//
+	// # Notes
+	//
+	//   - This might not match the underlying state of the file descriptor if
+	//     the file was not opened via OpenFile.
+	IsAppend() bool
+
+	// SetAppend toggles the append mode (O_APPEND) of this file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - There is no `O_APPEND` for `fcntl` in POSIX, so implementations may
+	//     have to re-open the underlying file to apply this. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html
+	SetAppend(enable bool) Errno
+
+	// Stat is similar to syscall.Fstat.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Fstat and `fstatat` with `AT_FDCWD` in POSIX.
+	//     See https://pubs.opengroup.org/onlinepubs/9699919799/functions/stat.html
+	//   - A fs.FileInfo backed implementation sets atim, mtim and ctim to the
+	//     same value.
+	//   - Windows allows you to stat a closed directory.
+	Stat() (sys.Stat_t, Errno)
+
+	// Read attempts to read all bytes in the file into `buf`, and returns the
+	// count read even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not readable.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.Reader and `read` in POSIX, preferring semantics of
+	//     io.Reader. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/read.html
+	//   - Unlike io.Reader, there is no io.EOF returned on end-of-file. To
+	//     read the file completely, the caller must repeat until `n` is zero.
+	Read(buf []byte) (n int, errno Errno)
+
+	// Pread attempts to read all bytes in the file into `p`, starting at the
+	// offset `off`, and returns the count read even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not readable.
+	//   - EINVAL: the offset was negative.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.ReaderAt and `pread` in POSIX, preferring semantics
+	//     of io.ReaderAt. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/pread.html
+	//   - Unlike io.ReaderAt, there is no io.EOF returned on end-of-file. To
+	//     read the file completely, the caller must repeat until `n` is zero.
+	Pread(buf []byte, off int64) (n int, errno Errno)
+
+	// Seek attempts to set the next offset for Read or Write and returns the
+	// resulting absolute offset or an error.
+	//
+	// # Parameters
+	//
+	// The `offset` parameters is interpreted in terms of `whence`:
+	//   - io.SeekStart: relative to the start of the file, e.g. offset=0 sets
+	//     the next Read or Write to the beginning of the file.
+	//   - io.SeekCurrent: relative to the current offset, e.g. offset=16 sets
+	//     the next Read or Write 16 bytes past the prior.
+	//   - io.SeekEnd: relative to the end of the file, e.g. offset=-1 sets the
+	//     next Read or Write to the last byte in the file.
+	//
+	// # Behavior when a directory
+	//
+	// The only supported use case for a directory is seeking to `offset` zero
+	// (`whence` = io.SeekStart). This should have the same behavior as
+	// os.File, which resets any internal state used by Readdir.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not readable.
+	//   - EINVAL: the offset was negative.
+	//
+	// # Notes
+	//
+	//   - This is like io.Seeker and `fseek` in POSIX, preferring semantics
+	//     of io.Seeker. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/fseek.html
+	Seek(offset int64, whence int) (newOffset int64, errno Errno)
+
+	// Readdir reads the contents of the directory associated with file and
+	// returns a slice of up to n Dirent values in an arbitrary order. This is
+	// a stateful function, so subsequent calls return any next values.
+	//
+	// If n > 0, Readdir returns at most n entries or an error.
+	// If n <= 0, Readdir returns all remaining entries or an error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file was closed or not a directory.
+	//   - ENOENT: the directory could not be read (e.g. deleted).
+	//
+	// # Notes
+	//
+	//   - This is like `Readdir` on os.File, but unlike `readdir` in POSIX.
+	//     See https://pubs.opengroup.org/onlinepubs/9699919799/functions/readdir.html
+	//   - Unlike os.File, there is no io.EOF returned on end-of-directory. To
+	//     read the directory completely, the caller must repeat until the
+	//     count read (`len(dirents)`) is less than `n`.
+	//   - See /RATIONALE.md for design notes.
+	Readdir(n int) (dirents []Dirent, errno Errno)
+
+	// Write attempts to write all bytes in `p` to the file, and returns the
+	// count written even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file was closed, not writeable, or a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.Writer and `write` in POSIX, preferring semantics of
+	//     io.Writer. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/write.html
+	Write(buf []byte) (n int, errno Errno)
+
+	// Pwrite attempts to write all bytes in `p` to the file at the given
+	// offset `off`, and returns the count written even on error.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed or not writeable.
+	//   - EINVAL: the offset was negative.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like io.WriterAt and `pwrite` in POSIX, preferring semantics
+	//     of io.WriterAt. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/pwrite.html
+	Pwrite(buf []byte, off int64) (n int, errno Errno)
+
+	// Truncate truncates a file to a specified length.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//   - EINVAL: the `size` is negative.
+	//   - EISDIR: the file was a directory.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Ftruncate and `ftruncate` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/ftruncate.html
+	//   - Windows does not error when calling Truncate on a closed file.
+	Truncate(size int64) Errno
+
+	// Sync synchronizes changes to the file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Fsync and `fsync` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/fsync.html
+	//   - This returns with no error instead of ENOSYS when
+	//     unimplemented. This prevents fake filesystems from erring.
+	//   - Windows does not error when calling Sync on a closed file.
+	Sync() Errno
+
+	// Datasync synchronizes the data of a file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Fdatasync and `fdatasync` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/fdatasync.html
+	//   - This returns with no error instead of ENOSYS when
+	//     unimplemented. This prevents fake filesystems from erring.
+	//   - As this is commonly missing, some implementations dispatch to Sync.
+	Datasync() Errno
+
+	// Utimens set file access and modification times of this file, at
+	// nanosecond precision.
+	//
+	// # Parameters
+	//
+	// The `atim` and `mtim` parameters refer to access and modification time
+	// stamps as defined in sys.Stat_t. To retain one or the other, substitute
+	// it with the pseudo-timestamp UTIME_OMIT.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EBADF: the file or directory was closed.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.UtimesNano and `futimens` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/futimens.html
+	//   - Windows requires files to be open with O_RDWR, which means you
+	//     cannot use this to update timestamps on a directory (EPERM).
+	Utimens(atim, mtim int64) Errno
+
+	// Close closes the underlying file.
+	//
+	// A zero Errno is returned if unimplemented or success.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Close and `close` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html
+	Close() Errno
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/fs.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/fs.go
@ -0,0 +1,292 @@
+package sys
+
+import (
+	"io/fs"
+
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// FS is a writeable fs.FS bridge backed by syscall functions needed for ABI
+// including WASI.
+//
+// Implementations should embed UnimplementedFS for forward compatibility. Any
+// unsupported method or parameter should return ENO
+//
+// # Errors
+//
+// All methods that can return an error return a Errno, which is zero
+// on success.
+//
+// Restricting to Errno matches current WebAssembly host functions,
+// which are constrained to well-known error codes. For example, WASI maps syscall
+// errors to u32 numeric values.
+//
+// # Notes
+//
+// A writable filesystem abstraction is not yet implemented as of Go 1.20. See
+// https://github.com/golang/go/issues/45757
+type FS interface {
+	// OpenFile opens a file. It should be closed via Close on File.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` or `flag` is invalid.
+	//   - EISDIR: the path was a directory, but flag included O_RDWR or
+	//     O_WRONLY
+	//   - ENOENT: `path` doesn't exist and `flag` doesn't contain O_CREAT.
+	//
+	// # Constraints on the returned file
+	//
+	// Implementations that can read flags should enforce them regardless of
+	// the type returned. For example, while os.File implements io.Writer,
+	// attempts to write to a directory or a file opened with O_RDONLY fail
+	// with a EBADF.
+	//
+	// Some implementations choose whether to enforce read-only opens, namely
+	// fs.FS. While fs.FS is supported (Adapt), wazero cannot runtime enforce
+	// open flags. Instead, we encourage good behavior and test our built-in
+	// implementations.
+	//
+	// # Notes
+	//
+	//   - This is like os.OpenFile, except the path is relative to this file
+	//     system, and Errno is returned instead of os.PathError.
+	//   - Implications of permissions when O_CREAT are described in Chmod notes.
+	//   - This is like `open` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html
+	OpenFile(path string, flag Oflag, perm fs.FileMode) (File, Errno)
+
+	// Lstat gets file status without following symbolic links.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - ENOENT: `path` doesn't exist.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Lstat, except the `path` is relative to this
+	//     file system.
+	//   - This is like `lstat` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/lstat.html
+	//   - An fs.FileInfo backed implementation sets atim, mtim and ctim to the
+	//     same value.
+	//   - When the path is a symbolic link, the stat returned is for the link,
+	//     not the file it refers to.
+	Lstat(path string) (sys.Stat_t, Errno)
+
+	// Stat gets file status.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - ENOENT: `path` doesn't exist.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Stat, except the `path` is relative to this
+	//     file system.
+	//   - This is like `stat` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/stat.html
+	//   - An fs.FileInfo backed implementation sets atim, mtim and ctim to the
+	//     same value.
+	//   - When the path is a symbolic link, the stat returned is for the file
+	//     it refers to.
+	Stat(path string) (sys.Stat_t, Errno)
+
+	// Mkdir makes a directory.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - EEXIST: `path` exists and is a directory.
+	//   - ENOTDIR: `path` exists and is a file.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Mkdir, except the `path` is relative to this
+	//     file system.
+	//   - This is like `mkdir` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/mkdir.html
+	//   - Implications of permissions are described in Chmod notes.
+	Mkdir(path string, perm fs.FileMode) Errno
+
+	// Chmod changes the mode of the file.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - ENOENT: `path` does not exist.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Chmod, except the `path` is relative to this
+	//     file system.
+	//   - This is like `chmod` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/chmod.html
+	//   - Windows ignores the execute bit, and any permissions come back as
+	//     group and world. For example, chmod of 0400 reads back as 0444, and
+	//     0700 0666. Also, permissions on directories aren't supported at all.
+	Chmod(path string, perm fs.FileMode) Errno
+
+	// Rename renames file or directory.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `from` or `to` is invalid.
+	//   - ENOENT: `from` or `to` don't exist.
+	//   - ENOTDIR: `from` is a directory and `to` exists as a file.
+	//   - EISDIR: `from` is a file and `to` exists as a directory.
+	//   - ENOTEMPTY: `both from` and `to` are existing directory, but
+	//    `to` is not empty.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Rename, except the paths are relative to this
+	//     file system.
+	//   - This is like `rename` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/rename.html
+	//   -  Windows doesn't let you overwrite an existing directory.
+	Rename(from, to string) Errno
+
+	// Rmdir removes a directory.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - ENOENT: `path` doesn't exist.
+	//   - ENOTDIR: `path` exists, but isn't a directory.
+	//   - ENOTEMPTY: `path` exists, but isn't empty.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Rmdir, except the `path` is relative to this
+	//     file system.
+	//   - This is like `rmdir` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/rmdir.html
+	//   - As of Go 1.19, Windows maps ENOTDIR to ENOENT.
+	Rmdir(path string) Errno
+
+	// Unlink removes a directory entry.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - ENOENT: `path` doesn't exist.
+	//   - EISDIR: `path` exists, but is a directory.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Unlink, except the `path` is relative to this
+	//     file system.
+	//   - This is like `unlink` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/unlink.html
+	//   - On Windows, syscall.Unlink doesn't delete symlink to directory unlike other platforms. Implementations might
+	//     want to combine syscall.RemoveDirectory with syscall.Unlink in order to delete such links on Windows.
+	//     See https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-removedirectorya
+	Unlink(path string) Errno
+
+	// Link creates a "hard" link from oldPath to newPath, in contrast to a
+	// soft link (via Symlink).
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EPERM: `oldPath` is invalid.
+	//   - ENOENT: `oldPath` doesn't exist.
+	//   - EISDIR: `newPath` exists, but is a directory.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Link, except the `oldPath` is relative to this
+	//     file system.
+	//   - This is like `link` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/link.html
+	Link(oldPath, newPath string) Errno
+
+	// Symlink creates a "soft" link from oldPath to newPath, in contrast to a
+	// hard link (via Link).
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EPERM: `oldPath` or `newPath` is invalid.
+	//   - EEXIST: `newPath` exists.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Symlink, except the `oldPath` is relative to
+	//     this file system.
+	//   - This is like `symlink` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/symlink.html
+	//   - Only `newPath` is relative to this file system and `oldPath` is kept
+	//     as-is. That is because the link is only resolved relative to the
+	//     directory when dereferencing it (e.g. ReadLink).
+	//     See https://github.com/bytecodealliance/cap-std/blob/v1.0.4/cap-std/src/fs/dir.rs#L404-L409
+	//     for how others implement this.
+	//   - Symlinks in Windows requires `SeCreateSymbolicLinkPrivilege`.
+	//     Otherwise, EPERM results.
+	//     See https://learn.microsoft.com/en-us/windows/security/threat-protection/security-policy-settings/create-symbolic-links
+	Symlink(oldPath, linkName string) Errno
+
+	// Readlink reads the contents of a symbolic link.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.Readlink, except the path is relative to this
+	//     filesystem.
+	//   - This is like `readlink` in POSIX. See
+	//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/readlink.html
+	//   - On Windows, the path separator is different from other platforms,
+	//     but to provide consistent results to Wasm, this normalizes to a "/"
+	//     separator.
+	Readlink(path string) (string, Errno)
+
+	// Utimens set file access and modification times on a path relative to
+	// this file system, at nanosecond precision.
+	//
+	// # Parameters
+	//
+	// If the path is a symbolic link, the target of expanding that link is
+	// updated.
+	//
+	// The `atim` and `mtim` parameters refer to access and modification time
+	// stamps as defined in sys.Stat_t. To retain one or the other, substitute
+	// it with the pseudo-timestamp UTIME_OMIT.
+	//
+	// # Errors
+	//
+	// A zero Errno is success. The below are expected otherwise:
+	//   - ENOSYS: the implementation does not support this function.
+	//   - EINVAL: `path` is invalid.
+	//   - EEXIST: `path` exists and is a directory.
+	//   - ENOTDIR: `path` exists and is a file.
+	//
+	// # Notes
+	//
+	//   - This is like syscall.UtimesNano and `utimensat` with `AT_FDCWD` in
+	//     POSIX. See https://pubs.opengroup.org/onlinepubs/9699919799/functions/futimens.html
+	Utimens(path string, atim, mtim int64) Errno
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/oflag.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/oflag.go
@ -0,0 +1,70 @@
+package sys
+
+// Oflag are flags used for FS.OpenFile. Values, including zero, should not be
+// interpreted numerically. Instead, use by constants prefixed with 'O_' with
+// special casing noted below.
+//
+// # Notes
+//
+//   - O_RDONLY, O_RDWR and O_WRONLY are mutually exclusive, while the other
+//     flags can coexist bitwise.
+//   - This is like `flag` in os.OpenFile and `oflag` in POSIX. See
+//     https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html
+type Oflag uint32
+
+// This is a subset of oflags to reduce implementation burden. `wasip1` splits
+// these across `oflags` and `fdflags`. We can't rely on the Go `os` package,
+// as it is missing some values. Any flags added will be defined in POSIX
+// order, as needed by functions that explicitly document accepting them.
+//
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-oflags-flagsu16
+// https://github.com/WebAssembly/WASI/blob/snapshot-01/phases/snapshot/docs.md#-fdflags-flagsu16
+const (
+	// O_RDONLY is like os.O_RDONLY
+	O_RDONLY Oflag = iota
+
+	// O_RDWR is like os.O_RDWR
+	O_RDWR
+
+	// O_WRONLY is like os.O_WRONLY
+	O_WRONLY
+
+	// Define bitflags as they are in POSIX `open`: alphabetically
+
+	// O_APPEND is like os.O_APPEND
+	O_APPEND Oflag = 1 << iota
+
+	// O_CREAT is link os.O_CREATE
+	O_CREAT
+
+	// O_DIRECTORY is defined on some platforms as syscall.O_DIRECTORY.
+	//
+	// Note: This ensures that the opened file is a directory. Those emulating
+	// on platforms that don't support the O_DIRECTORY, can double-check the
+	// result with File.IsDir (or stat) and err if not a directory.
+	O_DIRECTORY
+
+	// O_DSYNC is defined on some platforms as syscall.O_DSYNC.
+	O_DSYNC
+
+	// O_EXCL is defined on some platforms as syscall.O_EXCL.
+	O_EXCL
+
+	// O_NOFOLLOW is defined on some platforms as syscall.O_NOFOLLOW.
+	//
+	// Note: This allows programs to ensure that if the opened file is a
+	// symbolic link, the link itself is opened instead of its target.
+	O_NOFOLLOW
+
+	// O_NONBLOCK is defined on some platforms as syscall.O_NONBLOCK.
+	O_NONBLOCK
+
+	// O_RSYNC is defined on some platforms as syscall.O_RSYNC.
+	O_RSYNC
+
+	// O_SYNC is defined on some platforms as syscall.O_SYNC.
+	O_SYNC
+
+	// O_TRUNC is defined on some platforms as syscall.O_TRUNC.
+	O_TRUNC
+)
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno.go
@ -0,0 +1,106 @@
+//go:build !plan9 && !aix
+
+package sys
+
+import "syscall"
+
+func syscallToErrno(err error) (Errno, bool) {
+	errno, ok := err.(syscall.Errno)
+	if !ok {
+		return 0, false
+	}
+	switch errno {
+	case 0:
+		return 0, true
+	case syscall.EACCES:
+		return EACCES, true
+	case syscall.EAGAIN:
+		return EAGAIN, true
+	case syscall.EBADF:
+		return EBADF, true
+	case syscall.EEXIST:
+		return EEXIST, true
+	case syscall.EFAULT:
+		return EFAULT, true
+	case syscall.EINTR:
+		return EINTR, true
+	case syscall.EINVAL:
+		return EINVAL, true
+	case syscall.EIO:
+		return EIO, true
+	case syscall.EISDIR:
+		return EISDIR, true
+	case syscall.ELOOP:
+		return ELOOP, true
+	case syscall.ENAMETOOLONG:
+		return ENAMETOOLONG, true
+	case syscall.ENOENT:
+		return ENOENT, true
+	case syscall.ENOSYS:
+		return ENOSYS, true
+	case syscall.ENOTDIR:
+		return ENOTDIR, true
+	case syscall.ERANGE:
+		return ERANGE, true
+	case syscall.ENOTEMPTY:
+		return ENOTEMPTY, true
+	case syscall.ENOTSOCK:
+		return ENOTSOCK, true
+	case syscall.ENOTSUP:
+		return ENOTSUP, true
+	case syscall.EPERM:
+		return EPERM, true
+	case syscall.EROFS:
+		return EROFS, true
+	default:
+		return EIO, true
+	}
+}
+
+// Unwrap is a convenience for runtime.GOOS which define syscall.Errno.
+func (e Errno) Unwrap() error {
+	switch e {
+	case 0:
+		return nil
+	case EACCES:
+		return syscall.EACCES
+	case EAGAIN:
+		return syscall.EAGAIN
+	case EBADF:
+		return syscall.EBADF
+	case EEXIST:
+		return syscall.EEXIST
+	case EFAULT:
+		return syscall.EFAULT
+	case EINTR:
+		return syscall.EINTR
+	case EINVAL:
+		return syscall.EINVAL
+	case EIO:
+		return syscall.EIO
+	case EISDIR:
+		return syscall.EISDIR
+	case ELOOP:
+		return syscall.ELOOP
+	case ENAMETOOLONG:
+		return syscall.ENAMETOOLONG
+	case ENOENT:
+		return syscall.ENOENT
+	case ENOSYS:
+		return syscall.ENOSYS
+	case ENOTDIR:
+		return syscall.ENOTDIR
+	case ENOTEMPTY:
+		return syscall.ENOTEMPTY
+	case ENOTSOCK:
+		return syscall.ENOTSOCK
+	case ENOTSUP:
+		return syscall.ENOTSUP
+	case EPERM:
+		return syscall.EPERM
+	case EROFS:
+		return syscall.EROFS
+	default:
+		return syscall.EIO
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_notwindows.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_notwindows.go
@ -0,0 +1,13 @@
+//go:build !windows
+
+package sys
+
+func errorToErrno(err error) Errno {
+	if errno, ok := err.(Errno); ok {
+		return errno
+	}
+	if errno, ok := syscallToErrno(err); ok {
+		return errno
+	}
+	return EIO
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_unsupported.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_unsupported.go
@ -0,0 +1,7 @@
+//go:build plan9 || aix
+
+package sys
+
+func syscallToErrno(err error) (Errno, bool) {
+	return 0, false
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go
@ -0,0 +1,62 @@
+package sys
+
+import "syscall"
+
+// These are errors not defined in the syscall package. They are prefixed with
+// underscore to avoid exporting them.
+//
+// See https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
+const (
+	// _ERROR_INVALID_HANDLE is a Windows error returned by syscall.Write
+	// instead of syscall.EBADF
+	_ERROR_INVALID_HANDLE = syscall.Errno(6)
+
+	// _ERROR_INVALID_NAME is a Windows error returned by open when a file
+	// path has a trailing slash
+	_ERROR_INVALID_NAME = syscall.Errno(0x7B)
+
+	// _ERROR_NEGATIVE_SEEK is a Windows error returned by os.Truncate
+	// instead of syscall.EINVAL
+	_ERROR_NEGATIVE_SEEK = syscall.Errno(0x83)
+
+	// _ERROR_DIRECTORY is a Windows error returned by syscall.Rmdir
+	// instead of syscall.ENOTDIR
+	_ERROR_DIRECTORY = syscall.Errno(0x10B)
+
+	// _ERROR_INVALID_SOCKET is a Windows error returned by winsock_select
+	// when a given handle is not a socket.
+	_ERROR_INVALID_SOCKET = syscall.Errno(0x2736)
+)
+
+func errorToErrno(err error) Errno {
+	switch err := err.(type) {
+	case Errno:
+		return err
+	case syscall.Errno:
+		// Note: In windows, _ERROR_PATH_NOT_FOUND(0x3) maps to syscall.ENOTDIR
+		switch err {
+		case syscall.ERROR_ALREADY_EXISTS:
+			return EEXIST
+		case _ERROR_DIRECTORY:
+			return ENOTDIR
+		case syscall.ERROR_DIR_NOT_EMPTY:
+			return ENOTEMPTY
+		case syscall.ERROR_FILE_EXISTS:
+			return EEXIST
+		case _ERROR_INVALID_HANDLE, _ERROR_INVALID_SOCKET:
+			return EBADF
+		case syscall.ERROR_ACCESS_DENIED:
+			// POSIX read and write functions expect EBADF, not EACCES when not
+			// open for reading or writing.
+			return EBADF
+		case syscall.ERROR_PRIVILEGE_NOT_HELD:
+			return EPERM
+		case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME:
+			return EINVAL
+		}
+		errno, _ := syscallToErrno(err)
+		return errno
+	default:
+		return EIO
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/time.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/time.go
@ -0,0 +1,10 @@
+package sys
+
+import "math"
+
+// UTIME_OMIT is a special constant for use in updating times via FS.Utimens
+// or File.Utimens. When used for atim or mtim, the value is retained.
+//
+// Note: This may be implemented via a stat when the underlying filesystem
+// does not support this value.
+const UTIME_OMIT int64 = math.MinInt64
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/unimplemented.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/unimplemented.go
@ -0,0 +1,160 @@
+package sys
+
+import (
+	"io/fs"
+
+	"github.com/tetratelabs/wazero/sys"
+)
+
+// UnimplementedFS is an FS that returns ENOSYS for all functions,
+// This should be embedded to have forward compatible implementations.
+type UnimplementedFS struct{}
+
+// OpenFile implements FS.OpenFile
+func (UnimplementedFS) OpenFile(path string, flag Oflag, perm fs.FileMode) (File, Errno) {
+	return nil, ENOSYS
+}
+
+// Lstat implements FS.Lstat
+func (UnimplementedFS) Lstat(path string) (sys.Stat_t, Errno) {
+	return sys.Stat_t{}, ENOSYS
+}
+
+// Stat implements FS.Stat
+func (UnimplementedFS) Stat(path string) (sys.Stat_t, Errno) {
+	return sys.Stat_t{}, ENOSYS
+}
+
+// Readlink implements FS.Readlink
+func (UnimplementedFS) Readlink(path string) (string, Errno) {
+	return "", ENOSYS
+}
+
+// Mkdir implements FS.Mkdir
+func (UnimplementedFS) Mkdir(path string, perm fs.FileMode) Errno {
+	return ENOSYS
+}
+
+// Chmod implements FS.Chmod
+func (UnimplementedFS) Chmod(path string, perm fs.FileMode) Errno {
+	return ENOSYS
+}
+
+// Rename implements FS.Rename
+func (UnimplementedFS) Rename(from, to string) Errno {
+	return ENOSYS
+}
+
+// Rmdir implements FS.Rmdir
+func (UnimplementedFS) Rmdir(path string) Errno {
+	return ENOSYS
+}
+
+// Link implements FS.Link
+func (UnimplementedFS) Link(_, _ string) Errno {
+	return ENOSYS
+}
+
+// Symlink implements FS.Symlink
+func (UnimplementedFS) Symlink(_, _ string) Errno {
+	return ENOSYS
+}
+
+// Unlink implements FS.Unlink
+func (UnimplementedFS) Unlink(path string) Errno {
+	return ENOSYS
+}
+
+// Utimens implements FS.Utimens
+func (UnimplementedFS) Utimens(path string, atim, mtim int64) Errno {
+	return ENOSYS
+}
+
+// UnimplementedFile is a File that returns ENOSYS for all functions,
+// except where no-op are otherwise documented.
+//
+// This should be embedded to have forward compatible implementations.
+type UnimplementedFile struct{}
+
+// Dev implements File.Dev
+func (UnimplementedFile) Dev() (uint64, Errno) {
+	return 0, 0
+}
+
+// Ino implements File.Ino
+func (UnimplementedFile) Ino() (sys.Inode, Errno) {
+	return 0, 0
+}
+
+// IsDir implements File.IsDir
+func (UnimplementedFile) IsDir() (bool, Errno) {
+	return false, 0
+}
+
+// IsAppend implements File.IsAppend
+func (UnimplementedFile) IsAppend() bool {
+	return false
+}
+
+// SetAppend implements File.SetAppend
+func (UnimplementedFile) SetAppend(bool) Errno {
+	return ENOSYS
+}
+
+// Stat implements File.Stat
+func (UnimplementedFile) Stat() (sys.Stat_t, Errno) {
+	return sys.Stat_t{}, ENOSYS
+}
+
+// Read implements File.Read
+func (UnimplementedFile) Read([]byte) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Pread implements File.Pread
+func (UnimplementedFile) Pread([]byte, int64) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Seek implements File.Seek
+func (UnimplementedFile) Seek(int64, int) (int64, Errno) {
+	return 0, ENOSYS
+}
+
+// Readdir implements File.Readdir
+func (UnimplementedFile) Readdir(int) (dirents []Dirent, errno Errno) {
+	return nil, ENOSYS
+}
+
+// Write implements File.Write
+func (UnimplementedFile) Write([]byte) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Pwrite implements File.Pwrite
+func (UnimplementedFile) Pwrite([]byte, int64) (int, Errno) {
+	return 0, ENOSYS
+}
+
+// Truncate implements File.Truncate
+func (UnimplementedFile) Truncate(int64) Errno {
+	return ENOSYS
+}
+
+// Sync implements File.Sync
+func (UnimplementedFile) Sync() Errno {
+	return 0 // not ENOSYS
+}
+
+// Datasync implements File.Datasync
+func (UnimplementedFile) Datasync() Errno {
+	return 0 // not ENOSYS
+}
+
+// Utimens implements File.Utimens
+func (UnimplementedFile) Utimens(int64, int64) Errno {
+	return ENOSYS
+}
+
+// Close implements File.Close
+func (UnimplementedFile) Close() (errno Errno) { return }
--- a/vendor/github.com/tetratelabs/wazero/fsconfig.go
+++ b/vendor/github.com/tetratelabs/wazero/fsconfig.go
@ -0,0 +1,213 @@
+package wazero
+
+import (
+	"io/fs"
+
+	experimentalsys "github.com/tetratelabs/wazero/experimental/sys"
+	"github.com/tetratelabs/wazero/internal/sys"
+	"github.com/tetratelabs/wazero/internal/sysfs"
+)
+
+// FSConfig configures filesystem paths the embedding host allows the wasm
+// guest to access. Unconfigured paths are not allowed, so functions like
+// `path_open` result in unsupported errors (e.g. syscall.ENOSYS).
+//
+// # Guest Path
+//
+// `guestPath` is the name of the path the guest should use a filesystem for, or
+// empty for any files.
+//
+// All `guestPath` paths are normalized, specifically removing any leading or
+// trailing slashes. This means "/", "./" or "." all coerce to empty "".
+//
+// Multiple `guestPath` values can be configured, but the last longest match
+// wins. For example, if "tmp", then "" were added, a request to open
+// "tmp/foo.txt" use the filesystem associated with "tmp" even though a wider
+// path, "" (all files), was added later.
+//
+// A `guestPath` of "." coerces to the empty string "" because the current
+// directory is handled by the guest. In other words, the guest resolves ites
+// current directory prior to requesting files.
+//
+// More notes on `guestPath`
+//   - Working directories are typically tracked in wasm, though possible some
+//     relative paths are requested. For example, TinyGo may attempt to resolve
+//     a path "../.." in unit tests.
+//   - Zig uses the first path name it sees as the initial working directory of
+//     the process.
+//
+// # Scope
+//
+// Configuration here is module instance scoped. This means you can use the
+// same configuration for multiple calls to Runtime.InstantiateModule. Each
+// module will have a different file descriptor table. Any errors accessing
+// resources allowed here are deferred to instantiation time of each module.
+//
+// Any host resources present at the time of configuration, but deleted before
+// Runtime.InstantiateModule will trap/panic when the guest wasm initializes or
+// calls functions like `fd_read`.
+//
+// # Windows
+//
+// While wazero supports Windows as a platform, all known compilers use POSIX
+// conventions at runtime. For example, even when running on Windows, paths
+// used by wasm are separated by forward slash (/), not backslash (\).
+//
+// # Notes
+//
+//   - This is an interface for decoupling, not third-party implementations.
+//     All implementations are in wazero.
+//   - FSConfig is immutable. Each WithXXX function returns a new instance
+//     including the corresponding change.
+//   - RATIONALE.md includes design background and relationship to WebAssembly
+//     System Interfaces (WASI).
+type FSConfig interface {
+	// WithDirMount assigns a directory at `dir` to any paths beginning at
+	// `guestPath`.
+	//
+	// For example, `dirPath` as / (or c:\ in Windows), makes the entire host
+	// volume writeable to the path on the guest. The `guestPath` is always a
+	// POSIX style path, slash (/) delimited, even if run on Windows.
+	//
+	// If the same `guestPath` was assigned before, this overrides its value,
+	// retaining the original precedence. See the documentation of FSConfig for
+	// more details on `guestPath`.
+	//
+	// # Isolation
+	//
+	// The guest will have full access to this directory including escaping it
+	// via relative path lookups like "../../". Full access includes operations
+	// such as creating or deleting files, limited to any host level access
+	// controls.
+	//
+	// # os.DirFS
+	//
+	// This configuration optimizes for WASI compatibility which is sometimes
+	// at odds with the behavior of os.DirFS. Hence, this will not behave
+	// exactly the same as os.DirFS. See /RATIONALE.md for more.
+	WithDirMount(dir, guestPath string) FSConfig
+
+	// WithReadOnlyDirMount assigns a directory at `dir` to any paths
+	// beginning at `guestPath`.
+	//
+	// This is the same as WithDirMount except only read operations are
+	// permitted. However, escaping the directory via relative path lookups
+	// like "../../" is still allowed.
+	WithReadOnlyDirMount(dir, guestPath string) FSConfig
+
+	// WithFSMount assigns a fs.FS file system for any paths beginning at
+	// `guestPath`.
+	//
+	// If the same `guestPath` was assigned before, this overrides its value,
+	// retaining the original precedence. See the documentation of FSConfig for
+	// more details on `guestPath`.
+	//
+	// # Isolation
+	//
+	// fs.FS does not restrict the ability to overwrite returned files via
+	// io.Writer. Moreover, os.DirFS documentation includes important notes
+	// about isolation, which also applies to fs.Sub. As of Go 1.19, the
+	// built-in file-systems are not jailed (chroot). See
+	// https://github.com/golang/go/issues/42322
+	//
+	// # os.DirFS
+	//
+	// Due to limited control and functionality available in os.DirFS, we
+	// advise using WithDirMount instead. There will be behavior differences
+	// between os.DirFS and WithDirMount, as the latter biases towards what's
+	// expected from WASI implementations.
+	//
+	// # Custom fs.FileInfo
+	//
+	// The underlying implementation supports data not usually in fs.FileInfo
+	// when `info.Sys` returns *sys.Stat_t. For example, a custom fs.FS can use
+	// this approach to generate or mask sys.Inode data. Such a filesystem
+	// needs to decorate any functions that can return fs.FileInfo:
+	//
+	//   - `Stat` as defined on `fs.File` (always)
+	//   - `Readdir` as defined on `os.File` (if defined)
+	//
+	// See sys.NewStat_t for examples.
+	WithFSMount(fs fs.FS, guestPath string) FSConfig
+}
+
+type fsConfig struct {
+	// fs are the currently configured filesystems.
+	fs []experimentalsys.FS
+	// guestPaths are the user-supplied names of the filesystems, retained for
+	// error messages and fmt.Stringer.
+	guestPaths []string
+	// guestPathToFS are the normalized paths to the currently configured
+	// filesystems, used for de-duplicating.
+	guestPathToFS map[string]int
+}
+
+// NewFSConfig returns a FSConfig that can be used for configuring module instantiation.
+func NewFSConfig() FSConfig {
+	return &fsConfig{guestPathToFS: map[string]int{}}
+}
+
+// clone makes a deep copy of this module config.
+func (c *fsConfig) clone() *fsConfig {
+	ret := *c // copy except slice and maps which share a ref
+	ret.fs = make([]experimentalsys.FS, 0, len(c.fs))
+	ret.fs = append(ret.fs, c.fs...)
+	ret.guestPaths = make([]string, 0, len(c.guestPaths))
+	ret.guestPaths = append(ret.guestPaths, c.guestPaths...)
+	ret.guestPathToFS = make(map[string]int, len(c.guestPathToFS))
+	for key, value := range c.guestPathToFS {
+		ret.guestPathToFS[key] = value
+	}
+	return &ret
+}
+
+// WithDirMount implements FSConfig.WithDirMount
+func (c *fsConfig) WithDirMount(dir, guestPath string) FSConfig {
+	return c.WithSysFSMount(sysfs.DirFS(dir), guestPath)
+}
+
+// WithReadOnlyDirMount implements FSConfig.WithReadOnlyDirMount
+func (c *fsConfig) WithReadOnlyDirMount(dir, guestPath string) FSConfig {
+	return c.WithSysFSMount(&sysfs.ReadFS{FS: sysfs.DirFS(dir)}, guestPath)
+}
+
+// WithFSMount implements FSConfig.WithFSMount
+func (c *fsConfig) WithFSMount(fs fs.FS, guestPath string) FSConfig {
+	var adapted experimentalsys.FS
+	if fs != nil {
+		adapted = &sysfs.AdaptFS{FS: fs}
+	}
+	return c.WithSysFSMount(adapted, guestPath)
+}
+
+// WithSysFSMount implements sysfs.FSConfig
+func (c *fsConfig) WithSysFSMount(fs experimentalsys.FS, guestPath string) FSConfig {
+	if _, ok := fs.(experimentalsys.UnimplementedFS); ok {
+		return c // don't add fake paths.
+	}
+	cleaned := sys.StripPrefixesAndTrailingSlash(guestPath)
+	ret := c.clone()
+	if i, ok := ret.guestPathToFS[cleaned]; ok {
+		ret.fs[i] = fs
+		ret.guestPaths[i] = guestPath
+	} else if fs != nil {
+		ret.guestPathToFS[cleaned] = len(ret.fs)
+		ret.fs = append(ret.fs, fs)
+		ret.guestPaths = append(ret.guestPaths, guestPath)
+	}
+	return ret
+}
+
+// preopens returns the possible nil index-correlated preopened filesystems
+// with guest paths.
+func (c *fsConfig) preopens() ([]experimentalsys.FS, []string) {
+	preopenCount := len(c.fs)
+	if preopenCount == 0 {
+		return nil, nil
+	}
+	fs := make([]experimentalsys.FS, len(c.fs))
+	copy(fs, c.fs)
+	guestPaths := make([]string, len(c.guestPaths))
+	copy(guestPaths, c.guestPaths)
+	return fs, guestPaths
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/descriptor/table.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/descriptor/table.go
@ -0,0 +1,164 @@
+package descriptor
+
+import "math/bits"
+
+// Table is a data structure mapping 32 bit descriptor to items.
+//
+// # Negative keys are invalid.
+//
+// Negative keys (e.g. -1) are invalid inputs and will return a corresponding
+// not-found value. This matches POSIX behavior of file descriptors.
+// See https://pubs.opengroup.org/onlinepubs/9699919799/functions/dirfd.html#tag_16_90
+//
+// # Data structure design
+//
+// The data structure optimizes for memory density and lookup performance,
+// trading off compute at insertion time. This is a useful compromise for the
+// use cases we employ it with: items are usually accessed a lot more often
+// than they are inserted, each operation requires a table lookup, so we are
+// better off spending extra compute to insert items in the table in order to
+// get cheaper lookups. Memory efficiency is also crucial to support scaling
+// with programs that maintain thousands of items: having a high or non-linear
+// memory-to-item ratio could otherwise be used as an attack vector by
+// malicious applications attempting to damage performance of the host.
+type Table[Key ~int32, Item any] struct {
+	masks []uint64
+	items []Item
+}
+
+// Len returns the number of items stored in the table.
+func (t *Table[Key, Item]) Len() (n int) {
+	// We could make this a O(1) operation if we cached the number of items in
+	// the table. More state usually means more problems, so until we have a
+	// clear need for this, the simple implementation may be a better trade off.
+	for _, mask := range t.masks {
+		n += bits.OnesCount64(mask)
+	}
+	return n
+}
+
+// grow ensures that t has enough room for n items, potentially reallocating the
+// internal buffers if their capacity was too small to hold this many items.
+func (t *Table[Key, Item]) grow(n int) {
+	// Round up to a multiple of 64 since this is the smallest increment due to
+	// using 64 bits masks.
+	n = (n*64 + 63) / 64
+
+	if n > len(t.masks) {
+		masks := make([]uint64, n)
+		copy(masks, t.masks)
+
+		items := make([]Item, n*64)
+		copy(items, t.items)
+
+		t.masks = masks
+		t.items = items
+	}
+}
+
+// Insert inserts the given item to the table, returning the key that it is
+// mapped to or false if the table was full.
+//
+// The method does not perform deduplication, it is possible for the same item
+// to be inserted multiple times, each insertion will return a different key.
+func (t *Table[Key, Item]) Insert(item Item) (key Key, ok bool) {
+	offset := 0
+insert:
+	// Note: this loop could be made a lot more efficient using vectorized
+	// operations: 256 bits vector registers would yield a theoretical 4x
+	// speed up (e.g. using AVX2).
+	for index, mask := range t.masks[offset:] {
+		if ^mask != 0 { // not full?
+			shift := bits.TrailingZeros64(^mask)
+			index += offset
+			key = Key(index)*64 + Key(shift)
+			t.items[key] = item
+			t.masks[index] = mask | uint64(1<<shift)
+			return key, key >= 0
+		}
+	}
+
+	offset = len(t.masks)
+	n := 2 * len(t.masks)
+	if n == 0 {
+		n = 1
+	}
+
+	t.grow(n)
+	goto insert
+}
+
+// Lookup returns the item associated with the given key (may be nil).
+func (t *Table[Key, Item]) Lookup(key Key) (item Item, found bool) {
+	if key < 0 { // invalid key
+		return
+	}
+	if i := int(key); i >= 0 && i < len(t.items) {
+		index := uint(key) / 64
+		shift := uint(key) % 64
+		if (t.masks[index] & (1 << shift)) != 0 {
+			item, found = t.items[i], true
+		}
+	}
+	return
+}
+
+// InsertAt inserts the given `item` at the item descriptor `key`. This returns
+// false if the insert was impossible due to negative key.
+func (t *Table[Key, Item]) InsertAt(item Item, key Key) bool {
+	if key < 0 {
+		return false
+	}
+	if diff := int(key) - t.Len(); diff > 0 {
+		t.grow(diff)
+	}
+	index := uint(key) / 64
+	shift := uint(key) % 64
+	t.masks[index] |= 1 << shift
+	t.items[key] = item
+	return true
+}
+
+// Delete deletes the item stored at the given key from the table.
+func (t *Table[Key, Item]) Delete(key Key) {
+	if key < 0 { // invalid key
+		return
+	}
+	if index, shift := key/64, key%64; int(index) < len(t.masks) {
+		mask := t.masks[index]
+		if (mask & (1 << shift)) != 0 {
+			var zero Item
+			t.items[key] = zero
+			t.masks[index] = mask & ^uint64(1<<shift)
+		}
+	}
+}
+
+// Range calls f for each item and its associated key in the table. The function
+// f might return false to interupt the iteration.
+func (t *Table[Key, Item]) Range(f func(Key, Item) bool) {
+	for i, mask := range t.masks {
+		if mask == 0 {
+			continue
+		}
+		for j := Key(0); j < 64; j++ {
+			if (mask & (1 << j)) == 0 {
+				continue
+			}
+			if key := Key(i)*64 + j; !f(key, t.items[key]) {
+				return
+			}
+		}
+	}
+}
+
+// Reset clears the content of the table.
+func (t *Table[Key, Item]) Reset() {
+	for i := range t.masks {
+		t.masks[i] = 0
+	}
+	var zero Item
+	for i := range t.items {
+		t.items[i] = zero
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/compiler.go
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/format.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/format.go
@ -0,0 +1,22 @@
+package interpreter
+
+import (
+	"bytes"
+)
+
+func format(ops []unionOperation) string {
+	buf := bytes.NewBuffer(nil)
+
+	_, _ = buf.WriteString(".entrypoint\n")
+	for i := range ops {
+		op := &ops[i]
+		str := op.String()
+		isLabel := op.Kind == operationKindLabel
+		if !isLabel {
+			const indent = "\t"
+			str = indent + str
+		}
+		_, _ = buf.WriteString(str + "\n")
+	}
+	return buf.String()
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/operations.go
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/signature.go
@ -0,0 +1,767 @@
+package interpreter
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+// signature represents how a Wasm opcode
+// manipulates the value stacks in terms of value types.
+type signature struct {
+	in, out []unsignedType
+}
+
+var (
+	signature_None_None    = &signature{}
+	signature_Unknown_None = &signature{
+		in: []unsignedType{unsignedTypeUnknown},
+	}
+	signature_None_I32 = &signature{
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_None_I64 = &signature{
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_None_V128 = &signature{
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_None_F32 = &signature{
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_None_F64 = &signature{
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64_None = &signature{
+		in: []unsignedType{unsignedTypeI64},
+	}
+	signature_F32_None = &signature{
+		in: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64_None = &signature{
+		in: []unsignedType{unsignedTypeF64},
+	}
+	signature_V128_None = &signature{
+		in: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_I32_F32 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_I32_F64 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64_F32 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_I64_F64 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_F32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F32_I64 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_F32_F64 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_F32_F32 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F64_F32 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_F64_F64 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I32I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI32},
+	}
+
+	signature_I32I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I64_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI64},
+	}
+	signature_I32F32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeF32},
+	}
+	signature_I32F64_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeF64},
+	}
+	signature_I64I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI64, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I64I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_F32F32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF32, unsignedTypeF32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F32F32_F32 = &signature{
+		in:  []unsignedType{unsignedTypeF32, unsignedTypeF32},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_F64F64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeF64, unsignedTypeF64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_F64F64_F64 = &signature{
+		in:  []unsignedType{unsignedTypeF64, unsignedTypeF64},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_I32I32I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI32},
+	}
+	signature_I32I64I32_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI32},
+	}
+	signature_UnknownUnknownI32_Unknown = &signature{
+		in:  []unsignedType{unsignedTypeUnknown, unsignedTypeUnknown, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeUnknown},
+	}
+	signature_V128V128_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128V128V128_V32 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeV128, unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeI32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32V128_None = &signature{
+		in: []unsignedType{unsignedTypeI32, unsignedTypeV128},
+	}
+	signature_I32V128_V128 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128I32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128I64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128F32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeF32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128F64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128, unsignedTypeF64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_V128_I32 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_V128_I64 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_V128_F32 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeF32},
+	}
+	signature_V128_F64 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeF64},
+	}
+	signature_V128_V128 = &signature{
+		in:  []unsignedType{unsignedTypeV128},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeI64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_F32_V128 = &signature{
+		in:  []unsignedType{unsignedTypeF32},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_F64_V128 = &signature{
+		in:  []unsignedType{unsignedTypeF64},
+		out: []unsignedType{unsignedTypeV128},
+	}
+	signature_I32I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+	signature_I32I32I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I64I64_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I32I32_I32 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI32, unsignedTypeI32},
+		out: []unsignedType{unsignedTypeI32},
+	}
+	signature_I32I64I64_I64 = &signature{
+		in:  []unsignedType{unsignedTypeI32, unsignedTypeI64, unsignedTypeI64},
+		out: []unsignedType{unsignedTypeI64},
+	}
+)
+
+// wasmOpcodeSignature returns the signature of given Wasm opcode.
+// Note that some of opcodes' signature vary depending on
+// the function instance (for example, local types).
+// "index" parameter is not used by most of opcodes.
+// The returned signature is used for stack validation when lowering Wasm's opcodes to interpreterir.
+func (c *compiler) wasmOpcodeSignature(op wasm.Opcode, index uint32) (*signature, error) {
+	switch op {
+	case wasm.OpcodeUnreachable, wasm.OpcodeNop, wasm.OpcodeBlock, wasm.OpcodeLoop:
+		return signature_None_None, nil
+	case wasm.OpcodeIf:
+		return signature_I32_None, nil
+	case wasm.OpcodeElse, wasm.OpcodeEnd, wasm.OpcodeBr:
+		return signature_None_None, nil
+	case wasm.OpcodeBrIf, wasm.OpcodeBrTable:
+		return signature_I32_None, nil
+	case wasm.OpcodeReturn:
+		return signature_None_None, nil
+	case wasm.OpcodeCall:
+		return c.funcTypeToSigs.get(c.funcs[index], false /* direct */), nil
+	case wasm.OpcodeCallIndirect:
+		return c.funcTypeToSigs.get(index, true /* call_indirect */), nil
+	case wasm.OpcodeDrop:
+		return signature_Unknown_None, nil
+	case wasm.OpcodeSelect, wasm.OpcodeTypedSelect:
+		return signature_UnknownUnknownI32_Unknown, nil
+	case wasm.OpcodeLocalGet:
+		inputLen := uint32(len(c.sig.Params))
+		if l := uint32(len(c.localTypes)) + inputLen; index >= l {
+			return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
+		}
+		var t wasm.ValueType
+		if index < inputLen {
+			t = c.sig.Params[index]
+		} else {
+			t = c.localTypes[index-inputLen]
+		}
+		return wasmValueTypeToUnsignedOutSignature(t), nil
+	case wasm.OpcodeLocalSet:
+		inputLen := uint32(len(c.sig.Params))
+		if l := uint32(len(c.localTypes)) + inputLen; index >= l {
+			return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
+		}
+		var t wasm.ValueType
+		if index < inputLen {
+			t = c.sig.Params[index]
+		} else {
+			t = c.localTypes[index-inputLen]
+		}
+		return wasmValueTypeToUnsignedInSignature(t), nil
+	case wasm.OpcodeLocalTee:
+		inputLen := uint32(len(c.sig.Params))
+		if l := uint32(len(c.localTypes)) + inputLen; index >= l {
+			return nil, fmt.Errorf("invalid local index for local.get %d >= %d", index, l)
+		}
+		var t wasm.ValueType
+		if index < inputLen {
+			t = c.sig.Params[index]
+		} else {
+			t = c.localTypes[index-inputLen]
+		}
+		return wasmValueTypeToUnsignedInOutSignature(t), nil
+	case wasm.OpcodeGlobalGet:
+		if len(c.globals) <= int(index) {
+			return nil, fmt.Errorf("invalid global index for global.get %d >= %d", index, len(c.globals))
+		}
+		return wasmValueTypeToUnsignedOutSignature(c.globals[index].ValType), nil
+	case wasm.OpcodeGlobalSet:
+		if len(c.globals) <= int(index) {
+			return nil, fmt.Errorf("invalid global index for global.get %d >= %d", index, len(c.globals))
+		}
+		return wasmValueTypeToUnsignedInSignature(c.globals[index].ValType), nil
+	case wasm.OpcodeI32Load:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI64Load:
+		return signature_I32_I64, nil
+	case wasm.OpcodeF32Load:
+		return signature_I32_F32, nil
+	case wasm.OpcodeF64Load:
+		return signature_I32_F64, nil
+	case wasm.OpcodeI32Load8S, wasm.OpcodeI32Load8U, wasm.OpcodeI32Load16S, wasm.OpcodeI32Load16U:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI64Load8S, wasm.OpcodeI64Load8U, wasm.OpcodeI64Load16S, wasm.OpcodeI64Load16U,
+		wasm.OpcodeI64Load32S, wasm.OpcodeI64Load32U:
+		return signature_I32_I64, nil
+	case wasm.OpcodeI32Store:
+		return signature_I32I32_None, nil
+	case wasm.OpcodeI64Store:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeF32Store:
+		return signature_I32F32_None, nil
+	case wasm.OpcodeF64Store:
+		return signature_I32F64_None, nil
+	case wasm.OpcodeI32Store8:
+		return signature_I32I32_None, nil
+	case wasm.OpcodeI32Store16:
+		return signature_I32I32_None, nil
+	case wasm.OpcodeI64Store8:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeI64Store16:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeI64Store32:
+		return signature_I32I64_None, nil
+	case wasm.OpcodeMemorySize:
+		return signature_None_I32, nil
+	case wasm.OpcodeMemoryGrow:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI32Const:
+		return signature_None_I32, nil
+	case wasm.OpcodeI64Const:
+		return signature_None_I64, nil
+	case wasm.OpcodeF32Const:
+		return signature_None_F32, nil
+	case wasm.OpcodeF64Const:
+		return signature_None_F64, nil
+	case wasm.OpcodeI32Eqz:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI32Eq, wasm.OpcodeI32Ne, wasm.OpcodeI32LtS,
+		wasm.OpcodeI32LtU, wasm.OpcodeI32GtS, wasm.OpcodeI32GtU,
+		wasm.OpcodeI32LeS, wasm.OpcodeI32LeU, wasm.OpcodeI32GeS,
+		wasm.OpcodeI32GeU:
+		return signature_I32I32_I32, nil
+	case wasm.OpcodeI64Eqz:
+		return signature_I64_I32, nil
+	case wasm.OpcodeI64Eq, wasm.OpcodeI64Ne, wasm.OpcodeI64LtS,
+		wasm.OpcodeI64LtU, wasm.OpcodeI64GtS, wasm.OpcodeI64GtU,
+		wasm.OpcodeI64LeS, wasm.OpcodeI64LeU, wasm.OpcodeI64GeS,
+		wasm.OpcodeI64GeU:
+		return signature_I64I64_I32, nil
+	case wasm.OpcodeF32Eq, wasm.OpcodeF32Ne, wasm.OpcodeF32Lt,
+		wasm.OpcodeF32Gt, wasm.OpcodeF32Le, wasm.OpcodeF32Ge:
+		return signature_F32F32_I32, nil
+	case wasm.OpcodeF64Eq, wasm.OpcodeF64Ne, wasm.OpcodeF64Lt,
+		wasm.OpcodeF64Gt, wasm.OpcodeF64Le, wasm.OpcodeF64Ge:
+		return signature_F64F64_I32, nil
+	case wasm.OpcodeI32Clz, wasm.OpcodeI32Ctz, wasm.OpcodeI32Popcnt:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI32Add, wasm.OpcodeI32Sub, wasm.OpcodeI32Mul,
+		wasm.OpcodeI32DivS, wasm.OpcodeI32DivU, wasm.OpcodeI32RemS,
+		wasm.OpcodeI32RemU, wasm.OpcodeI32And, wasm.OpcodeI32Or,
+		wasm.OpcodeI32Xor, wasm.OpcodeI32Shl, wasm.OpcodeI32ShrS,
+		wasm.OpcodeI32ShrU, wasm.OpcodeI32Rotl, wasm.OpcodeI32Rotr:
+		return signature_I32I32_I32, nil
+	case wasm.OpcodeI64Clz, wasm.OpcodeI64Ctz, wasm.OpcodeI64Popcnt:
+		return signature_I64_I64, nil
+	case wasm.OpcodeI64Add, wasm.OpcodeI64Sub, wasm.OpcodeI64Mul,
+		wasm.OpcodeI64DivS, wasm.OpcodeI64DivU, wasm.OpcodeI64RemS,
+		wasm.OpcodeI64RemU, wasm.OpcodeI64And, wasm.OpcodeI64Or,
+		wasm.OpcodeI64Xor, wasm.OpcodeI64Shl, wasm.OpcodeI64ShrS,
+		wasm.OpcodeI64ShrU, wasm.OpcodeI64Rotl, wasm.OpcodeI64Rotr:
+		return signature_I64I64_I64, nil
+	case wasm.OpcodeF32Abs, wasm.OpcodeF32Neg, wasm.OpcodeF32Ceil,
+		wasm.OpcodeF32Floor, wasm.OpcodeF32Trunc, wasm.OpcodeF32Nearest,
+		wasm.OpcodeF32Sqrt:
+		return signature_F32_F32, nil
+	case wasm.OpcodeF32Add, wasm.OpcodeF32Sub, wasm.OpcodeF32Mul,
+		wasm.OpcodeF32Div, wasm.OpcodeF32Min, wasm.OpcodeF32Max,
+		wasm.OpcodeF32Copysign:
+		return signature_F32F32_F32, nil
+	case wasm.OpcodeF64Abs, wasm.OpcodeF64Neg, wasm.OpcodeF64Ceil,
+		wasm.OpcodeF64Floor, wasm.OpcodeF64Trunc, wasm.OpcodeF64Nearest,
+		wasm.OpcodeF64Sqrt:
+		return signature_F64_F64, nil
+	case wasm.OpcodeF64Add, wasm.OpcodeF64Sub, wasm.OpcodeF64Mul,
+		wasm.OpcodeF64Div, wasm.OpcodeF64Min, wasm.OpcodeF64Max,
+		wasm.OpcodeF64Copysign:
+		return signature_F64F64_F64, nil
+	case wasm.OpcodeI32WrapI64:
+		return signature_I64_I32, nil
+	case wasm.OpcodeI32TruncF32S, wasm.OpcodeI32TruncF32U:
+		return signature_F32_I32, nil
+	case wasm.OpcodeI32TruncF64S, wasm.OpcodeI32TruncF64U:
+		return signature_F64_I32, nil
+	case wasm.OpcodeI64ExtendI32S, wasm.OpcodeI64ExtendI32U:
+		return signature_I32_I64, nil
+	case wasm.OpcodeI64TruncF32S, wasm.OpcodeI64TruncF32U:
+		return signature_F32_I64, nil
+	case wasm.OpcodeI64TruncF64S, wasm.OpcodeI64TruncF64U:
+		return signature_F64_I64, nil
+	case wasm.OpcodeF32ConvertI32S, wasm.OpcodeF32ConvertI32U:
+		return signature_I32_F32, nil
+	case wasm.OpcodeF32ConvertI64S, wasm.OpcodeF32ConvertI64U:
+		return signature_I64_F32, nil
+	case wasm.OpcodeF32DemoteF64:
+		return signature_F64_F32, nil
+	case wasm.OpcodeF64ConvertI32S, wasm.OpcodeF64ConvertI32U:
+		return signature_I32_F64, nil
+	case wasm.OpcodeF64ConvertI64S, wasm.OpcodeF64ConvertI64U:
+		return signature_I64_F64, nil
+	case wasm.OpcodeF64PromoteF32:
+		return signature_F32_F64, nil
+	case wasm.OpcodeI32ReinterpretF32:
+		return signature_F32_I32, nil
+	case wasm.OpcodeI64ReinterpretF64:
+		return signature_F64_I64, nil
+	case wasm.OpcodeF32ReinterpretI32:
+		return signature_I32_F32, nil
+	case wasm.OpcodeF64ReinterpretI64:
+		return signature_I64_F64, nil
+	case wasm.OpcodeI32Extend8S, wasm.OpcodeI32Extend16S:
+		return signature_I32_I32, nil
+	case wasm.OpcodeI64Extend8S, wasm.OpcodeI64Extend16S, wasm.OpcodeI64Extend32S:
+		return signature_I64_I64, nil
+	case wasm.OpcodeTableGet:
+		// table.get takes table's offset and pushes the ref type value of opaque pointer as i64 value onto the stack.
+		return signature_I32_I64, nil
+	case wasm.OpcodeTableSet:
+		// table.set takes table's offset and the ref type value of opaque pointer as i64 value.
+		return signature_I32I64_None, nil
+	case wasm.OpcodeRefFunc:
+		// ref.func is translated as pushing the compiled function's opaque pointer (uint64) at interpreterir layer.
+		return signature_None_I64, nil
+	case wasm.OpcodeRefIsNull:
+		// ref.is_null is translated as checking if the uint64 on the top of the stack (opaque pointer) is zero or not.
+		return signature_I64_I32, nil
+	case wasm.OpcodeRefNull:
+		// ref.null is translated as i64.const 0.
+		return signature_None_I64, nil
+	case wasm.OpcodeMiscPrefix:
+		switch miscOp := c.body[c.pc+1]; miscOp {
+		case wasm.OpcodeMiscI32TruncSatF32S, wasm.OpcodeMiscI32TruncSatF32U:
+			return signature_F32_I32, nil
+		case wasm.OpcodeMiscI32TruncSatF64S, wasm.OpcodeMiscI32TruncSatF64U:
+			return signature_F64_I32, nil
+		case wasm.OpcodeMiscI64TruncSatF32S, wasm.OpcodeMiscI64TruncSatF32U:
+			return signature_F32_I64, nil
+		case wasm.OpcodeMiscI64TruncSatF64S, wasm.OpcodeMiscI64TruncSatF64U:
+			return signature_F64_I64, nil
+		case wasm.OpcodeMiscMemoryInit, wasm.OpcodeMiscMemoryCopy, wasm.OpcodeMiscMemoryFill,
+			wasm.OpcodeMiscTableInit, wasm.OpcodeMiscTableCopy:
+			return signature_I32I32I32_None, nil
+		case wasm.OpcodeMiscDataDrop, wasm.OpcodeMiscElemDrop:
+			return signature_None_None, nil
+		case wasm.OpcodeMiscTableGrow:
+			return signature_I64I32_I32, nil
+		case wasm.OpcodeMiscTableSize:
+			return signature_None_I32, nil
+		case wasm.OpcodeMiscTableFill:
+			return signature_I32I64I32_None, nil
+		default:
+			return nil, fmt.Errorf("unsupported misc instruction in interpreterir: 0x%x", op)
+		}
+	case wasm.OpcodeVecPrefix:
+		switch vecOp := c.body[c.pc+1]; vecOp {
+		case wasm.OpcodeVecV128Const:
+			return signature_None_V128, nil
+		case wasm.OpcodeVecV128Load, wasm.OpcodeVecV128Load8x8s, wasm.OpcodeVecV128Load8x8u,
+			wasm.OpcodeVecV128Load16x4s, wasm.OpcodeVecV128Load16x4u, wasm.OpcodeVecV128Load32x2s,
+			wasm.OpcodeVecV128Load32x2u, wasm.OpcodeVecV128Load8Splat, wasm.OpcodeVecV128Load16Splat,
+			wasm.OpcodeVecV128Load32Splat, wasm.OpcodeVecV128Load64Splat, wasm.OpcodeVecV128Load32zero,
+			wasm.OpcodeVecV128Load64zero:
+			return signature_I32_V128, nil
+		case wasm.OpcodeVecV128Load8Lane, wasm.OpcodeVecV128Load16Lane,
+			wasm.OpcodeVecV128Load32Lane, wasm.OpcodeVecV128Load64Lane:
+			return signature_I32V128_V128, nil
+		case wasm.OpcodeVecV128Store,
+			wasm.OpcodeVecV128Store8Lane,
+			wasm.OpcodeVecV128Store16Lane,
+			wasm.OpcodeVecV128Store32Lane,
+			wasm.OpcodeVecV128Store64Lane:
+			return signature_I32V128_None, nil
+		case wasm.OpcodeVecI8x16ExtractLaneS,
+			wasm.OpcodeVecI8x16ExtractLaneU,
+			wasm.OpcodeVecI16x8ExtractLaneS,
+			wasm.OpcodeVecI16x8ExtractLaneU,
+			wasm.OpcodeVecI32x4ExtractLane:
+			return signature_V128_I32, nil
+		case wasm.OpcodeVecI64x2ExtractLane:
+			return signature_V128_I64, nil
+		case wasm.OpcodeVecF32x4ExtractLane:
+			return signature_V128_F32, nil
+		case wasm.OpcodeVecF64x2ExtractLane:
+			return signature_V128_F64, nil
+		case wasm.OpcodeVecI8x16ReplaceLane, wasm.OpcodeVecI16x8ReplaceLane, wasm.OpcodeVecI32x4ReplaceLane,
+			wasm.OpcodeVecI8x16Shl, wasm.OpcodeVecI8x16ShrS, wasm.OpcodeVecI8x16ShrU,
+			wasm.OpcodeVecI16x8Shl, wasm.OpcodeVecI16x8ShrS, wasm.OpcodeVecI16x8ShrU,
+			wasm.OpcodeVecI32x4Shl, wasm.OpcodeVecI32x4ShrS, wasm.OpcodeVecI32x4ShrU,
+			wasm.OpcodeVecI64x2Shl, wasm.OpcodeVecI64x2ShrS, wasm.OpcodeVecI64x2ShrU:
+			return signature_V128I32_V128, nil
+		case wasm.OpcodeVecI64x2ReplaceLane:
+			return signature_V128I64_V128, nil
+		case wasm.OpcodeVecF32x4ReplaceLane:
+			return signature_V128F32_V128, nil
+		case wasm.OpcodeVecF64x2ReplaceLane:
+			return signature_V128F64_V128, nil
+		case wasm.OpcodeVecI8x16Splat,
+			wasm.OpcodeVecI16x8Splat,
+			wasm.OpcodeVecI32x4Splat:
+			return signature_I32_V128, nil
+		case wasm.OpcodeVecI64x2Splat:
+			return signature_I64_V128, nil
+		case wasm.OpcodeVecF32x4Splat:
+			return signature_F32_V128, nil
+		case wasm.OpcodeVecF64x2Splat:
+			return signature_F64_V128, nil
+		case wasm.OpcodeVecV128i8x16Shuffle, wasm.OpcodeVecI8x16Swizzle, wasm.OpcodeVecV128And, wasm.OpcodeVecV128Or, wasm.OpcodeVecV128Xor, wasm.OpcodeVecV128AndNot:
+			return signature_V128V128_V128, nil
+		case wasm.OpcodeVecI8x16AllTrue, wasm.OpcodeVecI16x8AllTrue, wasm.OpcodeVecI32x4AllTrue, wasm.OpcodeVecI64x2AllTrue,
+			wasm.OpcodeVecV128AnyTrue,
+			wasm.OpcodeVecI8x16BitMask, wasm.OpcodeVecI16x8BitMask, wasm.OpcodeVecI32x4BitMask, wasm.OpcodeVecI64x2BitMask:
+			return signature_V128_I32, nil
+		case wasm.OpcodeVecV128Not, wasm.OpcodeVecI8x16Neg, wasm.OpcodeVecI16x8Neg, wasm.OpcodeVecI32x4Neg, wasm.OpcodeVecI64x2Neg,
+			wasm.OpcodeVecF32x4Neg, wasm.OpcodeVecF64x2Neg, wasm.OpcodeVecF32x4Sqrt, wasm.OpcodeVecF64x2Sqrt,
+			wasm.OpcodeVecI8x16Abs, wasm.OpcodeVecI8x16Popcnt, wasm.OpcodeVecI16x8Abs, wasm.OpcodeVecI32x4Abs, wasm.OpcodeVecI64x2Abs,
+			wasm.OpcodeVecF32x4Abs, wasm.OpcodeVecF64x2Abs,
+			wasm.OpcodeVecF32x4Ceil, wasm.OpcodeVecF32x4Floor, wasm.OpcodeVecF32x4Trunc, wasm.OpcodeVecF32x4Nearest,
+			wasm.OpcodeVecF64x2Ceil, wasm.OpcodeVecF64x2Floor, wasm.OpcodeVecF64x2Trunc, wasm.OpcodeVecF64x2Nearest,
+			wasm.OpcodeVecI16x8ExtendLowI8x16S, wasm.OpcodeVecI16x8ExtendHighI8x16S, wasm.OpcodeVecI16x8ExtendLowI8x16U, wasm.OpcodeVecI16x8ExtendHighI8x16U,
+			wasm.OpcodeVecI32x4ExtendLowI16x8S, wasm.OpcodeVecI32x4ExtendHighI16x8S, wasm.OpcodeVecI32x4ExtendLowI16x8U, wasm.OpcodeVecI32x4ExtendHighI16x8U,
+			wasm.OpcodeVecI64x2ExtendLowI32x4S, wasm.OpcodeVecI64x2ExtendHighI32x4S, wasm.OpcodeVecI64x2ExtendLowI32x4U, wasm.OpcodeVecI64x2ExtendHighI32x4U,
+			wasm.OpcodeVecI16x8ExtaddPairwiseI8x16S, wasm.OpcodeVecI16x8ExtaddPairwiseI8x16U, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8S, wasm.OpcodeVecI32x4ExtaddPairwiseI16x8U,
+			wasm.OpcodeVecF64x2PromoteLowF32x4Zero, wasm.OpcodeVecF32x4DemoteF64x2Zero,
+			wasm.OpcodeVecF32x4ConvertI32x4S, wasm.OpcodeVecF32x4ConvertI32x4U,
+			wasm.OpcodeVecF64x2ConvertLowI32x4S, wasm.OpcodeVecF64x2ConvertLowI32x4U,
+			wasm.OpcodeVecI32x4TruncSatF32x4S, wasm.OpcodeVecI32x4TruncSatF32x4U,
+			wasm.OpcodeVecI32x4TruncSatF64x2SZero, wasm.OpcodeVecI32x4TruncSatF64x2UZero:
+			return signature_V128_V128, nil
+		case wasm.OpcodeVecV128Bitselect:
+			return signature_V128V128V128_V32, nil
+		case wasm.OpcodeVecI8x16Eq, wasm.OpcodeVecI8x16Ne, wasm.OpcodeVecI8x16LtS, wasm.OpcodeVecI8x16LtU, wasm.OpcodeVecI8x16GtS,
+			wasm.OpcodeVecI8x16GtU, wasm.OpcodeVecI8x16LeS, wasm.OpcodeVecI8x16LeU, wasm.OpcodeVecI8x16GeS, wasm.OpcodeVecI8x16GeU,
+			wasm.OpcodeVecI16x8Eq, wasm.OpcodeVecI16x8Ne, wasm.OpcodeVecI16x8LtS, wasm.OpcodeVecI16x8LtU, wasm.OpcodeVecI16x8GtS,
+			wasm.OpcodeVecI16x8GtU, wasm.OpcodeVecI16x8LeS, wasm.OpcodeVecI16x8LeU, wasm.OpcodeVecI16x8GeS, wasm.OpcodeVecI16x8GeU,
+			wasm.OpcodeVecI32x4Eq, wasm.OpcodeVecI32x4Ne, wasm.OpcodeVecI32x4LtS, wasm.OpcodeVecI32x4LtU, wasm.OpcodeVecI32x4GtS,
+			wasm.OpcodeVecI32x4GtU, wasm.OpcodeVecI32x4LeS, wasm.OpcodeVecI32x4LeU, wasm.OpcodeVecI32x4GeS, wasm.OpcodeVecI32x4GeU,
+			wasm.OpcodeVecI64x2Eq, wasm.OpcodeVecI64x2Ne, wasm.OpcodeVecI64x2LtS, wasm.OpcodeVecI64x2GtS, wasm.OpcodeVecI64x2LeS,
+			wasm.OpcodeVecI64x2GeS, wasm.OpcodeVecF32x4Eq, wasm.OpcodeVecF32x4Ne, wasm.OpcodeVecF32x4Lt, wasm.OpcodeVecF32x4Gt,
+			wasm.OpcodeVecF32x4Le, wasm.OpcodeVecF32x4Ge, wasm.OpcodeVecF64x2Eq, wasm.OpcodeVecF64x2Ne, wasm.OpcodeVecF64x2Lt,
+			wasm.OpcodeVecF64x2Gt, wasm.OpcodeVecF64x2Le, wasm.OpcodeVecF64x2Ge,
+			wasm.OpcodeVecI8x16Add, wasm.OpcodeVecI8x16AddSatS, wasm.OpcodeVecI8x16AddSatU, wasm.OpcodeVecI8x16Sub,
+			wasm.OpcodeVecI8x16SubSatS, wasm.OpcodeVecI8x16SubSatU,
+			wasm.OpcodeVecI16x8Add, wasm.OpcodeVecI16x8AddSatS, wasm.OpcodeVecI16x8AddSatU, wasm.OpcodeVecI16x8Sub,
+			wasm.OpcodeVecI16x8SubSatS, wasm.OpcodeVecI16x8SubSatU, wasm.OpcodeVecI16x8Mul,
+			wasm.OpcodeVecI32x4Add, wasm.OpcodeVecI32x4Sub, wasm.OpcodeVecI32x4Mul,
+			wasm.OpcodeVecI64x2Add, wasm.OpcodeVecI64x2Sub, wasm.OpcodeVecI64x2Mul,
+			wasm.OpcodeVecF32x4Add, wasm.OpcodeVecF32x4Sub, wasm.OpcodeVecF32x4Mul, wasm.OpcodeVecF32x4Div,
+			wasm.OpcodeVecF64x2Add, wasm.OpcodeVecF64x2Sub, wasm.OpcodeVecF64x2Mul, wasm.OpcodeVecF64x2Div,
+			wasm.OpcodeVecI8x16MinS, wasm.OpcodeVecI8x16MinU, wasm.OpcodeVecI8x16MaxS, wasm.OpcodeVecI8x16MaxU, wasm.OpcodeVecI8x16AvgrU,
+			wasm.OpcodeVecI16x8MinS, wasm.OpcodeVecI16x8MinU, wasm.OpcodeVecI16x8MaxS, wasm.OpcodeVecI16x8MaxU, wasm.OpcodeVecI16x8AvgrU,
+			wasm.OpcodeVecI32x4MinS, wasm.OpcodeVecI32x4MinU, wasm.OpcodeVecI32x4MaxS, wasm.OpcodeVecI32x4MaxU,
+			wasm.OpcodeVecF32x4Min, wasm.OpcodeVecF32x4Max, wasm.OpcodeVecF64x2Min, wasm.OpcodeVecF64x2Max,
+			wasm.OpcodeVecF32x4Pmin, wasm.OpcodeVecF32x4Pmax, wasm.OpcodeVecF64x2Pmin, wasm.OpcodeVecF64x2Pmax,
+			wasm.OpcodeVecI16x8Q15mulrSatS,
+			wasm.OpcodeVecI16x8ExtMulLowI8x16S, wasm.OpcodeVecI16x8ExtMulHighI8x16S, wasm.OpcodeVecI16x8ExtMulLowI8x16U, wasm.OpcodeVecI16x8ExtMulHighI8x16U,
+			wasm.OpcodeVecI32x4ExtMulLowI16x8S, wasm.OpcodeVecI32x4ExtMulHighI16x8S, wasm.OpcodeVecI32x4ExtMulLowI16x8U, wasm.OpcodeVecI32x4ExtMulHighI16x8U,
+			wasm.OpcodeVecI64x2ExtMulLowI32x4S, wasm.OpcodeVecI64x2ExtMulHighI32x4S, wasm.OpcodeVecI64x2ExtMulLowI32x4U, wasm.OpcodeVecI64x2ExtMulHighI32x4U,
+			wasm.OpcodeVecI32x4DotI16x8S,
+			wasm.OpcodeVecI8x16NarrowI16x8S, wasm.OpcodeVecI8x16NarrowI16x8U, wasm.OpcodeVecI16x8NarrowI32x4S, wasm.OpcodeVecI16x8NarrowI32x4U:
+			return signature_V128V128_V128, nil
+		default:
+			return nil, fmt.Errorf("unsupported vector instruction in interpreterir: %s", wasm.VectorInstructionName(vecOp))
+		}
+	case wasm.OpcodeAtomicPrefix:
+		switch atomicOp := c.body[c.pc+1]; atomicOp {
+		case wasm.OpcodeAtomicMemoryNotify:
+			return signature_I32I32_I32, nil
+		case wasm.OpcodeAtomicMemoryWait32:
+			return signature_I32I32I64_I32, nil
+		case wasm.OpcodeAtomicMemoryWait64:
+			return signature_I32I64I64_I32, nil
+		case wasm.OpcodeAtomicFence:
+			return signature_None_None, nil
+		case wasm.OpcodeAtomicI32Load, wasm.OpcodeAtomicI32Load8U, wasm.OpcodeAtomicI32Load16U:
+			return signature_I32_I32, nil
+		case wasm.OpcodeAtomicI64Load, wasm.OpcodeAtomicI64Load8U, wasm.OpcodeAtomicI64Load16U, wasm.OpcodeAtomicI64Load32U:
+			return signature_I32_I64, nil
+		case wasm.OpcodeAtomicI32Store, wasm.OpcodeAtomicI32Store8, wasm.OpcodeAtomicI32Store16:
+			return signature_I32I32_None, nil
+		case wasm.OpcodeAtomicI64Store, wasm.OpcodeAtomicI64Store8, wasm.OpcodeAtomicI64Store16, wasm.OpcodeAtomicI64Store32:
+			return signature_I32I64_None, nil
+		case wasm.OpcodeAtomicI32RmwAdd, wasm.OpcodeAtomicI32RmwSub, wasm.OpcodeAtomicI32RmwAnd, wasm.OpcodeAtomicI32RmwOr, wasm.OpcodeAtomicI32RmwXor, wasm.OpcodeAtomicI32RmwXchg,
+			wasm.OpcodeAtomicI32Rmw8AddU, wasm.OpcodeAtomicI32Rmw8SubU, wasm.OpcodeAtomicI32Rmw8AndU, wasm.OpcodeAtomicI32Rmw8OrU, wasm.OpcodeAtomicI32Rmw8XorU, wasm.OpcodeAtomicI32Rmw8XchgU,
+			wasm.OpcodeAtomicI32Rmw16AddU, wasm.OpcodeAtomicI32Rmw16SubU, wasm.OpcodeAtomicI32Rmw16AndU, wasm.OpcodeAtomicI32Rmw16OrU, wasm.OpcodeAtomicI32Rmw16XorU, wasm.OpcodeAtomicI32Rmw16XchgU:
+			return signature_I32I32_I32, nil
+		case wasm.OpcodeAtomicI64RmwAdd, wasm.OpcodeAtomicI64RmwSub, wasm.OpcodeAtomicI64RmwAnd, wasm.OpcodeAtomicI64RmwOr, wasm.OpcodeAtomicI64RmwXor, wasm.OpcodeAtomicI64RmwXchg,
+			wasm.OpcodeAtomicI64Rmw8AddU, wasm.OpcodeAtomicI64Rmw8SubU, wasm.OpcodeAtomicI64Rmw8AndU, wasm.OpcodeAtomicI64Rmw8OrU, wasm.OpcodeAtomicI64Rmw8XorU, wasm.OpcodeAtomicI64Rmw8XchgU,
+			wasm.OpcodeAtomicI64Rmw16AddU, wasm.OpcodeAtomicI64Rmw16SubU, wasm.OpcodeAtomicI64Rmw16AndU, wasm.OpcodeAtomicI64Rmw16OrU, wasm.OpcodeAtomicI64Rmw16XorU, wasm.OpcodeAtomicI64Rmw16XchgU,
+			wasm.OpcodeAtomicI64Rmw32AddU, wasm.OpcodeAtomicI64Rmw32SubU, wasm.OpcodeAtomicI64Rmw32AndU, wasm.OpcodeAtomicI64Rmw32OrU, wasm.OpcodeAtomicI64Rmw32XorU, wasm.OpcodeAtomicI64Rmw32XchgU:
+			return signature_I32I64_I64, nil
+		case wasm.OpcodeAtomicI32RmwCmpxchg, wasm.OpcodeAtomicI32Rmw8CmpxchgU, wasm.OpcodeAtomicI32Rmw16CmpxchgU:
+			return signature_I32I32I32_I32, nil
+		case wasm.OpcodeAtomicI64RmwCmpxchg, wasm.OpcodeAtomicI64Rmw8CmpxchgU, wasm.OpcodeAtomicI64Rmw16CmpxchgU, wasm.OpcodeAtomicI64Rmw32CmpxchgU:
+			return signature_I32I64I64_I64, nil
+		default:
+			return nil, fmt.Errorf("unsupported atomic instruction in interpreterir: %s", wasm.AtomicInstructionName(atomicOp))
+		}
+	default:
+		return nil, fmt.Errorf("unsupported instruction in interpreterir: 0x%x", op)
+	}
+}
+
+// funcTypeToIRSignatures is the central cache for a module to get the *signature
+// for function calls.
+type funcTypeToIRSignatures struct {
+	directCalls   []*signature
+	indirectCalls []*signature
+	wasmTypes     []wasm.FunctionType
+}
+
+// get returns the *signature for the direct or indirect function call against functions whose type is at `typeIndex`.
+func (f *funcTypeToIRSignatures) get(typeIndex wasm.Index, indirect bool) *signature {
+	var sig *signature
+	if indirect {
+		sig = f.indirectCalls[typeIndex]
+	} else {
+		sig = f.directCalls[typeIndex]
+	}
+	if sig != nil {
+		return sig
+	}
+
+	tp := &f.wasmTypes[typeIndex]
+	if indirect {
+		sig = &signature{
+			in:  make([]unsignedType, 0, len(tp.Params)+1), // +1 to reserve space for call indirect index.
+			out: make([]unsignedType, 0, len(tp.Results)),
+		}
+	} else {
+		sig = &signature{
+			in:  make([]unsignedType, 0, len(tp.Params)),
+			out: make([]unsignedType, 0, len(tp.Results)),
+		}
+	}
+
+	for _, vt := range tp.Params {
+		sig.in = append(sig.in, wasmValueTypeTounsignedType(vt))
+	}
+	for _, vt := range tp.Results {
+		sig.out = append(sig.out, wasmValueTypeTounsignedType(vt))
+	}
+
+	if indirect {
+		sig.in = append(sig.in, unsignedTypeI32)
+		f.indirectCalls[typeIndex] = sig
+	} else {
+		f.directCalls[typeIndex] = sig
+	}
+	return sig
+}
+
+func wasmValueTypeTounsignedType(vt wasm.ValueType) unsignedType {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return unsignedTypeI32
+	case wasm.ValueTypeI64,
+		// From interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return unsignedTypeI64
+	case wasm.ValueTypeF32:
+		return unsignedTypeF32
+	case wasm.ValueTypeF64:
+		return unsignedTypeF64
+	case wasm.ValueTypeV128:
+		return unsignedTypeV128
+	}
+	panic("unreachable")
+}
+
+func wasmValueTypeToUnsignedOutSignature(vt wasm.ValueType) *signature {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return signature_None_I32
+	case wasm.ValueTypeI64,
+		// From interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return signature_None_I64
+	case wasm.ValueTypeF32:
+		return signature_None_F32
+	case wasm.ValueTypeF64:
+		return signature_None_F64
+	case wasm.ValueTypeV128:
+		return signature_None_V128
+	}
+	panic("unreachable")
+}
+
+func wasmValueTypeToUnsignedInSignature(vt wasm.ValueType) *signature {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return signature_I32_None
+	case wasm.ValueTypeI64,
+		// From interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return signature_I64_None
+	case wasm.ValueTypeF32:
+		return signature_F32_None
+	case wasm.ValueTypeF64:
+		return signature_F64_None
+	case wasm.ValueTypeV128:
+		return signature_V128_None
+	}
+	panic("unreachable")
+}
+
+func wasmValueTypeToUnsignedInOutSignature(vt wasm.ValueType) *signature {
+	switch vt {
+	case wasm.ValueTypeI32:
+		return signature_I32_I32
+	case wasm.ValueTypeI64,
+		// At interpreterir layer, ref type values are opaque 64-bit pointers.
+		wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
+		return signature_I64_I64
+	case wasm.ValueTypeF32:
+		return signature_F32_F32
+	case wasm.ValueTypeF64:
+		return signature_F64_F64
+	case wasm.ValueTypeV128:
+		return signature_V128_V128
+	}
+	panic("unreachable")
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/abi.go
@ -0,0 +1,170 @@
+package backend
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type (
+	// FunctionABI represents the ABI information for a function which corresponds to a ssa.Signature.
+	FunctionABI struct {
+		Initialized bool
+
+		Args, Rets                 []ABIArg
+		ArgStackSize, RetStackSize int64
+
+		ArgIntRealRegs   byte
+		ArgFloatRealRegs byte
+		RetIntRealRegs   byte
+		RetFloatRealRegs byte
+	}
+
+	// ABIArg represents either argument or return value's location.
+	ABIArg struct {
+		// Index is the index of the argument.
+		Index int
+		// Kind is the kind of the argument.
+		Kind ABIArgKind
+		// Reg is valid if Kind == ABIArgKindReg.
+		// This VReg must be based on RealReg.
+		Reg regalloc.VReg
+		// Offset is valid if Kind == ABIArgKindStack.
+		// This is the offset from the beginning of either arg or ret stack slot.
+		Offset int64
+		// Type is the type of the argument.
+		Type ssa.Type
+	}
+
+	// ABIArgKind is the kind of ABI argument.
+	ABIArgKind byte
+)
+
+const (
+	// ABIArgKindReg represents an argument passed in a register.
+	ABIArgKindReg = iota
+	// ABIArgKindStack represents an argument passed in the stack.
+	ABIArgKindStack
+)
+
+// String implements fmt.Stringer.
+func (a *ABIArg) String() string {
+	return fmt.Sprintf("args[%d]: %s", a.Index, a.Kind)
+}
+
+// String implements fmt.Stringer.
+func (a ABIArgKind) String() string {
+	switch a {
+	case ABIArgKindReg:
+		return "reg"
+	case ABIArgKindStack:
+		return "stack"
+	default:
+		panic("BUG")
+	}
+}
+
+// Init initializes the abiImpl for the given signature.
+func (a *FunctionABI) Init(sig *ssa.Signature, argResultInts, argResultFloats []regalloc.RealReg) {
+	if len(a.Rets) < len(sig.Results) {
+		a.Rets = make([]ABIArg, len(sig.Results))
+	}
+	a.Rets = a.Rets[:len(sig.Results)]
+	a.RetStackSize = a.setABIArgs(a.Rets, sig.Results, argResultInts, argResultFloats)
+	if argsNum := len(sig.Params); len(a.Args) < argsNum {
+		a.Args = make([]ABIArg, argsNum)
+	}
+	a.Args = a.Args[:len(sig.Params)]
+	a.ArgStackSize = a.setABIArgs(a.Args, sig.Params, argResultInts, argResultFloats)
+
+	// Gather the real registers usages in arg/return.
+	a.ArgIntRealRegs, a.ArgFloatRealRegs = 0, 0
+	a.RetIntRealRegs, a.RetFloatRealRegs = 0, 0
+	for i := range a.Rets {
+		r := &a.Rets[i]
+		if r.Kind == ABIArgKindReg {
+			if r.Type.IsInt() {
+				a.RetIntRealRegs++
+			} else {
+				a.RetFloatRealRegs++
+			}
+		}
+	}
+	for i := range a.Args {
+		arg := &a.Args[i]
+		if arg.Kind == ABIArgKindReg {
+			if arg.Type.IsInt() {
+				a.ArgIntRealRegs++
+			} else {
+				a.ArgFloatRealRegs++
+			}
+		}
+	}
+
+	a.Initialized = true
+}
+
+// setABIArgs sets the ABI arguments in the given slice. This assumes that len(s) >= len(types)
+// where if len(s) > len(types), the last elements of s is for the multi-return slot.
+func (a *FunctionABI) setABIArgs(s []ABIArg, types []ssa.Type, ints, floats []regalloc.RealReg) (stackSize int64) {
+	il, fl := len(ints), len(floats)
+
+	var stackOffset int64
+	intParamIndex, floatParamIndex := 0, 0
+	for i, typ := range types {
+		arg := &s[i]
+		arg.Index = i
+		arg.Type = typ
+		if typ.IsInt() {
+			if intParamIndex >= il {
+				arg.Kind = ABIArgKindStack
+				const slotSize = 8 // Align 8 bytes.
+				arg.Offset = stackOffset
+				stackOffset += slotSize
+			} else {
+				arg.Kind = ABIArgKindReg
+				arg.Reg = regalloc.FromRealReg(ints[intParamIndex], regalloc.RegTypeInt)
+				intParamIndex++
+			}
+		} else {
+			if floatParamIndex >= fl {
+				arg.Kind = ABIArgKindStack
+				slotSize := int64(8)   // Align at least 8 bytes.
+				if typ.Bits() == 128 { // Vector.
+					slotSize = 16
+				}
+				arg.Offset = stackOffset
+				stackOffset += slotSize
+			} else {
+				arg.Kind = ABIArgKindReg
+				arg.Reg = regalloc.FromRealReg(floats[floatParamIndex], regalloc.RegTypeFloat)
+				floatParamIndex++
+			}
+		}
+	}
+	return stackOffset
+}
+
+func (a *FunctionABI) AlignedArgResultStackSlotSize() uint32 {
+	stackSlotSize := a.RetStackSize + a.ArgStackSize
+	// Align stackSlotSize to 16 bytes.
+	stackSlotSize = (stackSlotSize + 15) &^ 15
+	// Check overflow 32-bit.
+	if stackSlotSize > 0xFFFFFFFF {
+		panic("ABI stack slot size overflow")
+	}
+	return uint32(stackSlotSize)
+}
+
+func (a *FunctionABI) ABIInfoAsUint64() uint64 {
+	return uint64(a.ArgIntRealRegs)<<56 |
+		uint64(a.ArgFloatRealRegs)<<48 |
+		uint64(a.RetIntRealRegs)<<40 |
+		uint64(a.RetFloatRealRegs)<<32 |
+		uint64(a.AlignedArgResultStackSlotSize())
+}
+
+func ABIInfoFromUint64(info uint64) (argIntRealRegs, argFloatRealRegs, retIntRealRegs, retFloatRealRegs byte, stackSlotSize uint32) {
+	return byte(info >> 56), byte(info >> 48), byte(info >> 40), byte(info >> 32), uint32(info)
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/backend.go
@ -0,0 +1,3 @@
+// Package backend must be free of Wasm-specific concept. In other words,
+// this package must not import internal/wasm package.
+package backend
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler.go
@ -0,0 +1,417 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// NewCompiler returns a new Compiler that can generate a machine code.
+func NewCompiler(ctx context.Context, mach Machine, builder ssa.Builder) Compiler {
+	return newCompiler(ctx, mach, builder)
+}
+
+func newCompiler(_ context.Context, mach Machine, builder ssa.Builder) *compiler {
+	argResultInts, argResultFloats := mach.ArgsResultsRegs()
+	c := &compiler{
+		mach: mach, ssaBuilder: builder,
+		nextVRegID:      regalloc.VRegIDNonReservedBegin,
+		argResultInts:   argResultInts,
+		argResultFloats: argResultFloats,
+	}
+	mach.SetCompiler(c)
+	return c
+}
+
+// Compiler is the backend of wazevo which takes ssa.Builder and Machine,
+// use the information there to emit the final machine code.
+type Compiler interface {
+	// SSABuilder returns the ssa.Builder used by this compiler.
+	SSABuilder() ssa.Builder
+
+	// Compile executes the following steps:
+	// 	1. Lower()
+	// 	2. RegAlloc()
+	// 	3. Finalize()
+	// 	4. Encode()
+	//
+	// Each step can be called individually for testing purpose, therefore they are exposed in this interface too.
+	//
+	// The returned byte slices are the machine code and the relocation information for the machine code.
+	// The caller is responsible for copying them immediately since the compiler may reuse the buffer.
+	Compile(ctx context.Context) (_ []byte, _ []RelocationInfo, _ error)
+
+	// Lower lowers the given ssa.Instruction to the machine-specific instructions.
+	Lower()
+
+	// RegAlloc performs the register allocation after Lower is called.
+	RegAlloc()
+
+	// Finalize performs the finalization of the compilation, including machine code emission.
+	// This must be called after RegAlloc.
+	Finalize(ctx context.Context) error
+
+	// Buf returns the buffer of the encoded machine code. This is only used for testing purpose.
+	Buf() []byte
+
+	BufPtr() *[]byte
+
+	// Format returns the debug string of the current state of the compiler.
+	Format() string
+
+	// Init initializes the internal state of the compiler for the next compilation.
+	Init()
+
+	// AllocateVReg allocates a new virtual register of the given type.
+	AllocateVReg(typ ssa.Type) regalloc.VReg
+
+	// ValueDefinition returns the definition of the given value.
+	ValueDefinition(ssa.Value) *SSAValueDefinition
+
+	// VRegOf returns the virtual register of the given ssa.Value.
+	VRegOf(value ssa.Value) regalloc.VReg
+
+	// TypeOf returns the ssa.Type of the given virtual register.
+	TypeOf(regalloc.VReg) ssa.Type
+
+	// MatchInstr returns true if the given definition is from an instruction with the given opcode, the current group ID,
+	// and a refcount of 1. That means, the instruction can be merged/swapped within the current instruction group.
+	MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool
+
+	// MatchInstrOneOf is the same as MatchInstr but for multiple opcodes. If it matches one of ssa.Opcode,
+	// this returns the opcode. Otherwise, this returns ssa.OpcodeInvalid.
+	//
+	// Note: caller should be careful to avoid excessive allocation on opcodes slice.
+	MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode
+
+	// AddRelocationInfo appends the relocation information for the function reference at the current buffer offset.
+	AddRelocationInfo(funcRef ssa.FuncRef)
+
+	// AddSourceOffsetInfo appends the source offset information for the given offset.
+	AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset)
+
+	// SourceOffsetInfo returns the source offset information for the current buffer offset.
+	SourceOffsetInfo() []SourceOffsetInfo
+
+	// EmitByte appends a byte to the buffer. Used during the code emission.
+	EmitByte(b byte)
+
+	// Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
+	Emit4Bytes(b uint32)
+
+	// Emit8Bytes appends 8 bytes to the buffer. Used during the code emission.
+	Emit8Bytes(b uint64)
+
+	// GetFunctionABI returns the ABI information for the given signature.
+	GetFunctionABI(sig *ssa.Signature) *FunctionABI
+}
+
+// RelocationInfo represents the relocation information for a call instruction.
+type RelocationInfo struct {
+	// Offset represents the offset from the beginning of the machine code of either a function or the entire module.
+	Offset int64
+	// Target is the target function of the call instruction.
+	FuncRef ssa.FuncRef
+}
+
+// compiler implements Compiler.
+type compiler struct {
+	mach       Machine
+	currentGID ssa.InstructionGroupID
+	ssaBuilder ssa.Builder
+	// nextVRegID is the next virtual register ID to be allocated.
+	nextVRegID regalloc.VRegID
+	// ssaValueToVRegs maps ssa.ValueID to regalloc.VReg.
+	ssaValueToVRegs [] /* VRegID to */ regalloc.VReg
+	// ssaValueDefinitions maps ssa.ValueID to its definition.
+	ssaValueDefinitions []SSAValueDefinition
+	// ssaValueRefCounts is a cached list obtained by ssa.Builder.ValueRefCounts().
+	ssaValueRefCounts []int
+	// returnVRegs is the list of virtual registers that store the return values.
+	returnVRegs  []regalloc.VReg
+	varEdges     [][2]regalloc.VReg
+	varEdgeTypes []ssa.Type
+	constEdges   []struct {
+		cInst *ssa.Instruction
+		dst   regalloc.VReg
+	}
+	vRegSet         []bool
+	vRegIDs         []regalloc.VRegID
+	tempRegs        []regalloc.VReg
+	tmpVals         []ssa.Value
+	ssaTypeOfVRegID [] /* VRegID to */ ssa.Type
+	buf             []byte
+	relocations     []RelocationInfo
+	sourceOffsets   []SourceOffsetInfo
+	// abis maps ssa.SignatureID to the ABI implementation.
+	abis                           []FunctionABI
+	argResultInts, argResultFloats []regalloc.RealReg
+}
+
+// SourceOffsetInfo is a data to associate the source offset with the executable offset.
+type SourceOffsetInfo struct {
+	// SourceOffset is the source offset in the original source code.
+	SourceOffset ssa.SourceOffset
+	// ExecutableOffset is the offset in the compiled executable.
+	ExecutableOffset int64
+}
+
+// Compile implements Compiler.Compile.
+func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) {
+	c.Lower()
+	if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
+	}
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format())
+	}
+	c.RegAlloc()
+	if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
+	}
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format())
+	}
+	if err := c.Finalize(ctx); err != nil {
+		return nil, nil, err
+	}
+	if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
+	}
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Finalization", c.Format())
+	}
+	return c.buf, c.relocations, nil
+}
+
+// RegAlloc implements Compiler.RegAlloc.
+func (c *compiler) RegAlloc() {
+	c.mach.RegAlloc()
+}
+
+// Finalize implements Compiler.Finalize.
+func (c *compiler) Finalize(ctx context.Context) error {
+	c.mach.PostRegAlloc()
+	return c.mach.Encode(ctx)
+}
+
+// setCurrentGroupID sets the current instruction group ID.
+func (c *compiler) setCurrentGroupID(gid ssa.InstructionGroupID) {
+	c.currentGID = gid
+}
+
+// assignVirtualRegisters assigns a virtual register to each ssa.ValueID Valid in the ssa.Builder.
+func (c *compiler) assignVirtualRegisters() {
+	builder := c.ssaBuilder
+	refCounts := builder.ValueRefCounts()
+	c.ssaValueRefCounts = refCounts
+
+	need := len(refCounts)
+	if need >= len(c.ssaValueToVRegs) {
+		c.ssaValueToVRegs = append(c.ssaValueToVRegs, make([]regalloc.VReg, need+1)...)
+	}
+	if need >= len(c.ssaValueDefinitions) {
+		c.ssaValueDefinitions = append(c.ssaValueDefinitions, make([]SSAValueDefinition, need+1)...)
+	}
+
+	for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
+		// First we assign a virtual register to each parameter.
+		for i := 0; i < blk.Params(); i++ {
+			p := blk.Param(i)
+			pid := p.ID()
+			typ := p.Type()
+			vreg := c.AllocateVReg(typ)
+			c.ssaValueToVRegs[pid] = vreg
+			c.ssaValueDefinitions[pid] = SSAValueDefinition{BlockParamValue: p, BlkParamVReg: vreg}
+			c.ssaTypeOfVRegID[vreg.ID()] = p.Type()
+		}
+
+		// Assigns each value to a virtual register produced by instructions.
+		for cur := blk.Root(); cur != nil; cur = cur.Next() {
+			r, rs := cur.Returns()
+			var N int
+			if r.Valid() {
+				id := r.ID()
+				ssaTyp := r.Type()
+				typ := r.Type()
+				vReg := c.AllocateVReg(typ)
+				c.ssaValueToVRegs[id] = vReg
+				c.ssaValueDefinitions[id] = SSAValueDefinition{
+					Instr:    cur,
+					N:        0,
+					RefCount: refCounts[id],
+				}
+				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
+				N++
+			}
+			for _, r := range rs {
+				id := r.ID()
+				ssaTyp := r.Type()
+				vReg := c.AllocateVReg(ssaTyp)
+				c.ssaValueToVRegs[id] = vReg
+				c.ssaValueDefinitions[id] = SSAValueDefinition{
+					Instr:    cur,
+					N:        N,
+					RefCount: refCounts[id],
+				}
+				c.ssaTypeOfVRegID[vReg.ID()] = ssaTyp
+				N++
+			}
+		}
+	}
+
+	for i, retBlk := 0, builder.ReturnBlock(); i < retBlk.Params(); i++ {
+		typ := retBlk.Param(i).Type()
+		vReg := c.AllocateVReg(typ)
+		c.returnVRegs = append(c.returnVRegs, vReg)
+		c.ssaTypeOfVRegID[vReg.ID()] = typ
+	}
+}
+
+// AllocateVReg implements Compiler.AllocateVReg.
+func (c *compiler) AllocateVReg(typ ssa.Type) regalloc.VReg {
+	regType := regalloc.RegTypeOf(typ)
+	r := regalloc.VReg(c.nextVRegID).SetRegType(regType)
+
+	id := r.ID()
+	if int(id) >= len(c.ssaTypeOfVRegID) {
+		c.ssaTypeOfVRegID = append(c.ssaTypeOfVRegID, make([]ssa.Type, id+1)...)
+	}
+	c.ssaTypeOfVRegID[id] = typ
+	c.nextVRegID++
+	return r
+}
+
+// Init implements Compiler.Init.
+func (c *compiler) Init() {
+	c.currentGID = 0
+	c.nextVRegID = regalloc.VRegIDNonReservedBegin
+	c.returnVRegs = c.returnVRegs[:0]
+	c.mach.Reset()
+	c.varEdges = c.varEdges[:0]
+	c.constEdges = c.constEdges[:0]
+	c.buf = c.buf[:0]
+	c.sourceOffsets = c.sourceOffsets[:0]
+	c.relocations = c.relocations[:0]
+}
+
+// ValueDefinition implements Compiler.ValueDefinition.
+func (c *compiler) ValueDefinition(value ssa.Value) *SSAValueDefinition {
+	return &c.ssaValueDefinitions[value.ID()]
+}
+
+// VRegOf implements Compiler.VRegOf.
+func (c *compiler) VRegOf(value ssa.Value) regalloc.VReg {
+	return c.ssaValueToVRegs[value.ID()]
+}
+
+// Format implements Compiler.Format.
+func (c *compiler) Format() string {
+	return c.mach.Format()
+}
+
+// TypeOf implements Compiler.Format.
+func (c *compiler) TypeOf(v regalloc.VReg) ssa.Type {
+	return c.ssaTypeOfVRegID[v.ID()]
+}
+
+// MatchInstr implements Compiler.MatchInstr.
+func (c *compiler) MatchInstr(def *SSAValueDefinition, opcode ssa.Opcode) bool {
+	instr := def.Instr
+	return def.IsFromInstr() &&
+		instr.Opcode() == opcode &&
+		instr.GroupID() == c.currentGID &&
+		def.RefCount < 2
+}
+
+// MatchInstrOneOf implements Compiler.MatchInstrOneOf.
+func (c *compiler) MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode) ssa.Opcode {
+	instr := def.Instr
+	if !def.IsFromInstr() {
+		return ssa.OpcodeInvalid
+	}
+
+	if instr.GroupID() != c.currentGID {
+		return ssa.OpcodeInvalid
+	}
+
+	if def.RefCount >= 2 {
+		return ssa.OpcodeInvalid
+	}
+
+	opcode := instr.Opcode()
+	for _, op := range opcodes {
+		if opcode == op {
+			return opcode
+		}
+	}
+	return ssa.OpcodeInvalid
+}
+
+// SSABuilder implements Compiler .SSABuilder.
+func (c *compiler) SSABuilder() ssa.Builder {
+	return c.ssaBuilder
+}
+
+// AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo.
+func (c *compiler) AddSourceOffsetInfo(executableOffset int64, sourceOffset ssa.SourceOffset) {
+	c.sourceOffsets = append(c.sourceOffsets, SourceOffsetInfo{
+		SourceOffset:     sourceOffset,
+		ExecutableOffset: executableOffset,
+	})
+}
+
+// SourceOffsetInfo implements Compiler.SourceOffsetInfo.
+func (c *compiler) SourceOffsetInfo() []SourceOffsetInfo {
+	return c.sourceOffsets
+}
+
+// AddRelocationInfo implements Compiler.AddRelocationInfo.
+func (c *compiler) AddRelocationInfo(funcRef ssa.FuncRef) {
+	c.relocations = append(c.relocations, RelocationInfo{
+		Offset:  int64(len(c.buf)),
+		FuncRef: funcRef,
+	})
+}
+
+// Emit8Bytes implements Compiler.Emit8Bytes.
+func (c *compiler) Emit8Bytes(b uint64) {
+	c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24), byte(b>>32), byte(b>>40), byte(b>>48), byte(b>>56))
+}
+
+// Emit4Bytes implements Compiler.Emit4Bytes.
+func (c *compiler) Emit4Bytes(b uint32) {
+	c.buf = append(c.buf, byte(b), byte(b>>8), byte(b>>16), byte(b>>24))
+}
+
+// EmitByte implements Compiler.EmitByte.
+func (c *compiler) EmitByte(b byte) {
+	c.buf = append(c.buf, b)
+}
+
+// Buf implements Compiler.Buf.
+func (c *compiler) Buf() []byte {
+	return c.buf
+}
+
+// BufPtr implements Compiler.BufPtr.
+func (c *compiler) BufPtr() *[]byte {
+	return &c.buf
+}
+
+func (c *compiler) GetFunctionABI(sig *ssa.Signature) *FunctionABI {
+	if int(sig.ID) >= len(c.abis) {
+		c.abis = append(c.abis, make([]FunctionABI, int(sig.ID)+1)...)
+	}
+
+	abi := &c.abis[sig.ID]
+	if abi.Initialized {
+		return abi
+	}
+
+	abi.Init(sig, c.argResultInts, c.argResultFloats)
+	return abi
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/compiler_lower.go
@ -0,0 +1,226 @@
+package backend
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// Lower implements Compiler.Lower.
+func (c *compiler) Lower() {
+	c.assignVirtualRegisters()
+	c.mach.SetCurrentABI(c.GetFunctionABI(c.ssaBuilder.Signature()))
+	c.mach.ExecutableContext().StartLoweringFunction(c.ssaBuilder.BlockIDMax())
+	c.lowerBlocks()
+}
+
+// lowerBlocks lowers each block in the ssa.Builder.
+func (c *compiler) lowerBlocks() {
+	builder := c.ssaBuilder
+	for blk := builder.BlockIteratorReversePostOrderBegin(); blk != nil; blk = builder.BlockIteratorReversePostOrderNext() {
+		c.lowerBlock(blk)
+	}
+
+	ectx := c.mach.ExecutableContext()
+	// After lowering all blocks, we need to link adjacent blocks to layout one single instruction list.
+	var prev ssa.BasicBlock
+	for next := builder.BlockIteratorReversePostOrderBegin(); next != nil; next = builder.BlockIteratorReversePostOrderNext() {
+		if prev != nil {
+			ectx.LinkAdjacentBlocks(prev, next)
+		}
+		prev = next
+	}
+}
+
+func (c *compiler) lowerBlock(blk ssa.BasicBlock) {
+	mach := c.mach
+	ectx := mach.ExecutableContext()
+	ectx.StartBlock(blk)
+
+	// We traverse the instructions in reverse order because we might want to lower multiple
+	// instructions together.
+	cur := blk.Tail()
+
+	// First gather the branching instructions at the end of the blocks.
+	var br0, br1 *ssa.Instruction
+	if cur.IsBranching() {
+		br0 = cur
+		cur = cur.Prev()
+		if cur != nil && cur.IsBranching() {
+			br1 = cur
+			cur = cur.Prev()
+		}
+	}
+
+	if br0 != nil {
+		c.lowerBranches(br0, br1)
+	}
+
+	if br1 != nil && br0 == nil {
+		panic("BUG? when a block has conditional branch but doesn't end with an unconditional branch?")
+	}
+
+	// Now start lowering the non-branching instructions.
+	for ; cur != nil; cur = cur.Prev() {
+		c.setCurrentGroupID(cur.GroupID())
+		if cur.Lowered() {
+			continue
+		}
+
+		switch cur.Opcode() {
+		case ssa.OpcodeReturn:
+			rets := cur.ReturnVals()
+			if len(rets) > 0 {
+				c.mach.LowerReturns(rets)
+			}
+			c.mach.InsertReturn()
+		default:
+			mach.LowerInstr(cur)
+		}
+		ectx.FlushPendingInstructions()
+	}
+
+	// Finally, if this is the entry block, we have to insert copies of arguments from the real location to the VReg.
+	if blk.EntryBlock() {
+		c.lowerFunctionArguments(blk)
+	}
+
+	ectx.EndBlock()
+}
+
+// lowerBranches is called right after StartBlock and before any LowerInstr call if
+// there are branches to the given block. br0 is the very end of the block and b1 is the before the br0 if it exists.
+// At least br0 is not nil, but br1 can be nil if there's no branching before br0.
+//
+// See ssa.Instruction IsBranching, and the comment on ssa.BasicBlock.
+func (c *compiler) lowerBranches(br0, br1 *ssa.Instruction) {
+	ectx := c.mach.ExecutableContext()
+
+	c.setCurrentGroupID(br0.GroupID())
+	c.mach.LowerSingleBranch(br0)
+	ectx.FlushPendingInstructions()
+	if br1 != nil {
+		c.setCurrentGroupID(br1.GroupID())
+		c.mach.LowerConditionalBranch(br1)
+		ectx.FlushPendingInstructions()
+	}
+
+	if br0.Opcode() == ssa.OpcodeJump {
+		_, args, target := br0.BranchData()
+		argExists := len(args) != 0
+		if argExists && br1 != nil {
+			panic("BUG: critical edge split failed")
+		}
+		if argExists && target.ReturnBlock() {
+			if len(args) > 0 {
+				c.mach.LowerReturns(args)
+			}
+		} else if argExists {
+			c.lowerBlockArguments(args, target)
+		}
+	}
+	ectx.FlushPendingInstructions()
+}
+
+func (c *compiler) lowerFunctionArguments(entry ssa.BasicBlock) {
+	ectx := c.mach.ExecutableContext()
+
+	c.tmpVals = c.tmpVals[:0]
+	for i := 0; i < entry.Params(); i++ {
+		p := entry.Param(i)
+		if c.ssaValueRefCounts[p.ID()] > 0 {
+			c.tmpVals = append(c.tmpVals, p)
+		} else {
+			// If the argument is not used, we can just pass an invalid value.
+			c.tmpVals = append(c.tmpVals, ssa.ValueInvalid)
+		}
+	}
+	c.mach.LowerParams(c.tmpVals)
+	ectx.FlushPendingInstructions()
+}
+
+// lowerBlockArguments lowers how to pass arguments to the given successor block.
+func (c *compiler) lowerBlockArguments(args []ssa.Value, succ ssa.BasicBlock) {
+	if len(args) != succ.Params() {
+		panic("BUG: mismatched number of arguments")
+	}
+
+	c.varEdges = c.varEdges[:0]
+	c.varEdgeTypes = c.varEdgeTypes[:0]
+	c.constEdges = c.constEdges[:0]
+	for i := 0; i < len(args); i++ {
+		dst := succ.Param(i)
+		src := args[i]
+
+		dstReg := c.VRegOf(dst)
+		srcDef := c.ssaValueDefinitions[src.ID()]
+		if srcDef.IsFromInstr() && srcDef.Instr.Constant() {
+			c.constEdges = append(c.constEdges, struct {
+				cInst *ssa.Instruction
+				dst   regalloc.VReg
+			}{cInst: srcDef.Instr, dst: dstReg})
+		} else {
+			srcReg := c.VRegOf(src)
+			// Even when the src=dst, insert the move so that we can keep such registers keep-alive.
+			c.varEdges = append(c.varEdges, [2]regalloc.VReg{srcReg, dstReg})
+			c.varEdgeTypes = append(c.varEdgeTypes, src.Type())
+		}
+	}
+
+	// Check if there's an overlap among the dsts and srcs in varEdges.
+	c.vRegIDs = c.vRegIDs[:0]
+	for _, edge := range c.varEdges {
+		src := edge[0].ID()
+		if int(src) >= len(c.vRegSet) {
+			c.vRegSet = append(c.vRegSet, make([]bool, src+1)...)
+		}
+		c.vRegSet[src] = true
+		c.vRegIDs = append(c.vRegIDs, src)
+	}
+	separated := true
+	for _, edge := range c.varEdges {
+		dst := edge[1].ID()
+		if int(dst) >= len(c.vRegSet) {
+			c.vRegSet = append(c.vRegSet, make([]bool, dst+1)...)
+		} else {
+			if c.vRegSet[dst] {
+				separated = false
+				break
+			}
+		}
+	}
+	for _, id := range c.vRegIDs {
+		c.vRegSet[id] = false // reset for the next use.
+	}
+
+	if separated {
+		// If there's no overlap, we can simply move the source to destination.
+		for i, edge := range c.varEdges {
+			src, dst := edge[0], edge[1]
+			c.mach.InsertMove(dst, src, c.varEdgeTypes[i])
+		}
+	} else {
+		// Otherwise, we allocate a temporary registers and move the source to the temporary register,
+		//
+		// First move all of them to temporary registers.
+		c.tempRegs = c.tempRegs[:0]
+		for i, edge := range c.varEdges {
+			src := edge[0]
+			typ := c.varEdgeTypes[i]
+			temp := c.AllocateVReg(typ)
+			c.tempRegs = append(c.tempRegs, temp)
+			c.mach.InsertMove(temp, src, typ)
+		}
+		// Then move the temporary registers to the destination.
+		for i, edge := range c.varEdges {
+			temp := c.tempRegs[i]
+			dst := edge[1]
+			c.mach.InsertMove(dst, temp, c.varEdgeTypes[i])
+		}
+	}
+
+	// Finally, move the constants.
+	for _, edge := range c.constEdges {
+		cInst, dst := edge.cInst, edge.dst
+		c.mach.InsertLoadConstantBlockArg(cInst, dst)
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
@ -0,0 +1,219 @@
+package backend
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+type ExecutableContext interface {
+	// StartLoweringFunction is called when the lowering of the given function is started.
+	// maximumBlockID is the maximum value of ssa.BasicBlockID existing in the function.
+	StartLoweringFunction(maximumBlockID ssa.BasicBlockID)
+
+	// LinkAdjacentBlocks is called after finished lowering all blocks in order to create one single instruction list.
+	LinkAdjacentBlocks(prev, next ssa.BasicBlock)
+
+	// StartBlock is called when the compilation of the given block is started.
+	// The order of this being called is the reverse post order of the ssa.BasicBlock(s) as we iterate with
+	// ssa.Builder BlockIteratorReversePostOrderBegin and BlockIteratorReversePostOrderEnd.
+	StartBlock(ssa.BasicBlock)
+
+	// EndBlock is called when the compilation of the current block is finished.
+	EndBlock()
+
+	// FlushPendingInstructions flushes the pending instructions to the buffer.
+	// This will be called after the lowering of each SSA Instruction.
+	FlushPendingInstructions()
+}
+
+type ExecutableContextT[Instr any] struct {
+	CurrentSSABlk ssa.BasicBlock
+
+	// InstrPool is the InstructionPool of instructions.
+	InstructionPool wazevoapi.Pool[Instr]
+	asNop           func(*Instr)
+	setNext         func(*Instr, *Instr)
+	setPrev         func(*Instr, *Instr)
+
+	// RootInstr is the root instruction of the executable.
+	RootInstr         *Instr
+	labelPositionPool wazevoapi.Pool[LabelPosition[Instr]]
+	NextLabel         Label
+	// LabelPositions maps a label to the instructions of the region which the label represents.
+	LabelPositions     map[Label]*LabelPosition[Instr]
+	OrderedBlockLabels []*LabelPosition[Instr]
+
+	// PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock.
+	PerBlockHead, PerBlockEnd *Instr
+	// PendingInstructions are the instructions which are not yet emitted into the instruction list.
+	PendingInstructions []*Instr
+
+	// SsaBlockIDToLabels maps an SSA block ID to the label.
+	SsaBlockIDToLabels []Label
+}
+
+func NewExecutableContextT[Instr any](
+	resetInstruction func(*Instr),
+	setNext func(*Instr, *Instr),
+	setPrev func(*Instr, *Instr),
+	asNop func(*Instr),
+) *ExecutableContextT[Instr] {
+	return &ExecutableContextT[Instr]{
+		InstructionPool:   wazevoapi.NewPool[Instr](resetInstruction),
+		asNop:             asNop,
+		setNext:           setNext,
+		setPrev:           setPrev,
+		labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]),
+		LabelPositions:    make(map[Label]*LabelPosition[Instr]),
+		NextLabel:         LabelInvalid,
+	}
+}
+
+func resetLabelPosition[T any](l *LabelPosition[T]) {
+	*l = LabelPosition[T]{}
+}
+
+// StartLoweringFunction implements ExecutableContext.
+func (e *ExecutableContextT[Instr]) StartLoweringFunction(max ssa.BasicBlockID) {
+	imax := int(max)
+	if len(e.SsaBlockIDToLabels) <= imax {
+		// Eagerly allocate labels for the blocks since the underlying slice will be used for the next iteration.
+		e.SsaBlockIDToLabels = append(e.SsaBlockIDToLabels, make([]Label, imax+1)...)
+	}
+}
+
+func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) {
+	e.CurrentSSABlk = blk
+
+	l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()]
+	if l == LabelInvalid {
+		l = e.AllocateLabel()
+		e.SsaBlockIDToLabels[blk.ID()] = l
+	}
+
+	end := e.allocateNop0()
+	e.PerBlockHead, e.PerBlockEnd = end, end
+
+	labelPos, ok := e.LabelPositions[l]
+	if !ok {
+		labelPos = e.AllocateLabelPosition(l)
+		e.LabelPositions[l] = labelPos
+	}
+	e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos)
+	labelPos.Begin, labelPos.End = end, end
+	labelPos.SB = blk
+}
+
+// EndBlock implements ExecutableContext.
+func (e *ExecutableContextT[T]) EndBlock() {
+	// Insert nop0 as the head of the block for convenience to simplify the logic of inserting instructions.
+	e.insertAtPerBlockHead(e.allocateNop0())
+
+	l := e.SsaBlockIDToLabels[e.CurrentSSABlk.ID()]
+	e.LabelPositions[l].Begin = e.PerBlockHead
+
+	if e.CurrentSSABlk.EntryBlock() {
+		e.RootInstr = e.PerBlockHead
+	}
+}
+
+func (e *ExecutableContextT[T]) insertAtPerBlockHead(i *T) {
+	if e.PerBlockHead == nil {
+		e.PerBlockHead = i
+		e.PerBlockEnd = i
+		return
+	}
+	e.setNext(i, e.PerBlockHead)
+	e.setPrev(e.PerBlockHead, i)
+	e.PerBlockHead = i
+}
+
+// FlushPendingInstructions implements ExecutableContext.
+func (e *ExecutableContextT[T]) FlushPendingInstructions() {
+	l := len(e.PendingInstructions)
+	if l == 0 {
+		return
+	}
+	for i := l - 1; i >= 0; i-- { // reverse because we lower instructions in reverse order.
+		e.insertAtPerBlockHead(e.PendingInstructions[i])
+	}
+	e.PendingInstructions = e.PendingInstructions[:0]
+}
+
+func (e *ExecutableContextT[T]) Reset() {
+	e.labelPositionPool.Reset()
+	e.InstructionPool.Reset()
+	for l := Label(0); l <= e.NextLabel; l++ {
+		delete(e.LabelPositions, l)
+	}
+	e.PendingInstructions = e.PendingInstructions[:0]
+	e.OrderedBlockLabels = e.OrderedBlockLabels[:0]
+	e.RootInstr = nil
+	e.SsaBlockIDToLabels = e.SsaBlockIDToLabels[:0]
+	e.PerBlockHead, e.PerBlockEnd = nil, nil
+	e.NextLabel = LabelInvalid
+}
+
+// AllocateLabel allocates an unused label.
+func (e *ExecutableContextT[T]) AllocateLabel() Label {
+	e.NextLabel++
+	return e.NextLabel
+}
+
+func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] {
+	l := e.labelPositionPool.Allocate()
+	l.L = la
+	return l
+}
+
+func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label {
+	if blk.ReturnBlock() {
+		return LabelReturn
+	}
+	l := e.SsaBlockIDToLabels[blk.ID()]
+	if l == LabelInvalid {
+		l = e.AllocateLabel()
+		e.SsaBlockIDToLabels[blk.ID()] = l
+	}
+	return l
+}
+
+func (e *ExecutableContextT[T]) allocateNop0() *T {
+	i := e.InstructionPool.Allocate()
+	e.asNop(i)
+	return i
+}
+
+// LinkAdjacentBlocks implements backend.Machine.
+func (e *ExecutableContextT[T]) LinkAdjacentBlocks(prev, next ssa.BasicBlock) {
+	prevLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(prev)]
+	nextLabelPos := e.LabelPositions[e.GetOrAllocateSSABlockLabel(next)]
+	e.setNext(prevLabelPos.End, nextLabelPos.Begin)
+}
+
+// LabelPosition represents the regions of the generated code which the label represents.
+type LabelPosition[Instr any] struct {
+	SB           ssa.BasicBlock
+	L            Label
+	Begin, End   *Instr
+	BinaryOffset int64
+}
+
+// Label represents a position in the generated code which is either
+// a real instruction or the constant InstructionPool (e.g. jump tables).
+//
+// This is exactly the same as the traditional "label" in assembly code.
+type Label uint32
+
+const (
+	LabelInvalid Label = 0
+	LabelReturn  Label = math.MaxUint32
+)
+
+// String implements backend.Machine.
+func (l Label) String() string {
+	return fmt.Sprintf("L%d", l)
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/go_call.go
@ -0,0 +1,33 @@
+package backend
+
+import "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+
+// GoFunctionCallRequiredStackSize returns the size of the stack required for the Go function call.
+// argBegin is the index of the first argument in the signature which is not either execution context or module context.
+func GoFunctionCallRequiredStackSize(sig *ssa.Signature, argBegin int) (ret, retUnaligned int64) {
+	var paramNeededInBytes, resultNeededInBytes int64
+	for _, p := range sig.Params[argBegin:] {
+		s := int64(p.Size())
+		if s < 8 {
+			s = 8 // We use uint64 for all basic types, except SIMD v128.
+		}
+		paramNeededInBytes += s
+	}
+	for _, r := range sig.Results {
+		s := int64(r.Size())
+		if s < 8 {
+			s = 8 // We use uint64 for all basic types, except SIMD v128.
+		}
+		resultNeededInBytes += s
+	}
+
+	if paramNeededInBytes > resultNeededInBytes {
+		ret = paramNeededInBytes
+	} else {
+		ret = resultNeededInBytes
+	}
+	retUnaligned = ret
+	// Align to 16 bytes.
+	ret = (ret + 15) &^ 15
+	return
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi.go
@ -0,0 +1,186 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// For the details of the ABI, see:
+// https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#amd64-architecture
+
+var (
+	intArgResultRegs   = []regalloc.RealReg{rax, rbx, rcx, rdi, rsi, r8, r9, r10, r11}
+	floatArgResultRegs = []regalloc.RealReg{xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7}
+)
+
+var regInfo = &regalloc.RegisterInfo{
+	AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
+		regalloc.RegTypeInt: {
+			rax, rcx, rdx, rbx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15,
+		},
+		regalloc.RegTypeFloat: {
+			xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+		},
+	},
+	CalleeSavedRegisters: regalloc.NewRegSet(
+		rdx, r12, r13, r14, r15,
+		xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+	),
+	CallerSavedRegisters: regalloc.NewRegSet(
+		rax, rcx, rbx, rsi, rdi, r8, r9, r10, r11,
+		xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
+	),
+	RealRegToVReg: []regalloc.VReg{
+		rax: raxVReg, rcx: rcxVReg, rdx: rdxVReg, rbx: rbxVReg, rsp: rspVReg, rbp: rbpVReg, rsi: rsiVReg, rdi: rdiVReg,
+		r8: r8VReg, r9: r9VReg, r10: r10VReg, r11: r11VReg, r12: r12VReg, r13: r13VReg, r14: r14VReg, r15: r15VReg,
+		xmm0: xmm0VReg, xmm1: xmm1VReg, xmm2: xmm2VReg, xmm3: xmm3VReg, xmm4: xmm4VReg, xmm5: xmm5VReg, xmm6: xmm6VReg,
+		xmm7: xmm7VReg, xmm8: xmm8VReg, xmm9: xmm9VReg, xmm10: xmm10VReg, xmm11: xmm11VReg, xmm12: xmm12VReg,
+		xmm13: xmm13VReg, xmm14: xmm14VReg, xmm15: xmm15VReg,
+	},
+	RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
+	RealRegType: func(r regalloc.RealReg) regalloc.RegType {
+		if r < xmm0 {
+			return regalloc.RegTypeInt
+		}
+		return regalloc.RegTypeFloat
+	},
+}
+
+// ArgsResultsRegs implements backend.Machine.
+func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
+	return intArgResultRegs, floatArgResultRegs
+}
+
+// LowerParams implements backend.Machine.
+func (m *machine) LowerParams(args []ssa.Value) {
+	a := m.currentABI
+
+	for i, ssaArg := range args {
+		if !ssaArg.Valid() {
+			continue
+		}
+		reg := m.c.VRegOf(ssaArg)
+		arg := &a.Args[i]
+		if arg.Kind == backend.ABIArgKindReg {
+			m.InsertMove(reg, arg.Reg, arg.Type)
+		} else {
+			//
+			//            (high address)
+			//          +-----------------+
+			//          |     .......     |
+			//          |      ret Y      |
+			//          |     .......     |
+			//          |      ret 0      |
+			//          |      arg X      |
+			//          |     .......     |
+			//          |      arg 1      |
+			//          |      arg 0      |
+			//          |   ReturnAddress |
+			//          |    Caller_RBP   |
+			//          +-----------------+ <-- RBP
+			//          |   ...........   |
+			//          |   clobbered  M  |
+			//          |   ............  |
+			//          |   clobbered  0  |
+			//          |   spill slot N  |
+			//          |   ...........   |
+			//          |   spill slot 0  |
+			//   RSP--> +-----------------+
+			//             (low address)
+
+			// Load the value from the arg stack slot above the current RBP.
+			load := m.allocateInstr()
+			mem := newOperandMem(m.newAmodeImmRBPReg(uint32(arg.Offset + 16)))
+			switch arg.Type {
+			case ssa.TypeI32:
+				load.asMovzxRmR(extModeLQ, mem, reg)
+			case ssa.TypeI64:
+				load.asMov64MR(mem, reg)
+			case ssa.TypeF32:
+				load.asXmmUnaryRmR(sseOpcodeMovss, mem, reg)
+			case ssa.TypeF64:
+				load.asXmmUnaryRmR(sseOpcodeMovsd, mem, reg)
+			case ssa.TypeV128:
+				load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, reg)
+			default:
+				panic("BUG")
+			}
+			m.insert(load)
+		}
+	}
+}
+
+// LowerReturns implements backend.Machine.
+func (m *machine) LowerReturns(rets []ssa.Value) {
+	// Load the XMM registers first as it might need a temporary register to inline
+	// constant return.
+	a := m.currentABI
+	for i, ret := range rets {
+		r := &a.Rets[i]
+		if !r.Type.IsInt() {
+			m.LowerReturn(ret, r)
+		}
+	}
+	// Then load the GPR registers.
+	for i, ret := range rets {
+		r := &a.Rets[i]
+		if r.Type.IsInt() {
+			m.LowerReturn(ret, r)
+		}
+	}
+}
+
+func (m *machine) LowerReturn(ret ssa.Value, r *backend.ABIArg) {
+	reg := m.c.VRegOf(ret)
+	if def := m.c.ValueDefinition(ret); def.IsFromInstr() {
+		// Constant instructions are inlined.
+		if inst := def.Instr; inst.Constant() {
+			m.insertLoadConstant(inst, reg)
+		}
+	}
+	if r.Kind == backend.ABIArgKindReg {
+		m.InsertMove(r.Reg, reg, ret.Type())
+	} else {
+		//
+		//            (high address)
+		//          +-----------------+
+		//          |     .......     |
+		//          |      ret Y      |
+		//          |     .......     |
+		//          |      ret 0      |
+		//          |      arg X      |
+		//          |     .......     |
+		//          |      arg 1      |
+		//          |      arg 0      |
+		//          |   ReturnAddress |
+		//          |    Caller_RBP   |
+		//          +-----------------+ <-- RBP
+		//          |   ...........   |
+		//          |   clobbered  M  |
+		//          |   ............  |
+		//          |   clobbered  0  |
+		//          |   spill slot N  |
+		//          |   ...........   |
+		//          |   spill slot 0  |
+		//   RSP--> +-----------------+
+		//             (low address)
+
+		// Store the value to the return stack slot above the current RBP.
+		store := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmRBPReg(uint32(m.currentABI.ArgStackSize + 16 + r.Offset)))
+		switch r.Type {
+		case ssa.TypeI32:
+			store.asMovRM(reg, mem, 4)
+		case ssa.TypeI64:
+			store.asMovRM(reg, mem, 8)
+		case ssa.TypeF32:
+			store.asXmmMovRM(sseOpcodeMovss, reg, mem)
+		case ssa.TypeF64:
+			store.asXmmMovRM(sseOpcodeMovsd, reg, mem)
+		case ssa.TypeV128:
+			store.asXmmMovRM(sseOpcodeMovdqu, reg, mem)
+		}
+		m.insert(store)
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.go
@ -0,0 +1,9 @@
+package amd64
+
+// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
+// This implements wazevo.entrypoint, and see the comments there for detail.
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
+
+// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
+// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_amd64.s
@ -0,0 +1,29 @@
+#include "funcdata.h"
+#include "textflag.h"
+
+// entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr
+TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
+	MOVQ preambleExecutable+0(FP), R11
+	MOVQ functionExectuable+8(FP), R14
+	MOVQ executionContextPtr+16(FP), AX       // First argument is passed in AX.
+	MOVQ moduleContextPtr+24(FP), BX          // Second argument is passed in BX.
+	MOVQ paramResultSlicePtr+32(FP), R12
+	MOVQ goAllocatedStackSlicePtr+40(FP), R13
+	JMP  R11
+
+// afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
+TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
+	MOVQ executable+0(FP), CX
+	MOVQ executionContextPtr+8(FP), AX // First argument is passed in AX.
+
+	// Save the stack pointer and frame pointer.
+	MOVQ BP, 16(AX) // 16 == ExecutionContextOffsetOriginalFramePointer
+	MOVQ SP, 24(AX) // 24 == ExecutionContextOffsetOriginalStackPointer
+
+	// Then set the stack pointer and frame pointer to the values we got from the Go runtime.
+	MOVQ framePointer+24(FP), BP
+
+	// WARNING: do not update SP before BP, because the Go translates (FP) as (SP) + 8.
+	MOVQ stackPointer+16(FP), SP
+
+	JMP CX
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_entry_preamble.go
@ -0,0 +1,248 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+var (
+	executionContextPtrReg = raxVReg
+
+	// Followings are callee saved registers. They can be used freely in the entry preamble
+	// since the preamble is called via Go assembly function which has stack-based ABI.
+
+	// savedExecutionContextPtr also must be a callee-saved reg so that they can be used in the prologue and epilogue.
+	savedExecutionContextPtr = rdxVReg
+	// paramResultSlicePtr must match with entrypoint function in abi_entry_amd64.s.
+	paramResultSlicePtr = r12VReg
+	// goAllocatedStackPtr must match with entrypoint function in abi_entry_amd64.s.
+	goAllocatedStackPtr = r13VReg
+	// functionExecutable must match with entrypoint function in abi_entry_amd64.s.
+	functionExecutable = r14VReg
+	tmpIntReg          = r15VReg
+	tmpXmmReg          = xmm15VReg
+)
+
+// CompileEntryPreamble implements backend.Machine.
+func (m *machine) CompileEntryPreamble(sig *ssa.Signature) []byte {
+	root := m.compileEntryPreamble(sig)
+	m.encodeWithoutSSA(root)
+	buf := m.c.Buf()
+	return buf
+}
+
+func (m *machine) compileEntryPreamble(sig *ssa.Signature) *instruction {
+	abi := backend.FunctionABI{}
+	abi.Init(sig, intArgResultRegs, floatArgResultRegs)
+
+	root := m.allocateNop()
+
+	//// ----------------------------------- prologue ----------------------------------- ////
+
+	// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
+	// 		mov %executionContextPtrReg, %savedExecutionContextPtr
+	cur := m.move64(executionContextPtrReg, savedExecutionContextPtr, root)
+
+	// Next is to save the original RBP and RSP into the execution context.
+	cur = m.saveOriginalRSPRBP(cur)
+
+	// Now set the RSP to the Go-allocated stack pointer.
+	// 		mov %goAllocatedStackPtr, %rsp
+	cur = m.move64(goAllocatedStackPtr, rspVReg, cur)
+
+	if stackSlotSize := abi.AlignedArgResultStackSlotSize(); stackSlotSize > 0 {
+		// Allocate stack slots for the arguments and return values.
+		// 		sub $stackSlotSize, %rsp
+		spDec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(stackSlotSize)), rspVReg, true)
+		cur = linkInstr(cur, spDec)
+	}
+
+	var offset uint32
+	for i := range abi.Args {
+		if i < 2 {
+			// module context ptr and execution context ptr are passed in rax and rbx by the Go assembly function.
+			continue
+		}
+		arg := &abi.Args[i]
+		cur = m.goEntryPreamblePassArg(cur, paramResultSlicePtr, offset, arg)
+		if arg.Type == ssa.TypeV128 {
+			offset += 16
+		} else {
+			offset += 8
+		}
+	}
+
+	// Zero out RBP so that the unwind/stack growth code can correctly detect the end of the stack.
+	zerosRbp := m.allocateInstr().asAluRmiR(aluRmiROpcodeXor, newOperandReg(rbpVReg), rbpVReg, true)
+	cur = linkInstr(cur, zerosRbp)
+
+	// Now ready to call the real function. Note that at this point stack pointer is already set to the Go-allocated,
+	// which is aligned to 16 bytes.
+	call := m.allocateInstr().asCallIndirect(newOperandReg(functionExecutable), &abi)
+	cur = linkInstr(cur, call)
+
+	//// ----------------------------------- epilogue ----------------------------------- ////
+
+	// Read the results from regs and the stack, and set them correctly into the paramResultSlicePtr.
+	offset = 0
+	for i := range abi.Rets {
+		r := &abi.Rets[i]
+		cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, offset, r, uint32(abi.ArgStackSize))
+		if r.Type == ssa.TypeV128 {
+			offset += 16
+		} else {
+			offset += 8
+		}
+	}
+
+	// Finally, restore the original RBP and RSP.
+	cur = m.restoreOriginalRSPRBP(cur)
+
+	ret := m.allocateInstr().asRet()
+	linkInstr(cur, ret)
+	return root
+}
+
+// saveOriginalRSPRBP saves the original RSP and RBP into the execution context.
+func (m *machine) saveOriginalRSPRBP(cur *instruction) *instruction {
+	// 		mov %rbp, wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg)
+	// 		mov %rsp, wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg)
+	cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, true, cur)
+	cur = m.loadOrStore64AtExecutionCtx(executionContextPtrReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, true, cur)
+	return cur
+}
+
+// restoreOriginalRSPRBP restores the original RSP and RBP from the execution context.
+func (m *machine) restoreOriginalRSPRBP(cur *instruction) *instruction {
+	// 		mov wazevoapi.ExecutionContextOffsetOriginalFramePointer(%executionContextPtrReg), %rbp
+	// 		mov wazevoapi.ExecutionContextOffsetOriginalStackPointer(%executionContextPtrReg), %rsp
+	cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalFramePointer, rbpVReg, false, cur)
+	cur = m.loadOrStore64AtExecutionCtx(savedExecutionContextPtr, wazevoapi.ExecutionContextOffsetOriginalStackPointer, rspVReg, false, cur)
+	return cur
+}
+
+func (m *machine) move64(src, dst regalloc.VReg, prev *instruction) *instruction {
+	mov := m.allocateInstr().asMovRR(src, dst, true)
+	return linkInstr(prev, mov)
+}
+
+func (m *machine) loadOrStore64AtExecutionCtx(execCtx regalloc.VReg, offset wazevoapi.Offset, r regalloc.VReg, store bool, prev *instruction) *instruction {
+	mem := newOperandMem(m.newAmodeImmReg(offset.U32(), execCtx))
+	instr := m.allocateInstr()
+	if store {
+		instr.asMovRM(r, mem, 8)
+	} else {
+		instr.asMov64MR(mem, r)
+	}
+	return linkInstr(prev, instr)
+}
+
+// This is for debugging.
+func (m *machine) linkUD2(cur *instruction) *instruction { //nolint
+	return linkInstr(cur, m.allocateInstr().asUD2())
+}
+
+func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, offsetInParamSlice uint32, arg *backend.ABIArg) *instruction {
+	var dst regalloc.VReg
+	argTyp := arg.Type
+	if arg.Kind == backend.ABIArgKindStack {
+		// Caller saved registers ca
+		switch argTyp {
+		case ssa.TypeI32, ssa.TypeI64:
+			dst = tmpIntReg
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			dst = tmpXmmReg
+		default:
+			panic("BUG")
+		}
+	} else {
+		dst = arg.Reg
+	}
+
+	load := m.allocateInstr()
+	a := newOperandMem(m.newAmodeImmReg(offsetInParamSlice, paramSlicePtr))
+	switch arg.Type {
+	case ssa.TypeI32:
+		load.asMovzxRmR(extModeLQ, a, dst)
+	case ssa.TypeI64:
+		load.asMov64MR(a, dst)
+	case ssa.TypeF32:
+		load.asXmmUnaryRmR(sseOpcodeMovss, a, dst)
+	case ssa.TypeF64:
+		load.asXmmUnaryRmR(sseOpcodeMovsd, a, dst)
+	case ssa.TypeV128:
+		load.asXmmUnaryRmR(sseOpcodeMovdqu, a, dst)
+	}
+
+	cur = linkInstr(cur, load)
+	if arg.Kind == backend.ABIArgKindStack {
+		// Store back to the stack.
+		store := m.allocateInstr()
+		a := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset), rspVReg))
+		switch arg.Type {
+		case ssa.TypeI32:
+			store.asMovRM(dst, a, 4)
+		case ssa.TypeI64:
+			store.asMovRM(dst, a, 8)
+		case ssa.TypeF32:
+			store.asXmmMovRM(sseOpcodeMovss, dst, a)
+		case ssa.TypeF64:
+			store.asXmmMovRM(sseOpcodeMovsd, dst, a)
+		case ssa.TypeV128:
+			store.asXmmMovRM(sseOpcodeMovdqu, dst, a)
+		}
+		cur = linkInstr(cur, store)
+	}
+	return cur
+}
+
+func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, offsetInResultSlice uint32, result *backend.ABIArg, resultStackSlotBeginOffset uint32) *instruction {
+	var r regalloc.VReg
+	if result.Kind == backend.ABIArgKindStack {
+		// Load the value to the temporary.
+		load := m.allocateInstr()
+		offset := resultStackSlotBeginOffset + uint32(result.Offset)
+		a := newOperandMem(m.newAmodeImmReg(offset, rspVReg))
+		switch result.Type {
+		case ssa.TypeI32:
+			r = tmpIntReg
+			load.asMovzxRmR(extModeLQ, a, r)
+		case ssa.TypeI64:
+			r = tmpIntReg
+			load.asMov64MR(a, r)
+		case ssa.TypeF32:
+			r = tmpXmmReg
+			load.asXmmUnaryRmR(sseOpcodeMovss, a, r)
+		case ssa.TypeF64:
+			r = tmpXmmReg
+			load.asXmmUnaryRmR(sseOpcodeMovsd, a, r)
+		case ssa.TypeV128:
+			r = tmpXmmReg
+			load.asXmmUnaryRmR(sseOpcodeMovdqu, a, r)
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, load)
+	} else {
+		r = result.Reg
+	}
+
+	store := m.allocateInstr()
+	a := newOperandMem(m.newAmodeImmReg(offsetInResultSlice, resultSlicePtr))
+	switch result.Type {
+	case ssa.TypeI32:
+		store.asMovRM(r, a, 4)
+	case ssa.TypeI64:
+		store.asMovRM(r, a, 8)
+	case ssa.TypeF32:
+		store.asXmmMovRM(sseOpcodeMovss, r, a)
+	case ssa.TypeF64:
+		store.asXmmMovRM(sseOpcodeMovsd, r, a)
+	case ssa.TypeV128:
+		store.asXmmMovRM(sseOpcodeMovdqu, r, a)
+	}
+
+	return linkInstr(cur, store)
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/abi_go_call.go
@ -0,0 +1,443 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+var calleeSavedVRegs = []regalloc.VReg{
+	rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
+	xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
+}
+
+// CompileGoFunctionTrampoline implements backend.Machine.
+func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
+	ectx := m.ectx
+	argBegin := 1 // Skips exec context by default.
+	if needModuleContextPtr {
+		argBegin++
+	}
+
+	abi := &backend.FunctionABI{}
+	abi.Init(sig, intArgResultRegs, floatArgResultRegs)
+	m.currentABI = abi
+
+	cur := m.allocateNop()
+	ectx.RootInstr = cur
+
+	// Execution context is always the first argument.
+	execCtrPtr := raxVReg
+
+	// First we update RBP and RSP just like the normal prologue.
+	//
+	//                   (high address)                     (high address)
+	//       RBP ----> +-----------------+                +-----------------+
+	//                 |     .......     |                |     .......     |
+	//                 |      ret Y      |                |      ret Y      |
+	//                 |     .......     |                |     .......     |
+	//                 |      ret 0      |                |      ret 0      |
+	//                 |      arg X      |                |      arg X      |
+	//                 |     .......     |     ====>      |     .......     |
+	//                 |      arg 1      |                |      arg 1      |
+	//                 |      arg 0      |                |      arg 0      |
+	//                 |   Return Addr   |                |   Return Addr   |
+	//       RSP ----> +-----------------+                |    Caller_RBP   |
+	//                    (low address)                   +-----------------+ <----- RSP, RBP
+	//
+	cur = m.setupRBPRSP(cur)
+
+	goSliceSizeAligned, goSliceSizeAlignedUnaligned := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
+	cur = m.insertStackBoundsCheck(goSliceSizeAligned+8 /* size of the Go slice */, cur)
+
+	// Save the callee saved registers.
+	cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
+
+	if needModuleContextPtr {
+		moduleCtrPtr := rbxVReg // Module context is always the second argument.
+		mem := m.newAmodeImmReg(
+			wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.U32(),
+			execCtrPtr)
+		store := m.allocateInstr().asMovRM(moduleCtrPtr, newOperandMem(mem), 8)
+		cur = linkInstr(cur, store)
+	}
+
+	// Now let's advance the RSP to the stack slot for the arguments.
+	//
+	//                (high address)                     (high address)
+	//              +-----------------+               +-----------------+
+	//              |     .......     |               |     .......     |
+	//              |      ret Y      |               |      ret Y      |
+	//              |     .......     |               |     .......     |
+	//              |      ret 0      |               |      ret 0      |
+	//              |      arg X      |               |      arg X      |
+	//              |     .......     |   =======>    |     .......     |
+	//              |      arg 1      |               |      arg 1      |
+	//              |      arg 0      |               |      arg 0      |
+	//              |   Return Addr   |               |   Return Addr   |
+	//              |    Caller_RBP   |               |    Caller_RBP   |
+	//  RBP,RSP --> +-----------------+               +-----------------+ <----- RBP
+	//                 (low address)                  |  arg[N]/ret[M]  |
+	//                                                |    ..........   |
+	//                                                |  arg[1]/ret[1]  |
+	//                                                |  arg[0]/ret[0]  |
+	//                                                +-----------------+ <----- RSP
+	//                                                   (low address)
+	//
+	// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
+	// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
+	// the arguments/return values to/from Go function.
+	cur = m.addRSP(-int32(goSliceSizeAligned), cur)
+
+	// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
+	var offsetInGoSlice int32
+	for i := range abi.Args[argBegin:] {
+		arg := &abi.Args[argBegin+i]
+		var v regalloc.VReg
+		if arg.Kind == backend.ABIArgKindReg {
+			v = arg.Reg
+		} else {
+			// We have saved callee saved registers, so we can use them.
+			if arg.Type.IsInt() {
+				v = r15VReg
+			} else {
+				v = xmm15VReg
+			}
+			mem := newOperandMem(m.newAmodeImmReg(uint32(arg.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
+			load := m.allocateInstr()
+			switch arg.Type {
+			case ssa.TypeI32:
+				load.asMovzxRmR(extModeLQ, mem, v)
+			case ssa.TypeI64:
+				load.asMov64MR(mem, v)
+			case ssa.TypeF32:
+				load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
+			case ssa.TypeF64:
+				load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
+			case ssa.TypeV128:
+				load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
+			default:
+				panic("BUG")
+			}
+			cur = linkInstr(cur, load)
+		}
+
+		store := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
+		switch arg.Type {
+		case ssa.TypeI32:
+			store.asMovRM(v, mem, 4)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeI64:
+			store.asMovRM(v, mem, 8)
+			offsetInGoSlice += 8
+		case ssa.TypeF32:
+			store.asXmmMovRM(sseOpcodeMovss, v, mem)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeF64:
+			store.asXmmMovRM(sseOpcodeMovsd, v, mem)
+			offsetInGoSlice += 8
+		case ssa.TypeV128:
+			store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+			offsetInGoSlice += 16
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, store)
+	}
+
+	// Finally we push the size of the slice to the stack so the stack looks like:
+	//
+	//          (high address)
+	//       +-----------------+
+	//       |     .......     |
+	//       |      ret Y      |
+	//       |     .......     |
+	//       |      ret 0      |
+	//       |      arg X      |
+	//       |     .......     |
+	//       |      arg 1      |
+	//       |      arg 0      |
+	//       |   Return Addr   |
+	//       |    Caller_RBP   |
+	//       +-----------------+ <----- RBP
+	//       |  arg[N]/ret[M]  |
+	//       |    ..........   |
+	//       |  arg[1]/ret[1]  |
+	//       |  arg[0]/ret[0]  |
+	//       |    slice size   |
+	//       +-----------------+ <----- RSP
+	//         (low address)
+	//
+	// 		push $sliceSize
+	cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandImm32(uint32(goSliceSizeAlignedUnaligned))))
+
+	// Load the exitCode to the register.
+	exitCodeReg := r12VReg // Callee saved which is already saved.
+	cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(exitCode), false))
+
+	saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
+	cur = linkInstr(cur, setExitCode)
+	cur = linkInstr(cur, saveRsp)
+	cur = linkInstr(cur, saveRbp)
+
+	// Ready to exit the execution.
+	cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
+
+	// We don't need the slice size anymore, so pop it.
+	cur = m.addRSP(8, cur)
+
+	// Ready to set up the results.
+	offsetInGoSlice = 0
+	// To avoid overwriting with the execution context pointer by the result, we need to track the offset,
+	// and defer the restoration of the result to the end of this function.
+	var argOverlapWithExecCtxOffset int32 = -1
+	for i := range abi.Rets {
+		r := &abi.Rets[i]
+		var v regalloc.VReg
+		isRegResult := r.Kind == backend.ABIArgKindReg
+		if isRegResult {
+			v = r.Reg
+			if v.RealReg() == execCtrPtr.RealReg() {
+				argOverlapWithExecCtxOffset = offsetInGoSlice
+				offsetInGoSlice += 8 // always uint64 rep.
+				continue
+			}
+		} else {
+			if r.Type.IsInt() {
+				v = r15VReg
+			} else {
+				v = xmm15VReg
+			}
+		}
+
+		load := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offsetInGoSlice), rspVReg))
+		switch r.Type {
+		case ssa.TypeI32:
+			load.asMovzxRmR(extModeLQ, mem, v)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeI64:
+			load.asMov64MR(mem, v)
+			offsetInGoSlice += 8
+		case ssa.TypeF32:
+			load.asXmmUnaryRmR(sseOpcodeMovss, mem, v)
+			offsetInGoSlice += 8 // always uint64 rep.
+		case ssa.TypeF64:
+			load.asXmmUnaryRmR(sseOpcodeMovsd, mem, v)
+			offsetInGoSlice += 8
+		case ssa.TypeV128:
+			load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
+			offsetInGoSlice += 16
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, load)
+
+		if !isRegResult {
+			// We need to store it back to the result slot above rbp.
+			store := m.allocateInstr()
+			mem := newOperandMem(m.newAmodeImmReg(uint32(abi.ArgStackSize+r.Offset+16 /* to skip caller_rbp and ret_addr */), rbpVReg))
+			switch r.Type {
+			case ssa.TypeI32:
+				store.asMovRM(v, mem, 4)
+			case ssa.TypeI64:
+				store.asMovRM(v, mem, 8)
+			case ssa.TypeF32:
+				store.asXmmMovRM(sseOpcodeMovss, v, mem)
+			case ssa.TypeF64:
+				store.asXmmMovRM(sseOpcodeMovsd, v, mem)
+			case ssa.TypeV128:
+				store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+			default:
+				panic("BUG")
+			}
+			cur = linkInstr(cur, store)
+		}
+	}
+
+	// Before return, we need to restore the callee saved registers.
+	cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, calleeSavedVRegs)
+
+	if argOverlapWithExecCtxOffset >= 0 {
+		// At this point execCtt is not used anymore, so we can finally store the
+		// result to the register which overlaps with the execution context pointer.
+		mem := newOperandMem(m.newAmodeImmReg(uint32(argOverlapWithExecCtxOffset), rspVReg))
+		load := m.allocateInstr().asMov64MR(mem, execCtrPtr)
+		cur = linkInstr(cur, load)
+	}
+
+	// Finally ready to return.
+	cur = m.revertRBPRSP(cur)
+	linkInstr(cur, m.allocateInstr().asRet())
+
+	m.encodeWithoutSSA(ectx.RootInstr)
+	return m.c.Buf()
+}
+
+func (m *machine) saveRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
+	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
+	for _, v := range regs {
+		store := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
+		switch v.RegType() {
+		case regalloc.RegTypeInt:
+			store.asMovRM(v, mem, 8)
+		case regalloc.RegTypeFloat:
+			store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, store)
+		offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
+	}
+	return cur
+}
+
+func (m *machine) restoreRegistersInExecutionContext(cur *instruction, execCtx regalloc.VReg, regs []regalloc.VReg) *instruction {
+	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
+	for _, v := range regs {
+		load := m.allocateInstr()
+		mem := newOperandMem(m.newAmodeImmReg(uint32(offset), execCtx))
+		switch v.RegType() {
+		case regalloc.RegTypeInt:
+			load.asMov64MR(mem, v)
+		case regalloc.RegTypeFloat:
+			load.asXmmUnaryRmR(sseOpcodeMovdqu, mem, v)
+		default:
+			panic("BUG")
+		}
+		cur = linkInstr(cur, load)
+		offset += 16 // See execution context struct. Each register is 16 bytes-aligned unconditionally.
+	}
+	return cur
+}
+
+func (m *machine) storeReturnAddressAndExit(cur *instruction, execCtx regalloc.VReg) *instruction {
+	readRip := m.allocateInstr()
+	cur = linkInstr(cur, readRip)
+
+	ripReg := r12VReg // Callee saved which is already saved.
+	saveRip := m.allocateInstr().asMovRM(
+		ripReg,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetGoCallReturnAddress.U32(), execCtx)),
+		8,
+	)
+	cur = linkInstr(cur, saveRip)
+
+	exit := m.allocateExitSeq(execCtx)
+	cur = linkInstr(cur, exit)
+
+	nop, l := m.allocateBrTarget()
+	cur = linkInstr(cur, nop)
+	readRip.asLEA(newOperandLabel(l), ripReg)
+	return cur
+}
+
+// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
+// stack space left. Basically this is the all allocatable registers except for RSP and RBP, and RAX which contains the
+// execution context pointer. ExecCtx pointer is always the first argument so we don't need to save it.
+var stackGrowSaveVRegs = []regalloc.VReg{
+	rdxVReg, r12VReg, r13VReg, r14VReg, r15VReg,
+	rcxVReg, rbxVReg, rsiVReg, rdiVReg, r8VReg, r9VReg, r10VReg, r11VReg,
+	xmm8VReg, xmm9VReg, xmm10VReg, xmm11VReg, xmm12VReg, xmm13VReg, xmm14VReg, xmm15VReg,
+	xmm0VReg, xmm1VReg, xmm2VReg, xmm3VReg, xmm4VReg, xmm5VReg, xmm6VReg, xmm7VReg,
+}
+
+// CompileStackGrowCallSequence implements backend.Machine.
+func (m *machine) CompileStackGrowCallSequence() []byte {
+	ectx := m.ectx
+
+	cur := m.allocateNop()
+	ectx.RootInstr = cur
+
+	cur = m.setupRBPRSP(cur)
+
+	// Execution context is always the first argument.
+	execCtrPtr := raxVReg
+
+	// Save the callee saved and argument registers.
+	cur = m.saveRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
+
+	// Load the exitCode to the register.
+	exitCodeReg := r12VReg // Already saved.
+	cur = linkInstr(cur, m.allocateInstr().asImm(exitCodeReg, uint64(wazevoapi.ExitCodeGrowStack), false))
+
+	saveRsp, saveRbp, setExitCode := m.allocateExitInstructions(execCtrPtr, exitCodeReg)
+	cur = linkInstr(cur, setExitCode)
+	cur = linkInstr(cur, saveRsp)
+	cur = linkInstr(cur, saveRbp)
+
+	// Ready to exit the execution.
+	cur = m.storeReturnAddressAndExit(cur, execCtrPtr)
+
+	// After the exit, restore the saved registers.
+	cur = m.restoreRegistersInExecutionContext(cur, execCtrPtr, stackGrowSaveVRegs)
+
+	// Finally ready to return.
+	cur = m.revertRBPRSP(cur)
+	linkInstr(cur, m.allocateInstr().asRet())
+
+	m.encodeWithoutSSA(ectx.RootInstr)
+	return m.c.Buf()
+}
+
+// insertStackBoundsCheck will insert the instructions after `cur` to check the
+// stack bounds, and if there's no sufficient spaces required for the function,
+// exit the execution and try growing it in Go world.
+func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
+	//		add $requiredStackSize, %rsp ;; Temporarily update the sp.
+	// 		cmp ExecutionContextOffsetStackBottomPtr(%rax), %rsp ;; Compare the stack bottom and the sp.
+	// 		ja .ok
+	//		sub $requiredStackSize, %rsp ;; Reverse the temporary update.
+	//      pushq r15 ;; save the temporary.
+	//		mov $requiredStackSize, %r15
+	//		mov %15, ExecutionContextOffsetStackGrowRequiredSize(%rax) ;; Set the required size in the execution context.
+	//      popq r15 ;; restore the temporary.
+	//		callq *ExecutionContextOffsetStackGrowCallTrampolineAddress(%rax) ;; Call the Go function to grow the stack.
+	//		jmp .cont
+	// .ok:
+	//		sub $requiredStackSize, %rsp ;; Reverse the temporary update.
+	// .cont:
+	cur = m.addRSP(-int32(requiredStackSize), cur)
+	cur = linkInstr(cur, m.allocateInstr().asCmpRmiR(true,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackBottomPtr.U32(), raxVReg)),
+		rspVReg, true))
+
+	ja := m.allocateInstr()
+	cur = linkInstr(cur, ja)
+
+	cur = m.addRSP(int32(requiredStackSize), cur)
+
+	// Save the temporary.
+
+	cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r15VReg)))
+	// Load the required size to the temporary.
+	cur = linkInstr(cur, m.allocateInstr().asImm(r15VReg, uint64(requiredStackSize), true))
+	// Set the required size in the execution context.
+	cur = linkInstr(cur, m.allocateInstr().asMovRM(r15VReg,
+		newOperandMem(m.newAmodeImmReg(wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.U32(), raxVReg)), 8))
+	// Restore the temporary.
+	cur = linkInstr(cur, m.allocateInstr().asPop64(r15VReg))
+	// Call the Go function to grow the stack.
+	cur = linkInstr(cur, m.allocateInstr().asCallIndirect(newOperandMem(m.newAmodeImmReg(
+		wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.U32(), raxVReg)), nil))
+	// Jump to the continuation.
+	jmpToCont := m.allocateInstr()
+	cur = linkInstr(cur, jmpToCont)
+
+	// .ok:
+	okInstr, ok := m.allocateBrTarget()
+	cur = linkInstr(cur, okInstr)
+	ja.asJmpIf(condNBE, newOperandLabel(ok))
+	// On the ok path, we only need to reverse the temporary update.
+	cur = m.addRSP(int32(requiredStackSize), cur)
+
+	// .cont:
+	contInstr, cont := m.allocateBrTarget()
+	cur = linkInstr(cur, contInstr)
+	jmpToCont.asJmp(newOperandLabel(cont))
+
+	return cur
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/cond.go
@ -0,0 +1,168 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type cond byte
+
+const (
+	// condO represents (overflow) condition.
+	condO cond = iota
+	// condNO represents (no overflow) condition.
+	condNO
+	// condB represents (< unsigned) condition.
+	condB
+	// condNB represents (>= unsigned) condition.
+	condNB
+	// condZ represents (zero) condition.
+	condZ
+	// condNZ represents (not-zero) condition.
+	condNZ
+	// condBE represents (<= unsigned) condition.
+	condBE
+	// condNBE represents (> unsigned) condition.
+	condNBE
+	// condS represents (negative) condition.
+	condS
+	// condNS represents (not-negative) condition.
+	condNS
+	// condP represents (parity) condition.
+	condP
+	// condNP represents (not parity) condition.
+	condNP
+	// condL represents (< signed) condition.
+	condL
+	// condNL represents (>= signed) condition.
+	condNL
+	// condLE represents (<= signed) condition.
+	condLE
+	// condNLE represents (> signed) condition.
+	condNLE
+
+	condInvalid
+)
+
+func (c cond) String() string {
+	switch c {
+	case condO:
+		return "o"
+	case condNO:
+		return "no"
+	case condB:
+		return "b"
+	case condNB:
+		return "nb"
+	case condZ:
+		return "z"
+	case condNZ:
+		return "nz"
+	case condBE:
+		return "be"
+	case condNBE:
+		return "nbe"
+	case condS:
+		return "s"
+	case condNS:
+		return "ns"
+	case condL:
+		return "l"
+	case condNL:
+		return "nl"
+	case condLE:
+		return "le"
+	case condNLE:
+		return "nle"
+	case condP:
+		return "p"
+	case condNP:
+		return "np"
+	default:
+		panic("unreachable")
+	}
+}
+
+func condFromSSAIntCmpCond(origin ssa.IntegerCmpCond) cond {
+	switch origin {
+	case ssa.IntegerCmpCondEqual:
+		return condZ
+	case ssa.IntegerCmpCondNotEqual:
+		return condNZ
+	case ssa.IntegerCmpCondSignedLessThan:
+		return condL
+	case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
+		return condNL
+	case ssa.IntegerCmpCondSignedGreaterThan:
+		return condNLE
+	case ssa.IntegerCmpCondSignedLessThanOrEqual:
+		return condLE
+	case ssa.IntegerCmpCondUnsignedLessThan:
+		return condB
+	case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
+		return condNB
+	case ssa.IntegerCmpCondUnsignedGreaterThan:
+		return condNBE
+	case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
+		return condBE
+	default:
+		panic("unreachable")
+	}
+}
+
+func condFromSSAFloatCmpCond(origin ssa.FloatCmpCond) cond {
+	switch origin {
+	case ssa.FloatCmpCondGreaterThanOrEqual:
+		return condNB
+	case ssa.FloatCmpCondGreaterThan:
+		return condNBE
+	case ssa.FloatCmpCondEqual, ssa.FloatCmpCondNotEqual, ssa.FloatCmpCondLessThan, ssa.FloatCmpCondLessThanOrEqual:
+		panic(fmt.Sprintf("cond %s must be treated as a special case", origin))
+	default:
+		panic("unreachable")
+	}
+}
+
+func (c cond) encoding() byte {
+	return byte(c)
+}
+
+func (c cond) invert() cond {
+	switch c {
+	case condO:
+		return condNO
+	case condNO:
+		return condO
+	case condB:
+		return condNB
+	case condNB:
+		return condB
+	case condZ:
+		return condNZ
+	case condNZ:
+		return condZ
+	case condBE:
+		return condNBE
+	case condNBE:
+		return condBE
+	case condS:
+		return condNS
+	case condNS:
+		return condS
+	case condP:
+		return condNP
+	case condNP:
+		return condP
+	case condL:
+		return condNL
+	case condNL:
+		return condL
+	case condLE:
+		return condNLE
+	case condNLE:
+		return condLE
+	default:
+		panic("unreachable")
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/ext.go
@ -0,0 +1,35 @@
+package amd64
+
+// extMode represents the mode of extension in movzx/movsx.
+type extMode byte
+
+const (
+	// extModeBL represents Byte -> Longword.
+	extModeBL extMode = iota
+	// extModeBQ represents Byte -> Quadword.
+	extModeBQ
+	// extModeWL represents Word -> Longword.
+	extModeWL
+	// extModeWQ represents Word -> Quadword.
+	extModeWQ
+	// extModeLQ represents Longword -> Quadword.
+	extModeLQ
+)
+
+// String implements fmt.Stringer.
+func (e extMode) String() string {
+	switch e {
+	case extModeBL:
+		return "bl"
+	case extModeBQ:
+		return "bq"
+	case extModeWL:
+		return "wl"
+	case extModeWQ:
+		return "wq"
+	case extModeLQ:
+		return "lq"
+	default:
+		panic("BUG: invalid ext mode")
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr.go
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/instr_encoding.go
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_constant.go
@ -0,0 +1,71 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
+func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
+	val := instr.Return()
+	valType := val.Type()
+
+	vr = m.c.AllocateVReg(valType)
+	m.insertLoadConstant(instr, vr)
+	return
+}
+
+// InsertLoadConstantBlockArg implements backend.Machine.
+func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
+	m.insertLoadConstant(instr, vr)
+}
+
+func (m *machine) insertLoadConstant(instr *ssa.Instruction, vr regalloc.VReg) {
+	val := instr.Return()
+	valType := val.Type()
+	v := instr.ConstantVal()
+
+	bits := valType.Bits()
+	if bits < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
+		v = v & ((1 << valType.Bits()) - 1)
+	}
+
+	switch valType {
+	case ssa.TypeF32, ssa.TypeF64:
+		m.lowerFconst(vr, v, bits == 64)
+	case ssa.TypeI32, ssa.TypeI64:
+		m.lowerIconst(vr, v, bits == 64)
+	default:
+		panic("BUG")
+	}
+}
+
+func (m *machine) lowerFconst(dst regalloc.VReg, c uint64, _64 bool) {
+	if c == 0 {
+		xor := m.allocateInstr().asZeros(dst)
+		m.insert(xor)
+	} else {
+		var tmpType ssa.Type
+		if _64 {
+			tmpType = ssa.TypeI64
+		} else {
+			tmpType = ssa.TypeI32
+		}
+		tmpInt := m.c.AllocateVReg(tmpType)
+		loadToGP := m.allocateInstr().asImm(tmpInt, c, _64)
+		m.insert(loadToGP)
+
+		movToXmm := m.allocateInstr().asGprToXmm(sseOpcodeMovq, newOperandReg(tmpInt), dst, _64)
+		m.insert(movToXmm)
+	}
+}
+
+func (m *machine) lowerIconst(dst regalloc.VReg, c uint64, _64 bool) {
+	i := m.allocateInstr()
+	if c == 0 {
+		i.asZeros(dst)
+	} else {
+		i.asImm(dst, c, _64)
+	}
+	m.insert(i)
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/lower_mem.go
@ -0,0 +1,187 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+var addendsMatchOpcodes = [...]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst, ssa.OpcodeIshl}
+
+type addend struct {
+	r     regalloc.VReg
+	off   int64
+	shift byte
+}
+
+func (a addend) String() string {
+	return fmt.Sprintf("addend{r=%s, off=%d, shift=%d}", a.r, a.off, a.shift)
+}
+
+// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
+func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32) (am *amode) {
+	def := m.c.ValueDefinition(ptr)
+
+	if offsetBase&0x80000000 != 0 {
+		// Special casing the huge base offset whose MSB is set. In x64, the immediate is always
+		// sign-extended, but our IR semantics requires the offset base is always unsigned.
+		// Note that this should be extremely rare or even this shouldn't hit in the real application,
+		// therefore we don't need to optimize this case in my opinion.
+
+		a := m.lowerAddend(def)
+		off64 := a.off + int64(offsetBase)
+		offsetBaseReg := m.c.AllocateVReg(ssa.TypeI64)
+		m.lowerIconst(offsetBaseReg, uint64(off64), true)
+		if a.r != regalloc.VRegInvalid {
+			return m.newAmodeRegRegShift(0, offsetBaseReg, a.r, a.shift)
+		} else {
+			return m.newAmodeImmReg(0, offsetBaseReg)
+		}
+	}
+
+	if op := m.c.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op == ssa.OpcodeIadd {
+		add := def.Instr
+		x, y := add.Arg2()
+		xDef, yDef := m.c.ValueDefinition(x), m.c.ValueDefinition(y)
+		ax := m.lowerAddend(xDef)
+		ay := m.lowerAddend(yDef)
+		add.MarkLowered()
+		return m.lowerAddendsToAmode(ax, ay, offsetBase)
+	} else {
+		// If it is not an Iadd, then we lower the one addend.
+		a := m.lowerAddend(def)
+		// off is always 0 if r is valid.
+		if a.r != regalloc.VRegInvalid {
+			if a.shift != 0 {
+				tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+				m.lowerIconst(tmpReg, 0, true)
+				return m.newAmodeRegRegShift(offsetBase, tmpReg, a.r, a.shift)
+			}
+			return m.newAmodeImmReg(offsetBase, a.r)
+		} else {
+			off64 := a.off + int64(offsetBase)
+			tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+			m.lowerIconst(tmpReg, uint64(off64), true)
+			return m.newAmodeImmReg(0, tmpReg)
+		}
+	}
+}
+
+func (m *machine) lowerAddendsToAmode(x, y addend, offBase uint32) *amode {
+	if x.r != regalloc.VRegInvalid && x.off != 0 || y.r != regalloc.VRegInvalid && y.off != 0 {
+		panic("invalid input")
+	}
+
+	u64 := uint64(x.off+y.off) + uint64(offBase)
+	if u64 != 0 {
+		if _, ok := asImm32(u64, false); !ok {
+			tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+			m.lowerIconst(tmpReg, u64, true)
+			// Blank u64 as it has been already lowered.
+			u64 = 0
+
+			if x.r == regalloc.VRegInvalid {
+				x.r = tmpReg
+			} else if y.r == regalloc.VRegInvalid {
+				y.r = tmpReg
+			} else {
+				// We already know that either rx or ry is invalid,
+				// so we overwrite it with the temporary register.
+				panic("BUG")
+			}
+		}
+	}
+
+	u32 := uint32(u64)
+	switch {
+	// We assume rx, ry are valid iff offx, offy are 0.
+	case x.r != regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
+		switch {
+		case x.shift != 0 && y.shift != 0:
+			// Cannot absorb two shifted registers, must lower one to a shift instruction.
+			shifted := m.allocateInstr()
+			shifted.asShiftR(shiftROpShiftLeft, newOperandImm32(uint32(x.shift)), x.r, true)
+			m.insert(shifted)
+
+			return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
+		case x.shift != 0 && y.shift == 0:
+			// Swap base and index.
+			x, y = y, x
+			fallthrough
+		default:
+			return m.newAmodeRegRegShift(u32, x.r, y.r, y.shift)
+		}
+	case x.r == regalloc.VRegInvalid && y.r != regalloc.VRegInvalid:
+		x, y = y, x
+		fallthrough
+	case x.r != regalloc.VRegInvalid && y.r == regalloc.VRegInvalid:
+		if x.shift != 0 {
+			zero := m.c.AllocateVReg(ssa.TypeI64)
+			m.lowerIconst(zero, 0, true)
+			return m.newAmodeRegRegShift(u32, zero, x.r, x.shift)
+		}
+		return m.newAmodeImmReg(u32, x.r)
+	default: // Both are invalid: use the offset.
+		tmpReg := m.c.AllocateVReg(ssa.TypeI64)
+		m.lowerIconst(tmpReg, u64, true)
+		return m.newAmodeImmReg(0, tmpReg)
+	}
+}
+
+func (m *machine) lowerAddend(x *backend.SSAValueDefinition) addend {
+	if x.IsFromBlockParam() {
+		return addend{x.BlkParamVReg, 0, 0}
+	}
+	// Ensure the addend is not referenced in multiple places; we will discard nested Iadds.
+	op := m.c.MatchInstrOneOf(x, addendsMatchOpcodes[:])
+	if op != ssa.OpcodeInvalid && op != ssa.OpcodeIadd {
+		return m.lowerAddendFromInstr(x.Instr)
+	}
+	p := m.getOperand_Reg(x)
+	return addend{p.reg(), 0, 0}
+}
+
+// lowerAddendFromInstr takes an instruction returns a Vreg and an offset that can be used in an address mode.
+// The Vreg is regalloc.VRegInvalid if the addend cannot be lowered to a register.
+// The offset is 0 if the addend can be lowered to a register.
+func (m *machine) lowerAddendFromInstr(instr *ssa.Instruction) addend {
+	instr.MarkLowered()
+	switch op := instr.Opcode(); op {
+	case ssa.OpcodeIconst:
+		u64 := instr.ConstantVal()
+		if instr.Return().Type().Bits() == 32 {
+			return addend{regalloc.VRegInvalid, int64(int32(u64)), 0} // sign-extend.
+		} else {
+			return addend{regalloc.VRegInvalid, int64(u64), 0}
+		}
+	case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
+		input := instr.Arg()
+		inputDef := m.c.ValueDefinition(input)
+		if input.Type().Bits() != 32 {
+			panic("BUG: invalid input type " + input.Type().String())
+		}
+		constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
+		switch {
+		case constInst && op == ssa.OpcodeSExtend:
+			return addend{regalloc.VRegInvalid, int64(uint32(inputDef.Instr.ConstantVal())), 0}
+		case constInst && op == ssa.OpcodeUExtend:
+			return addend{regalloc.VRegInvalid, int64(int32(inputDef.Instr.ConstantVal())), 0} // sign-extend!
+		default:
+			r := m.getOperand_Reg(inputDef)
+			return addend{r.reg(), 0, 0}
+		}
+	case ssa.OpcodeIshl:
+		// If the addend is a shift, we can only handle it if the shift amount is a constant.
+		x, amount := instr.Arg2()
+		amountDef := m.c.ValueDefinition(amount)
+		if amountDef.IsFromInstr() && amountDef.Instr.Constant() && amountDef.Instr.ConstantVal() <= 3 {
+			r := m.getOperand_Reg(m.c.ValueDefinition(x))
+			return addend{r.reg(), 0, uint8(amountDef.Instr.ConstantVal())}
+		}
+		r := m.getOperand_Reg(m.c.ValueDefinition(x))
+		return addend{r.reg(), 0, 0}
+	}
+	panic("BUG: invalid opcode")
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_pro_epi_logue.go
@ -0,0 +1,304 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+)
+
+// PostRegAlloc implements backend.Machine.
+func (m *machine) PostRegAlloc() {
+	m.setupPrologue()
+	m.postRegAlloc()
+}
+
+func (m *machine) setupPrologue() {
+	cur := m.ectx.RootInstr
+	prevInitInst := cur.next
+
+	// At this point, we have the stack layout as follows:
+	//
+	//                   (high address)
+	//                 +-----------------+ <----- RBP (somewhere in the middle of the stack)
+	//                 |     .......     |
+	//                 |      ret Y      |
+	//                 |     .......     |
+	//                 |      ret 0      |
+	//                 |      arg X      |
+	//                 |     .......     |
+	//                 |      arg 1      |
+	//                 |      arg 0      |
+	//                 |   Return Addr   |
+	//       RSP ----> +-----------------+
+	//                    (low address)
+
+	// First, we push the RBP, and update the RBP to the current RSP.
+	//
+	//                   (high address)                     (high address)
+	//       RBP ----> +-----------------+                +-----------------+
+	//                 |     .......     |                |     .......     |
+	//                 |      ret Y      |                |      ret Y      |
+	//                 |     .......     |                |     .......     |
+	//                 |      ret 0      |                |      ret 0      |
+	//                 |      arg X      |                |      arg X      |
+	//                 |     .......     |     ====>      |     .......     |
+	//                 |      arg 1      |                |      arg 1      |
+	//                 |      arg 0      |                |      arg 0      |
+	//                 |   Return Addr   |                |   Return Addr   |
+	//       RSP ----> +-----------------+                |    Caller_RBP   |
+	//                    (low address)                   +-----------------+ <----- RSP, RBP
+	//
+	cur = m.setupRBPRSP(cur)
+
+	if !m.stackBoundsCheckDisabled {
+		cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
+	}
+
+	//
+	//            (high address)
+	//          +-----------------+                  +-----------------+
+	//          |     .......     |                  |     .......     |
+	//          |      ret Y      |                  |      ret Y      |
+	//          |     .......     |                  |     .......     |
+	//          |      ret 0      |                  |      ret 0      |
+	//          |      arg X      |                  |      arg X      |
+	//          |     .......     |                  |     .......     |
+	//          |      arg 1      |                  |      arg 1      |
+	//          |      arg 0      |                  |      arg 0      |
+	//          |      xxxxx      |                  |      xxxxx      |
+	//          |   Return Addr   |                  |   Return Addr   |
+	//          |    Caller_RBP   |      ====>       |    Caller_RBP   |
+	// RBP,RSP->+-----------------+                  +-----------------+ <----- RBP
+	//             (low address)                     |   clobbered M   |
+	//                                               |   clobbered 1   |
+	//                                               |   ...........   |
+	//                                               |   clobbered 0   |
+	//                                               +-----------------+ <----- RSP
+	//
+	if regs := m.clobberedRegs; len(regs) > 0 {
+		for i := range regs {
+			r := regs[len(regs)-1-i] // Reverse order.
+			if r.RegType() == regalloc.RegTypeInt {
+				cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(r)))
+			} else {
+				// Push the XMM register is not supported by the PUSH instruction.
+				cur = m.addRSP(-16, cur)
+				push := m.allocateInstr().asXmmMovRM(
+					sseOpcodeMovdqu, r, newOperandMem(m.newAmodeImmReg(0, rspVReg)),
+				)
+				cur = linkInstr(cur, push)
+			}
+		}
+	}
+
+	if size := m.spillSlotSize; size > 0 {
+		// Simply decrease the RSP to allocate the spill slots.
+		// 		sub $size, %rsp
+		cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(uint32(size)), rspVReg, true))
+
+		// At this point, we have the stack layout as follows:
+		//
+		//            (high address)
+		//          +-----------------+
+		//          |     .......     |
+		//          |      ret Y      |
+		//          |     .......     |
+		//          |      ret 0      |
+		//          |      arg X      |
+		//          |     .......     |
+		//          |      arg 1      |
+		//          |      arg 0      |
+		//          |   ReturnAddress |
+		//          |   Caller_RBP    |
+		//          +-----------------+ <--- RBP
+		//          |    clobbered M  |
+		//          |   ............  |
+		//          |    clobbered 1  |
+		//          |    clobbered 0  |
+		//          |   spill slot N  |
+		//          |   ............  |
+		//          |   spill slot 0  |
+		//          +-----------------+ <--- RSP
+		//             (low address)
+	}
+
+	linkInstr(cur, prevInitInst)
+}
+
+// postRegAlloc does multiple things while walking through the instructions:
+// 1. Inserts the epilogue code.
+// 2. Removes the redundant copy instruction.
+// 3. Inserts the dec/inc RSP instruction right before/after the call instruction.
+// 4. Lowering that is supposed to be done after regalloc.
+func (m *machine) postRegAlloc() {
+	ectx := m.ectx
+	for cur := ectx.RootInstr; cur != nil; cur = cur.next {
+		switch k := cur.kind; k {
+		case ret:
+			m.setupEpilogueAfter(cur.prev)
+			continue
+		case fcvtToSintSequence, fcvtToUintSequence:
+			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
+			if k == fcvtToSintSequence {
+				m.lowerFcvtToSintSequenceAfterRegalloc(cur)
+			} else {
+				m.lowerFcvtToUintSequenceAfterRegalloc(cur)
+			}
+			prev := cur.prev
+			next := cur.next
+			cur := prev
+			for _, instr := range m.ectx.PendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			continue
+		case xmmCMov:
+			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
+			m.lowerXmmCmovAfterRegAlloc(cur)
+			prev := cur.prev
+			next := cur.next
+			cur := prev
+			for _, instr := range m.ectx.PendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			continue
+		case idivRemSequence:
+			m.ectx.PendingInstructions = m.ectx.PendingInstructions[:0]
+			m.lowerIDivRemSequenceAfterRegAlloc(cur)
+			prev := cur.prev
+			next := cur.next
+			cur := prev
+			for _, instr := range m.ectx.PendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			continue
+		case call, callIndirect:
+			// At this point, reg alloc is done, therefore we can safely insert dec/inc RPS instruction
+			// right before/after the call instruction. If this is done before reg alloc, the stack slot
+			// can point to the wrong location and therefore results in a wrong value.
+			call := cur
+			next := call.next
+			_, _, _, _, size := backend.ABIInfoFromUint64(call.u2)
+			if size > 0 {
+				dec := m.allocateInstr().asAluRmiR(aluRmiROpcodeSub, newOperandImm32(size), rspVReg, true)
+				linkInstr(call.prev, dec)
+				linkInstr(dec, call)
+				inc := m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(size), rspVReg, true)
+				linkInstr(call, inc)
+				linkInstr(inc, next)
+			}
+			continue
+		}
+
+		// Removes the redundant copy instruction.
+		if cur.IsCopy() && cur.op1.reg().RealReg() == cur.op2.reg().RealReg() {
+			prev, next := cur.prev, cur.next
+			// Remove the copy instruction.
+			prev.next = next
+			if next != nil {
+				next.prev = prev
+			}
+		}
+	}
+}
+
+func (m *machine) setupEpilogueAfter(cur *instruction) {
+	prevNext := cur.next
+
+	// At this point, we have the stack layout as follows:
+	//
+	//            (high address)
+	//          +-----------------+
+	//          |     .......     |
+	//          |      ret Y      |
+	//          |     .......     |
+	//          |      ret 0      |
+	//          |      arg X      |
+	//          |     .......     |
+	//          |      arg 1      |
+	//          |      arg 0      |
+	//          |   ReturnAddress |
+	//          |   Caller_RBP    |
+	//          +-----------------+ <--- RBP
+	//          |    clobbered M  |
+	//          |   ............  |
+	//          |    clobbered 1  |
+	//          |    clobbered 0  |
+	//          |   spill slot N  |
+	//          |   ............  |
+	//          |   spill slot 0  |
+	//          +-----------------+ <--- RSP
+	//             (low address)
+
+	if size := m.spillSlotSize; size > 0 {
+		// Simply increase the RSP to free the spill slots.
+		// 		add $size, %rsp
+		cur = linkInstr(cur, m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(uint32(size)), rspVReg, true))
+	}
+
+	//
+	//             (high address)
+	//            +-----------------+                     +-----------------+
+	//            |     .......     |                     |     .......     |
+	//            |      ret Y      |                     |      ret Y      |
+	//            |     .......     |                     |     .......     |
+	//            |      ret 0      |                     |      ret 0      |
+	//            |      arg X      |                     |      arg X      |
+	//            |     .......     |                     |     .......     |
+	//            |      arg 1      |                     |      arg 1      |
+	//            |      arg 0      |                     |      arg 0      |
+	//            |   ReturnAddress |                     |   ReturnAddress |
+	//            |    Caller_RBP   |                     |    Caller_RBP   |
+	//   RBP ---> +-----------------+      ========>      +-----------------+ <---- RSP, RBP
+	//            |    clobbered M  |
+	//            |   ............  |
+	//            |    clobbered 1  |
+	//            |    clobbered 0  |
+	//   RSP ---> +-----------------+
+	//               (low address)
+	//
+	if regs := m.clobberedRegs; len(regs) > 0 {
+		for _, r := range regs {
+			if r.RegType() == regalloc.RegTypeInt {
+				cur = linkInstr(cur, m.allocateInstr().asPop64(r))
+			} else {
+				// Pop the XMM register is not supported by the POP instruction.
+				pop := m.allocateInstr().asXmmUnaryRmR(
+					sseOpcodeMovdqu, newOperandMem(m.newAmodeImmReg(0, rspVReg)), r,
+				)
+				cur = linkInstr(cur, pop)
+				cur = m.addRSP(16, cur)
+			}
+		}
+	}
+
+	// Now roll back the RSP to RBP, and pop the caller's RBP.
+	cur = m.revertRBPRSP(cur)
+
+	linkInstr(cur, prevNext)
+}
+
+func (m *machine) addRSP(offset int32, cur *instruction) *instruction {
+	if offset == 0 {
+		return cur
+	}
+	opcode := aluRmiROpcodeAdd
+	if offset < 0 {
+		opcode = aluRmiROpcodeSub
+		offset = -offset
+	}
+	return linkInstr(cur, m.allocateInstr().asAluRmiR(opcode, newOperandImm32(uint32(offset)), rspVReg, true))
+}
+
+func (m *machine) setupRBPRSP(cur *instruction) *instruction {
+	cur = linkInstr(cur, m.allocateInstr().asPush64(newOperandReg(rbpVReg)))
+	cur = linkInstr(cur, m.allocateInstr().asMovRR(rspVReg, rbpVReg, true))
+	return cur
+}
+
+func (m *machine) revertRBPRSP(cur *instruction) *instruction {
+	cur = linkInstr(cur, m.allocateInstr().asMovRR(rbpVReg, rspVReg, true))
+	cur = linkInstr(cur, m.allocateInstr().asPop64(rbpVReg))
+	return cur
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_regalloc.go
@ -0,0 +1,153 @@
+package amd64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// InsertMoveBefore implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
+	typ := src.RegType()
+	if typ != dst.RegType() {
+		panic("BUG: src and dst must have the same type")
+	}
+
+	mov := m.allocateInstr()
+	if typ == regalloc.RegTypeInt {
+		mov.asMovRR(src, dst, true)
+	} else {
+		mov.asXmmUnaryRmR(sseOpcodeMovdqu, newOperandReg(src), dst)
+	}
+
+	cur := instr.prev
+	prevNext := cur.next
+	cur = linkInstr(cur, mov)
+	linkInstr(cur, prevNext)
+}
+
+// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
+	if !v.IsRealReg() {
+		panic("BUG: VReg must be backed by real reg to be stored")
+	}
+
+	typ := m.c.TypeOf(v)
+
+	var prevNext, cur *instruction
+	if after {
+		cur, prevNext = instr, instr.next
+	} else {
+		cur, prevNext = instr.prev, instr
+	}
+
+	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
+	store := m.allocateInstr()
+	mem := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
+	switch typ {
+	case ssa.TypeI32:
+		store.asMovRM(v, mem, 4)
+	case ssa.TypeI64:
+		store.asMovRM(v, mem, 8)
+	case ssa.TypeF32:
+		store.asXmmMovRM(sseOpcodeMovss, v, mem)
+	case ssa.TypeF64:
+		store.asXmmMovRM(sseOpcodeMovsd, v, mem)
+	case ssa.TypeV128:
+		store.asXmmMovRM(sseOpcodeMovdqu, v, mem)
+	}
+
+	cur = linkInstr(cur, store)
+	return linkInstr(cur, prevNext)
+}
+
+// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
+	if !v.IsRealReg() {
+		panic("BUG: VReg must be backed by real reg to be stored")
+	}
+
+	typ := m.c.TypeOf(v)
+	var prevNext, cur *instruction
+	if after {
+		cur, prevNext = instr, instr.next
+	} else {
+		cur, prevNext = instr.prev, instr
+	}
+
+	// Load the value to the temporary.
+	load := m.allocateInstr()
+	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
+	a := newOperandMem(m.newAmodeImmReg(uint32(offsetFromSP), rspVReg))
+	switch typ {
+	case ssa.TypeI32:
+		load.asMovzxRmR(extModeLQ, a, v)
+	case ssa.TypeI64:
+		load.asMov64MR(a, v)
+	case ssa.TypeF32:
+		load.asXmmUnaryRmR(sseOpcodeMovss, a, v)
+	case ssa.TypeF64:
+		load.asXmmUnaryRmR(sseOpcodeMovsd, a, v)
+	case ssa.TypeV128:
+		load.asXmmUnaryRmR(sseOpcodeMovdqu, a, v)
+	default:
+		panic("BUG")
+	}
+
+	cur = linkInstr(cur, load)
+	return linkInstr(cur, prevNext)
+}
+
+// ClobberedRegisters implements backend.RegAllocFunctionMachine.
+func (m *machine) ClobberedRegisters(regs []regalloc.VReg) {
+	m.clobberedRegs = append(m.clobberedRegs[:0], regs...)
+}
+
+// Swap implements backend.RegAllocFunctionMachine.
+func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
+	if x1.RegType() == regalloc.RegTypeInt {
+		prevNext := cur.next
+		xc := m.allocateInstr().asXCHG(x1, newOperandReg(x2), 8)
+		cur = linkInstr(cur, xc)
+		linkInstr(cur, prevNext)
+	} else {
+		if tmp.Valid() {
+			prevNext := cur.next
+			m.InsertMoveBefore(tmp, x1, prevNext)
+			m.InsertMoveBefore(x1, x2, prevNext)
+			m.InsertMoveBefore(x2, tmp, prevNext)
+		} else {
+			prevNext := cur.next
+			r2 := x2.RealReg()
+			// Temporarily spill x1 to stack.
+			cur = m.InsertStoreRegisterAt(x1, cur, true).prev
+			// Then move x2 to x1.
+			cur = linkInstr(cur, m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqa, newOperandReg(x2), x1))
+			linkInstr(cur, prevNext)
+			// Then reload the original value on x1 from stack to r2.
+			m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
+		}
+	}
+}
+
+// LastInstrForInsertion implements backend.RegAllocFunctionMachine.
+func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {
+	cur := end
+	for cur.kind == nop0 {
+		cur = cur.prev
+		if cur == begin {
+			return end
+		}
+	}
+	switch cur.kind {
+	case jmp:
+		return cur
+	default:
+		return end
+	}
+}
+
+// SSABlockLabel implements backend.RegAllocFunctionMachine.
+func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label {
+	return m.ectx.SsaBlockIDToLabels[id]
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine_vec.go
@ -0,0 +1,992 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+var swizzleMask = [16]byte{
+	0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
+	0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
+}
+
+func (m *machine) lowerSwizzle(x, y ssa.Value, ret ssa.Value) {
+	masklabel := m.getOrAllocateConstLabel(&m.constSwizzleMaskConstIndex, swizzleMask[:])
+
+	// Load mask to maskReg.
+	maskReg := m.c.AllocateVReg(ssa.TypeV128)
+	loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(masklabel)), maskReg)
+	m.insert(loadMask)
+
+	// Copy x and y to tmp registers.
+	xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	tmpDst := m.copyToTmp(xx.reg())
+	yy := m.getOperand_Reg(m.c.ValueDefinition(y))
+	tmpX := m.copyToTmp(yy.reg())
+
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddusb, newOperandReg(maskReg), tmpX))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpX), tmpDst))
+
+	// Copy the result to the destination register.
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerInsertLane(x, y ssa.Value, index byte, ret ssa.Value, lane ssa.VecLane) {
+	// Copy x to tmp.
+	tmpDst := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, m.getOperand_Mem_Reg(m.c.ValueDefinition(x)), tmpDst))
+
+	yy := m.getOperand_Reg(m.c.ValueDefinition(y))
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, index, yy, tmpDst))
+	case ssa.VecLaneI16x8:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, index, yy, tmpDst))
+	case ssa.VecLaneI32x4:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, index, yy, tmpDst))
+	case ssa.VecLaneI64x2:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, index, yy, tmpDst))
+	case ssa.VecLaneF32x4:
+		// In INSERTPS instruction, the destination index is encoded at 4 and 5 bits of the argument.
+		// See https://www.felixcloutier.com/x86/insertps
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeInsertps, index<<4, yy, tmpDst))
+	case ssa.VecLaneF64x2:
+		if index == 0 {
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, yy, tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMovlhps, yy, tmpDst))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerExtractLane(x ssa.Value, index byte, signed bool, ret ssa.Value, lane ssa.VecLane) {
+	// Pextr variants are used to extract a lane from a vector register.
+	xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+
+	tmpDst := m.c.AllocateVReg(ret.Type())
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrb, index, xx, tmpDst))
+		if signed {
+			m.insert(m.allocateInstr().asMovsxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asMovzxRmR(extModeBL, newOperandReg(tmpDst), tmpDst))
+		}
+	case ssa.VecLaneI16x8:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrw, index, xx, tmpDst))
+		if signed {
+			m.insert(m.allocateInstr().asMovsxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asMovzxRmR(extModeWL, newOperandReg(tmpDst), tmpDst))
+		}
+	case ssa.VecLaneI32x4:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrd, index, xx, tmpDst))
+	case ssa.VecLaneI64x2:
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, index, xx, tmpDst))
+	case ssa.VecLaneF32x4:
+		if index == 0 {
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovss, xx, tmpDst))
+		} else {
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, index, xx, tmpDst))
+		}
+	case ssa.VecLaneF64x2:
+		if index == 0 {
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovsd, xx, tmpDst))
+		} else {
+			m.copyTo(xx.reg(), tmpDst)
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0b00_00_11_10, newOperandReg(tmpDst), tmpDst))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+var sqmulRoundSat = [16]byte{
+	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+	0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+}
+
+func (m *machine) lowerSqmulRoundSat(x, y, ret ssa.Value) {
+	// See https://github.com/WebAssembly/simd/pull/365 for the following logic.
+	maskLabel := m.getOrAllocateConstLabel(&m.constSqmulRoundSatIndex, sqmulRoundSat[:])
+
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+	loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp)
+	m.insert(loadMask)
+
+	xx, yy := m.getOperand_Reg(m.c.ValueDefinition(x)), m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	tmpX := m.copyToTmp(xx.reg())
+
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmulhrsw, yy, tmpX))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmpX), tmp))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmpX))
+
+	m.copyTo(tmpX, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVUshr(x, y, ret ssa.Value, lane ssa.VecLane) {
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.lowerVUshri8x16(x, y, ret)
+	case ssa.VecLaneI16x8, ssa.VecLaneI32x4, ssa.VecLaneI64x2:
+		m.lowerShr(x, y, ret, lane, false)
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+}
+
+// i8x16LogicalSHRMaskTable is necessary for emulating non-existent packed bytes logical right shifts on amd64.
+// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
+var i8x16LogicalSHRMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
+	0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // for 1 shift
+	0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, // for 2 shift
+	0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, // for 3 shift
+	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, // for 4 shift
+	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // for 5 shift
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // for 6 shift
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // for 7 shift
+}
+
+func (m *machine) lowerVUshri8x16(x, y, ret ssa.Value) {
+	tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(tmpGpReg, 0x7, false)
+	// Take the modulo 8 of the shift amount.
+	shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, tmpGpReg, false))
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	vecTmp := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), vecTmp, false))
+	m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrlw, newOperandReg(vecTmp), xx))
+
+	maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16LogicalSHRMaskTableIndex, i8x16LogicalSHRMaskTable[:])
+	base := m.c.AllocateVReg(ssa.TypeI64)
+	lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
+	m.insert(lea)
+
+	// Shift tmpGpReg by 4 to multiply the shift amount by 16.
+	m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
+
+	mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
+	loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), vecTmp)
+	m.insert(loadMask)
+
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(vecTmp), xx))
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVSshr(x, y, ret ssa.Value, lane ssa.VecLane) {
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.lowerVSshri8x16(x, y, ret)
+	case ssa.VecLaneI16x8, ssa.VecLaneI32x4:
+		m.lowerShr(x, y, ret, lane, true)
+	case ssa.VecLaneI64x2:
+		m.lowerVSshri64x2(x, y, ret)
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+}
+
+func (m *machine) lowerVSshri8x16(x, y, ret ssa.Value) {
+	shiftAmtReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(shiftAmtReg, 0x7, false)
+	// Take the modulo 8 of the shift amount.
+	shiftAmt := m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd, shiftAmt, shiftAmtReg, false))
+
+	// Copy the x value to two temporary registers.
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	vecTmp := m.c.AllocateVReg(ssa.TypeV128)
+	m.copyTo(xx, vecTmp)
+
+	// Assuming that we have
+	//  xx   = [b1, ..., b16]
+	//  vecTmp = [b1, ..., b16]
+	// at this point, then we use PUNPCKLBW and PUNPCKHBW to produce:
+	//  xx   = [b1, b1, b2, b2, ..., b8, b8]
+	//  vecTmp = [b9, b9, b10, b10, ..., b16, b16]
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpcklbw, newOperandReg(xx), xx))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePunpckhbw, newOperandReg(vecTmp), vecTmp))
+
+	// Adding 8 to the shift amount, and then move the amount to vecTmp2.
+	vecTmp2 := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAdd, newOperandImm32(8), shiftAmtReg, false))
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(shiftAmtReg), vecTmp2, false))
+
+	// Perform the word packed arithmetic right shifts on vreg and vecTmp.
+	// This changes these two registers as:
+	//  xx   = [xxx, b1 >> s, xxx, b2 >> s, ..., xxx, b8 >> s]
+	//  vecTmp = [xxx, b9 >> s, xxx, b10 >> s, ..., xxx, b16 >> s]
+	// where xxx is 1 or 0 depending on each byte's sign, and ">>" is the arithmetic shift on a byte.
+	m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), xx))
+	m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsraw, newOperandReg(vecTmp2), vecTmp))
+
+	// Finally, we can get the result by packing these two word vectors.
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePacksswb, newOperandReg(vecTmp), xx))
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVSshri64x2(x, y, ret ssa.Value) {
+	// Load the shift amount to RCX.
+	shiftAmt := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, shiftAmt, rcxVReg))
+
+	tmpGp := m.c.AllocateVReg(ssa.TypeI64)
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xxReg := m.copyToTmp(_xx.reg())
+
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpGp))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 0, newOperandReg(xxReg), tmpGp))
+	m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), xxReg))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePextrq, 1, newOperandReg(xxReg), tmpGp))
+	m.insert(m.allocateInstr().asShiftR(shiftROpShiftRightArithmetic, newOperandReg(rcxVReg), tmpGp, true))
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), xxReg))
+
+	m.copyTo(xxReg, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerShr(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	var modulo uint64
+	var shiftOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI16x8:
+		modulo = 0xf
+		if signed {
+			shiftOp = sseOpcodePsraw
+		} else {
+			shiftOp = sseOpcodePsrlw
+		}
+	case ssa.VecLaneI32x4:
+		modulo = 0x1f
+		if signed {
+			shiftOp = sseOpcodePsrad
+		} else {
+			shiftOp = sseOpcodePsrld
+		}
+	case ssa.VecLaneI64x2:
+		modulo = 0x3f
+		if signed {
+			panic("BUG")
+		}
+		shiftOp = sseOpcodePsrlq
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(tmpGpReg, modulo, false)
+	// Take the modulo 8 of the shift amount.
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
+		m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
+	// And move it to a xmm register.
+	tmpVec := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
+
+	// Then do the actual shift.
+	m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVIshl(x, y, ret ssa.Value, lane ssa.VecLane) {
+	var modulo uint64
+	var shiftOp sseOpcode
+	var isI8x16 bool
+	switch lane {
+	case ssa.VecLaneI8x16:
+		isI8x16 = true
+		modulo = 0x7
+		shiftOp = sseOpcodePsllw
+	case ssa.VecLaneI16x8:
+		modulo = 0xf
+		shiftOp = sseOpcodePsllw
+	case ssa.VecLaneI32x4:
+		modulo = 0x1f
+		shiftOp = sseOpcodePslld
+	case ssa.VecLaneI64x2:
+		modulo = 0x3f
+		shiftOp = sseOpcodePsllq
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	tmpGpReg := m.c.AllocateVReg(ssa.TypeI32)
+	// Load the modulo 8 mask to tmpReg.
+	m.lowerIconst(tmpGpReg, modulo, false)
+	// Take the modulo 8 of the shift amount.
+	m.insert(m.allocateInstr().asAluRmiR(aluRmiROpcodeAnd,
+		m.getOperand_Mem_Imm32_Reg(m.c.ValueDefinition(y)), tmpGpReg, false))
+	// And move it to a xmm register.
+	tmpVec := m.c.AllocateVReg(ssa.TypeV128)
+	m.insert(m.allocateInstr().asGprToXmm(sseOpcodeMovd, newOperandReg(tmpGpReg), tmpVec, false))
+
+	// Then do the actual shift.
+	m.insert(m.allocateInstr().asXmmRmiReg(shiftOp, newOperandReg(tmpVec), xx))
+
+	if isI8x16 {
+		maskTableLabel := m.getOrAllocateConstLabel(&m.constI8x16SHLMaskTableIndex, i8x16SHLMaskTable[:])
+		base := m.c.AllocateVReg(ssa.TypeI64)
+		lea := m.allocateInstr().asLEA(newOperandLabel(maskTableLabel), base)
+		m.insert(lea)
+
+		// Shift tmpGpReg by 4 to multiply the shift amount by 16.
+		m.insert(m.allocateInstr().asShiftR(shiftROpShiftLeft, newOperandImm32(4), tmpGpReg, false))
+
+		mem := m.newAmodeRegRegShift(0, base, tmpGpReg, 0)
+		loadMask := m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(mem), tmpVec)
+		m.insert(loadMask)
+
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePand, newOperandReg(tmpVec), xx))
+	}
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+// i8x16SHLMaskTable is necessary for emulating non-existent packed bytes left shifts on amd64.
+// The mask is applied after performing packed word shifts on the value to clear out the unnecessary bits.
+var i8x16SHLMaskTable = [8 * 16]byte{ // (the number of possible shift amount 0, 1, ..., 7.) * 16 bytes.
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // for 0 shift
+	0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, // for 1 shift
+	0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, // for 2 shift
+	0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, // for 3 shift
+	0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // for 4 shift
+	0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, // for 5 shift
+	0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, // for 6 shift
+	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, // for 7 shift
+}
+
+func (m *machine) lowerVRound(x, ret ssa.Value, imm byte, _64 bool) {
+	xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+	var round sseOpcode
+	if _64 {
+		round = sseOpcodeRoundpd
+	} else {
+		round = sseOpcodeRoundps
+	}
+	m.insert(m.allocateInstr().asXmmUnaryRmRImm(round, imm, xx, m.c.VRegOf(ret)))
+}
+
+var (
+	allOnesI8x16              = [16]byte{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}
+	allOnesI16x8              = [16]byte{0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0}
+	extAddPairwiseI16x8uMask1 = [16]byte{0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80}
+	extAddPairwiseI16x8uMask2 = [16]byte{0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00}
+)
+
+func (m *machine) lowerExtIaddPairwise(x, ret ssa.Value, srcLane ssa.VecLane, signed bool) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	switch srcLane {
+	case ssa.VecLaneI8x16:
+		allOneReg := m.c.AllocateVReg(ssa.TypeV128)
+		mask := m.getOrAllocateConstLabel(&m.constAllOnesI8x16Index, allOnesI8x16[:])
+		m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOneReg))
+
+		var resultReg regalloc.VReg
+		if signed {
+			resultReg = allOneReg
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(xx), resultReg))
+		} else {
+			// Interpreter tmp (all ones) as signed byte meaning that all the multiply-add is unsigned.
+			resultReg = xx
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddubsw, newOperandReg(allOneReg), resultReg))
+		}
+		m.copyTo(resultReg, m.c.VRegOf(ret))
+
+	case ssa.VecLaneI16x8:
+		if signed {
+			allOnesReg := m.c.AllocateVReg(ssa.TypeV128)
+			mask := m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), allOnesReg))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(allOnesReg), xx))
+			m.copyTo(xx, m.c.VRegOf(ret))
+		} else {
+			maskReg := m.c.AllocateVReg(ssa.TypeV128)
+			mask := m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask1Index, extAddPairwiseI16x8uMask1[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
+
+			// Flip the sign bits on xx.
+			//
+			// Assuming that xx = [w1, ..., w8], now we have,
+			// 	xx[i] = int8(-w1) for i = 0...8
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(maskReg), xx))
+
+			mask = m.getOrAllocateConstLabel(&m.constAllOnesI16x8Index, allOnesI16x8[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
+
+			// For i = 0,..4 (as this results in i32x4 lanes), now we have
+			// xx[i] = int32(-wn + -w(n+1)) = int32(-(wn + w(n+1)))
+			// c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr)
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, newOperandReg(maskReg), xx))
+
+			mask = m.getOrAllocateConstLabel(&m.constExtAddPairwiseI16x8uMask2Index, extAddPairwiseI16x8uMask2[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(mask)), maskReg))
+
+			// vr[i] = int32(-(wn + w(n+1))) + int32(math.MaxInt16+1) = int32((wn + w(n+1))) = uint32(wn + w(n+1)).
+			// c.assembler.CompileRegisterToRegister(amd64.PADDD, tmp, vr)
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(maskReg), xx))
+
+			m.copyTo(xx, m.c.VRegOf(ret))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", srcLane))
+	}
+}
+
+func (m *machine) lowerWidenLow(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	var sseOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI8x16:
+		if signed {
+			sseOp = sseOpcodePmovsxbw
+		} else {
+			sseOp = sseOpcodePmovzxbw
+		}
+	case ssa.VecLaneI16x8:
+		if signed {
+			sseOp = sseOpcodePmovsxwd
+		} else {
+			sseOp = sseOpcodePmovzxwd
+		}
+	case ssa.VecLaneI32x4:
+		if signed {
+			sseOp = sseOpcodePmovsxdq
+		} else {
+			sseOp = sseOpcodePmovzxdq
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+	m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, xx, m.c.VRegOf(ret)))
+}
+
+func (m *machine) lowerWidenHigh(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	tmp := m.c.AllocateVReg(ssa.TypeV128)
+	xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	m.copyTo(xx.reg(), tmp)
+	m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePalignr, 8, newOperandReg(tmp), tmp))
+
+	var sseOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI8x16:
+		if signed {
+			sseOp = sseOpcodePmovsxbw
+		} else {
+			sseOp = sseOpcodePmovzxbw
+		}
+	case ssa.VecLaneI16x8:
+		if signed {
+			sseOp = sseOpcodePmovsxwd
+		} else {
+			sseOp = sseOpcodePmovzxwd
+		}
+	case ssa.VecLaneI32x4:
+		if signed {
+			sseOp = sseOpcodePmovsxdq
+		} else {
+			sseOp = sseOpcodePmovzxdq
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.insert(m.allocateInstr().asXmmUnaryRmR(sseOp, newOperandReg(tmp), m.c.VRegOf(ret)))
+}
+
+func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, ret ssa.Value, lane ssa.VecLane) {
+	tmpDst, tmpGp := m.c.AllocateVReg(ssa.TypeV128), m.c.AllocateVReg(ssa.TypeI64)
+	am := newOperandMem(m.lowerToAddressMode(ptr, offset))
+
+	m.insert(m.allocateInstr().asDefineUninitializedReg(tmpDst))
+	switch lane {
+	case ssa.VecLaneI8x16:
+		m.insert(m.allocateInstr().asMovzxRmR(extModeBQ, am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrb, 0, newOperandReg(tmpGp), tmpDst))
+		tmpZeroVec := m.c.AllocateVReg(ssa.TypeV128)
+		m.insert(m.allocateInstr().asZeros(tmpZeroVec))
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePshufb, newOperandReg(tmpZeroVec), tmpDst))
+	case ssa.VecLaneI16x8:
+		m.insert(m.allocateInstr().asMovzxRmR(extModeWQ, am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 0, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrw, 1, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
+	case ssa.VecLaneI32x4:
+		m.insert(m.allocateInstr().asMovzxRmR(extModeLQ, am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrd, 0, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePshufd, 0, newOperandReg(tmpDst), tmpDst))
+	case ssa.VecLaneI64x2:
+		m.insert(m.allocateInstr().asMov64MR(am, tmpGp))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 0, newOperandReg(tmpGp), tmpDst))
+		m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodePinsrq, 1, newOperandReg(tmpGp), tmpDst))
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(tmpDst, m.c.VRegOf(ret))
+}
+
+var f64x2CvtFromIMask = [16]byte{
+	0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+}
+
+func (m *machine) lowerVFcvtFromInt(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	switch lane {
+	case ssa.VecLaneF32x4:
+		if signed {
+			xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, xx, m.c.VRegOf(ret)))
+		} else {
+			xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+			// Copy the value to two temporary registers.
+			tmp := m.copyToTmp(xx.reg())
+			tmp2 := m.copyToTmp(xx.reg())
+
+			// Clear the higher 16 bits of each 32-bit element.
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePslld, newOperandImm32(0xa), tmp))
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0xa), tmp))
+
+			// Subtract the higher 16-bits from tmp2: clear the lower 16-bits of tmp2.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubd, newOperandReg(tmp), tmp2))
+
+			// Convert the lower 16-bits in tmp.
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
+
+			// Left shift by one and convert tmp2, meaning that halved conversion result of higher 16-bits in tmp2.
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(1), tmp2))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp2), tmp2))
+
+			// Double the converted halved higher 16bits.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp2), tmp2))
+
+			// Get the conversion result by add tmp (holding lower 16-bit conversion) into tmp2.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddps, newOperandReg(tmp), tmp2))
+
+			m.copyTo(tmp2, m.c.VRegOf(ret))
+		}
+	case ssa.VecLaneF64x2:
+		if signed {
+			xx := m.getOperand_Mem_Reg(m.c.ValueDefinition(x))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2pd, xx, m.c.VRegOf(ret)))
+		} else {
+			maskReg := m.c.AllocateVReg(ssa.TypeV128)
+			maskLabel := m.getOrAllocateConstLabel(&m.constF64x2CvtFromIMaskIndex, f64x2CvtFromIMask[:])
+			// maskReg = [0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
+
+			_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+			xx := m.copyToTmp(_xx.reg())
+
+			// Given that we have xx = [d1, d2, d3, d4], this results in
+			//	xx = [d1, [0x00, 0x00, 0x30, 0x43], d2, [0x00, 0x00, 0x30, 0x43]]
+			//     = [float64(uint32(d1)) + 0x1.0p52, float64(uint32(d2)) + 0x1.0p52]
+			//     ^See https://stackoverflow.com/questions/13269523/can-all-32-bit-ints-be-exactly-represented-as-a-double
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeUnpcklps, newOperandReg(maskReg), xx))
+
+			// maskReg = [float64(0x1.0p52), float64(0x1.0p52)]
+			maskLabel = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), maskReg))
+
+			// Now, we get the result as
+			// 	xx = [float64(uint32(d1)), float64(uint32(d2))]
+			// because the following equality always satisfies:
+			//  float64(0x1.0p52 + float64(uint32(x))) - float64(0x1.0p52 + float64(uint32(y))) = float64(uint32(x)) - float64(uint32(y))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubpd, newOperandReg(maskReg), xx))
+
+			m.copyTo(xx, m.c.VRegOf(ret))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+}
+
+var (
+	// i32sMaxOnF64x2 holds math.MaxInt32(=2147483647.0) on two f64 lanes.
+	i32sMaxOnF64x2 = [16]byte{
+		0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
+		0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xdf, 0x41, // float64(2147483647.0)
+	}
+
+	// i32sMaxOnF64x2 holds math.MaxUint32(=4294967295.0) on two f64 lanes.
+	i32uMaxOnF64x2 = [16]byte{
+		0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
+		0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xef, 0x41, // float64(4294967295.0)
+	}
+
+	// twop52 holds two float64(0x1.0p52) on two f64 lanes. 0x1.0p52 is special in the sense that
+	// with this exponent, the mantissa represents a corresponding uint32 number, and arithmetics,
+	// like addition or subtraction, the resulted floating point holds exactly the same
+	// bit representations in 32-bit integer on its mantissa.
+	//
+	// Note: the name twop52 is common across various compiler ecosystem.
+	// 	E.g. https://github.com/llvm/llvm-project/blob/92ab024f81e5b64e258b7c3baaf213c7c26fcf40/compiler-rt/lib/builtins/floatdidf.c#L28
+	// 	E.g. https://opensource.apple.com/source/clang/clang-425.0.24/src/projects/compiler-rt/lib/floatdidf.c.auto.html
+	twop52 = [16]byte{
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, // float64(0x1.0p52)
+	}
+)
+
+func (m *machine) lowerVFcvtToIntSat(x, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+
+	switch lane {
+	case ssa.VecLaneF32x4:
+		if signed {
+			tmp := m.copyToTmp(xx)
+
+			// Assuming we have xx = [v1, v2, v3, v4].
+			//
+			// Set all bits if lane is not NaN on tmp.
+			// tmp[i] = 0xffffffff  if vi != NaN
+			//        = 0           if vi == NaN
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
+
+			// Clear NaN lanes on xx, meaning that
+			// 	xx[i] = vi  if vi != NaN
+			//	        0   if vi == NaN
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp), xx))
+
+			// tmp[i] = ^vi         if vi != NaN
+			//        = 0xffffffff  if vi == NaN
+			// which means that tmp[i] & 0x80000000 != 0 if and only if vi is negative.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeXorps, newOperandReg(xx), tmp))
+
+			// xx[i] = int32(vi)   if vi != NaN and xx is not overflowing.
+			//       = 0x80000000  if vi != NaN and xx is overflowing (See https://www.felixcloutier.com/x86/cvttps2dq)
+			//       = 0           if vi == NaN
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
+
+			// Below, we have to convert 0x80000000 into 0x7FFFFFFF for positive overflowing lane.
+			//
+			// tmp[i] = 0x80000000                         if vi is positive
+			//        = any satisfying any&0x80000000 = 0  if vi is negative or zero.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(xx), tmp))
+
+			// Arithmetic right shifting tmp by 31, meaning that we have
+			// tmp[i] = 0xffffffff if vi is positive, 0 otherwise.
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrad, newOperandImm32(0x1f), tmp))
+
+			// Flipping 0x80000000 if vi is positive, otherwise keep intact.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), xx))
+		} else {
+			tmp := m.c.AllocateVReg(ssa.TypeV128)
+			m.insert(m.allocateInstr().asZeros(tmp))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxps, newOperandReg(tmp), xx))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePcmpeqd, newOperandReg(tmp), tmp))
+			m.insert(m.allocateInstr().asXmmRmiReg(sseOpcodePsrld, newOperandImm32(0x1), tmp))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvtdq2ps, newOperandReg(tmp), tmp))
+			tmp2 := m.copyToTmp(xx)
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(xx), xx))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeSubps, newOperandReg(tmp), tmp2))
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmpps, uint8(cmpPredLE_OS), newOperandReg(tmp2), tmp))
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttps2dq, newOperandReg(tmp2), tmp2))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp2))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(tmp), tmp))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaxsd, newOperandReg(tmp), tmp2))
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodePaddd, newOperandReg(tmp2), xx))
+		}
+
+	case ssa.VecLaneF64x2:
+		tmp2 := m.c.AllocateVReg(ssa.TypeV128)
+		if signed {
+			tmp := m.copyToTmp(xx)
+
+			// Set all bits for non-NaN lanes, zeros otherwise.
+			// I.e. tmp[i] = 0xffffffff_ffffffff if vi != NaN, 0 otherwise.
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeCmppd, uint8(cmpPredEQ_OQ), newOperandReg(tmp), tmp))
+
+			maskLabel := m.getOrAllocateConstLabel(&m.constI32sMaxOnF64x2Index, i32sMaxOnF64x2[:])
+			// Load the 2147483647 into tmp2's each lane.
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskLabel)), tmp2))
+
+			// tmp[i] = 2147483647 if vi != NaN, 0 otherwise.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAndps, newOperandReg(tmp2), tmp))
+
+			// MINPD returns the source register's value as-is, so we have
+			//  xx[i] = vi   if vi != NaN
+			//        = 0    if vi == NaN
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp), xx))
+
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeCvttpd2dq, newOperandReg(xx), xx))
+		} else {
+			tmp := m.c.AllocateVReg(ssa.TypeV128)
+			m.insert(m.allocateInstr().asZeros(tmp))
+
+			//  xx[i] = vi   if vi != NaN && vi > 0
+			//        = 0    if vi == NaN || vi <= 0
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMaxpd, newOperandReg(tmp), xx))
+
+			// tmp2[i] = float64(math.MaxUint32) = math.MaxUint32
+			maskIndex := m.getOrAllocateConstLabel(&m.constI32uMaxOnF64x2Index, i32uMaxOnF64x2[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
+
+			// xx[i] = vi   if vi != NaN && vi > 0 && vi <= math.MaxUint32
+			//       = 0    otherwise
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeMinpd, newOperandReg(tmp2), xx))
+
+			// Round the floating points into integer.
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeRoundpd, 0x3, newOperandReg(xx), xx))
+
+			// tmp2[i] = float64(0x1.0p52)
+			maskIndex = m.getOrAllocateConstLabel(&m.constTwop52Index, twop52[:])
+			m.insert(m.allocateInstr().asXmmUnaryRmR(sseOpcodeMovdqu, newOperandMem(m.newAmodeRipRel(maskIndex)), tmp2))
+
+			// xx[i] = float64(0x1.0p52) + float64(uint32(vi)) if vi != NaN && vi > 0 && vi <= math.MaxUint32
+			//       = 0                                       otherwise
+			//
+			// This means that xx[i] holds exactly the same bit of uint32(vi) in its lower 32-bits.
+			m.insert(m.allocateInstr().asXmmRmR(sseOpcodeAddpd, newOperandReg(tmp2), xx))
+
+			// At this point, we have
+			// 	xx  = [uint32(v0), float64(0x1.0p52), uint32(v1), float64(0x1.0p52)]
+			//  tmp = [0, 0, 0, 0]
+			// as 32x4 lanes. Therefore, SHUFPS with 0b00_00_10_00 results in
+			//	xx = [xx[00], xx[10], tmp[00], tmp[00]] = [xx[00], xx[10], 0, 0]
+			// meaning that for i = 0 and 1, we have
+			//  xx[i] = uint32(vi) if vi != NaN && vi > 0 && vi <= math.MaxUint32
+			//        = 0          otherwise.
+			m.insert(m.allocateInstr().asXmmRmRImm(sseOpcodeShufps, 0b00_00_10_00, newOperandReg(tmp), xx))
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerNarrow(x, y, ret ssa.Value, lane ssa.VecLane, signed bool) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+
+	var sseOp sseOpcode
+	switch lane {
+	case ssa.VecLaneI16x8:
+		if signed {
+			sseOp = sseOpcodePacksswb
+		} else {
+			sseOp = sseOpcodePackuswb
+		}
+	case ssa.VecLaneI32x4:
+		if signed {
+			sseOp = sseOpcodePackssdw
+		} else {
+			sseOp = sseOpcodePackusdw
+		}
+	default:
+		panic(fmt.Sprintf("invalid lane type: %s", lane))
+	}
+	m.insert(m.allocateInstr().asXmmRmR(sseOp, yy, xx))
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerWideningPairwiseDotProductS(x, y, ret ssa.Value) {
+	_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+	xx := m.copyToTmp(_xx.reg())
+	yy := m.getOperand_Mem_Reg(m.c.ValueDefinition(y))
+	m.insert(m.allocateInstr().asXmmRmR(sseOpcodePmaddwd, yy, xx))
+	m.copyTo(xx, m.c.VRegOf(ret))
+}
+
+func (m *machine) lowerVIabs(instr *ssa.Instruction) {
+	x, lane := instr.ArgWithLane()
+	rd := m.c.VRegOf(instr.Return())
+
+	if lane == ssa.VecLaneI64x2 {
+		_xx := m.getOperand_Reg(m.c.ValueDefinition(x))
+
+		blendReg := xmm0VReg
+		m.insert(m.allocateInstr().asDefineUninitializedReg(blendReg))
+
+		tmp := m.copyToTmp(_xx.reg())
+		xx := m.copyToTmp(_xx.reg())
+
+		// Clear all bits on blendReg.
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePxor, newOperandReg(blendReg), blendReg))
+		// Subtract xx from blendMaskReg.
+		m.insert(m.allocateInstr().asXmmRmR(sseOpcodePsubq, newOperandReg(xx), blendReg))
+		// Copy the subtracted value ^^ back into tmp.
+		m.copyTo(blendReg, xx)
+
+		m.insert(m.allocateInstr().asBlendvpd(newOperandReg(tmp), xx))
+
+		m.copyTo(xx, rd)
+	} else {
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI8x16:
+			vecOp = sseOpcodePabsb
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePabsw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePabsd
+		}
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+
+		i := m.allocateInstr()
+		i.asXmmUnaryRmR(vecOp, rn, rd)
+		m.insert(i)
+	}
+}
+
+func (m *machine) lowerVIpopcnt(instr *ssa.Instruction) {
+	x := instr.Arg()
+	rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+	rd := m.c.VRegOf(instr.Return())
+
+	tmp1 := m.c.AllocateVReg(ssa.TypeV128)
+	m.lowerVconst(tmp1, 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f)
+
+	// Copy input into tmp2.
+	tmp2 := m.copyToTmp(rn.reg())
+
+	// Given that we have:
+	//  rm = [b1, ..., b16] where bn = hn:ln and hn and ln are higher and lower 4-bits of bn.
+	//
+	// Take PAND on tmp1 and tmp2, so that we mask out all the higher bits.
+	//  tmp2 = [l1, ..., l16].
+	pand := m.allocateInstr()
+	pand.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp2)
+	m.insert(pand)
+
+	// Do logical (packed word) right shift by 4 on rm and PAND against the mask (tmp1); meaning that we have
+	//  tmp3 = [h1, ...., h16].
+	tmp3 := m.copyToTmp(rn.reg())
+	psrlw := m.allocateInstr()
+	psrlw.asXmmRmiReg(sseOpcodePsrlw, newOperandImm32(4), tmp3)
+	m.insert(psrlw)
+
+	pand2 := m.allocateInstr()
+	pand2.asXmmRmR(sseOpcodePand, newOperandReg(tmp1), tmp3)
+	m.insert(pand2)
+
+	// Read the popcntTable into tmp4, and we have
+	//  tmp4 = [0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04]
+	tmp4 := m.c.AllocateVReg(ssa.TypeV128)
+	m.lowerVconst(tmp4, 0x03_02_02_01_02_01_01_00, 0x04_03_03_02_03_02_02_01)
+
+	// Make a copy for later.
+	tmp5 := m.copyToTmp(tmp4)
+
+	//  tmp4 = [popcnt(l1), ..., popcnt(l16)].
+	pshufb := m.allocateInstr()
+	pshufb.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp2), tmp4)
+	m.insert(pshufb)
+
+	pshufb2 := m.allocateInstr()
+	pshufb2.asXmmRmR(sseOpcodePshufb, newOperandReg(tmp3), tmp5)
+	m.insert(pshufb2)
+
+	// tmp4 + tmp5 is the result.
+	paddb := m.allocateInstr()
+	paddb.asXmmRmR(sseOpcodePaddb, newOperandReg(tmp4), tmp5)
+	m.insert(paddb)
+
+	m.copyTo(tmp5, rd)
+}
+
+func (m *machine) lowerVImul(instr *ssa.Instruction) {
+	x, y, lane := instr.Arg2WithLane()
+	rd := m.c.VRegOf(instr.Return())
+	if lane == ssa.VecLaneI64x2 {
+		rn := m.getOperand_Reg(m.c.ValueDefinition(x))
+		rm := m.getOperand_Reg(m.c.ValueDefinition(y))
+		// Assuming that we have
+		//	rm = [p1, p2] = [p1_lo, p1_hi, p2_lo, p2_high]
+		//  rn = [q1, q2] = [q1_lo, q1_hi, q2_lo, q2_high]
+		// where pN and qN are 64-bit (quad word) lane, and pN_lo, pN_hi, qN_lo and qN_hi are 32-bit (double word) lane.
+
+		// Copy rn into tmp1.
+		tmp1 := m.copyToTmp(rn.reg())
+
+		// And do the logical right shift by 32-bit on tmp1, which makes tmp1 = [0, p1_high, 0, p2_high]
+		shift := m.allocateInstr()
+		shift.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp1)
+		m.insert(shift)
+
+		// Execute "pmuludq rm,tmp1", which makes tmp1 = [p1_high*q1_lo, p2_high*q2_lo] where each lane is 64-bit.
+		mul := m.allocateInstr()
+		mul.asXmmRmR(sseOpcodePmuludq, rm, tmp1)
+		m.insert(mul)
+
+		// Copy rm value into tmp2.
+		tmp2 := m.copyToTmp(rm.reg())
+
+		// And do the logical right shift by 32-bit on tmp2, which makes tmp2 = [0, q1_high, 0, q2_high]
+		shift2 := m.allocateInstr()
+		shift2.asXmmRmiReg(sseOpcodePsrlq, newOperandImm32(32), tmp2)
+		m.insert(shift2)
+
+		// Execute "pmuludq rm,tmp2", which makes tmp2 = [p1_lo*q1_high, p2_lo*q2_high] where each lane is 64-bit.
+		mul2 := m.allocateInstr()
+		mul2.asXmmRmR(sseOpcodePmuludq, rn, tmp2)
+		m.insert(mul2)
+
+		// Adds tmp1 and tmp2 and do the logical left shift by 32-bit,
+		// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32, (p2_lo*q2_high+p2_high*q2_lo)<<32]
+		add := m.allocateInstr()
+		add.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp2), tmp1)
+		m.insert(add)
+
+		shift3 := m.allocateInstr()
+		shift3.asXmmRmiReg(sseOpcodePsllq, newOperandImm32(32), tmp1)
+		m.insert(shift3)
+
+		// Copy rm value into tmp3.
+		tmp3 := m.copyToTmp(rm.reg())
+
+		// "pmuludq rm,tmp3" makes tmp3 = [p1_lo*q1_lo, p2_lo*q2_lo] where each lane is 64-bit.
+		mul3 := m.allocateInstr()
+		mul3.asXmmRmR(sseOpcodePmuludq, rn, tmp3)
+		m.insert(mul3)
+
+		// Finally, we get the result by computing tmp1 + tmp3,
+		// which makes tmp1 = [(p1_lo*q1_high+p1_high*q1_lo)<<32+p1_lo*q1_lo, (p2_lo*q2_high+p2_high*q2_lo)<<32+p2_lo*q2_lo]
+		add2 := m.allocateInstr()
+		add2.asXmmRmR(sseOpcodePaddq, newOperandReg(tmp3), tmp1)
+		m.insert(add2)
+
+		m.copyTo(tmp1, rd)
+
+	} else {
+		var vecOp sseOpcode
+		switch lane {
+		case ssa.VecLaneI16x8:
+			vecOp = sseOpcodePmullw
+		case ssa.VecLaneI32x4:
+			vecOp = sseOpcodePmulld
+		default:
+			panic("unsupported: " + lane.String())
+		}
+		m.lowerVbBinOp(vecOp, x, y, instr.Return())
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/operands.go
@ -0,0 +1,346 @@
+package amd64
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+type operand struct {
+	kind operandKind
+	data uint64
+}
+
+type operandKind byte
+
+const (
+	// operandKindReg is an operand which is an integer Register.
+	operandKindReg operandKind = iota + 1
+
+	// operandKindMem is a value in Memory.
+	// 32, 64, or 128 bit value.
+	operandKindMem
+
+	// operandKindImm32 is a signed-32-bit integer immediate value.
+	operandKindImm32
+
+	// operandKindLabel is a label.
+	operandKindLabel
+)
+
+// String implements fmt.Stringer.
+func (o operandKind) String() string {
+	switch o {
+	case operandKindReg:
+		return "reg"
+	case operandKindMem:
+		return "mem"
+	case operandKindImm32:
+		return "imm32"
+	case operandKindLabel:
+		return "label"
+	default:
+		panic("BUG: invalid operand kind")
+	}
+}
+
+// format returns the string representation of the operand.
+// _64 is only for the case where the operand is a register, and it's integer.
+func (o *operand) format(_64 bool) string {
+	switch o.kind {
+	case operandKindReg:
+		return formatVRegSized(o.reg(), _64)
+	case operandKindMem:
+		return o.addressMode().String()
+	case operandKindImm32:
+		return fmt.Sprintf("$%d", int32(o.imm32()))
+	case operandKindLabel:
+		return backend.Label(o.imm32()).String()
+	default:
+		panic(fmt.Sprintf("BUG: invalid operand: %s", o.kind))
+	}
+}
+
+//go:inline
+func (o *operand) reg() regalloc.VReg {
+	return regalloc.VReg(o.data)
+}
+
+//go:inline
+func (o *operand) setReg(r regalloc.VReg) {
+	o.data = uint64(r)
+}
+
+//go:inline
+func (o *operand) addressMode() *amode {
+	return wazevoapi.PtrFromUintptr[amode](uintptr(o.data))
+}
+
+//go:inline
+func (o *operand) imm32() uint32 {
+	return uint32(o.data)
+}
+
+func (o *operand) label() backend.Label {
+	switch o.kind {
+	case operandKindLabel:
+		return backend.Label(o.data)
+	case operandKindMem:
+		mem := o.addressMode()
+		if mem.kind() != amodeRipRel {
+			panic("BUG: invalid label")
+		}
+		return backend.Label(mem.imm32)
+	default:
+		panic("BUG: invalid operand kind")
+	}
+}
+
+func newOperandLabel(label backend.Label) operand {
+	return operand{kind: operandKindLabel, data: uint64(label)}
+}
+
+func newOperandReg(r regalloc.VReg) operand {
+	return operand{kind: operandKindReg, data: uint64(r)}
+}
+
+func newOperandImm32(imm32 uint32) operand {
+	return operand{kind: operandKindImm32, data: uint64(imm32)}
+}
+
+func newOperandMem(amode *amode) operand {
+	return operand{kind: operandKindMem, data: uint64(uintptr(unsafe.Pointer(amode)))}
+}
+
+// amode is a memory operand (addressing mode).
+type amode struct {
+	kindWithShift uint32
+	imm32         uint32
+	base          regalloc.VReg
+
+	// For amodeRegRegShift:
+	index regalloc.VReg
+}
+
+type amodeKind byte
+
+const (
+	// amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base
+	amodeImmReg amodeKind = iota + 1
+
+	// amodeImmRBP is the same as amodeImmReg, but the base register is fixed to RBP.
+	// The only differece is that it doesn't tell the register allocator to use RBP which is distracting for the
+	// register allocator.
+	amodeImmRBP
+
+	// amodeRegRegShift calculates sign-extend-32-to-64(Immediate) + base + (Register2 << Shift)
+	amodeRegRegShift
+
+	// amodeRipRel is a RIP-relative addressing mode specified by the label.
+	amodeRipRel
+
+	// TODO: there are other addressing modes such as the one without base register.
+)
+
+func (a *amode) kind() amodeKind {
+	return amodeKind(a.kindWithShift & 0xff)
+}
+
+func (a *amode) shift() byte {
+	return byte(a.kindWithShift >> 8)
+}
+
+func (a *amode) uses(rs *[]regalloc.VReg) {
+	switch a.kind() {
+	case amodeImmReg:
+		*rs = append(*rs, a.base)
+	case amodeRegRegShift:
+		*rs = append(*rs, a.base, a.index)
+	case amodeImmRBP, amodeRipRel:
+	default:
+		panic("BUG: invalid amode kind")
+	}
+}
+
+func (a *amode) nregs() int {
+	switch a.kind() {
+	case amodeImmReg:
+		return 1
+	case amodeRegRegShift:
+		return 2
+	case amodeImmRBP, amodeRipRel:
+		return 0
+	default:
+		panic("BUG: invalid amode kind")
+	}
+}
+
+func (a *amode) assignUses(i int, reg regalloc.VReg) {
+	switch a.kind() {
+	case amodeImmReg:
+		if i == 0 {
+			a.base = reg
+		} else {
+			panic("BUG: invalid amode assignment")
+		}
+	case amodeRegRegShift:
+		if i == 0 {
+			a.base = reg
+		} else if i == 1 {
+			a.index = reg
+		} else {
+			panic("BUG: invalid amode assignment")
+		}
+	default:
+		panic("BUG: invalid amode assignment")
+	}
+}
+
+func (m *machine) newAmodeImmReg(imm32 uint32, base regalloc.VReg) *amode {
+	ret := m.amodePool.Allocate()
+	*ret = amode{kindWithShift: uint32(amodeImmReg), imm32: imm32, base: base}
+	return ret
+}
+
+func (m *machine) newAmodeImmRBPReg(imm32 uint32) *amode {
+	ret := m.amodePool.Allocate()
+	*ret = amode{kindWithShift: uint32(amodeImmRBP), imm32: imm32, base: rbpVReg}
+	return ret
+}
+
+func (m *machine) newAmodeRegRegShift(imm32 uint32, base, index regalloc.VReg, shift byte) *amode {
+	if shift > 3 {
+		panic(fmt.Sprintf("BUG: invalid shift (must be 3>=): %d", shift))
+	}
+	ret := m.amodePool.Allocate()
+	*ret = amode{kindWithShift: uint32(amodeRegRegShift) | uint32(shift)<<8, imm32: imm32, base: base, index: index}
+	return ret
+}
+
+func (m *machine) newAmodeRipRel(label backend.Label) *amode {
+	ret := m.amodePool.Allocate()
+	*ret = amode{kindWithShift: uint32(amodeRipRel), imm32: uint32(label)}
+	return ret
+}
+
+// String implements fmt.Stringer.
+func (a *amode) String() string {
+	switch a.kind() {
+	case amodeImmReg, amodeImmRBP:
+		if a.imm32 == 0 {
+			return fmt.Sprintf("(%s)", formatVRegSized(a.base, true))
+		}
+		return fmt.Sprintf("%d(%s)", int32(a.imm32), formatVRegSized(a.base, true))
+	case amodeRegRegShift:
+		shift := 1 << a.shift()
+		if a.imm32 == 0 {
+			return fmt.Sprintf(
+				"(%s,%s,%d)",
+				formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)
+		}
+		return fmt.Sprintf(
+			"%d(%s,%s,%d)",
+			int32(a.imm32), formatVRegSized(a.base, true), formatVRegSized(a.index, true), shift)
+	case amodeRipRel:
+		return fmt.Sprintf("%s(%%rip)", backend.Label(a.imm32))
+	default:
+		panic("BUG: invalid amode kind")
+	}
+}
+
+func (m *machine) getOperand_Mem_Reg(def *backend.SSAValueDefinition) (op operand) {
+	if def.IsFromBlockParam() {
+		return newOperandReg(def.BlkParamVReg)
+	}
+
+	if def.SSAValue().Type() == ssa.TypeV128 {
+		// SIMD instructions require strict memory alignment, so we don't support the memory operand for V128 at the moment.
+		return m.getOperand_Reg(def)
+	}
+
+	if m.c.MatchInstr(def, ssa.OpcodeLoad) {
+		instr := def.Instr
+		ptr, offset, _ := instr.LoadData()
+		op = newOperandMem(m.lowerToAddressMode(ptr, offset))
+		instr.MarkLowered()
+		return op
+	}
+	return m.getOperand_Reg(def)
+}
+
+func (m *machine) getOperand_Mem_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) {
+	if def.IsFromBlockParam() {
+		return newOperandReg(def.BlkParamVReg)
+	}
+
+	if m.c.MatchInstr(def, ssa.OpcodeLoad) {
+		instr := def.Instr
+		ptr, offset, _ := instr.LoadData()
+		op = newOperandMem(m.lowerToAddressMode(ptr, offset))
+		instr.MarkLowered()
+		return op
+	}
+	return m.getOperand_Imm32_Reg(def)
+}
+
+func (m *machine) getOperand_Imm32_Reg(def *backend.SSAValueDefinition) (op operand) {
+	if def.IsFromBlockParam() {
+		return newOperandReg(def.BlkParamVReg)
+	}
+
+	instr := def.Instr
+	if instr.Constant() {
+		// If the operation is 64-bit, x64 sign-extends the 32-bit immediate value.
+		// Therefore, we need to check if the immediate value is within the 32-bit range and if the sign bit is set,
+		// we should not use the immediate value.
+		if op, ok := asImm32Operand(instr.ConstantVal(), instr.Return().Type() == ssa.TypeI32); ok {
+			instr.MarkLowered()
+			return op
+		}
+	}
+	return m.getOperand_Reg(def)
+}
+
+func asImm32Operand(val uint64, allowSignExt bool) (operand, bool) {
+	if imm32, ok := asImm32(val, allowSignExt); ok {
+		return newOperandImm32(imm32), true
+	}
+	return operand{}, false
+}
+
+func asImm32(val uint64, allowSignExt bool) (uint32, bool) {
+	u32val := uint32(val)
+	if uint64(u32val) != val {
+		return 0, false
+	}
+	if !allowSignExt && u32val&0x80000000 != 0 {
+		return 0, false
+	}
+	return u32val, true
+}
+
+func (m *machine) getOperand_Reg(def *backend.SSAValueDefinition) (op operand) {
+	var v regalloc.VReg
+	if def.IsFromBlockParam() {
+		v = def.BlkParamVReg
+	} else {
+		instr := def.Instr
+		if instr.Constant() {
+			// We inline all the constant instructions so that we could reduce the register usage.
+			v = m.lowerConstant(instr)
+			instr.MarkLowered()
+		} else {
+			if n := def.N; n == 0 {
+				v = m.c.VRegOf(instr.Return())
+			} else {
+				_, rs := instr.Returns()
+				v = m.c.VRegOf(rs[n-1])
+			}
+		}
+	}
+	return newOperandReg(v)
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect.go
@ -0,0 +1,11 @@
+//go:build !tinygo
+
+package amd64
+
+import "reflect"
+
+// setSliceLimits sets both Cap and Len for the given reflected slice.
+func setSliceLimits(s *reflect.SliceHeader, limit uintptr) {
+	s.Len = int(limit)
+	s.Cap = int(limit)
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reflect_tinygo.go
@ -0,0 +1,11 @@
+//go:build tinygo
+
+package amd64
+
+import "reflect"
+
+// setSliceLimits sets both Cap and Len for the given reflected slice.
+func setSliceLimits(s *reflect.SliceHeader, limit uintptr) {
+	s.Len = limit
+	s.Len = limit
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/reg.go
@ -0,0 +1,181 @@
+package amd64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+)
+
+// Amd64-specific registers.
+const (
+	// rax is a gp register.
+	rax = regalloc.RealRegInvalid + 1 + iota
+	// rcx is a gp register.
+	rcx
+	// rdx is a gp register.
+	rdx
+	// rbx is a gp register.
+	rbx
+	// rsp is a gp register.
+	rsp
+	// rbp is a gp register.
+	rbp
+	// rsi is a gp register.
+	rsi
+	// rdi is a gp register.
+	rdi
+	// r8 is a gp register.
+	r8
+	// r9 is a gp register.
+	r9
+	// r10 is a gp register.
+	r10
+	// r11 is a gp register.
+	r11
+	// r12 is a gp register.
+	r12
+	// r13 is a gp register.
+	r13
+	// r14 is a gp register.
+	r14
+	// r15 is a gp register.
+	r15
+
+	// xmm0 is a vector register.
+	xmm0
+	// xmm1 is a vector register.
+	xmm1
+	// xmm2 is a vector register.
+	xmm2
+	// xmm3 is a vector register.
+	xmm3
+	// xmm4 is a vector register.
+	xmm4
+	// xmm5 is a vector register.
+	xmm5
+	// xmm6 is a vector register.
+	xmm6
+	// xmm7 is a vector register.
+	xmm7
+	// xmm8 is a vector register.
+	xmm8
+	// xmm9 is a vector register.
+	xmm9
+	// xmm10 is a vector register.
+	xmm10
+	// xmm11 is a vector register.
+	xmm11
+	// xmm12 is a vector register.
+	xmm12
+	// xmm13 is a vector register.
+	xmm13
+	// xmm14 is a vector register.
+	xmm14
+	// xmm15 is a vector register.
+	xmm15
+)
+
+var (
+	raxVReg = regalloc.FromRealReg(rax, regalloc.RegTypeInt)
+	rcxVReg = regalloc.FromRealReg(rcx, regalloc.RegTypeInt)
+	rdxVReg = regalloc.FromRealReg(rdx, regalloc.RegTypeInt)
+	rbxVReg = regalloc.FromRealReg(rbx, regalloc.RegTypeInt)
+	rspVReg = regalloc.FromRealReg(rsp, regalloc.RegTypeInt)
+	rbpVReg = regalloc.FromRealReg(rbp, regalloc.RegTypeInt)
+	rsiVReg = regalloc.FromRealReg(rsi, regalloc.RegTypeInt)
+	rdiVReg = regalloc.FromRealReg(rdi, regalloc.RegTypeInt)
+	r8VReg  = regalloc.FromRealReg(r8, regalloc.RegTypeInt)
+	r9VReg  = regalloc.FromRealReg(r9, regalloc.RegTypeInt)
+	r10VReg = regalloc.FromRealReg(r10, regalloc.RegTypeInt)
+	r11VReg = regalloc.FromRealReg(r11, regalloc.RegTypeInt)
+	r12VReg = regalloc.FromRealReg(r12, regalloc.RegTypeInt)
+	r13VReg = regalloc.FromRealReg(r13, regalloc.RegTypeInt)
+	r14VReg = regalloc.FromRealReg(r14, regalloc.RegTypeInt)
+	r15VReg = regalloc.FromRealReg(r15, regalloc.RegTypeInt)
+
+	xmm0VReg  = regalloc.FromRealReg(xmm0, regalloc.RegTypeFloat)
+	xmm1VReg  = regalloc.FromRealReg(xmm1, regalloc.RegTypeFloat)
+	xmm2VReg  = regalloc.FromRealReg(xmm2, regalloc.RegTypeFloat)
+	xmm3VReg  = regalloc.FromRealReg(xmm3, regalloc.RegTypeFloat)
+	xmm4VReg  = regalloc.FromRealReg(xmm4, regalloc.RegTypeFloat)
+	xmm5VReg  = regalloc.FromRealReg(xmm5, regalloc.RegTypeFloat)
+	xmm6VReg  = regalloc.FromRealReg(xmm6, regalloc.RegTypeFloat)
+	xmm7VReg  = regalloc.FromRealReg(xmm7, regalloc.RegTypeFloat)
+	xmm8VReg  = regalloc.FromRealReg(xmm8, regalloc.RegTypeFloat)
+	xmm9VReg  = regalloc.FromRealReg(xmm9, regalloc.RegTypeFloat)
+	xmm10VReg = regalloc.FromRealReg(xmm10, regalloc.RegTypeFloat)
+	xmm11VReg = regalloc.FromRealReg(xmm11, regalloc.RegTypeFloat)
+	xmm12VReg = regalloc.FromRealReg(xmm12, regalloc.RegTypeFloat)
+	xmm13VReg = regalloc.FromRealReg(xmm13, regalloc.RegTypeFloat)
+	xmm14VReg = regalloc.FromRealReg(xmm14, regalloc.RegTypeFloat)
+	xmm15VReg = regalloc.FromRealReg(xmm15, regalloc.RegTypeFloat)
+)
+
+var regNames = [...]string{
+	rax:   "rax",
+	rcx:   "rcx",
+	rdx:   "rdx",
+	rbx:   "rbx",
+	rsp:   "rsp",
+	rbp:   "rbp",
+	rsi:   "rsi",
+	rdi:   "rdi",
+	r8:    "r8",
+	r9:    "r9",
+	r10:   "r10",
+	r11:   "r11",
+	r12:   "r12",
+	r13:   "r13",
+	r14:   "r14",
+	r15:   "r15",
+	xmm0:  "xmm0",
+	xmm1:  "xmm1",
+	xmm2:  "xmm2",
+	xmm3:  "xmm3",
+	xmm4:  "xmm4",
+	xmm5:  "xmm5",
+	xmm6:  "xmm6",
+	xmm7:  "xmm7",
+	xmm8:  "xmm8",
+	xmm9:  "xmm9",
+	xmm10: "xmm10",
+	xmm11: "xmm11",
+	xmm12: "xmm12",
+	xmm13: "xmm13",
+	xmm14: "xmm14",
+	xmm15: "xmm15",
+}
+
+func formatVRegSized(r regalloc.VReg, _64 bool) string {
+	if r.IsRealReg() {
+		if r.RegType() == regalloc.RegTypeInt {
+			rr := r.RealReg()
+			orig := regNames[rr]
+			if rr <= rdi {
+				if _64 {
+					return "%" + orig
+				} else {
+					return "%e" + orig[1:]
+				}
+			} else {
+				if _64 {
+					return "%" + orig
+				} else {
+					return "%" + orig + "d"
+				}
+			}
+		} else {
+			return "%" + regNames[r.RealReg()]
+		}
+	} else {
+		if r.RegType() == regalloc.RegTypeInt {
+			if _64 {
+				return fmt.Sprintf("%%r%d?", r.ID())
+			} else {
+				return fmt.Sprintf("%%r%dd?", r.ID())
+			}
+		} else {
+			return fmt.Sprintf("%%xmm%d?", r.ID())
+		}
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/stack.go
@ -0,0 +1,128 @@
+package amd64
+
+import (
+	"encoding/binary"
+	"reflect"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+)
+
+func stackView(rbp, top uintptr) []byte {
+	var stackBuf []byte
+	{
+		// TODO: use unsafe.Slice after floor version is set to Go 1.20.
+		hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))
+		hdr.Data = rbp
+		setSliceLimits(hdr, top-rbp)
+	}
+	return stackBuf
+}
+
+// UnwindStack implements wazevo.unwindStack.
+func UnwindStack(_, rbp, top uintptr, returnAddresses []uintptr) []uintptr {
+	stackBuf := stackView(rbp, top)
+
+	for i := uint64(0); i < uint64(len(stackBuf)); {
+		//       (high address)
+		//    +-----------------+
+		//    |     .......     |
+		//    |      ret Y      |
+		//    |     .......     |
+		//    |      ret 0      |
+		//    |      arg X      |
+		//    |     .......     |
+		//    |      arg 1      |
+		//    |      arg 0      |
+		//    |  ReturnAddress  |
+		//    |   Caller_RBP    |
+		//    +-----------------+ <---- Caller_RBP
+		//    |   ...........   |
+		//    |   clobbered  M  |
+		//    |   ............  |
+		//    |   clobbered  0  |
+		//    |   spill slot N  |
+		//    |   ............  |
+		//    |   spill slot 0  |
+		//    |  ReturnAddress  |
+		//    |   Caller_RBP    |
+		//    +-----------------+ <---- RBP
+		//       (low address)
+
+		callerRBP := binary.LittleEndian.Uint64(stackBuf[i:])
+		retAddr := binary.LittleEndian.Uint64(stackBuf[i+8:])
+		returnAddresses = append(returnAddresses, uintptr(retAddr))
+		i = callerRBP - uint64(rbp)
+		if len(returnAddresses) == wasmdebug.MaxFrames {
+			break
+		}
+	}
+	return returnAddresses
+}
+
+// GoCallStackView implements wazevo.goCallStackView.
+func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
+	//                  (high address)
+	//              +-----------------+ <----+
+	//              |   xxxxxxxxxxx   |      | ;; optional unused space to make it 16-byte aligned.
+	//           ^  |  arg[N]/ret[M]  |      |
+	// sliceSize |  |  ............   |      | SizeInBytes/8
+	//           |  |  arg[1]/ret[1]  |      |
+	//           v  |  arg[0]/ret[0]  | <----+
+	//              |   SizeInBytes   |
+	//              +-----------------+ <---- stackPointerBeforeGoCall
+	//                 (low address)
+	data := unsafe.Pointer(uintptr(unsafe.Pointer(stackPointerBeforeGoCall)) + 8)
+	size := *stackPointerBeforeGoCall / 8
+	return unsafe.Slice((*uint64)(data), int(size))
+}
+
+func AdjustClonedStack(oldRsp, oldTop, rsp, rbp, top uintptr) {
+	diff := uint64(rsp - oldRsp)
+
+	newBuf := stackView(rbp, top)
+	for i := uint64(0); i < uint64(len(newBuf)); {
+		//       (high address)
+		//    +-----------------+
+		//    |     .......     |
+		//    |      ret Y      |
+		//    |     .......     |
+		//    |      ret 0      |
+		//    |      arg X      |
+		//    |     .......     |
+		//    |      arg 1      |
+		//    |      arg 0      |
+		//    |  ReturnAddress  |
+		//    |   Caller_RBP    |
+		//    +-----------------+ <---- Caller_RBP
+		//    |   ...........   |
+		//    |   clobbered  M  |
+		//    |   ............  |
+		//    |   clobbered  0  |
+		//    |   spill slot N  |
+		//    |   ............  |
+		//    |   spill slot 0  |
+		//    |  ReturnAddress  |
+		//    |   Caller_RBP    |
+		//    +-----------------+ <---- RBP
+		//       (low address)
+
+		callerRBP := binary.LittleEndian.Uint64(newBuf[i:])
+		if callerRBP == 0 {
+			// End of stack.
+			break
+		}
+		if i64 := int64(callerRBP); i64 < int64(oldRsp) || i64 >= int64(oldTop) {
+			panic("BUG: callerRBP is out of range")
+		}
+		if int(callerRBP) < 0 {
+			panic("BUG: callerRBP is negative")
+		}
+		adjustedCallerRBP := callerRBP + diff
+		if int(adjustedCallerRBP) < 0 {
+			panic("BUG: adjustedCallerRBP is negative")
+		}
+		binary.LittleEndian.PutUint64(newBuf[i:], adjustedCallerRBP)
+		i = adjustedCallerRBP - uint64(rbp)
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
@ -0,0 +1,332 @@
+package arm64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// References:
+// * https://github.com/golang/go/blob/49d42128fd8594c172162961ead19ac95e247d24/src/cmd/compile/abi-internal.md#arm64-architecture
+// * https://developer.arm.com/documentation/102374/0101/Procedure-Call-Standard
+
+var (
+	intParamResultRegs   = []regalloc.RealReg{x0, x1, x2, x3, x4, x5, x6, x7}
+	floatParamResultRegs = []regalloc.RealReg{v0, v1, v2, v3, v4, v5, v6, v7}
+)
+
+var regInfo = &regalloc.RegisterInfo{
+	AllocatableRegisters: [regalloc.NumRegType][]regalloc.RealReg{
+		// We don't allocate:
+		// - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
+		// - x28: Reserved by Go runtime.
+		// - x27(=tmpReg): because of the reason described on tmpReg.
+		regalloc.RegTypeInt: {
+			x8, x9, x10, x11, x12, x13, x14, x15,
+			x16, x17, x19, x20, x21, x22, x23, x24, x25,
+			x26, x29, x30,
+			// These are the argument/return registers. Less preferred in the allocation.
+			x7, x6, x5, x4, x3, x2, x1, x0,
+		},
+		regalloc.RegTypeFloat: {
+			v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+			v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30,
+			// These are the argument/return registers. Less preferred in the allocation.
+			v7, v6, v5, v4, v3, v2, v1, v0,
+		},
+	},
+	CalleeSavedRegisters: regalloc.NewRegSet(
+		x19, x20, x21, x22, x23, x24, x25, x26, x28,
+		v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
+	),
+	CallerSavedRegisters: regalloc.NewRegSet(
+		x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x29, x30,
+		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
+	),
+	RealRegToVReg: []regalloc.VReg{
+		x0: x0VReg, x1: x1VReg, x2: x2VReg, x3: x3VReg, x4: x4VReg, x5: x5VReg, x6: x6VReg, x7: x7VReg, x8: x8VReg, x9: x9VReg, x10: x10VReg, x11: x11VReg, x12: x12VReg, x13: x13VReg, x14: x14VReg, x15: x15VReg, x16: x16VReg, x17: x17VReg, x18: x18VReg, x19: x19VReg, x20: x20VReg, x21: x21VReg, x22: x22VReg, x23: x23VReg, x24: x24VReg, x25: x25VReg, x26: x26VReg, x27: x27VReg, x28: x28VReg, x29: x29VReg, x30: x30VReg,
+		v0: v0VReg, v1: v1VReg, v2: v2VReg, v3: v3VReg, v4: v4VReg, v5: v5VReg, v6: v6VReg, v7: v7VReg, v8: v8VReg, v9: v9VReg, v10: v10VReg, v11: v11VReg, v12: v12VReg, v13: v13VReg, v14: v14VReg, v15: v15VReg, v16: v16VReg, v17: v17VReg, v18: v18VReg, v19: v19VReg, v20: v20VReg, v21: v21VReg, v22: v22VReg, v23: v23VReg, v24: v24VReg, v25: v25VReg, v26: v26VReg, v27: v27VReg, v28: v28VReg, v29: v29VReg, v30: v30VReg, v31: v31VReg,
+	},
+	RealRegName: func(r regalloc.RealReg) string { return regNames[r] },
+	RealRegType: func(r regalloc.RealReg) regalloc.RegType {
+		if r < v0 {
+			return regalloc.RegTypeInt
+		}
+		return regalloc.RegTypeFloat
+	},
+}
+
+// ArgsResultsRegs implements backend.Machine.
+func (m *machine) ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg) {
+	return intParamResultRegs, floatParamResultRegs
+}
+
+// LowerParams implements backend.FunctionABI.
+func (m *machine) LowerParams(args []ssa.Value) {
+	a := m.currentABI
+
+	for i, ssaArg := range args {
+		if !ssaArg.Valid() {
+			continue
+		}
+		reg := m.compiler.VRegOf(ssaArg)
+		arg := &a.Args[i]
+		if arg.Kind == backend.ABIArgKindReg {
+			m.InsertMove(reg, arg.Reg, arg.Type)
+		} else {
+			// TODO: we could use pair load if there's consecutive loads for the same type.
+			//
+			//            (high address)
+			//          +-----------------+
+			//          |     .......     |
+			//          |      ret Y      |
+			//          |     .......     |
+			//          |      ret 0      |
+			//          |      arg X      |
+			//          |     .......     |
+			//          |      arg 1      |
+			//          |      arg 0      |    <-|
+			//          |   ReturnAddress |      |
+			//          +-----------------+      |
+			//          |   ...........   |      |
+			//          |   clobbered  M  |      |   argStackOffset: is unknown at this point of compilation.
+			//          |   ............  |      |
+			//          |   clobbered  0  |      |
+			//          |   spill slot N  |      |
+			//          |   ...........   |      |
+			//          |   spill slot 0  |      |
+			//   SP---> +-----------------+    <-+
+			//             (low address)
+
+			bits := arg.Type.Bits()
+			// At this point of compilation, we don't yet know how much space exist below the return address.
+			// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation.
+			amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
+			load := m.allocateInstr()
+			switch arg.Type {
+			case ssa.TypeI32, ssa.TypeI64:
+				load.asULoad(operandNR(reg), amode, bits)
+			case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+				load.asFpuLoad(operandNR(reg), amode, bits)
+			default:
+				panic("BUG")
+			}
+			m.insert(load)
+			m.unresolvedAddressModes = append(m.unresolvedAddressModes, load)
+		}
+	}
+}
+
+// LowerReturns lowers the given returns.
+func (m *machine) LowerReturns(rets []ssa.Value) {
+	a := m.currentABI
+
+	l := len(rets) - 1
+	for i := range rets {
+		// Reverse order in order to avoid overwriting the stack returns existing in the return registers.
+		ret := rets[l-i]
+		r := &a.Rets[l-i]
+		reg := m.compiler.VRegOf(ret)
+		if def := m.compiler.ValueDefinition(ret); def.IsFromInstr() {
+			// Constant instructions are inlined.
+			if inst := def.Instr; inst.Constant() {
+				val := inst.Return()
+				valType := val.Type()
+				v := inst.ConstantVal()
+				m.insertLoadConstant(v, valType, reg)
+			}
+		}
+		if r.Kind == backend.ABIArgKindReg {
+			m.InsertMove(r.Reg, reg, ret.Type())
+		} else {
+			// TODO: we could use pair store if there's consecutive stores for the same type.
+			//
+			//            (high address)
+			//          +-----------------+
+			//          |     .......     |
+			//          |      ret Y      |
+			//          |     .......     |
+			//          |      ret 0      |    <-+
+			//          |      arg X      |      |
+			//          |     .......     |      |
+			//          |      arg 1      |      |
+			//          |      arg 0      |      |
+			//          |   ReturnAddress |      |
+			//          +-----------------+      |
+			//          |   ...........   |      |
+			//          |   spill slot M  |      |   retStackOffset: is unknown at this point of compilation.
+			//          |   ............  |      |
+			//          |   spill slot 2  |      |
+			//          |   spill slot 1  |      |
+			//          |   clobbered 0   |      |
+			//          |   clobbered 1   |      |
+			//          |   ...........   |      |
+			//          |   clobbered N   |      |
+			//   SP---> +-----------------+    <-+
+			//             (low address)
+
+			bits := r.Type.Bits()
+
+			// At this point of compilation, we don't yet know how much space exist below the return address.
+			// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation.
+			amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
+			store := m.allocateInstr()
+			store.asStore(operandNR(reg), amode, bits)
+			m.insert(store)
+			m.unresolvedAddressModes = append(m.unresolvedAddressModes, store)
+		}
+	}
+}
+
+// callerGenVRegToFunctionArg is the opposite of GenFunctionArgToVReg, which is used to generate the
+// caller side of the function call.
+func (m *machine) callerGenVRegToFunctionArg(a *backend.FunctionABI, argIndex int, reg regalloc.VReg, def *backend.SSAValueDefinition, slotBegin int64) {
+	arg := &a.Args[argIndex]
+	if def != nil && def.IsFromInstr() {
+		// Constant instructions are inlined.
+		if inst := def.Instr; inst.Constant() {
+			val := inst.Return()
+			valType := val.Type()
+			v := inst.ConstantVal()
+			m.insertLoadConstant(v, valType, reg)
+		}
+	}
+	if arg.Kind == backend.ABIArgKindReg {
+		m.InsertMove(arg.Reg, reg, arg.Type)
+	} else {
+		// TODO: we could use pair store if there's consecutive stores for the same type.
+		//
+		// Note that at this point, stack pointer is already adjusted.
+		bits := arg.Type.Bits()
+		amode := m.resolveAddressModeForOffset(arg.Offset-slotBegin, bits, spVReg, false)
+		store := m.allocateInstr()
+		store.asStore(operandNR(reg), amode, bits)
+		m.insert(store)
+	}
+}
+
+func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex int, reg regalloc.VReg, slotBegin int64) {
+	r := &a.Rets[retIndex]
+	if r.Kind == backend.ABIArgKindReg {
+		m.InsertMove(reg, r.Reg, r.Type)
+	} else {
+		// TODO: we could use pair load if there's consecutive loads for the same type.
+		amode := m.resolveAddressModeForOffset(a.ArgStackSize+r.Offset-slotBegin, r.Type.Bits(), spVReg, false)
+		ldr := m.allocateInstr()
+		switch r.Type {
+		case ssa.TypeI32, ssa.TypeI64:
+			ldr.asULoad(operandNR(reg), amode, r.Type.Bits())
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits())
+		default:
+			panic("BUG")
+		}
+		m.insert(ldr)
+	}
+}
+
+func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) {
+	exct := m.executableContext
+	exct.PendingInstructions = exct.PendingInstructions[:0]
+	mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse)
+	for _, instr := range exct.PendingInstructions {
+		cur = linkInstr(cur, instr)
+	}
+	return cur, mode
+}
+
+func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode {
+	if rn.RegType() != regalloc.RegTypeInt {
+		panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))
+	}
+	var amode addressMode
+	if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) {
+		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
+	} else if offsetFitsInAddressModeKindRegSignedImm9(offset) {
+		amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
+	} else {
+		var indexReg regalloc.VReg
+		if allowTmpRegUse {
+			m.lowerConstantI64(tmpRegVReg, offset)
+			indexReg = tmpRegVReg
+		} else {
+			indexReg = m.compiler.AllocateVReg(ssa.TypeI64)
+			m.lowerConstantI64(indexReg, offset)
+		}
+		amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
+	}
+	return amode
+}
+
+func (m *machine) lowerCall(si *ssa.Instruction) {
+	isDirectCall := si.Opcode() == ssa.OpcodeCall
+	var indirectCalleePtr ssa.Value
+	var directCallee ssa.FuncRef
+	var sigID ssa.SignatureID
+	var args []ssa.Value
+	if isDirectCall {
+		directCallee, sigID, args = si.CallData()
+	} else {
+		indirectCalleePtr, sigID, args, _ /* on arm64, the calling convention is compatible with the Go runtime */ = si.CallIndirectData()
+	}
+	calleeABI := m.compiler.GetFunctionABI(m.compiler.SSABuilder().ResolveSignature(sigID))
+
+	stackSlotSize := int64(calleeABI.AlignedArgResultStackSlotSize())
+	if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
+		m.maxRequiredStackSizeForCalls = stackSlotSize + 16 // return address frame.
+	}
+
+	for i, arg := range args {
+		reg := m.compiler.VRegOf(arg)
+		def := m.compiler.ValueDefinition(arg)
+		m.callerGenVRegToFunctionArg(calleeABI, i, reg, def, stackSlotSize)
+	}
+
+	if isDirectCall {
+		call := m.allocateInstr()
+		call.asCall(directCallee, calleeABI)
+		m.insert(call)
+	} else {
+		ptr := m.compiler.VRegOf(indirectCalleePtr)
+		callInd := m.allocateInstr()
+		callInd.asCallIndirect(ptr, calleeABI)
+		m.insert(callInd)
+	}
+
+	var index int
+	r1, rs := si.Returns()
+	if r1.Valid() {
+		m.callerGenFunctionReturnVReg(calleeABI, 0, m.compiler.VRegOf(r1), stackSlotSize)
+		index++
+	}
+
+	for _, r := range rs {
+		m.callerGenFunctionReturnVReg(calleeABI, index, m.compiler.VRegOf(r), stackSlotSize)
+		index++
+	}
+}
+
+func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add bool) {
+	if imm12Operand, ok := asImm12Operand(uint64(diff)); ok {
+		alu := m.allocateInstr()
+		var ao aluOp
+		if add {
+			ao = aluOpAdd
+		} else {
+			ao = aluOpSub
+		}
+		alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true)
+		m.insert(alu)
+	} else {
+		m.lowerConstantI64(tmpRegVReg, diff)
+		alu := m.allocateInstr()
+		var ao aluOp
+		if add {
+			ao = aluOpAdd
+		} else {
+			ao = aluOpSub
+		}
+		alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true)
+		m.insert(alu)
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.go
@ -0,0 +1,9 @@
+package arm64
+
+// entrypoint enters the machine code generated by this backend which begins with the preamble generated by functionABI.EmitGoEntryPreamble below.
+// This implements wazevo.entrypoint, and see the comments there for detail.
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultPtr *uint64, goAllocatedStackSlicePtr uintptr)
+
+// afterGoFunctionCallEntrypoint enters the machine code after growing the stack.
+// This implements wazevo.afterGoFunctionCallEntrypoint, and see the comments there for detail.
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_arm64.s
@ -0,0 +1,29 @@
+//go:build arm64
+
+#include "funcdata.h"
+#include "textflag.h"
+
+// See the comments on EmitGoEntryPreamble for what this function is supposed to do.
+TEXT ·entrypoint(SB), NOSPLIT|NOFRAME, $0-48
+	MOVD preambleExecutable+0(FP), R27
+	MOVD functionExectuable+8(FP), R24
+	MOVD executionContextPtr+16(FP), R0
+	MOVD moduleContextPtr+24(FP), R1
+	MOVD paramResultSlicePtr+32(FP), R19
+	MOVD goAllocatedStackSlicePtr+40(FP), R26
+	JMP  (R27)
+
+TEXT ·afterGoFunctionCallEntrypoint(SB), NOSPLIT|NOFRAME, $0-32
+	MOVD goCallReturnAddress+0(FP), R20
+	MOVD executionContextPtr+8(FP), R0
+	MOVD stackPointer+16(FP), R19
+
+	// Save the current FP(R29), SP and LR(R30) into the wazevo.executionContext (stored in R0).
+	MOVD R29, 16(R0) // Store FP(R29) into [RO, #ExecutionContextOffsets.OriginalFramePointer]
+	MOVD RSP, R27    // Move SP to R27 (temporary register) since SP cannot be stored directly in str instructions.
+	MOVD R27, 24(R0) // Store R27 into [RO, #ExecutionContextOffsets.OriginalFramePointer]
+	MOVD R30, 32(R0) // Store R30 into [R0, #ExecutionContextOffsets.GoReturnAddress]
+
+	// Load the new stack pointer (which sits somewhere in Go-allocated stack) into SP.
+	MOVD R19, RSP
+	JMP  (R20)
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
@ -0,0 +1,230 @@
+package arm64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// CompileEntryPreamble implements backend.Machine. This assumes `entrypoint` function (in abi_go_entry_arm64.s) passes:
+//
+//  1. First (execution context ptr) and Second arguments are already passed in x0, and x1.
+//  2. param/result slice ptr in x19; the pointer to []uint64{} which is used to pass arguments and accept return values.
+//  3. Go-allocated stack slice ptr in x26.
+//  4. Function executable in x24.
+//
+// also SP and FP are correct Go-runtime-based values, and LR is the return address to the Go-side caller.
+func (m *machine) CompileEntryPreamble(signature *ssa.Signature) []byte {
+	root := m.constructEntryPreamble(signature)
+	m.encode(root)
+	return m.compiler.Buf()
+}
+
+var (
+	executionContextPtrReg = x0VReg
+	// callee-saved regs so that they can be used in the prologue and epilogue.
+	paramResultSlicePtr      = x19VReg
+	savedExecutionContextPtr = x20VReg
+	// goAllocatedStackPtr is not used in the epilogue.
+	goAllocatedStackPtr = x26VReg
+	// paramResultSliceCopied is not used in the epilogue.
+	paramResultSliceCopied = x25VReg
+	// tmpRegVReg is not used in the epilogue.
+	functionExecutable = x24VReg
+)
+
+func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regalloc.VReg, arg *backend.ABIArg, argStartOffsetFromSP int64) *instruction {
+	typ := arg.Type
+	bits := typ.Bits()
+	isStackArg := arg.Kind == backend.ABIArgKindStack
+
+	var loadTargetReg operand
+	if !isStackArg {
+		loadTargetReg = operandNR(arg.Reg)
+	} else {
+		switch typ {
+		case ssa.TypeI32, ssa.TypeI64:
+			loadTargetReg = operandNR(x15VReg)
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			loadTargetReg = operandNR(v15VReg)
+		default:
+			panic("TODO?")
+		}
+	}
+
+	var postIndexImm int64
+	if typ == ssa.TypeV128 {
+		postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice.
+	} else {
+		postIndexImm = 8
+	}
+	loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
+
+	instr := m.allocateInstr()
+	switch typ {
+	case ssa.TypeI32:
+		instr.asULoad(loadTargetReg, loadMode, 32)
+	case ssa.TypeI64:
+		instr.asULoad(loadTargetReg, loadMode, 64)
+	case ssa.TypeF32:
+		instr.asFpuLoad(loadTargetReg, loadMode, 32)
+	case ssa.TypeF64:
+		instr.asFpuLoad(loadTargetReg, loadMode, 64)
+	case ssa.TypeV128:
+		instr.asFpuLoad(loadTargetReg, loadMode, 128)
+	}
+	cur = linkInstr(cur, instr)
+
+	if isStackArg {
+		var storeMode addressMode
+		cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true)
+		toStack := m.allocateInstr()
+		toStack.asStore(loadTargetReg, storeMode, bits)
+		cur = linkInstr(cur, toStack)
+	}
+	return cur
+}
+
+func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr regalloc.VReg, result *backend.ABIArg, resultStartOffsetFromSP int64) *instruction {
+	isStackArg := result.Kind == backend.ABIArgKindStack
+	typ := result.Type
+	bits := typ.Bits()
+
+	var storeTargetReg operand
+	if !isStackArg {
+		storeTargetReg = operandNR(result.Reg)
+	} else {
+		switch typ {
+		case ssa.TypeI32, ssa.TypeI64:
+			storeTargetReg = operandNR(x15VReg)
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			storeTargetReg = operandNR(v15VReg)
+		default:
+			panic("TODO?")
+		}
+	}
+
+	var postIndexImm int64
+	if typ == ssa.TypeV128 {
+		postIndexImm = 16 // v128 is represented as 2x64-bit in Go slice.
+	} else {
+		postIndexImm = 8
+	}
+
+	if isStackArg {
+		var loadMode addressMode
+		cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true)
+		toReg := m.allocateInstr()
+		switch typ {
+		case ssa.TypeI32, ssa.TypeI64:
+			toReg.asULoad(storeTargetReg, loadMode, bits)
+		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+			toReg.asFpuLoad(storeTargetReg, loadMode, bits)
+		default:
+			panic("TODO?")
+		}
+		cur = linkInstr(cur, toReg)
+	}
+
+	mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
+	instr := m.allocateInstr()
+	instr.asStore(storeTargetReg, mode, bits)
+	cur = linkInstr(cur, instr)
+	return cur
+}
+
+func (m *machine) constructEntryPreamble(sig *ssa.Signature) (root *instruction) {
+	abi := backend.FunctionABI{}
+	abi.Init(sig, intParamResultRegs, floatParamResultRegs)
+
+	root = m.allocateNop()
+
+	//// ----------------------------------- prologue ----------------------------------- ////
+
+	// First, we save executionContextPtrReg into a callee-saved register so that it can be used in epilogue as well.
+	// 		mov savedExecutionContextPtr, x0
+	cur := m.move64(savedExecutionContextPtr, executionContextPtrReg, root)
+
+	// Next, save the current FP, SP and LR into the wazevo.executionContext:
+	// 		str fp, [savedExecutionContextPtr, #OriginalFramePointer]
+	//      mov tmp, sp ;; sp cannot be str'ed directly.
+	// 		str sp, [savedExecutionContextPtr, #OriginalStackPointer]
+	// 		str lr, [savedExecutionContextPtr, #GoReturnAddress]
+	cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, true, cur)
+	cur = m.move64(tmpRegVReg, spVReg, cur)
+	cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, true, cur)
+	cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, true, cur)
+
+	// Then, move the Go-allocated stack pointer to SP:
+	// 		mov sp, goAllocatedStackPtr
+	cur = m.move64(spVReg, goAllocatedStackPtr, cur)
+
+	prReg := paramResultSlicePtr
+	if len(abi.Args) > 2 && len(abi.Rets) > 0 {
+		// paramResultSlicePtr is modified during the execution of goEntryPreamblePassArg,
+		// so copy it to another reg.
+		cur = m.move64(paramResultSliceCopied, paramResultSlicePtr, cur)
+		prReg = paramResultSliceCopied
+	}
+
+	stackSlotSize := int64(abi.AlignedArgResultStackSlotSize())
+	for i := range abi.Args {
+		if i < 2 {
+			// module context ptr and execution context ptr are passed in x0 and x1 by the Go assembly function.
+			continue
+		}
+		arg := &abi.Args[i]
+		cur = m.goEntryPreamblePassArg(cur, prReg, arg, -stackSlotSize)
+	}
+
+	// Call the real function.
+	bl := m.allocateInstr()
+	bl.asCallIndirect(functionExecutable, &abi)
+	cur = linkInstr(cur, bl)
+
+	///// ----------------------------------- epilogue ----------------------------------- /////
+
+	// Store the register results into paramResultSlicePtr.
+	for i := range abi.Rets {
+		cur = m.goEntryPreamblePassResult(cur, paramResultSlicePtr, &abi.Rets[i], abi.ArgStackSize-stackSlotSize)
+	}
+
+	// Finally, restore the FP, SP and LR, and return to the Go code.
+	// 		ldr fp, [savedExecutionContextPtr, #OriginalFramePointer]
+	// 		ldr tmp, [savedExecutionContextPtr, #OriginalStackPointer]
+	//      mov sp, tmp ;; sp cannot be str'ed directly.
+	// 		ldr lr, [savedExecutionContextPtr, #GoReturnAddress]
+	// 		ret ;; --> return to the Go code
+	cur = m.loadOrStoreAtExecutionContext(fpVReg, wazevoapi.ExecutionContextOffsetOriginalFramePointer, false, cur)
+	cur = m.loadOrStoreAtExecutionContext(tmpRegVReg, wazevoapi.ExecutionContextOffsetOriginalStackPointer, false, cur)
+	cur = m.move64(spVReg, tmpRegVReg, cur)
+	cur = m.loadOrStoreAtExecutionContext(lrVReg, wazevoapi.ExecutionContextOffsetGoReturnAddress, false, cur)
+	retInst := m.allocateInstr()
+	retInst.asRet()
+	linkInstr(cur, retInst)
+	return
+}
+
+func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction {
+	instr := m.allocateInstr()
+	instr.asMove64(dst, src)
+	return linkInstr(prev, instr)
+}
+
+func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction {
+	instr := m.allocateInstr()
+	mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
+	if store {
+		instr.asStore(operandNR(d), mode, 64)
+	} else {
+		instr.asULoad(operandNR(d), mode, 64)
+	}
+	return linkInstr(prev, instr)
+}
+
+func linkInstr(prev, next *instruction) *instruction {
+	prev.next = next
+	next.prev = prev
+	return next
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
@ -0,0 +1,428 @@
+package arm64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+var calleeSavedRegistersSorted = []regalloc.VReg{
+	x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg,
+	v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
+}
+
+// CompileGoFunctionTrampoline implements backend.Machine.
+func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte {
+	exct := m.executableContext
+	argBegin := 1 // Skips exec context by default.
+	if needModuleContextPtr {
+		argBegin++
+	}
+
+	abi := &backend.FunctionABI{}
+	abi.Init(sig, intParamResultRegs, floatParamResultRegs)
+	m.currentABI = abi
+
+	cur := m.allocateInstr()
+	cur.asNop0()
+	exct.RootInstr = cur
+
+	// Execution context is always the first argument.
+	execCtrPtr := x0VReg
+
+	// In the following, we create the following stack layout:
+	//
+	//                   (high address)
+	//     SP ------> +-----------------+  <----+
+	//                |     .......     |       |
+	//                |      ret Y      |       |
+	//                |     .......     |       |
+	//                |      ret 0      |       |
+	//                |      arg X      |       |  size_of_arg_ret
+	//                |     .......     |       |
+	//                |      arg 1      |       |
+	//                |      arg 0      |  <----+ <-------- originalArg0Reg
+	//                | size_of_arg_ret |
+	//                |  ReturnAddress  |
+	//                +-----------------+ <----+
+	//                |      xxxx       |      |  ;; might be padded to make it 16-byte aligned.
+	//           +--->|  arg[N]/ret[M]  |      |
+	//  sliceSize|    |   ............  |      | goCallStackSize
+	//           |    |  arg[1]/ret[1]  |      |
+	//           +--->|  arg[0]/ret[0]  | <----+ <-------- arg0ret0AddrReg
+	//                |    sliceSize    |
+	//                |   frame_size    |
+	//                +-----------------+
+	//                   (low address)
+	//
+	// where the region of "arg[0]/ret[0] ... arg[N]/ret[M]" is the stack used by the Go functions,
+	// therefore will be accessed as the usual []uint64. So that's where we need to pass/receive
+	// the arguments/return values.
+
+	// First of all, to update the SP, and create "ReturnAddress + size_of_arg_ret".
+	cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
+
+	const frameInfoSize = 16 // == frame_size + sliceSize.
+
+	// Next, we should allocate the stack for the Go function call if necessary.
+	goCallStackSize, sliceSizeInBytes := backend.GoFunctionCallRequiredStackSize(sig, argBegin)
+	cur = m.insertStackBoundsCheck(goCallStackSize+frameInfoSize, cur)
+
+	originalArg0Reg := x17VReg // Caller save, so we can use it for whatever we want.
+	if m.currentABI.AlignedArgResultStackSlotSize() > 0 {
+		// At this point, SP points to `ReturnAddress`, so add 16 to get the original arg 0 slot.
+		cur = m.addsAddOrSubStackPointer(cur, originalArg0Reg, frameInfoSize, true)
+	}
+
+	// Save the callee saved registers.
+	cur = m.saveRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
+
+	if needModuleContextPtr {
+		offset := wazevoapi.ExecutionContextOffsetGoFunctionCallCalleeModuleContextOpaque.I64()
+		if !offsetFitsInAddressModeKindRegUnsignedImm12(64, offset) {
+			panic("BUG: too large or un-aligned offset for goFunctionCallCalleeModuleContextOpaque in execution context")
+		}
+
+		// Module context is always the second argument.
+		moduleCtrPtr := x1VReg
+		store := m.allocateInstr()
+		amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
+		store.asStore(operandNR(moduleCtrPtr), amode, 64)
+		cur = linkInstr(cur, store)
+	}
+
+	// Advances the stack pointer.
+	cur = m.addsAddOrSubStackPointer(cur, spVReg, goCallStackSize, false)
+
+	// Copy the pointer to x15VReg.
+	arg0ret0AddrReg := x15VReg // Caller save, so we can use it for whatever we want.
+	copySp := m.allocateInstr()
+	copySp.asMove64(arg0ret0AddrReg, spVReg)
+	cur = linkInstr(cur, copySp)
+
+	// Next, we need to store all the arguments to the stack in the typical Wasm stack style.
+	for i := range abi.Args[argBegin:] {
+		arg := &abi.Args[argBegin+i]
+		store := m.allocateInstr()
+		var v regalloc.VReg
+		if arg.Kind == backend.ABIArgKindReg {
+			v = arg.Reg
+		} else {
+			cur, v = m.goFunctionCallLoadStackArg(cur, originalArg0Reg, arg,
+				// Caller save, so we can use it for whatever we want.
+				x11VReg, v11VReg)
+		}
+
+		var sizeInBits byte
+		if arg.Type == ssa.TypeV128 {
+			sizeInBits = 128
+		} else {
+			sizeInBits = 64
+		}
+		store.asStore(operandNR(v),
+			addressMode{
+				kind: addressModeKindPostIndex,
+				rn:   arg0ret0AddrReg, imm: int64(sizeInBits / 8),
+			}, sizeInBits)
+		cur = linkInstr(cur, store)
+	}
+
+	// Finally, now that we've advanced SP to arg[0]/ret[0], we allocate `frame_size + sliceSize`.
+	var frameSizeReg, sliceSizeReg regalloc.VReg
+	if goCallStackSize > 0 {
+		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, goCallStackSize)
+		frameSizeReg = tmpRegVReg
+		cur = m.lowerConstantI64AndInsert(cur, x16VReg, sliceSizeInBytes/8)
+		sliceSizeReg = x16VReg
+	} else {
+		frameSizeReg = xzrVReg
+		sliceSizeReg = xzrVReg
+	}
+	_amode := addressModePreOrPostIndex(spVReg, -16, true)
+	storeP := m.allocateInstr()
+	storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode)
+	cur = linkInstr(cur, storeP)
+
+	// Set the exit status on the execution context.
+	cur = m.setExitCode(cur, x0VReg, exitCode)
+
+	// Save the current stack pointer.
+	cur = m.saveCurrentStackPointer(cur, x0VReg)
+
+	// Exit the execution.
+	cur = m.storeReturnAddressAndExit(cur)
+
+	// After the call, we need to restore the callee saved registers.
+	cur = m.restoreRegistersInExecutionContext(cur, calleeSavedRegistersSorted)
+
+	// Get the pointer to the arg[0]/ret[0]: We need to skip `frame_size + sliceSize`.
+	if len(abi.Rets) > 0 {
+		cur = m.addsAddOrSubStackPointer(cur, arg0ret0AddrReg, frameInfoSize, true)
+	}
+
+	// Advances the SP so that it points to `ReturnAddress`.
+	cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true)
+	ldr := m.allocateInstr()
+	// And load the return address.
+	ldr.asULoad(operandNR(lrVReg),
+		addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
+	cur = linkInstr(cur, ldr)
+
+	originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want.
+	if m.currentABI.RetStackSize > 0 {
+		cur = m.addsAddOrSubStackPointer(cur, originalRet0Reg, m.currentABI.ArgStackSize, true)
+	}
+
+	// Make the SP point to the original address (above the result slot).
+	if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
+		cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
+	}
+
+	for i := range abi.Rets {
+		r := &abi.Rets[i]
+		if r.Kind == backend.ABIArgKindReg {
+			loadIntoReg := m.allocateInstr()
+			mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
+			switch r.Type {
+			case ssa.TypeI32:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoReg.asULoad(operandNR(r.Reg), mode, 32)
+			case ssa.TypeI64:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoReg.asULoad(operandNR(r.Reg), mode, 64)
+			case ssa.TypeF32:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32)
+			case ssa.TypeF64:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64)
+			case ssa.TypeV128:
+				mode.imm = 16
+				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128)
+			default:
+				panic("TODO")
+			}
+			cur = linkInstr(cur, loadIntoReg)
+		} else {
+			// First we need to load the value to a temporary just like ^^.
+			intTmp, floatTmp := x11VReg, v11VReg
+			loadIntoTmpReg := m.allocateInstr()
+			mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
+			var resultReg regalloc.VReg
+			switch r.Type {
+			case ssa.TypeI32:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32)
+				resultReg = intTmp
+			case ssa.TypeI64:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64)
+				resultReg = intTmp
+			case ssa.TypeF32:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32)
+				resultReg = floatTmp
+			case ssa.TypeF64:
+				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64)
+				resultReg = floatTmp
+			case ssa.TypeV128:
+				mode.imm = 16
+				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128)
+				resultReg = floatTmp
+			default:
+				panic("TODO")
+			}
+			cur = linkInstr(cur, loadIntoTmpReg)
+			cur = m.goFunctionCallStoreStackResult(cur, originalRet0Reg, r, resultReg)
+		}
+	}
+
+	ret := m.allocateInstr()
+	ret.asRet()
+	linkInstr(cur, ret)
+
+	m.encode(m.executableContext.RootInstr)
+	return m.compiler.Buf()
+}
+
+func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
+	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
+	for _, v := range regs {
+		store := m.allocateInstr()
+		var sizeInBits byte
+		switch v.RegType() {
+		case regalloc.RegTypeInt:
+			sizeInBits = 64
+		case regalloc.RegTypeFloat:
+			sizeInBits = 128
+		}
+		store.asStore(operandNR(v),
+			addressMode{
+				kind: addressModeKindRegUnsignedImm12,
+				// Execution context is always the first argument.
+				rn: x0VReg, imm: offset,
+			}, sizeInBits)
+		store.prev = cur
+		cur.next = store
+		cur = store
+		offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally store regs at the offset of multiple of 16.
+	}
+	return cur
+}
+
+func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []regalloc.VReg) *instruction {
+	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
+	for _, v := range regs {
+		load := m.allocateInstr()
+		var as func(dst operand, amode addressMode, sizeInBits byte)
+		var sizeInBits byte
+		switch v.RegType() {
+		case regalloc.RegTypeInt:
+			as = load.asULoad
+			sizeInBits = 64
+		case regalloc.RegTypeFloat:
+			as = load.asFpuLoad
+			sizeInBits = 128
+		}
+		as(operandNR(v),
+			addressMode{
+				kind: addressModeKindRegUnsignedImm12,
+				// Execution context is always the first argument.
+				rn: x0VReg, imm: offset,
+			}, sizeInBits)
+		cur = linkInstr(cur, load)
+		offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16.
+	}
+	return cur
+}
+
+func (m *machine) lowerConstantI64AndInsert(cur *instruction, dst regalloc.VReg, v int64) *instruction {
+	exct := m.executableContext
+	exct.PendingInstructions = exct.PendingInstructions[:0]
+	m.lowerConstantI64(dst, v)
+	for _, instr := range exct.PendingInstructions {
+		cur = linkInstr(cur, instr)
+	}
+	return cur
+}
+
+func (m *machine) lowerConstantI32AndInsert(cur *instruction, dst regalloc.VReg, v int32) *instruction {
+	exct := m.executableContext
+	exct.PendingInstructions = exct.PendingInstructions[:0]
+	m.lowerConstantI32(dst, v)
+	for _, instr := range exct.PendingInstructions {
+		cur = linkInstr(cur, instr)
+	}
+	return cur
+}
+
+func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode wazevoapi.ExitCode) *instruction {
+	constReg := x17VReg // caller-saved, so we can use it.
+	cur = m.lowerConstantI32AndInsert(cur, constReg, int32(exitCode))
+
+	// Set the exit status on the execution context.
+	setExistStatus := m.allocateInstr()
+	setExistStatus.asStore(operandNR(constReg),
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			rn:   execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
+		}, 32)
+	cur = linkInstr(cur, setExistStatus)
+	return cur
+}
+
+func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction {
+	// Read the return address into tmp, and store it in the execution context.
+	adr := m.allocateInstr()
+	adr.asAdr(tmpRegVReg, exitSequenceSize+8)
+	cur = linkInstr(cur, adr)
+
+	storeReturnAddr := m.allocateInstr()
+	storeReturnAddr.asStore(operandNR(tmpRegVReg),
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			// Execution context is always the first argument.
+			rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
+		}, 64)
+	cur = linkInstr(cur, storeReturnAddr)
+
+	// Exit the execution.
+	trapSeq := m.allocateInstr()
+	trapSeq.asExitSequence(x0VReg)
+	cur = linkInstr(cur, trapSeq)
+	return cur
+}
+
+func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VReg) *instruction {
+	// Save the current stack pointer:
+	// 	mov tmp, sp,
+	// 	str tmp, [exec_ctx, #stackPointerBeforeGoCall]
+	movSp := m.allocateInstr()
+	movSp.asMove64(tmpRegVReg, spVReg)
+	cur = linkInstr(cur, movSp)
+
+	strSp := m.allocateInstr()
+	strSp.asStore(operandNR(tmpRegVReg),
+		addressMode{
+			kind: addressModeKindRegUnsignedImm12,
+			rn:   execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
+		}, 64)
+	cur = linkInstr(cur, strSp)
+	return cur
+}
+
+func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) {
+	load := m.allocateInstr()
+	var result regalloc.VReg
+	mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
+	switch arg.Type {
+	case ssa.TypeI32:
+		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+		load.asULoad(operandNR(intVReg), mode, 32)
+		result = intVReg
+	case ssa.TypeI64:
+		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+		load.asULoad(operandNR(intVReg), mode, 64)
+		result = intVReg
+	case ssa.TypeF32:
+		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+		load.asFpuLoad(operandNR(floatVReg), mode, 32)
+		result = floatVReg
+	case ssa.TypeF64:
+		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
+		load.asFpuLoad(operandNR(floatVReg), mode, 64)
+		result = floatVReg
+	case ssa.TypeV128:
+		mode.imm = 16
+		load.asFpuLoad(operandNR(floatVReg), mode, 128)
+		result = floatVReg
+	default:
+		panic("TODO")
+	}
+
+	cur = linkInstr(cur, load)
+	return cur, result
+}
+
+func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction {
+	store := m.allocateInstr()
+	mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
+	var sizeInBits byte
+	switch result.Type {
+	case ssa.TypeI32, ssa.TypeF32:
+		mode.imm = 8
+		sizeInBits = 32
+	case ssa.TypeI64, ssa.TypeF64:
+		mode.imm = 8
+		sizeInBits = 64
+	case ssa.TypeV128:
+		mode.imm = 16
+		sizeInBits = 128
+	default:
+		panic("TODO")
+	}
+	store.asStore(operandNR(resultVReg), mode, sizeInBits)
+	return linkInstr(cur, store)
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/cond.go
@ -0,0 +1,215 @@
+package arm64
+
+import (
+	"strconv"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type (
+	cond     uint64
+	condKind byte
+)
+
+const (
+	// condKindRegisterZero represents a condition which checks if the register is zero.
+	// This indicates that the instruction must be encoded as CBZ:
+	// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBZ--Compare-and-Branch-on-Zero-
+	condKindRegisterZero condKind = iota
+	// condKindRegisterNotZero indicates that the instruction must be encoded as CBNZ:
+	// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/CBNZ--Compare-and-Branch-on-Nonzero-
+	condKindRegisterNotZero
+	// condKindCondFlagSet indicates that the instruction must be encoded as B.cond:
+	// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-
+	condKindCondFlagSet
+)
+
+// kind returns the kind of condition which is stored in the first two bits.
+func (c cond) kind() condKind {
+	return condKind(c & 0b11)
+}
+
+func (c cond) asUint64() uint64 {
+	return uint64(c)
+}
+
+// register returns the register for register conditions.
+// This panics if the condition is not a register condition (condKindRegisterZero or condKindRegisterNotZero).
+func (c cond) register() regalloc.VReg {
+	if c.kind() != condKindRegisterZero && c.kind() != condKindRegisterNotZero {
+		panic("condition is not a register")
+	}
+	return regalloc.VReg(c >> 2)
+}
+
+func registerAsRegZeroCond(r regalloc.VReg) cond {
+	return cond(r)<<2 | cond(condKindRegisterZero)
+}
+
+func registerAsRegNotZeroCond(r regalloc.VReg) cond {
+	return cond(r)<<2 | cond(condKindRegisterNotZero)
+}
+
+func (c cond) flag() condFlag {
+	if c.kind() != condKindCondFlagSet {
+		panic("condition is not a flag")
+	}
+	return condFlag(c >> 2)
+}
+
+func (c condFlag) asCond() cond {
+	return cond(c)<<2 | cond(condKindCondFlagSet)
+}
+
+// condFlag represents a condition flag for conditional branches.
+// The value matches the encoding of condition flags in the ARM64 instruction set.
+// https://developer.arm.com/documentation/den0024/a/The-A64-instruction-set/Data-processing-instructions/Conditional-instructions
+type condFlag uint8
+
+const (
+	eq condFlag = iota // eq represents "equal"
+	ne                 // ne represents "not equal"
+	hs                 // hs represents "higher or same"
+	lo                 // lo represents "lower"
+	mi                 // mi represents "minus or negative result"
+	pl                 // pl represents "plus or positive result"
+	vs                 // vs represents "overflow set"
+	vc                 // vc represents "overflow clear"
+	hi                 // hi represents "higher"
+	ls                 // ls represents "lower or same"
+	ge                 // ge represents "greater or equal"
+	lt                 // lt represents "less than"
+	gt                 // gt represents "greater than"
+	le                 // le represents "less than or equal"
+	al                 // al represents "always"
+	nv                 // nv represents "never"
+)
+
+// invert returns the inverted condition.
+func (c condFlag) invert() condFlag {
+	switch c {
+	case eq:
+		return ne
+	case ne:
+		return eq
+	case hs:
+		return lo
+	case lo:
+		return hs
+	case mi:
+		return pl
+	case pl:
+		return mi
+	case vs:
+		return vc
+	case vc:
+		return vs
+	case hi:
+		return ls
+	case ls:
+		return hi
+	case ge:
+		return lt
+	case lt:
+		return ge
+	case gt:
+		return le
+	case le:
+		return gt
+	case al:
+		return nv
+	case nv:
+		return al
+	default:
+		panic(c)
+	}
+}
+
+// String implements fmt.Stringer.
+func (c condFlag) String() string {
+	switch c {
+	case eq:
+		return "eq"
+	case ne:
+		return "ne"
+	case hs:
+		return "hs"
+	case lo:
+		return "lo"
+	case mi:
+		return "mi"
+	case pl:
+		return "pl"
+	case vs:
+		return "vs"
+	case vc:
+		return "vc"
+	case hi:
+		return "hi"
+	case ls:
+		return "ls"
+	case ge:
+		return "ge"
+	case lt:
+		return "lt"
+	case gt:
+		return "gt"
+	case le:
+		return "le"
+	case al:
+		return "al"
+	case nv:
+		return "nv"
+	default:
+		panic(strconv.Itoa(int(c)))
+	}
+}
+
+// condFlagFromSSAIntegerCmpCond returns the condition flag for the given ssa.IntegerCmpCond.
+func condFlagFromSSAIntegerCmpCond(c ssa.IntegerCmpCond) condFlag {
+	switch c {
+	case ssa.IntegerCmpCondEqual:
+		return eq
+	case ssa.IntegerCmpCondNotEqual:
+		return ne
+	case ssa.IntegerCmpCondSignedLessThan:
+		return lt
+	case ssa.IntegerCmpCondSignedGreaterThanOrEqual:
+		return ge
+	case ssa.IntegerCmpCondSignedGreaterThan:
+		return gt
+	case ssa.IntegerCmpCondSignedLessThanOrEqual:
+		return le
+	case ssa.IntegerCmpCondUnsignedLessThan:
+		return lo
+	case ssa.IntegerCmpCondUnsignedGreaterThanOrEqual:
+		return hs
+	case ssa.IntegerCmpCondUnsignedGreaterThan:
+		return hi
+	case ssa.IntegerCmpCondUnsignedLessThanOrEqual:
+		return ls
+	default:
+		panic(c)
+	}
+}
+
+// condFlagFromSSAFloatCmpCond returns the condition flag for the given ssa.FloatCmpCond.
+func condFlagFromSSAFloatCmpCond(c ssa.FloatCmpCond) condFlag {
+	switch c {
+	case ssa.FloatCmpCondEqual:
+		return eq
+	case ssa.FloatCmpCondNotEqual:
+		return ne
+	case ssa.FloatCmpCondLessThan:
+		return mi
+	case ssa.FloatCmpCondLessThanOrEqual:
+		return ls
+	case ssa.FloatCmpCondGreaterThan:
+		return gt
+	case ssa.FloatCmpCondGreaterThanOrEqual:
+		return ge
+	default:
+		panic(c)
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
@ -0,0 +1,301 @@
+package arm64
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
+func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
+	val := instr.Return()
+	valType := val.Type()
+
+	vr = m.compiler.AllocateVReg(valType)
+	v := instr.ConstantVal()
+	m.insertLoadConstant(v, valType, vr)
+	return
+}
+
+// InsertLoadConstantBlockArg implements backend.Machine.
+func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
+	val := instr.Return()
+	valType := val.Type()
+	v := instr.ConstantVal()
+	load := m.allocateInstr()
+	load.asLoadConstBlockArg(v, valType, vr)
+	m.insert(load)
+}
+
+func (m *machine) lowerLoadConstantBlockArgAfterRegAlloc(i *instruction) {
+	v, typ, dst := i.loadConstBlockArgData()
+	m.insertLoadConstant(v, typ, dst)
+}
+
+func (m *machine) insertLoadConstant(v uint64, valType ssa.Type, vr regalloc.VReg) {
+	if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
+		v = v & ((1 << valType.Bits()) - 1)
+	}
+
+	switch valType {
+	case ssa.TypeF32:
+		loadF := m.allocateInstr()
+		loadF.asLoadFpuConst32(vr, v)
+		m.insert(loadF)
+	case ssa.TypeF64:
+		loadF := m.allocateInstr()
+		loadF.asLoadFpuConst64(vr, v)
+		m.insert(loadF)
+	case ssa.TypeI32:
+		if v == 0 {
+			m.InsertMove(vr, xzrVReg, ssa.TypeI32)
+		} else {
+			m.lowerConstantI32(vr, int32(v))
+		}
+	case ssa.TypeI64:
+		if v == 0 {
+			m.InsertMove(vr, xzrVReg, ssa.TypeI64)
+		} else {
+			m.lowerConstantI64(vr, int64(v))
+		}
+	default:
+		panic("TODO")
+	}
+}
+
+// The following logics are based on the old asm/arm64 package.
+// https://github.com/tetratelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go
+
+func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) {
+	// Following the logic here:
+	// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637
+	ic := int64(uint32(c))
+	if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) {
+		if isBitMaskImmediate(uint64(c), false) {
+			m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false)
+			return
+		}
+	}
+
+	if t := const16bitAligned(int64(uint32(c))); t >= 0 {
+		// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
+		// We could load it into temporary with movk.
+		m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false)
+	} else if t := const16bitAligned(int64(^c)); t >= 0 {
+		// Also, if the inverse of the const can fit within 16-bit range, do the same ^^.
+		m.insertMOVN(dst, uint64(^c>>(16*t)), t, false)
+	} else if isBitMaskImmediate(uint64(uint32(c)), false) {
+		m.lowerConstViaBitMaskImmediate(uint64(c), dst, false)
+	} else {
+		// Otherwise, we use MOVZ and MOVK to load it.
+		c16 := uint16(c)
+		m.insertMOVZ(dst, uint64(c16), 0, false)
+		c16 = uint16(uint32(c) >> 16)
+		m.insertMOVK(dst, uint64(c16), 1, false)
+	}
+}
+
+func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) {
+	// Following the logic here:
+	// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852
+	if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
+		if isBitMaskImmediate(uint64(c), true) {
+			m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
+			return
+		}
+	}
+
+	if t := const16bitAligned(c); t >= 0 {
+		// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
+		// We could load it into temporary with movk.
+		m.insertMOVZ(dst, uint64(c)>>(16*t), t, true)
+	} else if t := const16bitAligned(^c); t >= 0 {
+		// Also, if the reverse of the const can fit within 16-bit range, do the same ^^.
+		m.insertMOVN(dst, uint64(^c)>>(16*t), t, true)
+	} else if isBitMaskImmediate(uint64(c), true) {
+		m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
+	} else {
+		m.load64bitConst(c, dst)
+	}
+}
+
+func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) {
+	instr := m.allocateInstr()
+	instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64)
+	m.insert(instr)
+}
+
+// isBitMaskImmediate determines if the value can be encoded as "bitmask immediate".
+//
+//	Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits.
+//	Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits.
+//
+// See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate-
+func isBitMaskImmediate(x uint64, _64 bool) bool {
+	// All zeros and ones are not "bitmask immediate" by definition.
+	if x == 0 || (_64 && x == 0xffff_ffff_ffff_ffff) || (!_64 && x == 0xffff_ffff) {
+		return false
+	}
+
+	switch {
+	case x != x>>32|x<<32:
+		// e = 64
+	case x != x>>16|x<<48:
+		// e = 32 (x == x>>32|x<<32).
+		// e.g. 0x00ff_ff00_00ff_ff00
+		x = uint64(int32(x))
+	case x != x>>8|x<<56:
+		// e = 16 (x == x>>16|x<<48).
+		// e.g. 0x00ff_00ff_00ff_00ff
+		x = uint64(int16(x))
+	case x != x>>4|x<<60:
+		// e = 8 (x == x>>8|x<<56).
+		// e.g. 0x0f0f_0f0f_0f0f_0f0f
+		x = uint64(int8(x))
+	default:
+		// e = 4 or 2.
+		return true
+	}
+	return sequenceOfSetbits(x) || sequenceOfSetbits(^x)
+}
+
+// sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1).
+// For example: 0b1110 -> true, 0b1010 -> false
+func sequenceOfSetbits(x uint64) bool {
+	y := getLowestBit(x)
+	// If x is a sequence of set bit, this should results in the number
+	// with only one set bit (i.e. power of two).
+	y += x
+	return (y-1)&y == 0
+}
+
+func getLowestBit(x uint64) uint64 {
+	return x & (^x + 1)
+}
+
+// const16bitAligned check if the value is on the 16-bit alignment.
+// If so, returns the shift num divided by 16, and otherwise -1.
+func const16bitAligned(v int64) (ret int) {
+	ret = -1
+	for s := 0; s < 64; s += 16 {
+		if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 {
+			ret = s / 16
+			break
+		}
+	}
+	return
+}
+
+// load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit
+// consts as in the Go assembler.
+//
+// See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759
+func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
+	var bits [4]uint64
+	var zeros, negs int
+	for i := 0; i < 4; i++ {
+		bits[i] = uint64(c) >> uint(i*16) & 0xffff
+		if v := bits[i]; v == 0 {
+			zeros++
+		} else if v == 0xffff {
+			negs++
+		}
+	}
+
+	if zeros == 3 {
+		// one MOVZ instruction.
+		for i, v := range bits {
+			if v != 0 {
+				m.insertMOVZ(dst, v, i, true)
+			}
+		}
+	} else if negs == 3 {
+		// one MOVN instruction.
+		for i, v := range bits {
+			if v != 0xffff {
+				v = ^v
+				m.insertMOVN(dst, v, i, true)
+			}
+		}
+	} else if zeros == 2 {
+		// one MOVZ then one OVK.
+		var movz bool
+		for i, v := range bits {
+			if !movz && v != 0 { // MOVZ.
+				m.insertMOVZ(dst, v, i, true)
+				movz = true
+			} else if v != 0 {
+				m.insertMOVK(dst, v, i, true)
+			}
+		}
+
+	} else if negs == 2 {
+		// one MOVN then one or two MOVK.
+		var movn bool
+		for i, v := range bits { // Emit MOVN.
+			if !movn && v != 0xffff {
+				v = ^v
+				// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
+				m.insertMOVN(dst, v, i, true)
+				movn = true
+			} else if v != 0xffff {
+				m.insertMOVK(dst, v, i, true)
+			}
+		}
+
+	} else if zeros == 1 {
+		// one MOVZ then two MOVK.
+		var movz bool
+		for i, v := range bits {
+			if !movz && v != 0 { // MOVZ.
+				m.insertMOVZ(dst, v, i, true)
+				movz = true
+			} else if v != 0 {
+				m.insertMOVK(dst, v, i, true)
+			}
+		}
+
+	} else if negs == 1 {
+		// one MOVN then two MOVK.
+		var movn bool
+		for i, v := range bits { // Emit MOVN.
+			if !movn && v != 0xffff {
+				v = ^v
+				// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
+				m.insertMOVN(dst, v, i, true)
+				movn = true
+			} else if v != 0xffff {
+				m.insertMOVK(dst, v, i, true)
+			}
+		}
+
+	} else {
+		// one MOVZ then up to three MOVK.
+		var movz bool
+		for i, v := range bits {
+			if !movz && v != 0 { // MOVZ.
+				m.insertMOVZ(dst, v, i, true)
+				movz = true
+			} else if v != 0 {
+				m.insertMOVK(dst, v, i, true)
+			}
+		}
+	}
+}
+
+func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
+	instr := m.allocateInstr()
+	instr.asMOVZ(dst, v, uint64(shift), dst64)
+	m.insert(instr)
+}
+
+func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
+	instr := m.allocateInstr()
+	instr.asMOVK(dst, v, uint64(shift), dst64)
+	m.insert(instr)
+}
+
+func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
+	instr := m.allocateInstr()
+	instr.asMOVN(dst, v, uint64(shift), dst64)
+	m.insert(instr)
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go
@ -0,0 +1,350 @@
+package arm64
+
+// This file contains the logic to "find and determine operands" for instructions.
+// In order to finalize the form of an operand, we might end up merging/eliminating
+// the source instructions into an operand whenever possible.
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+type (
+	// operand represents an operand of an instruction whose type is determined by the kind.
+	operand struct {
+		kind        operandKind
+		data, data2 uint64
+	}
+	operandKind byte
+)
+
+// Here's the list of operand kinds. We use the abbreviation of the kind name not only for these consts,
+// but also names of functions which return the operand of the kind.
+const (
+	// operandKindNR represents "NormalRegister" (NR). This is literally the register without any special operation unlike others.
+	operandKindNR operandKind = iota
+	// operandKindSR represents "Shifted Register" (SR). This is a register which is shifted by a constant.
+	// Some of the arm64 instructions can take this kind of operand.
+	operandKindSR
+	// operandKindER represents "Extended Register (ER). This is a register which is sign/zero-extended to a larger size.
+	// Some of the arm64 instructions can take this kind of operand.
+	operandKindER
+	// operandKindImm12 represents "Immediate 12" (Imm12). This is a 12-bit immediate value which can be either shifted by 12 or not.
+	// See asImm12 function for detail.
+	operandKindImm12
+	// operandKindShiftImm represents "Shifted Immediate" (ShiftImm) used by shift operations.
+	operandKindShiftImm
+)
+
+// String implements fmt.Stringer for debugging.
+func (o operand) format(size byte) string {
+	switch o.kind {
+	case operandKindNR:
+		return formatVRegSized(o.nr(), size)
+	case operandKindSR:
+		r, amt, sop := o.sr()
+		return fmt.Sprintf("%s, %s #%d", formatVRegSized(r, size), sop, amt)
+	case operandKindER:
+		r, eop, _ := o.er()
+		return fmt.Sprintf("%s %s", formatVRegSized(r, size), eop)
+	case operandKindImm12:
+		imm12, shiftBit := o.imm12()
+		if shiftBit == 1 {
+			return fmt.Sprintf("#%#x", uint64(imm12)<<12)
+		} else {
+			return fmt.Sprintf("#%#x", imm12)
+		}
+	default:
+		panic(fmt.Sprintf("unknown operand kind: %d", o.kind))
+	}
+}
+
+// operandNR encodes the given VReg as an operand of operandKindNR.
+func operandNR(r regalloc.VReg) operand {
+	return operand{kind: operandKindNR, data: uint64(r)}
+}
+
+// nr decodes the underlying VReg assuming the operand is of operandKindNR.
+func (o operand) nr() regalloc.VReg {
+	return regalloc.VReg(o.data)
+}
+
+// operandER encodes the given VReg as an operand of operandKindER.
+func operandER(r regalloc.VReg, eop extendOp, to byte) operand {
+	if to < 32 {
+		panic("TODO?BUG?: when we need to extend to less than 32 bits?")
+	}
+	return operand{kind: operandKindER, data: uint64(r), data2: uint64(eop)<<32 | uint64(to)}
+}
+
+// er decodes the underlying VReg, extend operation, and the target size assuming the operand is of operandKindER.
+func (o operand) er() (r regalloc.VReg, eop extendOp, to byte) {
+	return regalloc.VReg(o.data), extendOp(o.data2>>32) & 0xff, byte(o.data2 & 0xff)
+}
+
+// operandSR encodes the given VReg as an operand of operandKindSR.
+func operandSR(r regalloc.VReg, amt byte, sop shiftOp) operand {
+	return operand{kind: operandKindSR, data: uint64(r), data2: uint64(amt)<<32 | uint64(sop)}
+}
+
+// sr decodes the underlying VReg, shift amount, and shift operation assuming the operand is of operandKindSR.
+func (o operand) sr() (r regalloc.VReg, amt byte, sop shiftOp) {
+	return regalloc.VReg(o.data), byte(o.data2>>32) & 0xff, shiftOp(o.data2) & 0xff
+}
+
+// operandImm12 encodes the given imm12 as an operand of operandKindImm12.
+func operandImm12(imm12 uint16, shiftBit byte) operand {
+	return operand{kind: operandKindImm12, data: uint64(imm12) | uint64(shiftBit)<<32}
+}
+
+// imm12 decodes the underlying imm12 data assuming the operand is of operandKindImm12.
+func (o operand) imm12() (v uint16, shiftBit byte) {
+	return uint16(o.data), byte(o.data >> 32)
+}
+
+// operandShiftImm encodes the given amount as an operand of operandKindShiftImm.
+func operandShiftImm(amount byte) operand {
+	return operand{kind: operandKindShiftImm, data: uint64(amount)}
+}
+
+// shiftImm decodes the underlying shift amount data assuming the operand is of operandKindShiftImm.
+func (o operand) shiftImm() byte {
+	return byte(o.data)
+}
+
+// reg returns the register of the operand if applicable.
+func (o operand) reg() regalloc.VReg {
+	switch o.kind {
+	case operandKindNR:
+		return o.nr()
+	case operandKindSR:
+		r, _, _ := o.sr()
+		return r
+	case operandKindER:
+		r, _, _ := o.er()
+		return r
+	case operandKindImm12:
+		// Does not have a register.
+	case operandKindShiftImm:
+		// Does not have a register.
+	default:
+		panic(o.kind)
+	}
+	return regalloc.VRegInvalid
+}
+
+func (o operand) realReg() regalloc.RealReg {
+	return o.nr().RealReg()
+}
+
+func (o operand) assignReg(v regalloc.VReg) operand {
+	switch o.kind {
+	case operandKindNR:
+		return operandNR(v)
+	case operandKindSR:
+		_, amt, sop := o.sr()
+		return operandSR(v, amt, sop)
+	case operandKindER:
+		_, eop, to := o.er()
+		return operandER(v, eop, to)
+	case operandKindImm12:
+		// Does not have a register.
+	case operandKindShiftImm:
+		// Does not have a register.
+	}
+	panic(o.kind)
+}
+
+// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).
+//
+// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
+// If the operand can be expressed as operandKindImm12, `mode` is ignored.
+func (m *machine) getOperand_Imm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
+	if def.IsFromBlockParam() {
+		return operandNR(def.BlkParamVReg)
+	}
+
+	instr := def.Instr
+	if instr.Opcode() == ssa.OpcodeIconst {
+		if imm12Op, ok := asImm12Operand(instr.ConstantVal()); ok {
+			instr.MarkLowered()
+			return imm12Op
+		}
+	}
+	return m.getOperand_ER_SR_NR(def, mode)
+}
+
+// getOperand_MaybeNegatedImm12_ER_SR_NR is almost the same as getOperand_Imm12_ER_SR_NR, but this might negate the immediate value.
+// If the immediate value is negated, the second return value is true, otherwise always false.
+func (m *machine) getOperand_MaybeNegatedImm12_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand, negatedImm12 bool) {
+	if def.IsFromBlockParam() {
+		return operandNR(def.BlkParamVReg), false
+	}
+
+	instr := def.Instr
+	if instr.Opcode() == ssa.OpcodeIconst {
+		c := instr.ConstantVal()
+		if imm12Op, ok := asImm12Operand(c); ok {
+			instr.MarkLowered()
+			return imm12Op, false
+		}
+
+		signExtended := int64(c)
+		if def.SSAValue().Type().Bits() == 32 {
+			signExtended = (signExtended << 32) >> 32
+		}
+		negatedWithoutSign := -signExtended
+		if imm12Op, ok := asImm12Operand(uint64(negatedWithoutSign)); ok {
+			instr.MarkLowered()
+			return imm12Op, true
+		}
+	}
+	return m.getOperand_ER_SR_NR(def, mode), false
+}
+
+// ensureValueNR returns an operand of either operandKindER, operandKindSR, or operandKindNR from the given value (defined by `def).
+//
+// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
+func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
+	if def.IsFromBlockParam() {
+		return operandNR(def.BlkParamVReg)
+	}
+
+	if m.compiler.MatchInstr(def, ssa.OpcodeSExtend) || m.compiler.MatchInstr(def, ssa.OpcodeUExtend) {
+		extInstr := def.Instr
+
+		signed := extInstr.Opcode() == ssa.OpcodeSExtend
+		innerExtFromBits, innerExtToBits := extInstr.ExtendFromToBits()
+		modeBits, modeSigned := mode.bits(), mode.signed()
+		if mode == extModeNone || innerExtToBits == modeBits {
+			eop := extendOpFrom(signed, innerExtFromBits)
+			extArg := m.getOperand_NR(m.compiler.ValueDefinition(extInstr.Arg()), extModeNone)
+			op = operandER(extArg.nr(), eop, innerExtToBits)
+			extInstr.MarkLowered()
+			return
+		}
+
+		if innerExtToBits > modeBits {
+			panic("BUG?TODO?: need the results of inner extension to be larger than the mode")
+		}
+
+		switch {
+		case (!signed && !modeSigned) || (signed && modeSigned):
+			// Two sign/zero extensions are equivalent to one sign/zero extension for the larger size.
+			eop := extendOpFrom(modeSigned, innerExtFromBits)
+			op = operandER(m.compiler.VRegOf(extInstr.Arg()), eop, modeBits)
+			extInstr.MarkLowered()
+		case (signed && !modeSigned) || (!signed && modeSigned):
+			// We need to {sign, zero}-extend the result of the {zero,sign} extension.
+			eop := extendOpFrom(modeSigned, innerExtToBits)
+			op = operandER(m.compiler.VRegOf(extInstr.Return()), eop, modeBits)
+			// Note that we failed to merge the inner extension instruction this case.
+		}
+		return
+	}
+	return m.getOperand_SR_NR(def, mode)
+}
+
+// ensureValueNR returns an operand of either operandKindSR or operandKindNR from the given value (defined by `def).
+//
+// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
+func (m *machine) getOperand_SR_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
+	if def.IsFromBlockParam() {
+		return operandNR(def.BlkParamVReg)
+	}
+
+	if m.compiler.MatchInstr(def, ssa.OpcodeIshl) {
+		// Check if the shift amount is constant instruction.
+		targetVal, amountVal := def.Instr.Arg2()
+		targetVReg := m.getOperand_NR(m.compiler.ValueDefinition(targetVal), extModeNone).nr()
+		amountDef := m.compiler.ValueDefinition(amountVal)
+		if amountDef.IsFromInstr() && amountDef.Instr.Constant() {
+			// If that is the case, we can use the shifted register operand (SR).
+			c := byte(amountDef.Instr.ConstantVal()) & (targetVal.Type().Bits() - 1) // Clears the unnecessary bits.
+			def.Instr.MarkLowered()
+			amountDef.Instr.MarkLowered()
+			return operandSR(targetVReg, c, shiftOpLSL)
+		}
+	}
+	return m.getOperand_NR(def, mode)
+}
+
+// getOperand_ShiftImm_NR returns an operand of either operandKindShiftImm or operandKindNR from the given value (defined by `def).
+func (m *machine) getOperand_ShiftImm_NR(def *backend.SSAValueDefinition, mode extMode, shiftBitWidth byte) (op operand) {
+	if def.IsFromBlockParam() {
+		return operandNR(def.BlkParamVReg)
+	}
+
+	instr := def.Instr
+	if instr.Constant() {
+		amount := byte(instr.ConstantVal()) & (shiftBitWidth - 1) // Clears the unnecessary bits.
+		return operandShiftImm(amount)
+	}
+	return m.getOperand_NR(def, mode)
+}
+
+// ensureValueNR returns an operand of operandKindNR from the given value (defined by `def).
+//
+// `mode` is used to extend the operand if the bit length is smaller than mode.bits().
+func (m *machine) getOperand_NR(def *backend.SSAValueDefinition, mode extMode) (op operand) {
+	var v regalloc.VReg
+	if def.IsFromBlockParam() {
+		v = def.BlkParamVReg
+	} else {
+		instr := def.Instr
+		if instr.Constant() {
+			// We inline all the constant instructions so that we could reduce the register usage.
+			v = m.lowerConstant(instr)
+			instr.MarkLowered()
+		} else {
+			if n := def.N; n == 0 {
+				v = m.compiler.VRegOf(instr.Return())
+			} else {
+				_, rs := instr.Returns()
+				v = m.compiler.VRegOf(rs[n-1])
+			}
+		}
+	}
+
+	r := v
+	switch inBits := def.SSAValue().Type().Bits(); {
+	case mode == extModeNone:
+	case inBits == 32 && (mode == extModeZeroExtend32 || mode == extModeSignExtend32):
+	case inBits == 32 && mode == extModeZeroExtend64:
+		extended := m.compiler.AllocateVReg(ssa.TypeI64)
+		ext := m.allocateInstr()
+		ext.asExtend(extended, v, 32, 64, false)
+		m.insert(ext)
+		r = extended
+	case inBits == 32 && mode == extModeSignExtend64:
+		extended := m.compiler.AllocateVReg(ssa.TypeI64)
+		ext := m.allocateInstr()
+		ext.asExtend(extended, v, 32, 64, true)
+		m.insert(ext)
+		r = extended
+	case inBits == 64 && (mode == extModeZeroExtend64 || mode == extModeSignExtend64):
+	}
+	return operandNR(r)
+}
+
+func asImm12Operand(val uint64) (op operand, ok bool) {
+	v, shiftBit, ok := asImm12(val)
+	if !ok {
+		return operand{}, false
+	}
+	return operandImm12(v, shiftBit), true
+}
+
+func asImm12(val uint64) (v uint16, shiftBit byte, ok bool) {
+	const mask1, mask2 uint64 = 0xfff, 0xfff_000
+	if val&^mask1 == 0 {
+		return uint16(val), 0, true
+	} else if val&^mask2 == 0 {
+		return uint16(val >> 12), 1, true
+	} else {
+		return 0, 0, false
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
@ -0,0 +1,440 @@
+package arm64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+type (
+	// addressMode represents an ARM64 addressing mode.
+	//
+	// https://developer.arm.com/documentation/102374/0101/Loads-and-stores---addressing
+	// TODO: use the bit-packed layout like operand struct.
+	addressMode struct {
+		kind   addressModeKind
+		rn, rm regalloc.VReg
+		extOp  extendOp
+		imm    int64
+	}
+
+	// addressModeKind represents the kind of ARM64 addressing mode.
+	addressModeKind byte
+)
+
+const (
+	// addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended,
+	// and then scaled by bits(type)/8.
+	//
+	// e.g.
+	// 	- ldrh w1, [x2, w3, SXTW #1] ;; sign-extended and scaled by 2 (== LSL #1)
+	// 	- strh w1, [x2, w3, UXTW #1] ;; zero-extended and scaled by 2 (== LSL #1)
+	// 	- ldr w1, [x2, w3, SXTW #2] ;; sign-extended and scaled by 4 (== LSL #2)
+	// 	- str x1, [x2, w3, UXTW #3] ;; zero-extended and scaled by 8 (== LSL #3)
+	//
+	// See the following pages:
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--register---Load-Register-Halfword--register--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--
+	addressModeKindRegScaledExtended addressModeKind = iota
+
+	// addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without extension factor.
+	addressModeKindRegScaled
+
+	// addressModeKindRegScaled is the same as addressModeKindRegScaledExtended, but without scale factor.
+	addressModeKindRegExtended
+
+	// addressModeKindRegReg takes a base register and an index register. The index register is not either scaled or extended.
+	addressModeKindRegReg
+
+	// addressModeKindRegSignedImm9 takes a base register and a 9-bit "signed" immediate offset (-256 to 255).
+	// The immediate will be sign-extended, and be added to the base register.
+	// This is a.k.a. "unscaled" since the immediate is not scaled.
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--
+	addressModeKindRegSignedImm9
+
+	// addressModeKindRegUnsignedImm12 takes a base register and a 12-bit "unsigned" immediate offset.  scaled by
+	// the size of the type. In other words, the actual offset will be imm12 * bits(type)/8.
+	// See "Unsigned offset" in the following pages:
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
+	addressModeKindRegUnsignedImm12
+
+	// addressModePostIndex takes a base register and a 9-bit "signed" immediate offset.
+	// After the load/store, the base register will be updated by the offset.
+	//
+	// Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset.
+	//
+	// See "Post-index" in the following pages for examples:
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
+	addressModeKindPostIndex
+
+	// addressModePostIndex takes a base register and a 9-bit "signed" immediate offset.
+	// Before the load/store, the base register will be updated by the offset.
+	//
+	// Note that when this is used for pair load/store, the offset will be 7-bit "signed" immediate offset.
+	//
+	// See "Pre-index" in the following pages for examples:
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRB--immediate---Load-Register-Byte--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-
+	addressModeKindPreIndex
+
+	// addressModeKindArgStackSpace is used to resolve the address of the argument stack space
+	// exiting right above the stack pointer. Since we don't know the exact stack space needed for a function
+	// at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above.
+	addressModeKindArgStackSpace
+
+	// addressModeKindResultStackSpace is used to resolve the address of the result stack space
+	// exiting right above the stack pointer. Since we don't know the exact stack space needed for a function
+	// at a compilation phase, this is used as a placeholder and further lowered to a real addressing mode like above.
+	addressModeKindResultStackSpace
+)
+
+func (a addressMode) format(dstSizeBits byte) (ret string) {
+	base := formatVRegSized(a.rn, 64)
+	if rn := a.rn; rn.RegType() != regalloc.RegTypeInt {
+		panic("invalid base register type: " + a.rn.RegType().String())
+	} else if rn.IsRealReg() && v0 <= a.rn.RealReg() && a.rn.RealReg() <= v30 {
+		panic("BUG: likely a bug in reg alloc or reset behavior")
+	}
+
+	switch a.kind {
+	case addressModeKindRegScaledExtended:
+		amount := a.sizeInBitsToShiftAmount(dstSizeBits)
+		ret = fmt.Sprintf("[%s, %s, %s #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp, amount)
+	case addressModeKindRegScaled:
+		amount := a.sizeInBitsToShiftAmount(dstSizeBits)
+		ret = fmt.Sprintf("[%s, %s, lsl #%#x]", base, formatVRegSized(a.rm, a.indexRegBits()), amount)
+	case addressModeKindRegExtended:
+		ret = fmt.Sprintf("[%s, %s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()), a.extOp)
+	case addressModeKindRegReg:
+		ret = fmt.Sprintf("[%s, %s]", base, formatVRegSized(a.rm, a.indexRegBits()))
+	case addressModeKindRegSignedImm9:
+		if a.imm != 0 {
+			ret = fmt.Sprintf("[%s, #%#x]", base, a.imm)
+		} else {
+			ret = fmt.Sprintf("[%s]", base)
+		}
+	case addressModeKindRegUnsignedImm12:
+		if a.imm != 0 {
+			ret = fmt.Sprintf("[%s, #%#x]", base, a.imm)
+		} else {
+			ret = fmt.Sprintf("[%s]", base)
+		}
+	case addressModeKindPostIndex:
+		ret = fmt.Sprintf("[%s], #%#x", base, a.imm)
+	case addressModeKindPreIndex:
+		ret = fmt.Sprintf("[%s, #%#x]!", base, a.imm)
+	case addressModeKindArgStackSpace:
+		ret = fmt.Sprintf("[#arg_space, #%#x]", a.imm)
+	case addressModeKindResultStackSpace:
+		ret = fmt.Sprintf("[#ret_space, #%#x]", a.imm)
+	}
+	return
+}
+
+func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode {
+	if !offsetFitsInAddressModeKindRegSignedImm9(imm) {
+		panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm))
+	}
+	if preIndex {
+		return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
+	} else {
+		return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
+	}
+}
+
+func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool {
+	divisor := int64(dstSizeInBits) / 8
+	return 0 < offset && offset%divisor == 0 && offset/divisor < 4096
+}
+
+func offsetFitsInAddressModeKindRegSignedImm9(offset int64) bool {
+	return -256 <= offset && offset <= 255
+}
+
+func (a addressMode) indexRegBits() byte {
+	bits := a.extOp.srcBits()
+	if bits != 32 && bits != 64 {
+		panic("invalid index register for address mode. it must be either 32 or 64 bits")
+	}
+	return bits
+}
+
+func (a addressMode) sizeInBitsToShiftAmount(sizeInBits byte) (lsl byte) {
+	switch sizeInBits {
+	case 8:
+		lsl = 0
+	case 16:
+		lsl = 1
+	case 32:
+		lsl = 2
+	case 64:
+		lsl = 3
+	}
+	return
+}
+
+func extLoadSignSize(op ssa.Opcode) (size byte, signed bool) {
+	switch op {
+	case ssa.OpcodeUload8:
+		size, signed = 8, false
+	case ssa.OpcodeUload16:
+		size, signed = 16, false
+	case ssa.OpcodeUload32:
+		size, signed = 32, false
+	case ssa.OpcodeSload8:
+		size, signed = 8, true
+	case ssa.OpcodeSload16:
+		size, signed = 16, true
+	case ssa.OpcodeSload32:
+		size, signed = 32, true
+	default:
+		panic("BUG")
+	}
+	return
+}
+
+func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret regalloc.VReg) {
+	size, signed := extLoadSignSize(op)
+	amode := m.lowerToAddressMode(ptr, offset, size)
+	load := m.allocateInstr()
+	if signed {
+		load.asSLoad(operandNR(ret), amode, size)
+	} else {
+		load.asULoad(operandNR(ret), amode, size)
+	}
+	m.insert(load)
+}
+
+func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa.Value) {
+	amode := m.lowerToAddressMode(ptr, offset, typ.Bits())
+
+	dst := m.compiler.VRegOf(ret)
+	load := m.allocateInstr()
+	switch typ {
+	case ssa.TypeI32, ssa.TypeI64:
+		load.asULoad(operandNR(dst), amode, typ.Bits())
+	case ssa.TypeF32, ssa.TypeF64:
+		load.asFpuLoad(operandNR(dst), amode, typ.Bits())
+	case ssa.TypeV128:
+		load.asFpuLoad(operandNR(dst), amode, 128)
+	default:
+		panic("TODO")
+	}
+	m.insert(load)
+}
+
+func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane, ret ssa.Value) {
+	// vecLoad1R has offset address mode (base+imm) only for post index, so we simply add the offset to the base.
+	base := m.getOperand_NR(m.compiler.ValueDefinition(ptr), extModeNone).nr()
+	offsetReg := m.compiler.AllocateVReg(ssa.TypeI64)
+	m.lowerConstantI64(offsetReg, int64(offset))
+	addedBase := m.addReg64ToReg64(base, offsetReg)
+
+	rd := operandNR(m.compiler.VRegOf(ret))
+
+	ld1r := m.allocateInstr()
+	ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane))
+	m.insert(ld1r)
+}
+
+func (m *machine) lowerStore(si *ssa.Instruction) {
+	// TODO: merge consecutive stores into a single pair store instruction.
+	value, ptr, offset, storeSizeInBits := si.StoreData()
+	amode := m.lowerToAddressMode(ptr, offset, storeSizeInBits)
+
+	valueOp := m.getOperand_NR(m.compiler.ValueDefinition(value), extModeNone)
+	store := m.allocateInstr()
+	store.asStore(valueOp, amode, storeSizeInBits)
+	m.insert(store)
+}
+
+// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
+func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) {
+	// TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and
+	// addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed
+	// to support more efficient address resolution.
+
+	a32s, a64s, offset := m.collectAddends(ptr)
+	offset += int64(offsetBase)
+	return m.lowerToAddressModeFromAddends(a32s, a64s, size, offset)
+}
+
+// lowerToAddressModeFromAddends creates an addressMode from a list of addends collected by collectAddends.
+// During the construction, this might emit additional instructions.
+//
+// Extracted as a separate function for easy testing.
+func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) {
+	switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); {
+	case a64sExist && a32sExist:
+		var base regalloc.VReg
+		base = a64s.Dequeue()
+		var a32 addend32
+		a32 = a32s.Dequeue()
+		amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
+	case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
+		var base regalloc.VReg
+		base = a64s.Dequeue()
+		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
+		offset = 0
+	case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
+		var base regalloc.VReg
+		base = a64s.Dequeue()
+		amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
+		offset = 0
+	case a64sExist:
+		var base regalloc.VReg
+		base = a64s.Dequeue()
+		if !a64s.Empty() {
+			index := a64s.Dequeue()
+			amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
+		} else {
+			amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
+		}
+	case a32sExist:
+		base32 := a32s.Dequeue()
+
+		// First we need 64-bit base.
+		base := m.compiler.AllocateVReg(ssa.TypeI64)
+		baseExt := m.allocateInstr()
+		var signed bool
+		if base32.ext == extendOpSXTW {
+			signed = true
+		}
+		baseExt.asExtend(base, base32.r, 32, 64, signed)
+		m.insert(baseExt)
+
+		if !a32s.Empty() {
+			index := a32s.Dequeue()
+			amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
+		} else {
+			amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
+		}
+	default: // Only static offsets.
+		tmpReg := m.compiler.AllocateVReg(ssa.TypeI64)
+		m.lowerConstantI64(tmpReg, offset)
+		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
+		offset = 0
+	}
+
+	baseReg := amode.rn
+	if offset > 0 {
+		baseReg = m.addConstToReg64(baseReg, offset) // baseReg += offset
+	}
+
+	for !a64s.Empty() {
+		a64 := a64s.Dequeue()
+		baseReg = m.addReg64ToReg64(baseReg, a64) // baseReg += a64
+	}
+
+	for !a32s.Empty() {
+		a32 := a32s.Dequeue()
+		baseReg = m.addRegToReg64Ext(baseReg, a32.r, a32.ext) // baseReg += (a32 extended to 64-bit)
+	}
+	amode.rn = baseReg
+	return
+}
+
+var addendsMatchOpcodes = [4]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst}
+
+func (m *machine) collectAddends(ptr ssa.Value) (addends32 *wazevoapi.Queue[addend32], addends64 *wazevoapi.Queue[regalloc.VReg], offset int64) {
+	m.addendsWorkQueue.Reset()
+	m.addends32.Reset()
+	m.addends64.Reset()
+	m.addendsWorkQueue.Enqueue(ptr)
+
+	for !m.addendsWorkQueue.Empty() {
+		v := m.addendsWorkQueue.Dequeue()
+
+		def := m.compiler.ValueDefinition(v)
+		switch op := m.compiler.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op {
+		case ssa.OpcodeIadd:
+			// If the addend is an add, we recursively collect its operands.
+			x, y := def.Instr.Arg2()
+			m.addendsWorkQueue.Enqueue(x)
+			m.addendsWorkQueue.Enqueue(y)
+			def.Instr.MarkLowered()
+		case ssa.OpcodeIconst:
+			// If the addend is constant, we just statically merge it into the offset.
+			ic := def.Instr
+			u64 := ic.ConstantVal()
+			if ic.Return().Type().Bits() == 32 {
+				offset += int64(int32(u64)) // sign-extend.
+			} else {
+				offset += int64(u64)
+			}
+			def.Instr.MarkLowered()
+		case ssa.OpcodeUExtend, ssa.OpcodeSExtend:
+			input := def.Instr.Arg()
+			if input.Type().Bits() != 32 {
+				panic("illegal size: " + input.Type().String())
+			}
+
+			var ext extendOp
+			if op == ssa.OpcodeUExtend {
+				ext = extendOpUXTW
+			} else {
+				ext = extendOpSXTW
+			}
+
+			inputDef := m.compiler.ValueDefinition(input)
+			constInst := inputDef.IsFromInstr() && inputDef.Instr.Constant()
+			switch {
+			case constInst && ext == extendOpUXTW:
+				// Zero-extension of a 32-bit constant can be merged into the offset.
+				offset += int64(uint32(inputDef.Instr.ConstantVal()))
+			case constInst && ext == extendOpSXTW:
+				// Sign-extension of a 32-bit constant can be merged into the offset.
+				offset += int64(int32(inputDef.Instr.ConstantVal())) // sign-extend!
+			default:
+				m.addends32.Enqueue(addend32{r: m.getOperand_NR(inputDef, extModeNone).nr(), ext: ext})
+			}
+			def.Instr.MarkLowered()
+			continue
+		default:
+			// If the addend is not one of them, we simply use it as-is (without merging!), optionally zero-extending it.
+			m.addends64.Enqueue(m.getOperand_NR(def, extModeZeroExtend64 /* optional zero ext */).nr())
+		}
+	}
+	return &m.addends32, &m.addends64, offset
+}
+
+func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
+	rd = m.compiler.AllocateVReg(ssa.TypeI64)
+	alu := m.allocateInstr()
+	if imm12Op, ok := asImm12Operand(uint64(c)); ok {
+		alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true)
+	} else if imm12Op, ok = asImm12Operand(uint64(-c)); ok {
+		alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true)
+	} else {
+		tmp := m.compiler.AllocateVReg(ssa.TypeI64)
+		m.load64bitConst(c, tmp)
+		alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true)
+	}
+	m.insert(alu)
+	return
+}
+
+func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
+	rd = m.compiler.AllocateVReg(ssa.TypeI64)
+	alu := m.allocateInstr()
+	alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true)
+	m.insert(alu)
+	return
+}
+
+func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) {
+	rd = m.compiler.AllocateVReg(ssa.TypeI64)
+	alu := m.allocateInstr()
+	alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true)
+	m.insert(alu)
+	return
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
@ -0,0 +1,515 @@
+package arm64
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+type (
+	// machine implements backend.Machine.
+	machine struct {
+		compiler          backend.Compiler
+		executableContext *backend.ExecutableContextT[instruction]
+		currentABI        *backend.FunctionABI
+
+		regAlloc   regalloc.Allocator
+		regAllocFn *backend.RegAllocFunction[*instruction, *machine]
+
+		// addendsWorkQueue is used during address lowering, defined here for reuse.
+		addendsWorkQueue wazevoapi.Queue[ssa.Value]
+		addends32        wazevoapi.Queue[addend32]
+		// addends64 is used during address lowering, defined here for reuse.
+		addends64              wazevoapi.Queue[regalloc.VReg]
+		unresolvedAddressModes []*instruction
+
+		// condBrRelocs holds the conditional branches which need offset relocation.
+		condBrRelocs []condBrReloc
+
+		// jmpTableTargets holds the labels of the jump table targets.
+		jmpTableTargets [][]uint32
+
+		// spillSlotSize is the size of the stack slot in bytes used for spilling registers.
+		// During the execution of the function, the stack looks like:
+		//
+		//
+		//            (high address)
+		//          +-----------------+
+		//          |     .......     |
+		//          |      ret Y      |
+		//          |     .......     |
+		//          |      ret 0      |
+		//          |      arg X      |
+		//          |     .......     |
+		//          |      arg 1      |
+		//          |      arg 0      |
+		//          |      xxxxx      |
+		//          |   ReturnAddress |
+		//          +-----------------+   <<-|
+		//          |   ...........   |      |
+		//          |   spill slot M  |      | <--- spillSlotSize
+		//          |   ............  |      |
+		//          |   spill slot 2  |      |
+		//          |   spill slot 1  |   <<-+
+		//          |   clobbered N   |
+		//          |   ...........   |
+		//          |   clobbered 1   |
+		//          |   clobbered 0   |
+		//   SP---> +-----------------+
+		//             (low address)
+		//
+		// and it represents the size of the space between FP and the first spilled slot. This must be a multiple of 16.
+		// Also note that this is only known after register allocation.
+		spillSlotSize int64
+		spillSlots    map[regalloc.VRegID]int64 // regalloc.VRegID to offset.
+		// clobberedRegs holds real-register backed VRegs saved at the function prologue, and restored at the epilogue.
+		clobberedRegs []regalloc.VReg
+
+		maxRequiredStackSizeForCalls int64
+		stackBoundsCheckDisabled     bool
+
+		regAllocStarted bool
+	}
+
+	addend32 struct {
+		r   regalloc.VReg
+		ext extendOp
+	}
+
+	condBrReloc struct {
+		cbr *instruction
+		// currentLabelPos is the labelPosition within which condBr is defined.
+		currentLabelPos *labelPosition
+		// Next block's labelPosition.
+		nextLabel label
+		offset    int64
+	}
+
+	labelPosition = backend.LabelPosition[instruction]
+	label         = backend.Label
+)
+
+const (
+	labelReturn  = backend.LabelReturn
+	labelInvalid = backend.LabelInvalid
+)
+
+// NewBackend returns a new backend for arm64.
+func NewBackend() backend.Machine {
+	m := &machine{
+		spillSlots:        make(map[regalloc.VRegID]int64),
+		executableContext: newExecutableContext(),
+		regAlloc:          regalloc.NewAllocator(regInfo),
+	}
+	return m
+}
+
+func newExecutableContext() *backend.ExecutableContextT[instruction] {
+	return backend.NewExecutableContextT[instruction](resetInstruction, setNext, setPrev, asNop0)
+}
+
+// ExecutableContext implements backend.Machine.
+func (m *machine) ExecutableContext() backend.ExecutableContext {
+	return m.executableContext
+}
+
+// RegAlloc implements backend.Machine Function.
+func (m *machine) RegAlloc() {
+	rf := m.regAllocFn
+	for _, pos := range m.executableContext.OrderedBlockLabels {
+		rf.AddBlock(pos.SB, pos.L, pos.Begin, pos.End)
+	}
+
+	m.regAllocStarted = true
+	m.regAlloc.DoAllocation(rf)
+	// Now that we know the final spill slot size, we must align spillSlotSize to 16 bytes.
+	m.spillSlotSize = (m.spillSlotSize + 15) &^ 15
+}
+
+// Reset implements backend.Machine.
+func (m *machine) Reset() {
+	m.clobberedRegs = m.clobberedRegs[:0]
+	for key := range m.spillSlots {
+		m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key))
+	}
+	for _, key := range m.clobberedRegs {
+		delete(m.spillSlots, regalloc.VRegID(key))
+	}
+	m.clobberedRegs = m.clobberedRegs[:0]
+	m.regAllocStarted = false
+	m.regAlloc.Reset()
+	m.regAllocFn.Reset()
+	m.spillSlotSize = 0
+	m.unresolvedAddressModes = m.unresolvedAddressModes[:0]
+	m.maxRequiredStackSizeForCalls = 0
+	m.executableContext.Reset()
+	m.jmpTableTargets = m.jmpTableTargets[:0]
+}
+
+// SetCurrentABI implements backend.Machine SetCurrentABI.
+func (m *machine) SetCurrentABI(abi *backend.FunctionABI) {
+	m.currentABI = abi
+}
+
+// DisableStackCheck implements backend.Machine DisableStackCheck.
+func (m *machine) DisableStackCheck() {
+	m.stackBoundsCheckDisabled = true
+}
+
+// SetCompiler implements backend.Machine.
+func (m *machine) SetCompiler(ctx backend.Compiler) {
+	m.compiler = ctx
+	m.regAllocFn = backend.NewRegAllocFunction[*instruction, *machine](m, ctx.SSABuilder(), ctx)
+}
+
+func (m *machine) insert(i *instruction) {
+	ectx := m.executableContext
+	ectx.PendingInstructions = append(ectx.PendingInstructions, i)
+}
+
+func (m *machine) insertBrTargetLabel() label {
+	nop, l := m.allocateBrTarget()
+	m.insert(nop)
+	return l
+}
+
+func (m *machine) allocateBrTarget() (nop *instruction, l label) {
+	ectx := m.executableContext
+	l = ectx.AllocateLabel()
+	nop = m.allocateInstr()
+	nop.asNop0WithLabel(l)
+	pos := ectx.AllocateLabelPosition(l)
+	pos.Begin, pos.End = nop, nop
+	ectx.LabelPositions[l] = pos
+	return
+}
+
+// allocateInstr allocates an instruction.
+func (m *machine) allocateInstr() *instruction {
+	instr := m.executableContext.InstructionPool.Allocate()
+	if !m.regAllocStarted {
+		instr.addedBeforeRegAlloc = true
+	}
+	return instr
+}
+
+func resetInstruction(i *instruction) {
+	*i = instruction{}
+}
+
+func (m *machine) allocateNop() *instruction {
+	instr := m.allocateInstr()
+	instr.asNop0()
+	return instr
+}
+
+func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) {
+	amode := &i.amode
+	switch amode.kind {
+	case addressModeKindResultStackSpace:
+		amode.imm += ret0offset
+	case addressModeKindArgStackSpace:
+		amode.imm += arg0offset
+	default:
+		panic("BUG")
+	}
+
+	var sizeInBits byte
+	switch i.kind {
+	case store8, uLoad8:
+		sizeInBits = 8
+	case store16, uLoad16:
+		sizeInBits = 16
+	case store32, fpuStore32, uLoad32, fpuLoad32:
+		sizeInBits = 32
+	case store64, fpuStore64, uLoad64, fpuLoad64:
+		sizeInBits = 64
+	case fpuStore128, fpuLoad128:
+		sizeInBits = 128
+	default:
+		panic("BUG")
+	}
+
+	if offsetFitsInAddressModeKindRegUnsignedImm12(sizeInBits, amode.imm) {
+		amode.kind = addressModeKindRegUnsignedImm12
+	} else {
+		// This case, we load the offset into the temporary register,
+		// and then use it as the index register.
+		newPrev := m.lowerConstantI64AndInsert(i.prev, tmpRegVReg, amode.imm)
+		linkInstr(newPrev, i)
+		*amode = addressMode{kind: addressModeKindRegReg, rn: amode.rn, rm: tmpRegVReg, extOp: extendOpUXTX /* indicates rm reg is 64-bit */}
+	}
+}
+
+// resolveRelativeAddresses resolves the relative addresses before encoding.
+func (m *machine) resolveRelativeAddresses(ctx context.Context) {
+	ectx := m.executableContext
+	for {
+		if len(m.unresolvedAddressModes) > 0 {
+			arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP()
+			for _, i := range m.unresolvedAddressModes {
+				m.resolveAddressingMode(arg0offset, ret0offset, i)
+			}
+		}
+
+		// Reuse the slice to gather the unresolved conditional branches.
+		m.condBrRelocs = m.condBrRelocs[:0]
+
+		var fn string
+		var fnIndex int
+		var labelToSSABlockID map[label]ssa.BasicBlockID
+		if wazevoapi.PerfMapEnabled {
+			fn = wazevoapi.GetCurrentFunctionName(ctx)
+			labelToSSABlockID = make(map[label]ssa.BasicBlockID)
+			for i, l := range ectx.SsaBlockIDToLabels {
+				labelToSSABlockID[l] = ssa.BasicBlockID(i)
+			}
+			fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)
+		}
+
+		// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
+		var offset int64
+		for i, pos := range ectx.OrderedBlockLabels {
+			pos.BinaryOffset = offset
+			var size int64
+			for cur := pos.Begin; ; cur = cur.next {
+				switch cur.kind {
+				case nop0:
+					l := cur.nop0Label()
+					if pos, ok := ectx.LabelPositions[l]; ok {
+						pos.BinaryOffset = offset + size
+					}
+				case condBr:
+					if !cur.condBrOffsetResolved() {
+						var nextLabel label
+						if i < len(ectx.OrderedBlockLabels)-1 {
+							// Note: this is only used when the block ends with fallthrough,
+							// therefore can be safely assumed that the next block exists when it's needed.
+							nextLabel = ectx.OrderedBlockLabels[i+1].L
+						}
+						m.condBrRelocs = append(m.condBrRelocs, condBrReloc{
+							cbr: cur, currentLabelPos: pos, offset: offset + size,
+							nextLabel: nextLabel,
+						})
+					}
+				}
+				size += cur.size()
+				if cur == pos.End {
+					break
+				}
+			}
+
+			if wazevoapi.PerfMapEnabled {
+				if size > 0 {
+					l := pos.L
+					var labelStr string
+					if blkID, ok := labelToSSABlockID[l]; ok {
+						labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID)
+					} else {
+						labelStr = l.String()
+					}
+					wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr))
+				}
+			}
+			offset += size
+		}
+
+		// Before resolving any offsets, we need to check if all the conditional branches can be resolved.
+		var needRerun bool
+		for i := range m.condBrRelocs {
+			reloc := &m.condBrRelocs[i]
+			cbr := reloc.cbr
+			offset := reloc.offset
+
+			target := cbr.condBrLabel()
+			offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
+			diff := offsetOfTarget - offset
+			if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
+				// This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block,
+				// and jump to it.
+				m.insertConditionalJumpTrampoline(cbr, reloc.currentLabelPos, reloc.nextLabel)
+				// Then, we need to recall this function to fix up the label offsets
+				// as they have changed after the trampoline is inserted.
+				needRerun = true
+			}
+		}
+		if needRerun {
+			if wazevoapi.PerfMapEnabled {
+				wazevoapi.PerfMap.Clear()
+			}
+		} else {
+			break
+		}
+	}
+
+	var currentOffset int64
+	for cur := ectx.RootInstr; cur != nil; cur = cur.next {
+		switch cur.kind {
+		case br:
+			target := cur.brLabel()
+			offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
+			diff := offsetOfTarget - currentOffset
+			divided := diff >> 2
+			if divided < minSignedInt26 || divided > maxSignedInt26 {
+				// This means the currently compiled single function is extremely large.
+				panic("too large function that requires branch relocation of large unconditional branch larger than 26-bit range")
+			}
+			cur.brOffsetResolve(diff)
+		case condBr:
+			if !cur.condBrOffsetResolved() {
+				target := cur.condBrLabel()
+				offsetOfTarget := ectx.LabelPositions[target].BinaryOffset
+				diff := offsetOfTarget - currentOffset
+				if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
+					panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly")
+				}
+				cur.condBrOffsetResolve(diff)
+			}
+		case brTableSequence:
+			tableIndex := cur.u1
+			targets := m.jmpTableTargets[tableIndex]
+			for i := range targets {
+				l := label(targets[i])
+				offsetOfTarget := ectx.LabelPositions[l].BinaryOffset
+				diff := offsetOfTarget - (currentOffset + brTableSequenceOffsetTableBegin)
+				targets[i] = uint32(diff)
+			}
+			cur.brTableSequenceOffsetsResolved()
+		case emitSourceOffsetInfo:
+			m.compiler.AddSourceOffsetInfo(currentOffset, cur.sourceOffsetInfo())
+		}
+		currentOffset += cur.size()
+	}
+}
+
+const (
+	maxSignedInt26 = 1<<25 - 1
+	minSignedInt26 = -(1 << 25)
+
+	maxSignedInt19 = 1<<18 - 1
+	minSignedInt19 = -(1 << 18)
+)
+
+func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) {
+	cur := currentBlk.End
+	originalTarget := cbr.condBrLabel()
+	endNext := cur.next
+
+	if cur.kind != br {
+		// If the current block ends with a conditional branch, we can just insert the trampoline after it.
+		// Otherwise, we need to insert "skip" instruction to skip the trampoline instructions.
+		skip := m.allocateInstr()
+		skip.asBr(nextLabel)
+		cur = linkInstr(cur, skip)
+	}
+
+	cbrNewTargetInstr, cbrNewTargetLabel := m.allocateBrTarget()
+	cbr.setCondBrTargets(cbrNewTargetLabel)
+	cur = linkInstr(cur, cbrNewTargetInstr)
+
+	// Then insert the unconditional branch to the original, which should be possible to get encoded
+	// as 26-bit offset should be enough for any practical application.
+	br := m.allocateInstr()
+	br.asBr(originalTarget)
+	cur = linkInstr(cur, br)
+
+	// Update the end of the current block.
+	currentBlk.End = cur
+
+	linkInstr(cur, endNext)
+}
+
+// Format implements backend.Machine.
+func (m *machine) Format() string {
+	ectx := m.executableContext
+	begins := map[*instruction]label{}
+	for l, pos := range ectx.LabelPositions {
+		begins[pos.Begin] = l
+	}
+
+	irBlocks := map[label]ssa.BasicBlockID{}
+	for i, l := range ectx.SsaBlockIDToLabels {
+		irBlocks[l] = ssa.BasicBlockID(i)
+	}
+
+	var lines []string
+	for cur := ectx.RootInstr; cur != nil; cur = cur.next {
+		if l, ok := begins[cur]; ok {
+			var labelStr string
+			if blkID, ok := irBlocks[l]; ok {
+				labelStr = fmt.Sprintf("%s (SSA Block: %s):", l, blkID)
+			} else {
+				labelStr = fmt.Sprintf("%s:", l)
+			}
+			lines = append(lines, labelStr)
+		}
+		if cur.kind == nop0 {
+			continue
+		}
+		lines = append(lines, "\t"+cur.String())
+	}
+	return "\n" + strings.Join(lines, "\n") + "\n"
+}
+
+// InsertReturn implements backend.Machine.
+func (m *machine) InsertReturn() {
+	i := m.allocateInstr()
+	i.asRet()
+	m.insert(i)
+}
+
+func (m *machine) getVRegSpillSlotOffsetFromSP(id regalloc.VRegID, size byte) int64 {
+	offset, ok := m.spillSlots[id]
+	if !ok {
+		offset = m.spillSlotSize
+		// TODO: this should be aligned depending on the `size` to use Imm12 offset load/store as much as possible.
+		m.spillSlots[id] = offset
+		m.spillSlotSize += int64(size)
+	}
+	return offset + 16 // spill slot starts above the clobbered registers and the frame size.
+}
+
+func (m *machine) clobberedRegSlotSize() int64 {
+	return int64(len(m.clobberedRegs) * 16)
+}
+
+func (m *machine) arg0OffsetFromSP() int64 {
+	return m.frameSize() +
+		16 + // 16-byte aligned return address
+		16 // frame size saved below the clobbered registers.
+}
+
+func (m *machine) ret0OffsetFromSP() int64 {
+	return m.arg0OffsetFromSP() + m.currentABI.ArgStackSize
+}
+
+func (m *machine) requiredStackSize() int64 {
+	return m.maxRequiredStackSizeForCalls +
+		m.frameSize() +
+		16 + // 16-byte aligned return address.
+		16 // frame size saved below the clobbered registers.
+}
+
+func (m *machine) frameSize() int64 {
+	s := m.clobberedRegSlotSize() + m.spillSlotSize
+	if s&0xf != 0 {
+		panic(fmt.Errorf("BUG: frame size %d is not 16-byte aligned", s))
+	}
+	return s
+}
+
+func (m *machine) addJmpTableTarget(targets []ssa.BasicBlock) (index int) {
+	// TODO: reuse the slice!
+	labels := make([]uint32, len(targets))
+	for j, target := range targets {
+		labels[j] = uint32(m.executableContext.GetOrAllocateSSABlockLabel(target))
+	}
+	index = len(m.jmpTableTargets)
+	m.jmpTableTargets = append(m.jmpTableTargets, labels)
+	return
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
@ -0,0 +1,469 @@
+package arm64
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+// PostRegAlloc implements backend.Machine.
+func (m *machine) PostRegAlloc() {
+	m.setupPrologue()
+	m.postRegAlloc()
+}
+
+// setupPrologue initializes the prologue of the function.
+func (m *machine) setupPrologue() {
+	ectx := m.executableContext
+
+	cur := ectx.RootInstr
+	prevInitInst := cur.next
+
+	//
+	//                   (high address)                    (high address)
+	//         SP----> +-----------------+               +------------------+ <----+
+	//                 |     .......     |               |     .......      |      |
+	//                 |      ret Y      |               |      ret Y       |      |
+	//                 |     .......     |               |     .......      |      |
+	//                 |      ret 0      |               |      ret 0       |      |
+	//                 |      arg X      |               |      arg X       |      |  size_of_arg_ret.
+	//                 |     .......     |     ====>     |     .......      |      |
+	//                 |      arg 1      |               |      arg 1       |      |
+	//                 |      arg 0      |               |      arg 0       | <----+
+	//                 |-----------------|               |  size_of_arg_ret |
+	//                                                   |  return address  |
+	//                                                   +------------------+ <---- SP
+	//                    (low address)                     (low address)
+
+	// Saves the return address (lr) and the size_of_arg_ret below the SP.
+	// size_of_arg_ret is used for stack unwinding.
+	cur = m.createReturnAddrAndSizeOfArgRetSlot(cur)
+
+	if !m.stackBoundsCheckDisabled {
+		cur = m.insertStackBoundsCheck(m.requiredStackSize(), cur)
+	}
+
+	// Decrement SP if spillSlotSize > 0.
+	if m.spillSlotSize == 0 && len(m.spillSlots) != 0 {
+		panic(fmt.Sprintf("BUG: spillSlotSize=%d, spillSlots=%v\n", m.spillSlotSize, m.spillSlots))
+	}
+
+	if regs := m.clobberedRegs; len(regs) > 0 {
+		//
+		//            (high address)                  (high address)
+		//          +-----------------+             +-----------------+
+		//          |     .......     |             |     .......     |
+		//          |      ret Y      |             |      ret Y      |
+		//          |     .......     |             |     .......     |
+		//          |      ret 0      |             |      ret 0      |
+		//          |      arg X      |             |      arg X      |
+		//          |     .......     |             |     .......     |
+		//          |      arg 1      |             |      arg 1      |
+		//          |      arg 0      |             |      arg 0      |
+		//          | size_of_arg_ret |             | size_of_arg_ret |
+		//          |   ReturnAddress |             |  ReturnAddress  |
+		//  SP----> +-----------------+    ====>    +-----------------+
+		//             (low address)                |   clobbered M   |
+		//                                          |   ............  |
+		//                                          |   clobbered 0   |
+		//                                          +-----------------+ <----- SP
+		//                                             (low address)
+		//
+		_amode := addressModePreOrPostIndex(spVReg,
+			-16,  // stack pointer must be 16-byte aligned.
+			true, // Decrement before store.
+		)
+		for _, vr := range regs {
+			// TODO: pair stores to reduce the number of instructions.
+			store := m.allocateInstr()
+			store.asStore(operandNR(vr), _amode, regTypeToRegisterSizeInBits(vr.RegType()))
+			cur = linkInstr(cur, store)
+		}
+	}
+
+	if size := m.spillSlotSize; size > 0 {
+		// Check if size is 16-byte aligned.
+		if size&0xf != 0 {
+			panic(fmt.Errorf("BUG: spill slot size %d is not 16-byte aligned", size))
+		}
+
+		cur = m.addsAddOrSubStackPointer(cur, spVReg, size, false)
+
+		// At this point, the stack looks like:
+		//
+		//            (high address)
+		//          +------------------+
+		//          |     .......      |
+		//          |      ret Y       |
+		//          |     .......      |
+		//          |      ret 0       |
+		//          |      arg X       |
+		//          |     .......      |
+		//          |      arg 1       |
+		//          |      arg 0       |
+		//          |  size_of_arg_ret |
+		//          |   ReturnAddress  |
+		//          +------------------+
+		//          |    clobbered M   |
+		//          |   ............   |
+		//          |    clobbered 0   |
+		//          |   spill slot N   |
+		//          |   ............   |
+		//          |   spill slot 2   |
+		//          |   spill slot 0   |
+		//  SP----> +------------------+
+		//             (low address)
+	}
+
+	// We push the frame size into the stack to make it possible to unwind stack:
+	//
+	//
+	//            (high address)                  (high address)
+	//         +-----------------+                +-----------------+
+	//         |     .......     |                |     .......     |
+	//         |      ret Y      |                |      ret Y      |
+	//         |     .......     |                |     .......     |
+	//         |      ret 0      |                |      ret 0      |
+	//         |      arg X      |                |      arg X      |
+	//         |     .......     |                |     .......     |
+	//         |      arg 1      |                |      arg 1      |
+	//         |      arg 0      |                |      arg 0      |
+	//         | size_of_arg_ret |                | size_of_arg_ret |
+	//         |  ReturnAddress  |                |  ReturnAddress  |
+	//         +-----------------+      ==>       +-----------------+ <----+
+	//         |   clobbered  M  |                |   clobbered  M  |      |
+	//         |   ............  |                |   ............  |      |
+	//         |   clobbered  2  |                |   clobbered  2  |      |
+	//         |   clobbered  1  |                |   clobbered  1  |      | frame size
+	//         |   clobbered  0  |                |   clobbered  0  |      |
+	//         |   spill slot N  |                |   spill slot N  |      |
+	//         |   ............  |                |   ............  |      |
+	//         |   spill slot 0  |                |   spill slot 0  | <----+
+	// SP--->  +-----------------+                |     xxxxxx      |  ;; unused space to make it 16-byte aligned.
+	//                                            |   frame_size    |
+	//                                            +-----------------+ <---- SP
+	//            (low address)
+	//
+	cur = m.createFrameSizeSlot(cur, m.frameSize())
+
+	linkInstr(cur, prevInitInst)
+}
+
+func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruction {
+	// First we decrement the stack pointer to point the arg0 slot.
+	var sizeOfArgRetReg regalloc.VReg
+	s := int64(m.currentABI.AlignedArgResultStackSlotSize())
+	if s > 0 {
+		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s)
+		sizeOfArgRetReg = tmpRegVReg
+
+		subSp := m.allocateInstr()
+		subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
+		cur = linkInstr(cur, subSp)
+	} else {
+		sizeOfArgRetReg = xzrVReg
+	}
+
+	// Saves the return address (lr) and the size_of_arg_ret below the SP.
+	// size_of_arg_ret is used for stack unwinding.
+	pstr := m.allocateInstr()
+	amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */)
+	pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode)
+	cur = linkInstr(cur, pstr)
+	return cur
+}
+
+func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {
+	var frameSizeReg regalloc.VReg
+	if s > 0 {
+		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, s)
+		frameSizeReg = tmpRegVReg
+	} else {
+		frameSizeReg = xzrVReg
+	}
+	_amode := addressModePreOrPostIndex(spVReg,
+		-16,  // stack pointer must be 16-byte aligned.
+		true, // Decrement before store.
+	)
+	store := m.allocateInstr()
+	store.asStore(operandNR(frameSizeReg), _amode, 64)
+	cur = linkInstr(cur, store)
+	return cur
+}
+
+// postRegAlloc does multiple things while walking through the instructions:
+// 1. Removes the redundant copy instruction.
+// 2. Inserts the epilogue.
+func (m *machine) postRegAlloc() {
+	ectx := m.executableContext
+	for cur := ectx.RootInstr; cur != nil; cur = cur.next {
+		switch cur.kind {
+		case ret:
+			m.setupEpilogueAfter(cur.prev)
+		case loadConstBlockArg:
+			lc := cur
+			next := lc.next
+			m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
+			m.lowerLoadConstantBlockArgAfterRegAlloc(lc)
+			for _, instr := range m.executableContext.PendingInstructions {
+				cur = linkInstr(cur, instr)
+			}
+			linkInstr(cur, next)
+			m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
+		default:
+			// Removes the redundant copy instruction.
+			if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() {
+				prev, next := cur.prev, cur.next
+				// Remove the copy instruction.
+				prev.next = next
+				if next != nil {
+					next.prev = prev
+				}
+			}
+		}
+	}
+}
+
+func (m *machine) setupEpilogueAfter(cur *instruction) {
+	prevNext := cur.next
+
+	// We've stored the frame size in the prologue, and now that we are about to return from this function, we won't need it anymore.
+	cur = m.addsAddOrSubStackPointer(cur, spVReg, 16, true)
+
+	if s := m.spillSlotSize; s > 0 {
+		// Adjust SP to the original value:
+		//
+		//            (high address)                        (high address)
+		//          +-----------------+                  +-----------------+
+		//          |     .......     |                  |     .......     |
+		//          |      ret Y      |                  |      ret Y      |
+		//          |     .......     |                  |     .......     |
+		//          |      ret 0      |                  |      ret 0      |
+		//          |      arg X      |                  |      arg X      |
+		//          |     .......     |                  |     .......     |
+		//          |      arg 1      |                  |      arg 1      |
+		//          |      arg 0      |                  |      arg 0      |
+		//          |      xxxxx      |                  |      xxxxx      |
+		//          |   ReturnAddress |                  |   ReturnAddress |
+		//          +-----------------+      ====>       +-----------------+
+		//          |    clobbered M  |                  |    clobbered M  |
+		//          |   ............  |                  |   ............  |
+		//          |    clobbered 1  |                  |    clobbered 1  |
+		//          |    clobbered 0  |                  |    clobbered 0  |
+		//          |   spill slot N  |                  +-----------------+ <---- SP
+		//          |   ............  |
+		//          |   spill slot 0  |
+		//   SP---> +-----------------+
+		//             (low address)
+		//
+		cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
+	}
+
+	// First we need to restore the clobbered registers.
+	if len(m.clobberedRegs) > 0 {
+		//            (high address)
+		//          +-----------------+                      +-----------------+
+		//          |     .......     |                      |     .......     |
+		//          |      ret Y      |                      |      ret Y      |
+		//          |     .......     |                      |     .......     |
+		//          |      ret 0      |                      |      ret 0      |
+		//          |      arg X      |                      |      arg X      |
+		//          |     .......     |                      |     .......     |
+		//          |      arg 1      |                      |      arg 1      |
+		//          |      arg 0      |                      |      arg 0      |
+		//          |      xxxxx      |                      |      xxxxx      |
+		//          |   ReturnAddress |                      |   ReturnAddress |
+		//          +-----------------+      ========>       +-----------------+ <---- SP
+		//          |   clobbered M   |
+		//          |   ...........   |
+		//          |   clobbered 1   |
+		//          |   clobbered 0   |
+		//   SP---> +-----------------+
+		//             (low address)
+
+		l := len(m.clobberedRegs) - 1
+		for i := range m.clobberedRegs {
+			vr := m.clobberedRegs[l-i] // reverse order to restore.
+			load := m.allocateInstr()
+			amode := addressModePreOrPostIndex(spVReg,
+				16,    // stack pointer must be 16-byte aligned.
+				false, // Increment after store.
+			)
+			// TODO: pair loads to reduce the number of instructions.
+			switch regTypeToRegisterSizeInBits(vr.RegType()) {
+			case 64: // save int reg.
+				load.asULoad(operandNR(vr), amode, 64)
+			case 128: // save vector reg.
+				load.asFpuLoad(operandNR(vr), amode, 128)
+			}
+			cur = linkInstr(cur, load)
+		}
+	}
+
+	// Reload the return address (lr).
+	//
+	//            +-----------------+          +-----------------+
+	//            |     .......     |          |     .......     |
+	//            |      ret Y      |          |      ret Y      |
+	//            |     .......     |          |     .......     |
+	//            |      ret 0      |          |      ret 0      |
+	//            |      arg X      |          |      arg X      |
+	//            |     .......     |   ===>   |     .......     |
+	//            |      arg 1      |          |      arg 1      |
+	//            |      arg 0      |          |      arg 0      |
+	//            |      xxxxx      |          +-----------------+ <---- SP
+	//            |  ReturnAddress  |
+	//    SP----> +-----------------+
+
+	ldr := m.allocateInstr()
+	ldr.asULoad(operandNR(lrVReg),
+		addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
+	cur = linkInstr(cur, ldr)
+
+	if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
+		cur = m.addsAddOrSubStackPointer(cur, spVReg, s, true)
+	}
+
+	linkInstr(cur, prevNext)
+}
+
+// saveRequiredRegs is the set of registers that must be saved/restored during growing stack when there's insufficient
+// stack space left. Basically this is the combination of CalleeSavedRegisters plus argument registers execpt for x0,
+// which always points to the execution context whenever the native code is entered from Go.
+var saveRequiredRegs = []regalloc.VReg{
+	x1VReg, x2VReg, x3VReg, x4VReg, x5VReg, x6VReg, x7VReg,
+	x19VReg, x20VReg, x21VReg, x22VReg, x23VReg, x24VReg, x25VReg, x26VReg, x28VReg, lrVReg,
+	v0VReg, v1VReg, v2VReg, v3VReg, v4VReg, v5VReg, v6VReg, v7VReg,
+	v18VReg, v19VReg, v20VReg, v21VReg, v22VReg, v23VReg, v24VReg, v25VReg, v26VReg, v27VReg, v28VReg, v29VReg, v30VReg, v31VReg,
+}
+
+// insertStackBoundsCheck will insert the instructions after `cur` to check the
+// stack bounds, and if there's no sufficient spaces required for the function,
+// exit the execution and try growing it in Go world.
+//
+// TODO: we should be able to share the instructions across all the functions to reduce the size of compiled executable.
+func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instruction) *instruction {
+	if requiredStackSize%16 != 0 {
+		panic("BUG")
+	}
+
+	if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok {
+		// sub tmp, sp, #requiredStackSize
+		sub := m.allocateInstr()
+		sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true)
+		cur = linkInstr(cur, sub)
+	} else {
+		// This case, we first load the requiredStackSize into the temporary register,
+		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
+		// Then subtract it.
+		sub := m.allocateInstr()
+		sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true)
+		cur = linkInstr(cur, sub)
+	}
+
+	tmp2 := x11VReg // Caller save, so it is safe to use it here in the prologue.
+
+	// ldr tmp2, [executionContext #StackBottomPtr]
+	ldr := m.allocateInstr()
+	ldr.asULoad(operandNR(tmp2), addressMode{
+		kind: addressModeKindRegUnsignedImm12,
+		rn:   x0VReg, // execution context is always the first argument.
+		imm:  wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(),
+	}, 64)
+	cur = linkInstr(cur, ldr)
+
+	// subs xzr, tmp, tmp2
+	subs := m.allocateInstr()
+	subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true)
+	cur = linkInstr(cur, subs)
+
+	// b.ge #imm
+	cbr := m.allocateInstr()
+	cbr.asCondBr(ge.asCond(), labelInvalid, false /* ignored */)
+	cur = linkInstr(cur, cbr)
+
+	// Set the required stack size and set it to the exec context.
+	{
+		// First load the requiredStackSize into the temporary register,
+		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
+		setRequiredStackSize := m.allocateInstr()
+		setRequiredStackSize.asStore(operandNR(tmpRegVReg),
+			addressMode{
+				kind: addressModeKindRegUnsignedImm12,
+				// Execution context is always the first argument.
+				rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
+			}, 64)
+
+		cur = linkInstr(cur, setRequiredStackSize)
+	}
+
+	ldrAddress := m.allocateInstr()
+	ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{
+		kind: addressModeKindRegUnsignedImm12,
+		rn:   x0VReg, // execution context is always the first argument
+		imm:  wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(),
+	}, 64)
+	cur = linkInstr(cur, ldrAddress)
+
+	// Then jumps to the stack grow call sequence's address, meaning
+	// transferring the control to the code compiled by CompileStackGrowCallSequence.
+	bl := m.allocateInstr()
+	bl.asCallIndirect(tmpRegVReg, nil)
+	cur = linkInstr(cur, bl)
+
+	// Now that we know the entire code, we can finalize how many bytes
+	// we have to skip when the stack size is sufficient.
+	var cbrOffset int64
+	for _cur := cbr; ; _cur = _cur.next {
+		cbrOffset += _cur.size()
+		if _cur == cur {
+			break
+		}
+	}
+	cbr.condBrOffsetResolve(cbrOffset)
+	return cur
+}
+
+// CompileStackGrowCallSequence implements backend.Machine.
+func (m *machine) CompileStackGrowCallSequence() []byte {
+	ectx := m.executableContext
+
+	cur := m.allocateInstr()
+	cur.asNop0()
+	ectx.RootInstr = cur
+
+	// Save the callee saved and argument registers.
+	cur = m.saveRegistersInExecutionContext(cur, saveRequiredRegs)
+
+	// Save the current stack pointer.
+	cur = m.saveCurrentStackPointer(cur, x0VReg)
+
+	// Set the exit status on the execution context.
+	cur = m.setExitCode(cur, x0VReg, wazevoapi.ExitCodeGrowStack)
+
+	// Exit the execution.
+	cur = m.storeReturnAddressAndExit(cur)
+
+	// After the exit, restore the saved registers.
+	cur = m.restoreRegistersInExecutionContext(cur, saveRequiredRegs)
+
+	// Then goes back the original address of this stack grow call.
+	ret := m.allocateInstr()
+	ret.asRet()
+	linkInstr(cur, ret)
+
+	m.encode(ectx.RootInstr)
+	return m.compiler.Buf()
+}
+
+func (m *machine) addsAddOrSubStackPointer(cur *instruction, rd regalloc.VReg, diff int64, add bool) *instruction {
+	ectx := m.executableContext
+
+	ectx.PendingInstructions = ectx.PendingInstructions[:0]
+	m.insertAddOrSubStackPointer(rd, diff, add)
+	for _, inserted := range ectx.PendingInstructions {
+		cur = linkInstr(cur, inserted)
+	}
+	return cur
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
@ -0,0 +1,152 @@
+package arm64
+
+// This file implements the interfaces required for register allocations. See backend.RegAllocFunctionMachine.
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// ClobberedRegisters implements backend.RegAllocFunctionMachine.
+func (m *machine) ClobberedRegisters(regs []regalloc.VReg) {
+	m.clobberedRegs = append(m.clobberedRegs[:0], regs...)
+}
+
+// Swap implements backend.RegAllocFunctionMachine.
+func (m *machine) Swap(cur *instruction, x1, x2, tmp regalloc.VReg) {
+	prevNext := cur.next
+	var mov1, mov2, mov3 *instruction
+	if x1.RegType() == regalloc.RegTypeInt {
+		if !tmp.Valid() {
+			tmp = tmpRegVReg
+		}
+		mov1 = m.allocateInstr().asMove64(tmp, x1)
+		mov2 = m.allocateInstr().asMove64(x1, x2)
+		mov3 = m.allocateInstr().asMove64(x2, tmp)
+		cur = linkInstr(cur, mov1)
+		cur = linkInstr(cur, mov2)
+		cur = linkInstr(cur, mov3)
+		linkInstr(cur, prevNext)
+	} else {
+		if !tmp.Valid() {
+			r2 := x2.RealReg()
+			// Temporarily spill x1 to stack.
+			cur = m.InsertStoreRegisterAt(x1, cur, true).prev
+			// Then move x2 to x1.
+			cur = linkInstr(cur, m.allocateInstr().asFpuMov128(x1, x2))
+			linkInstr(cur, prevNext)
+			// Then reload the original value on x1 from stack to r2.
+			m.InsertReloadRegisterAt(x1.SetRealReg(r2), cur, true)
+		} else {
+			mov1 = m.allocateInstr().asFpuMov128(tmp, x1)
+			mov2 = m.allocateInstr().asFpuMov128(x1, x2)
+			mov3 = m.allocateInstr().asFpuMov128(x2, tmp)
+			cur = linkInstr(cur, mov1)
+			cur = linkInstr(cur, mov2)
+			cur = linkInstr(cur, mov3)
+			linkInstr(cur, prevNext)
+		}
+	}
+}
+
+// InsertMoveBefore implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertMoveBefore(dst, src regalloc.VReg, instr *instruction) {
+	typ := src.RegType()
+	if typ != dst.RegType() {
+		panic("BUG: src and dst must have the same type")
+	}
+
+	mov := m.allocateInstr()
+	if typ == regalloc.RegTypeInt {
+		mov.asMove64(dst, src)
+	} else {
+		mov.asFpuMov128(dst, src)
+	}
+
+	cur := instr.prev
+	prevNext := cur.next
+	cur = linkInstr(cur, mov)
+	linkInstr(cur, prevNext)
+}
+
+// SSABlockLabel implements backend.RegAllocFunctionMachine.
+func (m *machine) SSABlockLabel(id ssa.BasicBlockID) backend.Label {
+	return m.executableContext.SsaBlockIDToLabels[id]
+}
+
+// InsertStoreRegisterAt implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
+	if !v.IsRealReg() {
+		panic("BUG: VReg must be backed by real reg to be stored")
+	}
+
+	typ := m.compiler.TypeOf(v)
+
+	var prevNext, cur *instruction
+	if after {
+		cur, prevNext = instr, instr.next
+	} else {
+		cur, prevNext = instr.prev, instr
+	}
+
+	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
+	var amode addressMode
+	cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
+	store := m.allocateInstr()
+	store.asStore(operandNR(v), amode, typ.Bits())
+
+	cur = linkInstr(cur, store)
+	return linkInstr(cur, prevNext)
+}
+
+// InsertReloadRegisterAt implements backend.RegAllocFunctionMachine.
+func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, after bool) *instruction {
+	if !v.IsRealReg() {
+		panic("BUG: VReg must be backed by real reg to be stored")
+	}
+
+	typ := m.compiler.TypeOf(v)
+
+	var prevNext, cur *instruction
+	if after {
+		cur, prevNext = instr, instr.next
+	} else {
+		cur, prevNext = instr.prev, instr
+	}
+
+	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
+	var amode addressMode
+	cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
+	load := m.allocateInstr()
+	switch typ {
+	case ssa.TypeI32, ssa.TypeI64:
+		load.asULoad(operandNR(v), amode, typ.Bits())
+	case ssa.TypeF32, ssa.TypeF64:
+		load.asFpuLoad(operandNR(v), amode, typ.Bits())
+	case ssa.TypeV128:
+		load.asFpuLoad(operandNR(v), amode, 128)
+	default:
+		panic("TODO")
+	}
+
+	cur = linkInstr(cur, load)
+	return linkInstr(cur, prevNext)
+}
+
+// LastInstrForInsertion implements backend.RegAllocFunctionMachine.
+func (m *machine) LastInstrForInsertion(begin, end *instruction) *instruction {
+	cur := end
+	for cur.kind == nop0 {
+		cur = cur.prev
+		if cur == begin {
+			return end
+		}
+	}
+	switch cur.kind {
+	case br:
+		return cur
+	default:
+		return end
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_relocation.go
@ -0,0 +1,117 @@
+package arm64
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math"
+	"sort"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+)
+
+const (
+	// trampolineCallSize is the size of the trampoline instruction sequence for each function in an island.
+	trampolineCallSize = 4*4 + 4 // Four instructions + 32-bit immediate.
+
+	// Unconditional branch offset is encoded as divided by 4 in imm26.
+	// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en
+
+	maxUnconditionalBranchOffset = maxSignedInt26 * 4
+	minUnconditionalBranchOffset = minSignedInt26 * 4
+
+	// trampolineIslandInterval is the range of the trampoline island.
+	// Half of the range is used for the trampoline island, and the other half is used for the function.
+	trampolineIslandInterval = maxUnconditionalBranchOffset / 2
+
+	// maxNumFunctions explicitly specifies the maximum number of functions that can be allowed in a single executable.
+	maxNumFunctions = trampolineIslandInterval >> 6
+
+	// maxFunctionExecutableSize is the maximum size of a function that can exist in a trampoline island.
+	// Conservatively set to 1/4 of the trampoline island interval.
+	maxFunctionExecutableSize = trampolineIslandInterval >> 2
+)
+
+// CallTrampolineIslandInfo implements backend.Machine CallTrampolineIslandInfo.
+func (m *machine) CallTrampolineIslandInfo(numFunctions int) (interval, size int, err error) {
+	if numFunctions > maxNumFunctions {
+		return 0, 0, fmt.Errorf("too many functions: %d > %d", numFunctions, maxNumFunctions)
+	}
+	return trampolineIslandInterval, trampolineCallSize * numFunctions, nil
+}
+
+// ResolveRelocations implements backend.Machine ResolveRelocations.
+func (m *machine) ResolveRelocations(
+	refToBinaryOffset []int,
+	executable []byte,
+	relocations []backend.RelocationInfo,
+	callTrampolineIslandOffsets []int,
+) {
+	for _, islandOffset := range callTrampolineIslandOffsets {
+		encodeCallTrampolineIsland(refToBinaryOffset, islandOffset, executable)
+	}
+
+	for _, r := range relocations {
+		instrOffset := r.Offset
+		calleeFnOffset := refToBinaryOffset[r.FuncRef]
+		diff := int64(calleeFnOffset) - (instrOffset)
+		// Check if the diff is within the range of the branch instruction.
+		if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
+			// Find the near trampoline island from callTrampolineIslandOffsets.
+			islandOffset := searchTrampolineIsland(callTrampolineIslandOffsets, int(instrOffset))
+			islandTargetOffset := islandOffset + trampolineCallSize*int(r.FuncRef)
+			diff = int64(islandTargetOffset) - (instrOffset)
+			if diff < minUnconditionalBranchOffset || diff > maxUnconditionalBranchOffset {
+				panic("BUG in trampoline placement")
+			}
+		}
+		binary.LittleEndian.PutUint32(executable[instrOffset:instrOffset+4], encodeUnconditionalBranch(true, diff))
+	}
+}
+
+// encodeCallTrampolineIsland encodes a trampoline island for the given functions.
+// Each island consists of a trampoline instruction sequence for each function.
+// Each trampoline instruction sequence consists of 4 instructions + 32-bit immediate.
+func encodeCallTrampolineIsland(refToBinaryOffset []int, islandOffset int, executable []byte) {
+	for i := 0; i < len(refToBinaryOffset); i++ {
+		trampolineOffset := islandOffset + trampolineCallSize*i
+
+		fnOffset := refToBinaryOffset[i]
+		diff := fnOffset - (trampolineOffset + 16)
+		if diff > math.MaxInt32 || diff < math.MinInt32 {
+			// This case even amd64 can't handle. 4GB is too big.
+			panic("too big binary")
+		}
+
+		// The tmpReg, tmpReg2 is safe to overwrite (in fact any caller-saved register is safe to use).
+		tmpReg, tmpReg2 := regNumberInEncoding[tmpRegVReg.RealReg()], regNumberInEncoding[x11]
+
+		// adr tmpReg, PC+16: load the address of #diff into tmpReg.
+		binary.LittleEndian.PutUint32(executable[trampolineOffset:], encodeAdr(tmpReg, 16))
+		// ldrsw tmpReg2, [tmpReg]: Load #diff into tmpReg2.
+		binary.LittleEndian.PutUint32(executable[trampolineOffset+4:],
+			encodeLoadOrStore(sLoad32, tmpReg2, addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpRegVReg}))
+		// add tmpReg, tmpReg2, tmpReg: add #diff to the address of #diff, getting the absolute address of the function.
+		binary.LittleEndian.PutUint32(executable[trampolineOffset+8:],
+			encodeAluRRR(aluOpAdd, tmpReg, tmpReg, tmpReg2, true, false))
+		// br tmpReg: branch to the function without overwriting the link register.
+		binary.LittleEndian.PutUint32(executable[trampolineOffset+12:], encodeUnconditionalBranchReg(tmpReg, false))
+		// #diff
+		binary.LittleEndian.PutUint32(executable[trampolineOffset+16:], uint32(diff))
+	}
+}
+
+// searchTrampolineIsland finds the nearest trampoline island from callTrampolineIslandOffsets.
+// Note that even if the offset is in the middle of two islands, it returns the latter one.
+// That is ok because the island is always placed in the middle of the range.
+//
+// precondition: callTrampolineIslandOffsets is sorted in ascending order.
+func searchTrampolineIsland(callTrampolineIslandOffsets []int, offset int) int {
+	l := len(callTrampolineIslandOffsets)
+	n := sort.Search(l, func(i int) bool {
+		return callTrampolineIslandOffsets[i] >= offset
+	})
+	if n == l {
+		n = l - 1
+	}
+	return callTrampolineIslandOffsets[n]
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/reg.go
@ -0,0 +1,397 @@
+package arm64
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+)
+
+// Arm64-specific registers.
+//
+// See https://developer.arm.com/documentation/dui0801/a/Overview-of-AArch64-state/Predeclared-core-register-names-in-AArch64-state
+
+const (
+	// General purpose registers. Note that we do not distinguish wn and xn registers
+	// because they are the same from the perspective of register allocator, and
+	// the size can be determined by the type of the instruction.
+
+	x0 = regalloc.RealRegInvalid + 1 + iota
+	x1
+	x2
+	x3
+	x4
+	x5
+	x6
+	x7
+	x8
+	x9
+	x10
+	x11
+	x12
+	x13
+	x14
+	x15
+	x16
+	x17
+	x18
+	x19
+	x20
+	x21
+	x22
+	x23
+	x24
+	x25
+	x26
+	x27
+	x28
+	x29
+	x30
+
+	// Vector registers. Note that we do not distinguish vn and dn, ... registers
+	// because they are the same from the perspective of register allocator, and
+	// the size can be determined by the type of the instruction.
+
+	v0
+	v1
+	v2
+	v3
+	v4
+	v5
+	v6
+	v7
+	v8
+	v9
+	v10
+	v11
+	v12
+	v13
+	v14
+	v15
+	v16
+	v17
+	v18
+	v19
+	v20
+	v21
+	v22
+	v23
+	v24
+	v25
+	v26
+	v27
+	v28
+	v29
+	v30
+	v31
+
+	// Special registers
+
+	xzr
+	sp
+	lr  = x30
+	fp  = x29
+	tmp = x27
+)
+
+var (
+	x0VReg  = regalloc.FromRealReg(x0, regalloc.RegTypeInt)
+	x1VReg  = regalloc.FromRealReg(x1, regalloc.RegTypeInt)
+	x2VReg  = regalloc.FromRealReg(x2, regalloc.RegTypeInt)
+	x3VReg  = regalloc.FromRealReg(x3, regalloc.RegTypeInt)
+	x4VReg  = regalloc.FromRealReg(x4, regalloc.RegTypeInt)
+	x5VReg  = regalloc.FromRealReg(x5, regalloc.RegTypeInt)
+	x6VReg  = regalloc.FromRealReg(x6, regalloc.RegTypeInt)
+	x7VReg  = regalloc.FromRealReg(x7, regalloc.RegTypeInt)
+	x8VReg  = regalloc.FromRealReg(x8, regalloc.RegTypeInt)
+	x9VReg  = regalloc.FromRealReg(x9, regalloc.RegTypeInt)
+	x10VReg = regalloc.FromRealReg(x10, regalloc.RegTypeInt)
+	x11VReg = regalloc.FromRealReg(x11, regalloc.RegTypeInt)
+	x12VReg = regalloc.FromRealReg(x12, regalloc.RegTypeInt)
+	x13VReg = regalloc.FromRealReg(x13, regalloc.RegTypeInt)
+	x14VReg = regalloc.FromRealReg(x14, regalloc.RegTypeInt)
+	x15VReg = regalloc.FromRealReg(x15, regalloc.RegTypeInt)
+	x16VReg = regalloc.FromRealReg(x16, regalloc.RegTypeInt)
+	x17VReg = regalloc.FromRealReg(x17, regalloc.RegTypeInt)
+	x18VReg = regalloc.FromRealReg(x18, regalloc.RegTypeInt)
+	x19VReg = regalloc.FromRealReg(x19, regalloc.RegTypeInt)
+	x20VReg = regalloc.FromRealReg(x20, regalloc.RegTypeInt)
+	x21VReg = regalloc.FromRealReg(x21, regalloc.RegTypeInt)
+	x22VReg = regalloc.FromRealReg(x22, regalloc.RegTypeInt)
+	x23VReg = regalloc.FromRealReg(x23, regalloc.RegTypeInt)
+	x24VReg = regalloc.FromRealReg(x24, regalloc.RegTypeInt)
+	x25VReg = regalloc.FromRealReg(x25, regalloc.RegTypeInt)
+	x26VReg = regalloc.FromRealReg(x26, regalloc.RegTypeInt)
+	x27VReg = regalloc.FromRealReg(x27, regalloc.RegTypeInt)
+	x28VReg = regalloc.FromRealReg(x28, regalloc.RegTypeInt)
+	x29VReg = regalloc.FromRealReg(x29, regalloc.RegTypeInt)
+	x30VReg = regalloc.FromRealReg(x30, regalloc.RegTypeInt)
+	v0VReg  = regalloc.FromRealReg(v0, regalloc.RegTypeFloat)
+	v1VReg  = regalloc.FromRealReg(v1, regalloc.RegTypeFloat)
+	v2VReg  = regalloc.FromRealReg(v2, regalloc.RegTypeFloat)
+	v3VReg  = regalloc.FromRealReg(v3, regalloc.RegTypeFloat)
+	v4VReg  = regalloc.FromRealReg(v4, regalloc.RegTypeFloat)
+	v5VReg  = regalloc.FromRealReg(v5, regalloc.RegTypeFloat)
+	v6VReg  = regalloc.FromRealReg(v6, regalloc.RegTypeFloat)
+	v7VReg  = regalloc.FromRealReg(v7, regalloc.RegTypeFloat)
+	v8VReg  = regalloc.FromRealReg(v8, regalloc.RegTypeFloat)
+	v9VReg  = regalloc.FromRealReg(v9, regalloc.RegTypeFloat)
+	v10VReg = regalloc.FromRealReg(v10, regalloc.RegTypeFloat)
+	v11VReg = regalloc.FromRealReg(v11, regalloc.RegTypeFloat)
+	v12VReg = regalloc.FromRealReg(v12, regalloc.RegTypeFloat)
+	v13VReg = regalloc.FromRealReg(v13, regalloc.RegTypeFloat)
+	v14VReg = regalloc.FromRealReg(v14, regalloc.RegTypeFloat)
+	v15VReg = regalloc.FromRealReg(v15, regalloc.RegTypeFloat)
+	v16VReg = regalloc.FromRealReg(v16, regalloc.RegTypeFloat)
+	v17VReg = regalloc.FromRealReg(v17, regalloc.RegTypeFloat)
+	v18VReg = regalloc.FromRealReg(v18, regalloc.RegTypeFloat)
+	v19VReg = regalloc.FromRealReg(v19, regalloc.RegTypeFloat)
+	v20VReg = regalloc.FromRealReg(v20, regalloc.RegTypeFloat)
+	v21VReg = regalloc.FromRealReg(v21, regalloc.RegTypeFloat)
+	v22VReg = regalloc.FromRealReg(v22, regalloc.RegTypeFloat)
+	v23VReg = regalloc.FromRealReg(v23, regalloc.RegTypeFloat)
+	v24VReg = regalloc.FromRealReg(v24, regalloc.RegTypeFloat)
+	v25VReg = regalloc.FromRealReg(v25, regalloc.RegTypeFloat)
+	v26VReg = regalloc.FromRealReg(v26, regalloc.RegTypeFloat)
+	v27VReg = regalloc.FromRealReg(v27, regalloc.RegTypeFloat)
+	// lr (link register) holds the return address at the function entry.
+	lrVReg = x30VReg
+	// tmpReg is used to perform spill/load on large stack offsets, and load large constants.
+	// Therefore, be cautious to use this register in the middle of the compilation, especially before the register allocation.
+	// This is the same as golang/go, but it's only described in the source code:
+	// https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L59
+	// https://github.com/golang/go/blob/18e17e2cb12837ea2c8582ecdb0cc780f49a1aac/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go#L13-L15
+	tmpRegVReg = regalloc.FromRealReg(tmp, regalloc.RegTypeInt)
+	v28VReg    = regalloc.FromRealReg(v28, regalloc.RegTypeFloat)
+	v29VReg    = regalloc.FromRealReg(v29, regalloc.RegTypeFloat)
+	v30VReg    = regalloc.FromRealReg(v30, regalloc.RegTypeFloat)
+	v31VReg    = regalloc.FromRealReg(v31, regalloc.RegTypeFloat)
+	xzrVReg    = regalloc.FromRealReg(xzr, regalloc.RegTypeInt)
+	spVReg     = regalloc.FromRealReg(sp, regalloc.RegTypeInt)
+	fpVReg     = regalloc.FromRealReg(fp, regalloc.RegTypeInt)
+)
+
+var regNames = [...]string{
+	x0:  "x0",
+	x1:  "x1",
+	x2:  "x2",
+	x3:  "x3",
+	x4:  "x4",
+	x5:  "x5",
+	x6:  "x6",
+	x7:  "x7",
+	x8:  "x8",
+	x9:  "x9",
+	x10: "x10",
+	x11: "x11",
+	x12: "x12",
+	x13: "x13",
+	x14: "x14",
+	x15: "x15",
+	x16: "x16",
+	x17: "x17",
+	x18: "x18",
+	x19: "x19",
+	x20: "x20",
+	x21: "x21",
+	x22: "x22",
+	x23: "x23",
+	x24: "x24",
+	x25: "x25",
+	x26: "x26",
+	x27: "x27",
+	x28: "x28",
+	x29: "x29",
+	x30: "x30",
+	xzr: "xzr",
+	sp:  "sp",
+	v0:  "v0",
+	v1:  "v1",
+	v2:  "v2",
+	v3:  "v3",
+	v4:  "v4",
+	v5:  "v5",
+	v6:  "v6",
+	v7:  "v7",
+	v8:  "v8",
+	v9:  "v9",
+	v10: "v10",
+	v11: "v11",
+	v12: "v12",
+	v13: "v13",
+	v14: "v14",
+	v15: "v15",
+	v16: "v16",
+	v17: "v17",
+	v18: "v18",
+	v19: "v19",
+	v20: "v20",
+	v21: "v21",
+	v22: "v22",
+	v23: "v23",
+	v24: "v24",
+	v25: "v25",
+	v26: "v26",
+	v27: "v27",
+	v28: "v28",
+	v29: "v29",
+	v30: "v30",
+	v31: "v31",
+}
+
+func formatVRegSized(r regalloc.VReg, size byte) (ret string) {
+	if r.IsRealReg() {
+		ret = regNames[r.RealReg()]
+		switch ret[0] {
+		case 'x':
+			switch size {
+			case 32:
+				ret = strings.Replace(ret, "x", "w", 1)
+			case 64:
+			default:
+				panic("BUG: invalid register size: " + strconv.Itoa(int(size)))
+			}
+		case 'v':
+			switch size {
+			case 32:
+				ret = strings.Replace(ret, "v", "s", 1)
+			case 64:
+				ret = strings.Replace(ret, "v", "d", 1)
+			case 128:
+				ret = strings.Replace(ret, "v", "q", 1)
+			default:
+				panic("BUG: invalid register size")
+			}
+		}
+	} else {
+		switch r.RegType() {
+		case regalloc.RegTypeInt:
+			switch size {
+			case 32:
+				ret = fmt.Sprintf("w%d?", r.ID())
+			case 64:
+				ret = fmt.Sprintf("x%d?", r.ID())
+			default:
+				panic("BUG: invalid register size: " + strconv.Itoa(int(size)))
+			}
+		case regalloc.RegTypeFloat:
+			switch size {
+			case 32:
+				ret = fmt.Sprintf("s%d?", r.ID())
+			case 64:
+				ret = fmt.Sprintf("d%d?", r.ID())
+			case 128:
+				ret = fmt.Sprintf("q%d?", r.ID())
+			default:
+				panic("BUG: invalid register size")
+			}
+		default:
+			panic(fmt.Sprintf("BUG: invalid register type: %d for %s", r.RegType(), r))
+		}
+	}
+	return
+}
+
+func formatVRegWidthVec(r regalloc.VReg, width vecArrangement) (ret string) {
+	var id string
+	wspec := strings.ToLower(width.String())
+	if r.IsRealReg() {
+		id = regNames[r.RealReg()][1:]
+	} else {
+		id = fmt.Sprintf("%d?", r.ID())
+	}
+	ret = fmt.Sprintf("%s%s", wspec, id)
+	return
+}
+
+func formatVRegVec(r regalloc.VReg, arr vecArrangement, index vecIndex) (ret string) {
+	id := fmt.Sprintf("v%d?", r.ID())
+	if r.IsRealReg() {
+		id = regNames[r.RealReg()]
+	}
+	ret = fmt.Sprintf("%s.%s", id, strings.ToLower(arr.String()))
+	if index != vecIndexNone {
+		ret += fmt.Sprintf("[%d]", index)
+	}
+	return
+}
+
+func regTypeToRegisterSizeInBits(r regalloc.RegType) byte {
+	switch r {
+	case regalloc.RegTypeInt:
+		return 64
+	case regalloc.RegTypeFloat:
+		return 128
+	default:
+		panic("BUG: invalid register type")
+	}
+}
+
+var regNumberInEncoding = [...]uint32{
+	x0:  0,
+	x1:  1,
+	x2:  2,
+	x3:  3,
+	x4:  4,
+	x5:  5,
+	x6:  6,
+	x7:  7,
+	x8:  8,
+	x9:  9,
+	x10: 10,
+	x11: 11,
+	x12: 12,
+	x13: 13,
+	x14: 14,
+	x15: 15,
+	x16: 16,
+	x17: 17,
+	x18: 18,
+	x19: 19,
+	x20: 20,
+	x21: 21,
+	x22: 22,
+	x23: 23,
+	x24: 24,
+	x25: 25,
+	x26: 26,
+	x27: 27,
+	x28: 28,
+	x29: 29,
+	x30: 30,
+	xzr: 31,
+	sp:  31,
+	v0:  0,
+	v1:  1,
+	v2:  2,
+	v3:  3,
+	v4:  4,
+	v5:  5,
+	v6:  6,
+	v7:  7,
+	v8:  8,
+	v9:  9,
+	v10: 10,
+	v11: 11,
+	v12: 12,
+	v13: 13,
+	v14: 14,
+	v15: 15,
+	v16: 16,
+	v17: 17,
+	v18: 18,
+	v19: 19,
+	v20: 20,
+	v21: 21,
+	v22: 22,
+	v23: 23,
+	v24: 24,
+	v25: 25,
+	v26: 26,
+	v27: 27,
+	v28: 28,
+	v29: 29,
+	v30: 30,
+	v31: 31,
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/unwind_stack.go
@ -0,0 +1,90 @@
+package arm64
+
+import (
+	"encoding/binary"
+	"reflect"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+)
+
+// UnwindStack implements wazevo.unwindStack.
+func UnwindStack(sp, _, top uintptr, returnAddresses []uintptr) []uintptr {
+	l := int(top - sp)
+
+	var stackBuf []byte
+	{
+		// TODO: use unsafe.Slice after floor version is set to Go 1.20.
+		hdr := (*reflect.SliceHeader)(unsafe.Pointer(&stackBuf))
+		hdr.Data = sp
+		hdr.Len = l
+		hdr.Cap = l
+	}
+
+	for i := uint64(0); i < uint64(l); {
+		//       (high address)
+		//    +-----------------+
+		//    |     .......     |
+		//    |      ret Y      |  <----+
+		//    |     .......     |       |
+		//    |      ret 0      |       |
+		//    |      arg X      |       |  size_of_arg_ret
+		//    |     .......     |       |
+		//    |      arg 1      |       |
+		//    |      arg 0      |  <----+
+		//    | size_of_arg_ret |
+		//    |  ReturnAddress  |
+		//    +-----------------+ <----+
+		//    |   ...........   |      |
+		//    |   spill slot M  |      |
+		//    |   ............  |      |
+		//    |   spill slot 2  |      |
+		//    |   spill slot 1  |      | frame size
+		//    |   spill slot 1  |      |
+		//    |   clobbered N   |      |
+		//    |   ............  |      |
+		//    |   clobbered 0   | <----+
+		//    |     xxxxxx      |  ;; unused space to make it 16-byte aligned.
+		//    |   frame_size    |
+		//    +-----------------+ <---- SP
+		//       (low address)
+
+		frameSize := binary.LittleEndian.Uint64(stackBuf[i:])
+		i += frameSize +
+			16 // frame size + aligned space.
+		retAddr := binary.LittleEndian.Uint64(stackBuf[i:])
+		i += 8 // ret addr.
+		sizeOfArgRet := binary.LittleEndian.Uint64(stackBuf[i:])
+		i += 8 + sizeOfArgRet
+		returnAddresses = append(returnAddresses, uintptr(retAddr))
+		if len(returnAddresses) == wasmdebug.MaxFrames {
+			break
+		}
+	}
+	return returnAddresses
+}
+
+// GoCallStackView implements wazevo.goCallStackView.
+func GoCallStackView(stackPointerBeforeGoCall *uint64) []uint64 {
+	//                  (high address)
+	//              +-----------------+ <----+
+	//              |   xxxxxxxxxxx   |      | ;; optional unused space to make it 16-byte aligned.
+	//           ^  |  arg[N]/ret[M]  |      |
+	// sliceSize |  |  ............   |      | sliceSize
+	//           |  |  arg[1]/ret[1]  |      |
+	//           v  |  arg[0]/ret[0]  | <----+
+	//              |    sliceSize    |
+	//              |   frame_size    |
+	//              +-----------------+ <---- stackPointerBeforeGoCall
+	//                 (low address)
+	ptr := unsafe.Pointer(stackPointerBeforeGoCall)
+	size := *(*uint64)(unsafe.Add(ptr, 8))
+	var view []uint64
+	{
+		sh := (*reflect.SliceHeader)(unsafe.Pointer(&view))
+		sh.Data = uintptr(unsafe.Add(ptr, 16)) // skips the (frame_size, sliceSize).
+		sh.Len = int(size)
+		sh.Cap = int(size)
+	}
+	return view
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/machine.go
@ -0,0 +1,100 @@
+package backend
+
+import (
+	"context"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+)
+
+type (
+	// Machine is a backend for a specific ISA machine.
+	Machine interface {
+		ExecutableContext() ExecutableContext
+
+		// DisableStackCheck disables the stack check for the current compilation for debugging/testing.
+		DisableStackCheck()
+
+		// SetCurrentABI initializes the FunctionABI for the given signature.
+		SetCurrentABI(abi *FunctionABI)
+
+		// SetCompiler sets the compilation context used for the lifetime of Machine.
+		// This is only called once per Machine, i.e. before the first compilation.
+		SetCompiler(Compiler)
+
+		// LowerSingleBranch is called when the compilation of the given single branch is started.
+		LowerSingleBranch(b *ssa.Instruction)
+
+		// LowerConditionalBranch is called when the compilation of the given conditional branch is started.
+		LowerConditionalBranch(b *ssa.Instruction)
+
+		// LowerInstr is called for each instruction in the given block except for the ones marked as already lowered
+		// via Compiler.MarkLowered. The order is reverse, i.e. from the last instruction to the first one.
+		//
+		// Note: this can lower multiple instructions (which produce the inputs) at once whenever it's possible
+		// for optimization.
+		LowerInstr(*ssa.Instruction)
+
+		// Reset resets the machine state for the next compilation.
+		Reset()
+
+		// InsertMove inserts a move instruction from src to dst whose type is typ.
+		InsertMove(dst, src regalloc.VReg, typ ssa.Type)
+
+		// InsertReturn inserts the return instruction to return from the current function.
+		InsertReturn()
+
+		// InsertLoadConstantBlockArg inserts the instruction(s) to load the constant value into the given regalloc.VReg.
+		InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg)
+
+		// Format returns the string representation of the currently compiled machine code.
+		// This is only for testing purpose.
+		Format() string
+
+		// RegAlloc does the register allocation after lowering.
+		RegAlloc()
+
+		// PostRegAlloc does the post register allocation, e.g. setting up prologue/epilogue, redundant move elimination, etc.
+		PostRegAlloc()
+
+		// ResolveRelocations resolves the relocations after emitting machine code.
+		//  * refToBinaryOffset: the map from the function reference (ssa.FuncRef) to the executable offset.
+		//  * executable: the binary to resolve the relocations.
+		//  * relocations: the relocations to resolve.
+		//  * callTrampolineIslandOffsets: the offsets of the trampoline islands in the executable.
+		ResolveRelocations(
+			refToBinaryOffset []int,
+			executable []byte,
+			relocations []RelocationInfo,
+			callTrampolineIslandOffsets []int,
+		)
+
+		// Encode encodes the machine instructions to the Compiler.
+		Encode(ctx context.Context) error
+
+		// CompileGoFunctionTrampoline compiles the trampoline function  to call a Go function of the given exit code and signature.
+		CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *ssa.Signature, needModuleContextPtr bool) []byte
+
+		// CompileStackGrowCallSequence returns the sequence of instructions shared by all functions to
+		// call the stack grow builtin function.
+		CompileStackGrowCallSequence() []byte
+
+		// CompileEntryPreamble returns the sequence of instructions shared by multiple functions to
+		// enter the function from Go.
+		CompileEntryPreamble(signature *ssa.Signature) []byte
+
+		// LowerParams lowers the given parameters.
+		LowerParams(params []ssa.Value)
+
+		// LowerReturns lowers the given returns.
+		LowerReturns(returns []ssa.Value)
+
+		// ArgsResultsRegs returns the registers used for arguments and return values.
+		ArgsResultsRegs() (argResultInts, argResultFloats []regalloc.RealReg)
+
+		// CallTrampolineIslandInfo returns the interval of the offset where the trampoline island is placed, and
+		// the size of the trampoline island. If islandSize is zero, the trampoline island is not used on this machine.
+		CallTrampolineIslandInfo(numFunctions int) (interval, islandSize int, err error)
+	}
+)
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
@ -0,0 +1,319 @@
+package backend
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// RegAllocFunctionMachine is the interface for the machine specific logic that will be used in RegAllocFunction.
+type RegAllocFunctionMachine[I regalloc.InstrConstraint] interface {
+	// InsertMoveBefore inserts the move instruction from src to dst before the given instruction.
+	InsertMoveBefore(dst, src regalloc.VReg, instr I)
+	// InsertStoreRegisterAt inserts the instruction(s) to store the given virtual register at the given instruction.
+	// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before.
+	InsertStoreRegisterAt(v regalloc.VReg, instr I, after bool) I
+	// InsertReloadRegisterAt inserts the instruction(s) to reload the given virtual register at the given instruction.
+	// If after is true, the instruction(s) will be inserted after the given instruction, otherwise before.
+	InsertReloadRegisterAt(v regalloc.VReg, instr I, after bool) I
+	// ClobberedRegisters is called when the register allocation is done and the clobbered registers are known.
+	ClobberedRegisters(regs []regalloc.VReg)
+	// Swap swaps the two virtual registers after the given instruction.
+	Swap(cur I, x1, x2, tmp regalloc.VReg)
+	// LastInstrForInsertion implements LastInstrForInsertion of regalloc.Function. See its comment for details.
+	LastInstrForInsertion(begin, end I) I
+	// SSABlockLabel returns the label of the given ssa.BasicBlockID.
+	SSABlockLabel(id ssa.BasicBlockID) Label
+}
+
+type (
+	// RegAllocFunction implements regalloc.Function.
+	RegAllocFunction[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct {
+		m   m
+		ssb ssa.Builder
+		c   Compiler
+		// iter is the iterator for reversePostOrderBlocks
+		iter                   int
+		reversePostOrderBlocks []RegAllocBlock[I, m]
+		// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks.
+		labelToRegAllocBlockIndex map[Label]int
+		loopNestingForestRoots    []ssa.BasicBlock
+	}
+
+	// RegAllocBlock implements regalloc.Block.
+	RegAllocBlock[I regalloc.InstrConstraint, m RegAllocFunctionMachine[I]] struct {
+		// f is the function this instruction belongs to. Used to reuse the regAllocFunctionImpl.predsSlice slice for Defs() and Uses().
+		f                           *RegAllocFunction[I, m]
+		sb                          ssa.BasicBlock
+		l                           Label
+		begin, end                  I
+		loopNestingForestChildren   []ssa.BasicBlock
+		cur                         I
+		id                          int
+		cachedLastInstrForInsertion I
+	}
+)
+
+// NewRegAllocFunction returns a new RegAllocFunction.
+func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] {
+	return &RegAllocFunction[I, M]{
+		m:                         m,
+		ssb:                       ssb,
+		c:                         c,
+		labelToRegAllocBlockIndex: make(map[Label]int),
+	}
+}
+
+// AddBlock adds a new block to the function.
+func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end I) {
+	i := len(f.reversePostOrderBlocks)
+	f.reversePostOrderBlocks = append(f.reversePostOrderBlocks, RegAllocBlock[I, M]{
+		f:     f,
+		sb:    sb,
+		l:     l,
+		begin: begin,
+		end:   end,
+		id:    int(sb.ID()),
+	})
+	f.labelToRegAllocBlockIndex[l] = i
+}
+
+// Reset resets the function for the next compilation.
+func (f *RegAllocFunction[I, M]) Reset() {
+	f.reversePostOrderBlocks = f.reversePostOrderBlocks[:0]
+	f.iter = 0
+}
+
+// StoreRegisterAfter implements regalloc.Function StoreRegisterAfter.
+func (f *RegAllocFunction[I, M]) StoreRegisterAfter(v regalloc.VReg, instr regalloc.Instr) {
+	m := f.m
+	m.InsertStoreRegisterAt(v, instr.(I), true)
+}
+
+// ReloadRegisterBefore implements regalloc.Function ReloadRegisterBefore.
+func (f *RegAllocFunction[I, M]) ReloadRegisterBefore(v regalloc.VReg, instr regalloc.Instr) {
+	m := f.m
+	m.InsertReloadRegisterAt(v, instr.(I), false)
+}
+
+// ReloadRegisterAfter implements regalloc.Function ReloadRegisterAfter.
+func (f *RegAllocFunction[I, M]) ReloadRegisterAfter(v regalloc.VReg, instr regalloc.Instr) {
+	m := f.m
+	m.InsertReloadRegisterAt(v, instr.(I), true)
+}
+
+// StoreRegisterBefore implements regalloc.Function StoreRegisterBefore.
+func (f *RegAllocFunction[I, M]) StoreRegisterBefore(v regalloc.VReg, instr regalloc.Instr) {
+	m := f.m
+	m.InsertStoreRegisterAt(v, instr.(I), false)
+}
+
+// ClobberedRegisters implements regalloc.Function ClobberedRegisters.
+func (f *RegAllocFunction[I, M]) ClobberedRegisters(regs []regalloc.VReg) {
+	f.m.ClobberedRegisters(regs)
+}
+
+// SwapBefore implements regalloc.Function SwapBefore.
+func (f *RegAllocFunction[I, M]) SwapBefore(x1, x2, tmp regalloc.VReg, instr regalloc.Instr) {
+	f.m.Swap(instr.Prev().(I), x1, x2, tmp)
+}
+
+// PostOrderBlockIteratorBegin implements regalloc.Function PostOrderBlockIteratorBegin.
+func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorBegin() regalloc.Block {
+	f.iter = len(f.reversePostOrderBlocks) - 1
+	return f.PostOrderBlockIteratorNext()
+}
+
+// PostOrderBlockIteratorNext implements regalloc.Function PostOrderBlockIteratorNext.
+func (f *RegAllocFunction[I, M]) PostOrderBlockIteratorNext() regalloc.Block {
+	if f.iter < 0 {
+		return nil
+	}
+	b := &f.reversePostOrderBlocks[f.iter]
+	f.iter--
+	return b
+}
+
+// ReversePostOrderBlockIteratorBegin implements regalloc.Function ReversePostOrderBlockIteratorBegin.
+func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorBegin() regalloc.Block {
+	f.iter = 0
+	return f.ReversePostOrderBlockIteratorNext()
+}
+
+// ReversePostOrderBlockIteratorNext implements regalloc.Function ReversePostOrderBlockIteratorNext.
+func (f *RegAllocFunction[I, M]) ReversePostOrderBlockIteratorNext() regalloc.Block {
+	if f.iter >= len(f.reversePostOrderBlocks) {
+		return nil
+	}
+	b := &f.reversePostOrderBlocks[f.iter]
+	f.iter++
+	return b
+}
+
+// LoopNestingForestRoots implements regalloc.Function LoopNestingForestRoots.
+func (f *RegAllocFunction[I, M]) LoopNestingForestRoots() int {
+	f.loopNestingForestRoots = f.ssb.LoopNestingForestRoots()
+	return len(f.loopNestingForestRoots)
+}
+
+// LoopNestingForestRoot implements regalloc.Function LoopNestingForestRoot.
+func (f *RegAllocFunction[I, M]) LoopNestingForestRoot(i int) regalloc.Block {
+	blk := f.loopNestingForestRoots[i]
+	l := f.m.SSABlockLabel(blk.ID())
+	index := f.labelToRegAllocBlockIndex[l]
+	return &f.reversePostOrderBlocks[index]
+}
+
+// InsertMoveBefore implements regalloc.Function InsertMoveBefore.
+func (f *RegAllocFunction[I, M]) InsertMoveBefore(dst, src regalloc.VReg, instr regalloc.Instr) {
+	f.m.InsertMoveBefore(dst, src, instr.(I))
+}
+
+// LowestCommonAncestor implements regalloc.Function LowestCommonAncestor.
+func (f *RegAllocFunction[I, M]) LowestCommonAncestor(blk1, blk2 regalloc.Block) regalloc.Block {
+	ret := f.ssb.LowestCommonAncestor(blk1.(*RegAllocBlock[I, M]).sb, blk2.(*RegAllocBlock[I, M]).sb)
+	l := f.m.SSABlockLabel(ret.ID())
+	index := f.labelToRegAllocBlockIndex[l]
+	return &f.reversePostOrderBlocks[index]
+}
+
+// Idom implements regalloc.Function Idom.
+func (f *RegAllocFunction[I, M]) Idom(blk regalloc.Block) regalloc.Block {
+	builder := f.ssb
+	idom := builder.Idom(blk.(*RegAllocBlock[I, M]).sb)
+	if idom == nil {
+		panic("BUG: idom must not be nil")
+	}
+	l := f.m.SSABlockLabel(idom.ID())
+	index := f.labelToRegAllocBlockIndex[l]
+	return &f.reversePostOrderBlocks[index]
+}
+
+// ID implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) ID() int32 { return int32(r.id) }
+
+// BlockParams implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) BlockParams(regs *[]regalloc.VReg) []regalloc.VReg {
+	c := r.f.c
+	*regs = (*regs)[:0]
+	for i := 0; i < r.sb.Params(); i++ {
+		v := c.VRegOf(r.sb.Param(i))
+		*regs = append(*regs, v)
+	}
+	return *regs
+}
+
+// InstrIteratorBegin implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) InstrIteratorBegin() regalloc.Instr {
+	r.cur = r.begin
+	return r.cur
+}
+
+// InstrIteratorNext implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) InstrIteratorNext() regalloc.Instr {
+	for {
+		if r.cur == r.end {
+			return nil
+		}
+		instr := r.cur.Next()
+		r.cur = instr.(I)
+		if instr == nil {
+			return nil
+		} else if instr.AddedBeforeRegAlloc() {
+			// Only concerned about the instruction added before regalloc.
+			return instr
+		}
+	}
+}
+
+// InstrRevIteratorBegin implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) InstrRevIteratorBegin() regalloc.Instr {
+	r.cur = r.end
+	return r.cur
+}
+
+// InstrRevIteratorNext implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) InstrRevIteratorNext() regalloc.Instr {
+	for {
+		if r.cur == r.begin {
+			return nil
+		}
+		instr := r.cur.Prev()
+		r.cur = instr.(I)
+		if instr == nil {
+			return nil
+		} else if instr.AddedBeforeRegAlloc() {
+			// Only concerned about the instruction added before regalloc.
+			return instr
+		}
+	}
+}
+
+// FirstInstr implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) FirstInstr() regalloc.Instr {
+	return r.begin
+}
+
+// EndInstr implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) EndInstr() regalloc.Instr {
+	return r.end
+}
+
+// LastInstrForInsertion implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) LastInstrForInsertion() regalloc.Instr {
+	var nil I
+	if r.cachedLastInstrForInsertion == nil {
+		r.cachedLastInstrForInsertion = r.f.m.LastInstrForInsertion(r.begin, r.end)
+	}
+	return r.cachedLastInstrForInsertion
+}
+
+// Preds implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) Preds() int { return r.sb.Preds() }
+
+// Pred implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) Pred(i int) regalloc.Block {
+	sb := r.sb
+	pred := sb.Pred(i)
+	l := r.f.m.SSABlockLabel(pred.ID())
+	index := r.f.labelToRegAllocBlockIndex[l]
+	return &r.f.reversePostOrderBlocks[index]
+}
+
+// Entry implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) Entry() bool { return r.sb.EntryBlock() }
+
+// Succs implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) Succs() int {
+	return r.sb.Succs()
+}
+
+// Succ implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) Succ(i int) regalloc.Block {
+	sb := r.sb
+	succ := sb.Succ(i)
+	if succ.ReturnBlock() {
+		return nil
+	}
+	l := r.f.m.SSABlockLabel(succ.ID())
+	index := r.f.labelToRegAllocBlockIndex[l]
+	return &r.f.reversePostOrderBlocks[index]
+}
+
+// LoopHeader implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) LoopHeader() bool {
+	return r.sb.LoopHeader()
+}
+
+// LoopNestingForestChildren implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) LoopNestingForestChildren() int {
+	r.loopNestingForestChildren = r.sb.LoopNestingForestChildren()
+	return len(r.loopNestingForestChildren)
+}
+
+// LoopNestingForestChild implements regalloc.Block.
+func (r *RegAllocBlock[I, m]) LoopNestingForestChild(i int) regalloc.Block {
+	blk := r.loopNestingForestChildren[i]
+	l := r.f.m.SSABlockLabel(blk.ID())
+	index := r.f.labelToRegAllocBlockIndex[l]
+	return &r.f.reversePostOrderBlocks[index]
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/api.go
@ -0,0 +1,136 @@
+package regalloc
+
+import "fmt"
+
+// These interfaces are implemented by ISA-specific backends to abstract away the details, and allow the register
+// allocators to work on any ISA.
+//
+// TODO: the interfaces are not stabilized yet, especially x64 will need some changes. E.g. x64 has an addressing mode
+// 	where index can be in memory. That kind of info will be useful to reduce the register pressure, and should be leveraged
+// 	by the register allocators, like https://docs.rs/regalloc2/latest/regalloc2/enum.OperandConstraint.html
+
+type (
+	// Function is the top-level interface to do register allocation, which corresponds to a CFG containing
+	// Blocks(s).
+	Function interface {
+		// PostOrderBlockIteratorBegin returns the first block in the post-order traversal of the CFG.
+		// In other words, the last blocks in the CFG will be returned first.
+		PostOrderBlockIteratorBegin() Block
+		// PostOrderBlockIteratorNext returns the next block in the post-order traversal of the CFG.
+		PostOrderBlockIteratorNext() Block
+		// ReversePostOrderBlockIteratorBegin returns the first block in the reverse post-order traversal of the CFG.
+		// In other words, the first blocks in the CFG will be returned first.
+		ReversePostOrderBlockIteratorBegin() Block
+		// ReversePostOrderBlockIteratorNext returns the next block in the reverse post-order traversal of the CFG.
+		ReversePostOrderBlockIteratorNext() Block
+		// ClobberedRegisters tell the clobbered registers by this function.
+		ClobberedRegisters([]VReg)
+		// LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function.
+		LoopNestingForestRoots() int
+		// LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function.
+		LoopNestingForestRoot(i int) Block
+		// LowestCommonAncestor returns the lowest common ancestor of two blocks in the dominator tree.
+		LowestCommonAncestor(blk1, blk2 Block) Block
+		// Idom returns the immediate dominator of the given block.
+		Idom(blk Block) Block
+
+		// Followings are for rewriting the function.
+
+		// SwapAtEndOfBlock swaps the two virtual registers at the end of the given block.
+		SwapBefore(x1, x2, tmp VReg, instr Instr)
+		// StoreRegisterBefore inserts store instruction(s) before the given instruction for the given virtual register.
+		StoreRegisterBefore(v VReg, instr Instr)
+		// StoreRegisterAfter inserts store instruction(s) after the given instruction for the given virtual register.
+		StoreRegisterAfter(v VReg, instr Instr)
+		// ReloadRegisterBefore inserts reload instruction(s) before the given instruction for the given virtual register.
+		ReloadRegisterBefore(v VReg, instr Instr)
+		// ReloadRegisterAfter inserts reload instruction(s) after the given instruction for the given virtual register.
+		ReloadRegisterAfter(v VReg, instr Instr)
+		// InsertMoveBefore inserts move instruction(s) before the given instruction for the given virtual registers.
+		InsertMoveBefore(dst, src VReg, instr Instr)
+	}
+
+	// Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s).
+	Block interface {
+		// ID returns the unique identifier of this block which is ordered in the reverse post-order traversal of the CFG.
+		ID() int32
+		// BlockParams returns the virtual registers used as the parameters of this block.
+		BlockParams(*[]VReg) []VReg
+		// InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped.
+		// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
+		InstrIteratorBegin() Instr
+		// InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped.
+		// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
+		InstrIteratorNext() Instr
+		// InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order.
+		InstrRevIteratorBegin() Instr
+		// InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order.
+		InstrRevIteratorNext() Instr
+		// FirstInstr returns the fist instruction in this block where instructions will be inserted after it.
+		FirstInstr() Instr
+		// EndInstr returns the end instruction in this block.
+		EndInstr() Instr
+		// LastInstrForInsertion returns the last instruction in this block where instructions will be inserted before it.
+		// Such insertions only happen when we need to insert spill/reload instructions to adjust the merge edges.
+		// At the time of register allocation, all the critical edges are already split, so there is no need
+		// to worry about the case where branching instruction has multiple successors.
+		// Therefore, usually, it is the nop instruction, but if the block ends with an unconditional branching, then it returns
+		// the unconditional branch, not the nop. In other words it is either nop or unconditional branch.
+		LastInstrForInsertion() Instr
+		// Preds returns the number of predecessors of this block in the CFG.
+		Preds() int
+		// Pred returns the i-th predecessor of this block in the CFG.
+		Pred(i int) Block
+		// Entry returns true if the block is for the entry block.
+		Entry() bool
+		// Succs returns the number of successors of this block in the CFG.
+		Succs() int
+		// Succ returns the i-th successor of this block in the CFG.
+		Succ(i int) Block
+		// LoopHeader returns true if this block is a loop header.
+		LoopHeader() bool
+		// LoopNestingForestChildren returns the number of children of this block in the loop nesting forest.
+		LoopNestingForestChildren() int
+		// LoopNestingForestChild returns the i-th child of this block in the loop nesting forest.
+		LoopNestingForestChild(i int) Block
+	}
+
+	// Instr is an instruction in a block, abstracting away the underlying ISA.
+	Instr interface {
+		fmt.Stringer
+		// Next returns the next instruction in the same block.
+		Next() Instr
+		// Prev returns the previous instruction in the same block.
+		Prev() Instr
+		// Defs returns the virtual registers defined by this instruction.
+		Defs(*[]VReg) []VReg
+		// Uses returns the virtual registers used by this instruction.
+		// Note: multiple returned []VReg will not be held at the same time, so it's safe to use the same slice for this.
+		Uses(*[]VReg) []VReg
+		// AssignUse assigns the RealReg-allocated virtual register used by this instruction at the given index.
+		AssignUse(index int, v VReg)
+		// AssignDef assigns a RealReg-allocated virtual register defined by this instruction.
+		// This only accepts one register because we don't allocate registers for multi-def instructions (i.e. call instruction)
+		AssignDef(VReg)
+		// IsCopy returns true if this instruction is a move instruction between two registers.
+		// If true, the instruction is of the form of dst = src, and if the src and dst do not interfere with each other,
+		// we could coalesce them, and hence the copy can be eliminated from the final code.
+		IsCopy() bool
+		// IsCall returns true if this instruction is a call instruction. The result is used to insert
+		// caller saved register spills and restores.
+		IsCall() bool
+		// IsIndirectCall returns true if this instruction is an indirect call instruction which calls a function pointer.
+		//  The result is used to insert caller saved register spills and restores.
+		IsIndirectCall() bool
+		// IsReturn returns true if this instruction is a return instruction.
+		IsReturn() bool
+		// AddedBeforeRegAlloc returns true if this instruction is added before register allocation.
+		AddedBeforeRegAlloc() bool
+	}
+
+	// InstrConstraint is an interface for arch-specific instruction constraints.
+	InstrConstraint interface {
+		comparable
+		Instr
+	}
+)
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/reg.go
@ -0,0 +1,123 @@
+package regalloc
+
+import (
+	"fmt"
+
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// VReg represents a register which is assigned to an SSA value. This is used to represent a register in the backend.
+// A VReg may or may not be a physical register, and the info of physical register can be obtained by RealReg.
+type VReg uint64
+
+// VRegID is the lower 32bit of VReg, which is the pure identifier of VReg without RealReg info.
+type VRegID uint32
+
+// RealReg returns the RealReg of this VReg.
+func (v VReg) RealReg() RealReg {
+	return RealReg(v >> 32)
+}
+
+// IsRealReg returns true if this VReg is backed by a physical register.
+func (v VReg) IsRealReg() bool {
+	return v.RealReg() != RealRegInvalid
+}
+
+// FromRealReg returns a VReg from the given RealReg and RegType.
+// This is used to represent a specific pre-colored register in the backend.
+func FromRealReg(r RealReg, typ RegType) VReg {
+	rid := VRegID(r)
+	if rid > vRegIDReservedForRealNum {
+		panic(fmt.Sprintf("invalid real reg %d", r))
+	}
+	return VReg(r).SetRealReg(r).SetRegType(typ)
+}
+
+// SetRealReg sets the RealReg of this VReg and returns the updated VReg.
+func (v VReg) SetRealReg(r RealReg) VReg {
+	return VReg(r)<<32 | (v & 0xff_00_ffffffff)
+}
+
+// RegType returns the RegType of this VReg.
+func (v VReg) RegType() RegType {
+	return RegType(v >> 40)
+}
+
+// SetRegType sets the RegType of this VReg and returns the updated VReg.
+func (v VReg) SetRegType(t RegType) VReg {
+	return VReg(t)<<40 | (v & 0x00_ff_ffffffff)
+}
+
+// ID returns the VRegID of this VReg.
+func (v VReg) ID() VRegID {
+	return VRegID(v & 0xffffffff)
+}
+
+// Valid returns true if this VReg is Valid.
+func (v VReg) Valid() bool {
+	return v.ID() != vRegIDInvalid && v.RegType() != RegTypeInvalid
+}
+
+// RealReg represents a physical register.
+type RealReg byte
+
+const RealRegInvalid RealReg = 0
+
+const (
+	vRegIDInvalid            VRegID = 1 << 31
+	VRegIDNonReservedBegin          = vRegIDReservedForRealNum
+	vRegIDReservedForRealNum VRegID = 128
+	VRegInvalid                     = VReg(vRegIDInvalid)
+)
+
+// String implements fmt.Stringer.
+func (r RealReg) String() string {
+	switch r {
+	case RealRegInvalid:
+		return "invalid"
+	default:
+		return fmt.Sprintf("r%d", r)
+	}
+}
+
+// String implements fmt.Stringer.
+func (v VReg) String() string {
+	if v.IsRealReg() {
+		return fmt.Sprintf("r%d", v.ID())
+	}
+	return fmt.Sprintf("v%d?", v.ID())
+}
+
+// RegType represents the type of a register.
+type RegType byte
+
+const (
+	RegTypeInvalid RegType = iota
+	RegTypeInt
+	RegTypeFloat
+	NumRegType
+)
+
+// String implements fmt.Stringer.
+func (r RegType) String() string {
+	switch r {
+	case RegTypeInt:
+		return "int"
+	case RegTypeFloat:
+		return "float"
+	default:
+		return "invalid"
+	}
+}
+
+// RegTypeOf returns the RegType of the given ssa.Type.
+func RegTypeOf(p ssa.Type) RegType {
+	switch p {
+	case ssa.TypeI32, ssa.TypeI64:
+		return RegTypeInt
+	case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
+		return RegTypeFloat
+	default:
+		panic("invalid type")
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
@ -0,0 +1,108 @@
+package regalloc
+
+import (
+	"fmt"
+	"strings"
+)
+
+// NewRegSet returns a new RegSet with the given registers.
+func NewRegSet(regs ...RealReg) RegSet {
+	var ret RegSet
+	for _, r := range regs {
+		ret = ret.add(r)
+	}
+	return ret
+}
+
+// RegSet represents a set of registers.
+type RegSet uint64
+
+func (rs RegSet) format(info *RegisterInfo) string { //nolint:unused
+	var ret []string
+	for i := 0; i < 64; i++ {
+		if rs&(1<<uint(i)) != 0 {
+			ret = append(ret, info.RealRegName(RealReg(i)))
+		}
+	}
+	return strings.Join(ret, ", ")
+}
+
+func (rs RegSet) has(r RealReg) bool {
+	return rs&(1<<uint(r)) != 0
+}
+
+func (rs RegSet) add(r RealReg) RegSet {
+	if r >= 64 {
+		return rs
+	}
+	return rs | 1<<uint(r)
+}
+
+func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {
+	for i := 0; i < 64; i++ {
+		if rs&(1<<uint(i)) != 0 {
+			f(RealReg(i))
+		}
+	}
+}
+
+type regInUseSet struct {
+	set RegSet
+	vrs [64]VReg
+}
+
+func (rs *regInUseSet) reset() {
+	rs.set = 0
+	for i := range rs.vrs {
+		rs.vrs[i] = VRegInvalid
+	}
+}
+
+func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
+	var ret []string
+	for i := 0; i < 64; i++ {
+		if rs.set&(1<<uint(i)) != 0 {
+			vr := rs.vrs[i]
+			ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))
+		}
+	}
+	return strings.Join(ret, ", ")
+}
+
+func (rs *regInUseSet) has(r RealReg) bool {
+	if r >= 64 {
+		return false
+	}
+	return rs.set&(1<<uint(r)) != 0
+}
+
+func (rs *regInUseSet) get(r RealReg) VReg {
+	if r >= 64 {
+		return VRegInvalid
+	}
+	return rs.vrs[r]
+}
+
+func (rs *regInUseSet) remove(r RealReg) {
+	if r >= 64 {
+		return
+	}
+	rs.set &= ^(1 << uint(r))
+	rs.vrs[r] = VRegInvalid
+}
+
+func (rs *regInUseSet) add(r RealReg, vr VReg) {
+	if r >= 64 {
+		return
+	}
+	rs.set |= 1 << uint(r)
+	rs.vrs[r] = vr
+}
+
+func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) {
+	for i := 0; i < 64; i++ {
+		if rs.set&(1<<uint(i)) != 0 {
+			f(RealReg(i), rs.vrs[i])
+		}
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/vdef.go
@ -0,0 +1,43 @@
+package backend
+
+import (
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+)
+
+// SSAValueDefinition represents a definition of an SSA value.
+type SSAValueDefinition struct {
+	// BlockParamValue is valid if Instr == nil
+	BlockParamValue ssa.Value
+
+	// BlkParamVReg is valid if Instr == nil
+	BlkParamVReg regalloc.VReg
+
+	// Instr is not nil if this is a definition from an instruction.
+	Instr *ssa.Instruction
+	// N is the index of the return value in the instr's return values list.
+	N int
+	// RefCount is the number of references to the result.
+	RefCount int
+}
+
+func (d *SSAValueDefinition) IsFromInstr() bool {
+	return d.Instr != nil
+}
+
+func (d *SSAValueDefinition) IsFromBlockParam() bool {
+	return d.Instr == nil
+}
+
+func (d *SSAValueDefinition) SSAValue() ssa.Value {
+	if d.IsFromBlockParam() {
+		return d.BlockParamValue
+	} else {
+		r, rs := d.Instr.Returns()
+		if d.N == 0 {
+			return r
+		} else {
+			return rs[d.N-1]
+		}
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go
@ -0,0 +1,722 @@
+package wazevo
+
+import (
+	"context"
+	"encoding/binary"
+	"fmt"
+	"reflect"
+	"runtime"
+	"sync/atomic"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+	"github.com/tetratelabs/wazero/internal/expctxkeys"
+	"github.com/tetratelabs/wazero/internal/internalapi"
+	"github.com/tetratelabs/wazero/internal/wasm"
+	"github.com/tetratelabs/wazero/internal/wasmdebug"
+	"github.com/tetratelabs/wazero/internal/wasmruntime"
+)
+
+type (
+	// callEngine implements api.Function.
+	callEngine struct {
+		internalapi.WazeroOnly
+		stack []byte
+		// stackTop is the pointer to the *aligned* top of the stack. This must be updated
+		// whenever the stack is changed. This is passed to the assembly function
+		// at the very beginning of api.Function Call/CallWithStack.
+		stackTop uintptr
+		// executable is the pointer to the executable code for this function.
+		executable         *byte
+		preambleExecutable *byte
+		// parent is the *moduleEngine from which this callEngine is created.
+		parent *moduleEngine
+		// indexInModule is the index of the function in the module.
+		indexInModule wasm.Index
+		// sizeOfParamResultSlice is the size of the parameter/result slice.
+		sizeOfParamResultSlice int
+		requiredParams         int
+		// execCtx holds various information to be read/written by assembly functions.
+		execCtx executionContext
+		// execCtxPtr holds the pointer to the executionContext which doesn't change after callEngine is created.
+		execCtxPtr        uintptr
+		numberOfResults   int
+		stackIteratorImpl stackIterator
+	}
+
+	// executionContext is the struct to be read/written by assembly functions.
+	executionContext struct {
+		// exitCode holds the wazevoapi.ExitCode describing the state of the function execution.
+		exitCode wazevoapi.ExitCode
+		// callerModuleContextPtr holds the moduleContextOpaque for Go function calls.
+		callerModuleContextPtr *byte
+		// originalFramePointer holds the original frame pointer of the caller of the assembly function.
+		originalFramePointer uintptr
+		// originalStackPointer holds the original stack pointer of the caller of the assembly function.
+		originalStackPointer uintptr
+		// goReturnAddress holds the return address to go back to the caller of the assembly function.
+		goReturnAddress uintptr
+		// stackBottomPtr holds the pointer to the bottom of the stack.
+		stackBottomPtr *byte
+		// goCallReturnAddress holds the return address to go back to the caller of the Go function.
+		goCallReturnAddress *byte
+		// stackPointerBeforeGoCall holds the stack pointer before calling a Go function.
+		stackPointerBeforeGoCall *uint64
+		// stackGrowRequiredSize holds the required size of stack grow.
+		stackGrowRequiredSize uintptr
+		// memoryGrowTrampolineAddress holds the address of memory grow trampoline function.
+		memoryGrowTrampolineAddress *byte
+		// stackGrowCallTrampolineAddress holds the address of stack grow trampoline function.
+		stackGrowCallTrampolineAddress *byte
+		// checkModuleExitCodeTrampolineAddress holds the address of check-module-exit-code function.
+		checkModuleExitCodeTrampolineAddress *byte
+		// savedRegisters is the opaque spaces for save/restore registers.
+		// We want to align 16 bytes for each register, so we use [64][2]uint64.
+		savedRegisters [64][2]uint64
+		// goFunctionCallCalleeModuleContextOpaque is the pointer to the target Go function's moduleContextOpaque.
+		goFunctionCallCalleeModuleContextOpaque uintptr
+		// tableGrowTrampolineAddress holds the address of table grow trampoline function.
+		tableGrowTrampolineAddress *byte
+		// refFuncTrampolineAddress holds the address of ref-func trampoline function.
+		refFuncTrampolineAddress *byte
+		// memmoveAddress holds the address of memmove function implemented by Go runtime. See memmove.go.
+		memmoveAddress uintptr
+		// framePointerBeforeGoCall holds the frame pointer before calling a Go function. Note: only used in amd64.
+		framePointerBeforeGoCall uintptr
+		// memoryWait32TrampolineAddress holds the address of memory_wait32 trampoline function.
+		memoryWait32TrampolineAddress *byte
+		// memoryWait32TrampolineAddress holds the address of memory_wait64 trampoline function.
+		memoryWait64TrampolineAddress *byte
+		// memoryNotifyTrampolineAddress holds the address of the memory_notify trampoline function.
+		memoryNotifyTrampolineAddress *byte
+	}
+)
+
+func (c *callEngine) requiredInitialStackSize() int {
+	const initialStackSizeDefault = 10240
+	stackSize := initialStackSizeDefault
+	paramResultInBytes := c.sizeOfParamResultSlice * 8 * 2 // * 8 because uint64 is 8 bytes, and *2 because we need both separated param/result slots.
+	required := paramResultInBytes + 32 + 16               // 32 is enough to accommodate the call frame info, and 16 exists just in case when []byte is not aligned to 16 bytes.
+	if required > stackSize {
+		stackSize = required
+	}
+	return stackSize
+}
+
+func (c *callEngine) init() {
+	stackSize := c.requiredInitialStackSize()
+	if wazevoapi.StackGuardCheckEnabled {
+		stackSize += wazevoapi.StackGuardCheckGuardPageSize
+	}
+	c.stack = make([]byte, stackSize)
+	c.stackTop = alignedStackTop(c.stack)
+	if wazevoapi.StackGuardCheckEnabled {
+		c.execCtx.stackBottomPtr = &c.stack[wazevoapi.StackGuardCheckGuardPageSize]
+	} else {
+		c.execCtx.stackBottomPtr = &c.stack[0]
+	}
+	c.execCtxPtr = uintptr(unsafe.Pointer(&c.execCtx))
+}
+
+// alignedStackTop returns 16-bytes aligned stack top of given stack.
+// 16 bytes should be good for all platform (arm64/amd64).
+func alignedStackTop(s []byte) uintptr {
+	stackAddr := uintptr(unsafe.Pointer(&s[len(s)-1]))
+	return stackAddr - (stackAddr & (16 - 1))
+}
+
+// Definition implements api.Function.
+func (c *callEngine) Definition() api.FunctionDefinition {
+	return c.parent.module.Source.FunctionDefinition(c.indexInModule)
+}
+
+// Call implements api.Function.
+func (c *callEngine) Call(ctx context.Context, params ...uint64) ([]uint64, error) {
+	if c.requiredParams != len(params) {
+		return nil, fmt.Errorf("expected %d params, but passed %d", c.requiredParams, len(params))
+	}
+	paramResultSlice := make([]uint64, c.sizeOfParamResultSlice)
+	copy(paramResultSlice, params)
+	if err := c.callWithStack(ctx, paramResultSlice); err != nil {
+		return nil, err
+	}
+	return paramResultSlice[:c.numberOfResults], nil
+}
+
+func (c *callEngine) addFrame(builder wasmdebug.ErrorBuilder, addr uintptr) (def api.FunctionDefinition, listener experimental.FunctionListener) {
+	eng := c.parent.parent.parent
+	cm := eng.compiledModuleOfAddr(addr)
+	if cm == nil {
+		// This case, the module might have been closed and deleted from the engine.
+		// We fall back to searching the imported modules that can be referenced from this callEngine.
+
+		// First, we check itself.
+		if checkAddrInBytes(addr, c.parent.parent.executable) {
+			cm = c.parent.parent
+		} else {
+			// Otherwise, search all imported modules. TODO: maybe recursive, but not sure it's useful in practice.
+			p := c.parent
+			for i := range p.importedFunctions {
+				candidate := p.importedFunctions[i].me.parent
+				if checkAddrInBytes(addr, candidate.executable) {
+					cm = candidate
+					break
+				}
+			}
+		}
+	}
+
+	if cm != nil {
+		index := cm.functionIndexOf(addr)
+		def = cm.module.FunctionDefinition(cm.module.ImportFunctionCount + index)
+		var sources []string
+		if dw := cm.module.DWARFLines; dw != nil {
+			sourceOffset := cm.getSourceOffset(addr)
+			sources = dw.Line(sourceOffset)
+		}
+		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
+		if len(cm.listeners) > 0 {
+			listener = cm.listeners[index]
+		}
+	}
+	return
+}
+
+// CallWithStack implements api.Function.
+func (c *callEngine) CallWithStack(ctx context.Context, paramResultStack []uint64) (err error) {
+	if c.sizeOfParamResultSlice > len(paramResultStack) {
+		return fmt.Errorf("need %d params, but stack size is %d", c.sizeOfParamResultSlice, len(paramResultStack))
+	}
+	return c.callWithStack(ctx, paramResultStack)
+}
+
+// CallWithStack implements api.Function.
+func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint64) (err error) {
+	snapshotEnabled := ctx.Value(expctxkeys.EnableSnapshotterKey{}) != nil
+	if snapshotEnabled {
+		ctx = context.WithValue(ctx, expctxkeys.SnapshotterKey{}, c)
+	}
+
+	if wazevoapi.StackGuardCheckEnabled {
+		defer func() {
+			wazevoapi.CheckStackGuardPage(c.stack)
+		}()
+	}
+
+	p := c.parent
+	ensureTermination := p.parent.ensureTermination
+	m := p.module
+	if ensureTermination {
+		select {
+		case <-ctx.Done():
+			// If the provided context is already done, close the module and return the error.
+			m.CloseWithCtxErr(ctx)
+			return m.FailIfClosed()
+		default:
+		}
+	}
+
+	var paramResultPtr *uint64
+	if len(paramResultStack) > 0 {
+		paramResultPtr = &paramResultStack[0]
+	}
+	defer func() {
+		r := recover()
+		if s, ok := r.(*snapshot); ok {
+			// A snapshot that wasn't handled was created by a different call engine possibly from a nested wasm invocation,
+			// let it propagate up to be handled by the caller.
+			panic(s)
+		}
+		if r != nil {
+			type listenerForAbort struct {
+				def api.FunctionDefinition
+				lsn experimental.FunctionListener
+			}
+
+			var listeners []listenerForAbort
+			builder := wasmdebug.NewErrorBuilder()
+			def, lsn := c.addFrame(builder, uintptr(unsafe.Pointer(c.execCtx.goCallReturnAddress)))
+			if lsn != nil {
+				listeners = append(listeners, listenerForAbort{def, lsn})
+			}
+			returnAddrs := unwindStack(
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)),
+				c.execCtx.framePointerBeforeGoCall,
+				c.stackTop,
+				nil,
+			)
+			for _, retAddr := range returnAddrs[:len(returnAddrs)-1] { // the last return addr is the trampoline, so we skip it.
+				def, lsn = c.addFrame(builder, retAddr)
+				if lsn != nil {
+					listeners = append(listeners, listenerForAbort{def, lsn})
+				}
+			}
+			err = builder.FromRecovered(r)
+
+			for _, lsn := range listeners {
+				lsn.lsn.Abort(ctx, m, lsn.def, err)
+			}
+		} else {
+			if err != wasmruntime.ErrRuntimeStackOverflow { // Stackoverflow case shouldn't be panic (to avoid extreme stack unwinding).
+				err = c.parent.module.FailIfClosed()
+			}
+		}
+
+		if err != nil {
+			// Ensures that we can reuse this callEngine even after an error.
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+		}
+	}()
+
+	if ensureTermination {
+		done := m.CloseModuleOnCanceledOrTimeout(ctx)
+		defer done()
+	}
+
+	if c.stackTop&(16-1) != 0 {
+		panic("BUG: stack must be aligned to 16 bytes")
+	}
+	entrypoint(c.preambleExecutable, c.executable, c.execCtxPtr, c.parent.opaquePtr, paramResultPtr, c.stackTop)
+	for {
+		switch ec := c.execCtx.exitCode; ec & wazevoapi.ExitCodeMask {
+		case wazevoapi.ExitCodeOK:
+			return nil
+		case wazevoapi.ExitCodeGrowStack:
+			oldsp := uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
+			oldTop := c.stackTop
+			oldStack := c.stack
+			var newsp, newfp uintptr
+			if wazevoapi.StackGuardCheckEnabled {
+				newsp, newfp, err = c.growStackWithGuarded()
+			} else {
+				newsp, newfp, err = c.growStack()
+			}
+			if err != nil {
+				return err
+			}
+			adjustClonedStack(oldsp, oldTop, newsp, newfp, c.stackTop)
+			// Old stack must be alive until the new stack is adjusted.
+			runtime.KeepAlive(oldStack)
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, newsp, newfp)
+		case wazevoapi.ExitCodeGrowMemory:
+			mod := c.callerModuleInstance()
+			mem := mod.MemoryInstance
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			argRes := &s[0]
+			if res, ok := mem.Grow(uint32(*argRes)); !ok {
+				*argRes = uint64(0xffffffff) // = -1 in signed 32-bit integer.
+			} else {
+				*argRes = uint64(res)
+				calleeOpaque := opaqueViewFromPtr(uintptr(unsafe.Pointer(c.execCtx.callerModuleContextPtr)))
+				if mod.Source.MemorySection != nil { // Local memory.
+					putLocalMemory(calleeOpaque, 8 /* local memory begins at 8 */, mem)
+				} else {
+					// Imported memory's owner at offset 16 of the callerModuleContextPtr.
+					opaquePtr := uintptr(binary.LittleEndian.Uint64(calleeOpaque[16:]))
+					importedMemOwner := opaqueViewFromPtr(opaquePtr)
+					putLocalMemory(importedMemOwner, 8 /* local memory begins at 8 */, mem)
+				}
+			}
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeTableGrow:
+			mod := c.callerModuleInstance()
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			tableIndex, num, ref := uint32(s[0]), uint32(s[1]), uintptr(s[2])
+			table := mod.Tables[tableIndex]
+			s[0] = uint64(uint32(int32(table.Grow(num, ref))))
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallGoFunction:
+			index := wazevoapi.GoFunctionIndexFromExitCode(ec)
+			f := hostModuleGoFuncFromOpaque[api.GoFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			func() {
+				if snapshotEnabled {
+					defer snapshotRecoverFn(c)
+				}
+				f.Call(ctx, goCallStackView(c.execCtx.stackPointerBeforeGoCall))
+			}()
+			// Back to the native code.
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallGoFunctionWithListener:
+			index := wazevoapi.GoFunctionIndexFromExitCode(ec)
+			f := hostModuleGoFuncFromOpaque[api.GoFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			listeners := hostModuleListenersSliceFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			// Call Listener.Before.
+			callerModule := c.callerModuleInstance()
+			listener := listeners[index]
+			hostModule := hostModuleFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			def := hostModule.FunctionDefinition(wasm.Index(index))
+			listener.Before(ctx, callerModule, def, s, c.stackIterator(true))
+			// Call into the Go function.
+			func() {
+				if snapshotEnabled {
+					defer snapshotRecoverFn(c)
+				}
+				f.Call(ctx, s)
+			}()
+			// Call Listener.After.
+			listener.After(ctx, callerModule, def, s)
+			// Back to the native code.
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallGoModuleFunction:
+			index := wazevoapi.GoFunctionIndexFromExitCode(ec)
+			f := hostModuleGoFuncFromOpaque[api.GoModuleFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			mod := c.callerModuleInstance()
+			func() {
+				if snapshotEnabled {
+					defer snapshotRecoverFn(c)
+				}
+				f.Call(ctx, mod, goCallStackView(c.execCtx.stackPointerBeforeGoCall))
+			}()
+			// Back to the native code.
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallGoModuleFunctionWithListener:
+			index := wazevoapi.GoFunctionIndexFromExitCode(ec)
+			f := hostModuleGoFuncFromOpaque[api.GoModuleFunction](index, c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			listeners := hostModuleListenersSliceFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			// Call Listener.Before.
+			callerModule := c.callerModuleInstance()
+			listener := listeners[index]
+			hostModule := hostModuleFromOpaque(c.execCtx.goFunctionCallCalleeModuleContextOpaque)
+			def := hostModule.FunctionDefinition(wasm.Index(index))
+			listener.Before(ctx, callerModule, def, s, c.stackIterator(true))
+			// Call into the Go function.
+			func() {
+				if snapshotEnabled {
+					defer snapshotRecoverFn(c)
+				}
+				f.Call(ctx, callerModule, s)
+			}()
+			// Call Listener.After.
+			listener.After(ctx, callerModule, def, s)
+			// Back to the native code.
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallListenerBefore:
+			stack := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			index := wasm.Index(stack[0])
+			mod := c.callerModuleInstance()
+			listener := mod.Engine.(*moduleEngine).listeners[index]
+			def := mod.Source.FunctionDefinition(index + mod.Source.ImportFunctionCount)
+			listener.Before(ctx, mod, def, stack[1:], c.stackIterator(false))
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCallListenerAfter:
+			stack := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			index := wasm.Index(stack[0])
+			mod := c.callerModuleInstance()
+			listener := mod.Engine.(*moduleEngine).listeners[index]
+			def := mod.Source.FunctionDefinition(index + mod.Source.ImportFunctionCount)
+			listener.After(ctx, mod, def, stack[1:])
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeCheckModuleExitCode:
+			// Note: this operation must be done in Go, not native code. The reason is that
+			// native code cannot be preempted and that means it can block forever if there are not
+			// enough OS threads (which we don't have control over).
+			if err := m.FailIfClosed(); err != nil {
+				panic(err)
+			}
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeRefFunc:
+			mod := c.callerModuleInstance()
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			funcIndex := wasm.Index(s[0])
+			ref := mod.Engine.FunctionInstanceReference(funcIndex)
+			s[0] = uint64(ref)
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeMemoryWait32:
+			mod := c.callerModuleInstance()
+			mem := mod.MemoryInstance
+			if !mem.Shared {
+				panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
+			}
+
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			timeout, exp, addr := int64(s[0]), uint32(s[1]), uintptr(s[2])
+			base := uintptr(unsafe.Pointer(&mem.Buffer[0]))
+
+			offset := uint32(addr - base)
+			res := mem.Wait32(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint32 {
+				addr := unsafe.Add(unsafe.Pointer(&mem.Buffer[0]), offset)
+				return atomic.LoadUint32((*uint32)(addr))
+			})
+			s[0] = res
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeMemoryWait64:
+			mod := c.callerModuleInstance()
+			mem := mod.MemoryInstance
+			if !mem.Shared {
+				panic(wasmruntime.ErrRuntimeExpectedSharedMemory)
+			}
+
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			timeout, exp, addr := int64(s[0]), uint64(s[1]), uintptr(s[2])
+			base := uintptr(unsafe.Pointer(&mem.Buffer[0]))
+
+			offset := uint32(addr - base)
+			res := mem.Wait64(offset, exp, timeout, func(mem *wasm.MemoryInstance, offset uint32) uint64 {
+				addr := unsafe.Add(unsafe.Pointer(&mem.Buffer[0]), offset)
+				return atomic.LoadUint64((*uint64)(addr))
+			})
+			s[0] = uint64(res)
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeMemoryNotify:
+			mod := c.callerModuleInstance()
+			mem := mod.MemoryInstance
+
+			s := goCallStackView(c.execCtx.stackPointerBeforeGoCall)
+			count, addr := uint32(s[0]), s[1]
+			offset := uint32(uintptr(addr) - uintptr(unsafe.Pointer(&mem.Buffer[0])))
+			res := mem.Notify(offset, count)
+			s[0] = uint64(res)
+			c.execCtx.exitCode = wazevoapi.ExitCodeOK
+			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr,
+				uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
+		case wazevoapi.ExitCodeUnreachable:
+			panic(wasmruntime.ErrRuntimeUnreachable)
+		case wazevoapi.ExitCodeMemoryOutOfBounds:
+			panic(wasmruntime.ErrRuntimeOutOfBoundsMemoryAccess)
+		case wazevoapi.ExitCodeTableOutOfBounds:
+			panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+		case wazevoapi.ExitCodeIndirectCallNullPointer:
+			panic(wasmruntime.ErrRuntimeInvalidTableAccess)
+		case wazevoapi.ExitCodeIndirectCallTypeMismatch:
+			panic(wasmruntime.ErrRuntimeIndirectCallTypeMismatch)
+		case wazevoapi.ExitCodeIntegerOverflow:
+			panic(wasmruntime.ErrRuntimeIntegerOverflow)
+		case wazevoapi.ExitCodeIntegerDivisionByZero:
+			panic(wasmruntime.ErrRuntimeIntegerDivideByZero)
+		case wazevoapi.ExitCodeInvalidConversionToInteger:
+			panic(wasmruntime.ErrRuntimeInvalidConversionToInteger)
+		case wazevoapi.ExitCodeUnalignedAtomic:
+			panic(wasmruntime.ErrRuntimeUnalignedAtomic)
+		default:
+			panic("BUG")
+		}
+	}
+}
+
+func (c *callEngine) callerModuleInstance() *wasm.ModuleInstance {
+	return moduleInstanceFromOpaquePtr(c.execCtx.callerModuleContextPtr)
+}
+
+func opaqueViewFromPtr(ptr uintptr) []byte {
+	var opaque []byte
+	sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaque))
+	sh.Data = ptr
+	setSliceLimits(sh, 24, 24)
+	return opaque
+}
+
+const callStackCeiling = uintptr(50000000) // in uint64 (8 bytes) == 400000000 bytes in total == 400mb.
+
+func (c *callEngine) growStackWithGuarded() (newSP uintptr, newFP uintptr, err error) {
+	if wazevoapi.StackGuardCheckEnabled {
+		wazevoapi.CheckStackGuardPage(c.stack)
+	}
+	newSP, newFP, err = c.growStack()
+	if err != nil {
+		return
+	}
+	if wazevoapi.StackGuardCheckEnabled {
+		c.execCtx.stackBottomPtr = &c.stack[wazevoapi.StackGuardCheckGuardPageSize]
+	}
+	return
+}
+
+// growStack grows the stack, and returns the new stack pointer.
+func (c *callEngine) growStack() (newSP, newFP uintptr, err error) {
+	currentLen := uintptr(len(c.stack))
+	if callStackCeiling < currentLen {
+		err = wasmruntime.ErrRuntimeStackOverflow
+		return
+	}
+
+	newLen := 2*currentLen + c.execCtx.stackGrowRequiredSize + 16 // Stack might be aligned to 16 bytes, so add 16 bytes just in case.
+	newSP, newFP, c.stackTop, c.stack = c.cloneStack(newLen)
+	c.execCtx.stackBottomPtr = &c.stack[0]
+	return
+}
+
+func (c *callEngine) cloneStack(l uintptr) (newSP, newFP, newTop uintptr, newStack []byte) {
+	newStack = make([]byte, l)
+
+	relSp := c.stackTop - uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
+	relFp := c.stackTop - c.execCtx.framePointerBeforeGoCall
+
+	// Copy the existing contents in the previous Go-allocated stack into the new one.
+	var prevStackAligned, newStackAligned []byte
+	{
+		sh := (*reflect.SliceHeader)(unsafe.Pointer(&prevStackAligned))
+		sh.Data = c.stackTop - relSp
+		setSliceLimits(sh, relSp, relSp)
+	}
+	newTop = alignedStackTop(newStack)
+	{
+		newSP = newTop - relSp
+		newFP = newTop - relFp
+		sh := (*reflect.SliceHeader)(unsafe.Pointer(&newStackAligned))
+		sh.Data = newSP
+		setSliceLimits(sh, relSp, relSp)
+	}
+	copy(newStackAligned, prevStackAligned)
+	return
+}
+
+func (c *callEngine) stackIterator(onHostCall bool) experimental.StackIterator {
+	c.stackIteratorImpl.reset(c, onHostCall)
+	return &c.stackIteratorImpl
+}
+
+// stackIterator implements experimental.StackIterator.
+type stackIterator struct {
+	retAddrs      []uintptr
+	retAddrCursor int
+	eng           *engine
+	pc            uint64
+
+	currentDef *wasm.FunctionDefinition
+}
+
+func (si *stackIterator) reset(c *callEngine, onHostCall bool) {
+	if onHostCall {
+		si.retAddrs = append(si.retAddrs[:0], uintptr(unsafe.Pointer(c.execCtx.goCallReturnAddress)))
+	} else {
+		si.retAddrs = si.retAddrs[:0]
+	}
+	si.retAddrs = unwindStack(uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall, c.stackTop, si.retAddrs)
+	si.retAddrs = si.retAddrs[:len(si.retAddrs)-1] // the last return addr is the trampoline, so we skip it.
+	si.retAddrCursor = 0
+	si.eng = c.parent.parent.parent
+}
+
+// Next implements the same method as documented on experimental.StackIterator.
+func (si *stackIterator) Next() bool {
+	if si.retAddrCursor >= len(si.retAddrs) {
+		return false
+	}
+
+	addr := si.retAddrs[si.retAddrCursor]
+	cm := si.eng.compiledModuleOfAddr(addr)
+	if cm != nil {
+		index := cm.functionIndexOf(addr)
+		def := cm.module.FunctionDefinition(cm.module.ImportFunctionCount + index)
+		si.currentDef = def
+		si.retAddrCursor++
+		si.pc = uint64(addr)
+		return true
+	}
+	return false
+}
+
+// ProgramCounter implements the same method as documented on experimental.StackIterator.
+func (si *stackIterator) ProgramCounter() experimental.ProgramCounter {
+	return experimental.ProgramCounter(si.pc)
+}
+
+// Function implements the same method as documented on experimental.StackIterator.
+func (si *stackIterator) Function() experimental.InternalFunction {
+	return si
+}
+
+// Definition implements the same method as documented on experimental.InternalFunction.
+func (si *stackIterator) Definition() api.FunctionDefinition {
+	return si.currentDef
+}
+
+// SourceOffsetForPC implements the same method as documented on experimental.InternalFunction.
+func (si *stackIterator) SourceOffsetForPC(pc experimental.ProgramCounter) uint64 {
+	upc := uintptr(pc)
+	cm := si.eng.compiledModuleOfAddr(upc)
+	return cm.getSourceOffset(upc)
+}
+
+// snapshot implements experimental.Snapshot
+type snapshot struct {
+	sp, fp, top    uintptr
+	returnAddress  *byte
+	stack          []byte
+	savedRegisters [64][2]uint64
+	ret            []uint64
+	c              *callEngine
+}
+
+// Snapshot implements the same method as documented on experimental.Snapshotter.
+func (c *callEngine) Snapshot() experimental.Snapshot {
+	returnAddress := c.execCtx.goCallReturnAddress
+	oldTop, oldSp := c.stackTop, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall))
+	newSP, newFP, newTop, newStack := c.cloneStack(uintptr(len(c.stack)) + 16)
+	adjustClonedStack(oldSp, oldTop, newSP, newFP, newTop)
+	return &snapshot{
+		sp:             newSP,
+		fp:             newFP,
+		top:            newTop,
+		savedRegisters: c.execCtx.savedRegisters,
+		returnAddress:  returnAddress,
+		stack:          newStack,
+		c:              c,
+	}
+}
+
+// Restore implements the same method as documented on experimental.Snapshot.
+func (s *snapshot) Restore(ret []uint64) {
+	s.ret = ret
+	panic(s)
+}
+
+func (s *snapshot) doRestore() {
+	spp := *(**uint64)(unsafe.Pointer(&s.sp))
+	view := goCallStackView(spp)
+	copy(view, s.ret)
+
+	c := s.c
+	c.stack = s.stack
+	c.stackTop = s.top
+	ec := &c.execCtx
+	ec.stackBottomPtr = &c.stack[0]
+	ec.stackPointerBeforeGoCall = spp
+	ec.framePointerBeforeGoCall = s.fp
+	ec.goCallReturnAddress = s.returnAddress
+	ec.savedRegisters = s.savedRegisters
+}
+
+// Error implements the same method on error.
+func (s *snapshot) Error() string {
+	return "unhandled snapshot restore, this generally indicates restore was called from a different " +
+		"exported function invocation than snapshot"
+}
+
+func snapshotRecoverFn(c *callEngine) {
+	if r := recover(); r != nil {
+		if s, ok := r.(*snapshot); ok && s.c == c {
+			s.doRestore()
+		} else {
+			panic(r)
+		}
+	}
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine.go
@ -0,0 +1,843 @@
+package wazevo
+
+import (
+	"context"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"runtime"
+	"sort"
+	"sync"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/api"
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/frontend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/platform"
+	"github.com/tetratelabs/wazero/internal/version"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+type (
+	// engine implements wasm.Engine.
+	engine struct {
+		wazeroVersion   string
+		fileCache       filecache.Cache
+		compiledModules map[wasm.ModuleID]*compiledModule
+		// sortedCompiledModules is a list of compiled modules sorted by the initial address of the executable.
+		sortedCompiledModules []*compiledModule
+		mux                   sync.RWMutex
+		// sharedFunctions is compiled functions shared by all modules.
+		sharedFunctions *sharedFunctions
+		// setFinalizer defaults to runtime.SetFinalizer, but overridable for tests.
+		setFinalizer func(obj interface{}, finalizer interface{})
+
+		// The followings are reused for compiling shared functions.
+		machine backend.Machine
+		be      backend.Compiler
+	}
+
+	sharedFunctions struct {
+		// memoryGrowExecutable is a compiled trampoline executable for memory.grow builtin function.
+		memoryGrowExecutable []byte
+		// checkModuleExitCode is a compiled trampoline executable for checking module instance exit code. This
+		// is used when ensureTermination is true.
+		checkModuleExitCode []byte
+		// stackGrowExecutable is a compiled executable for growing stack builtin function.
+		stackGrowExecutable []byte
+		// tableGrowExecutable is a compiled trampoline executable for table.grow builtin function.
+		tableGrowExecutable []byte
+		// refFuncExecutable is a compiled trampoline executable for ref.func builtin function.
+		refFuncExecutable []byte
+		// memoryWait32Executable is a compiled trampoline executable for memory.wait32 builtin function
+		memoryWait32Executable []byte
+		// memoryWait64Executable is a compiled trampoline executable for memory.wait64 builtin function
+		memoryWait64Executable []byte
+		// memoryNotifyExecutable is a compiled trampoline executable for memory.notify builtin function
+		memoryNotifyExecutable    []byte
+		listenerBeforeTrampolines map[*wasm.FunctionType][]byte
+		listenerAfterTrampolines  map[*wasm.FunctionType][]byte
+	}
+
+	// compiledModule is a compiled variant of a wasm.Module and ready to be used for instantiation.
+	compiledModule struct {
+		*executables
+		// functionOffsets maps a local function index to the offset in the executable.
+		functionOffsets           []int
+		parent                    *engine
+		module                    *wasm.Module
+		ensureTermination         bool
+		listeners                 []experimental.FunctionListener
+		listenerBeforeTrampolines []*byte
+		listenerAfterTrampolines  []*byte
+
+		// The followings are only available for non host modules.
+
+		offsets         wazevoapi.ModuleContextOffsetData
+		sharedFunctions *sharedFunctions
+		sourceMap       sourceMap
+	}
+
+	executables struct {
+		executable     []byte
+		entryPreambles [][]byte
+	}
+)
+
+// sourceMap is a mapping from the offset of the executable to the offset of the original wasm binary.
+type sourceMap struct {
+	// executableOffsets is a sorted list of offsets of the executable. This is index-correlated with wasmBinaryOffsets,
+	// in other words executableOffsets[i] is the offset of the executable which corresponds to the offset of a Wasm
+	// binary pointed by wasmBinaryOffsets[i].
+	executableOffsets []uintptr
+	// wasmBinaryOffsets is the counterpart of executableOffsets.
+	wasmBinaryOffsets []uint64
+}
+
+var _ wasm.Engine = (*engine)(nil)
+
+// NewEngine returns the implementation of wasm.Engine.
+func NewEngine(ctx context.Context, _ api.CoreFeatures, fc filecache.Cache) wasm.Engine {
+	machine := newMachine()
+	be := backend.NewCompiler(ctx, machine, ssa.NewBuilder())
+	e := &engine{
+		compiledModules: make(map[wasm.ModuleID]*compiledModule),
+		setFinalizer:    runtime.SetFinalizer,
+		machine:         machine,
+		be:              be,
+		fileCache:       fc,
+		wazeroVersion:   version.GetWazeroVersion(),
+	}
+	e.compileSharedFunctions()
+	return e
+}
+
+// CompileModule implements wasm.Engine.
+func (e *engine) CompileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (err error) {
+	if wazevoapi.PerfMapEnabled {
+		wazevoapi.PerfMap.Lock()
+		defer wazevoapi.PerfMap.Unlock()
+	}
+
+	if _, ok, err := e.getCompiledModule(module, listeners, ensureTermination); ok { // cache hit!
+		return nil
+	} else if err != nil {
+		return err
+	}
+
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		ctx = wazevoapi.NewDeterministicCompilationVerifierContext(ctx, len(module.CodeSection))
+	}
+	cm, err := e.compileModule(ctx, module, listeners, ensureTermination)
+	if err != nil {
+		return err
+	}
+	if err = e.addCompiledModule(module, cm); err != nil {
+		return err
+	}
+
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		for i := 0; i < wazevoapi.DeterministicCompilationVerifyingIter; i++ {
+			_, err := e.compileModule(ctx, module, listeners, ensureTermination)
+			if err != nil {
+				return err
+			}
+		}
+	}
+
+	if len(listeners) > 0 {
+		cm.listeners = listeners
+		cm.listenerBeforeTrampolines = make([]*byte, len(module.TypeSection))
+		cm.listenerAfterTrampolines = make([]*byte, len(module.TypeSection))
+		for i := range module.TypeSection {
+			typ := &module.TypeSection[i]
+			before, after := e.getListenerTrampolineForType(typ)
+			cm.listenerBeforeTrampolines[i] = before
+			cm.listenerAfterTrampolines[i] = after
+		}
+	}
+	return nil
+}
+
+func (exec *executables) compileEntryPreambles(m *wasm.Module, machine backend.Machine, be backend.Compiler) {
+	exec.entryPreambles = make([][]byte, len(m.TypeSection))
+	for i := range m.TypeSection {
+		typ := &m.TypeSection[i]
+		sig := frontend.SignatureForWasmFunctionType(typ)
+		be.Init()
+		buf := machine.CompileEntryPreamble(&sig)
+		executable := mmapExecutable(buf)
+		exec.entryPreambles[i] = executable
+
+		if wazevoapi.PerfMapEnabled {
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&executable[0])),
+				uint64(len(executable)), fmt.Sprintf("entry_preamble::type=%s", typ.String()))
+		}
+	}
+}
+
+func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (*compiledModule, error) {
+	withListener := len(listeners) > 0
+	cm := &compiledModule{
+		offsets: wazevoapi.NewModuleContextOffsetData(module, withListener), parent: e, module: module,
+		ensureTermination: ensureTermination,
+		executables:       &executables{},
+	}
+
+	if module.IsHostModule {
+		return e.compileHostModule(ctx, module, listeners)
+	}
+
+	importedFns, localFns := int(module.ImportFunctionCount), len(module.FunctionSection)
+	if localFns == 0 {
+		return cm, nil
+	}
+
+	rels := make([]backend.RelocationInfo, 0)
+	refToBinaryOffset := make([]int, importedFns+localFns)
+
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		// The compilation must be deterministic regardless of the order of functions being compiled.
+		wazevoapi.DeterministicCompilationVerifierRandomizeIndexes(ctx)
+	}
+
+	needSourceInfo := module.DWARFLines != nil
+
+	// Creates new compiler instances which are reused for each function.
+	ssaBuilder := ssa.NewBuilder()
+	fe := frontend.NewFrontendCompiler(module, ssaBuilder, &cm.offsets, ensureTermination, withListener, needSourceInfo)
+	machine := newMachine()
+	be := backend.NewCompiler(ctx, machine, ssaBuilder)
+
+	cm.executables.compileEntryPreambles(module, machine, be)
+
+	totalSize := 0 // Total binary size of the executable.
+	cm.functionOffsets = make([]int, localFns)
+	bodies := make([][]byte, localFns)
+
+	// Trampoline relocation related variables.
+	trampolineInterval, callTrampolineIslandSize, err := machine.CallTrampolineIslandInfo(localFns)
+	if err != nil {
+		return nil, err
+	}
+	needCallTrampoline := callTrampolineIslandSize > 0
+	var callTrampolineIslandOffsets []int // Holds the offsets of trampoline islands.
+
+	for i := range module.CodeSection {
+		if wazevoapi.DeterministicCompilationVerifierEnabled {
+			i = wazevoapi.DeterministicCompilationVerifierGetRandomizedLocalFunctionIndex(ctx, i)
+		}
+
+		fidx := wasm.Index(i + importedFns)
+
+		if wazevoapi.NeedFunctionNameInContext {
+			def := module.FunctionDefinition(fidx)
+			name := def.DebugName()
+			if len(def.ExportNames()) > 0 {
+				name = def.ExportNames()[0]
+			}
+			ctx = wazevoapi.SetCurrentFunctionName(ctx, i, fmt.Sprintf("[%d/%d]%s", i, len(module.CodeSection)-1, name))
+		}
+
+		needListener := len(listeners) > 0 && listeners[i] != nil
+		body, relsPerFunc, err := e.compileLocalWasmFunction(ctx, module, wasm.Index(i), fe, ssaBuilder, be, needListener)
+		if err != nil {
+			return nil, fmt.Errorf("compile function %d/%d: %v", i, len(module.CodeSection)-1, err)
+		}
+
+		// Align 16-bytes boundary.
+		totalSize = (totalSize + 15) &^ 15
+		cm.functionOffsets[i] = totalSize
+
+		if needSourceInfo {
+			// At the beginning of the function, we add the offset of the function body so that
+			// we can resolve the source location of the call site of before listener call.
+			cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize))
+			cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, module.CodeSection[i].BodyOffsetInCodeSection)
+
+			for _, info := range be.SourceOffsetInfo() {
+				cm.sourceMap.executableOffsets = append(cm.sourceMap.executableOffsets, uintptr(totalSize)+uintptr(info.ExecutableOffset))
+				cm.sourceMap.wasmBinaryOffsets = append(cm.sourceMap.wasmBinaryOffsets, uint64(info.SourceOffset))
+			}
+		}
+
+		fref := frontend.FunctionIndexToFuncRef(fidx)
+		refToBinaryOffset[fref] = totalSize
+
+		// At this point, relocation offsets are relative to the start of the function body,
+		// so we adjust it to the start of the executable.
+		for _, r := range relsPerFunc {
+			r.Offset += int64(totalSize)
+			rels = append(rels, r)
+		}
+
+		bodies[i] = body
+		totalSize += len(body)
+		if wazevoapi.PrintMachineCodeHexPerFunction {
+			fmt.Printf("[[[machine code for %s]]]\n%s\n\n", wazevoapi.GetCurrentFunctionName(ctx), hex.EncodeToString(body))
+		}
+
+		if needCallTrampoline {
+			// If the total size exceeds the trampoline interval, we need to add a trampoline island.
+			if totalSize/trampolineInterval > len(callTrampolineIslandOffsets) {
+				callTrampolineIslandOffsets = append(callTrampolineIslandOffsets, totalSize)
+				totalSize += callTrampolineIslandSize
+			}
+		}
+	}
+
+	// Allocate executable memory and then copy the generated machine code.
+	executable, err := platform.MmapCodeSegment(totalSize)
+	if err != nil {
+		panic(err)
+	}
+	cm.executable = executable
+
+	for i, b := range bodies {
+		offset := cm.functionOffsets[i]
+		copy(executable[offset:], b)
+	}
+
+	if wazevoapi.PerfMapEnabled {
+		wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
+	}
+
+	if needSourceInfo {
+		for i := range cm.sourceMap.executableOffsets {
+			cm.sourceMap.executableOffsets[i] += uintptr(unsafe.Pointer(&cm.executable[0]))
+		}
+	}
+
+	// Resolve relocations for local function calls.
+	if len(rels) > 0 {
+		machine.ResolveRelocations(refToBinaryOffset, executable, rels, callTrampolineIslandOffsets)
+	}
+
+	if runtime.GOARCH == "arm64" {
+		// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
+		if err = platform.MprotectRX(executable); err != nil {
+			return nil, err
+		}
+	}
+	cm.sharedFunctions = e.sharedFunctions
+	e.setFinalizer(cm.executables, executablesFinalizer)
+	return cm, nil
+}
+
+func (e *engine) compileLocalWasmFunction(
+	ctx context.Context,
+	module *wasm.Module,
+	localFunctionIndex wasm.Index,
+	fe *frontend.Compiler,
+	ssaBuilder ssa.Builder,
+	be backend.Compiler,
+	needListener bool,
+) (body []byte, rels []backend.RelocationInfo, err error) {
+	typIndex := module.FunctionSection[localFunctionIndex]
+	typ := &module.TypeSection[typIndex]
+	codeSeg := &module.CodeSection[localFunctionIndex]
+
+	// Initializes both frontend and backend compilers.
+	fe.Init(localFunctionIndex, typIndex, typ, codeSeg.LocalTypes, codeSeg.Body, needListener, codeSeg.BodyOffsetInCodeSection)
+	be.Init()
+
+	// Lower Wasm to SSA.
+	fe.LowerToSSA()
+	if wazevoapi.PrintSSA && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
+	}
+
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "SSA", ssaBuilder.Format())
+	}
+
+	// Run SSA-level optimization passes.
+	ssaBuilder.RunPasses()
+
+	if wazevoapi.PrintOptimizedSSA && wazevoapi.PrintEnabledIndex(ctx) {
+		fmt.Printf("[[[Optimized SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
+	}
+
+	if wazevoapi.DeterministicCompilationVerifierEnabled {
+		wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "Optimized SSA", ssaBuilder.Format())
+	}
+
+	// Now our ssaBuilder contains the necessary information to further lower them to
+	// machine code.
+	original, rels, err := be.Compile(ctx)
+	if err != nil {
+		return nil, nil, fmt.Errorf("ssa->machine code: %v", err)
+	}
+
+	// TODO: optimize as zero copy.
+	copied := make([]byte, len(original))
+	copy(copied, original)
+	return copied, rels, nil
+}
+
+func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener) (*compiledModule, error) {
+	machine := newMachine()
+	be := backend.NewCompiler(ctx, machine, ssa.NewBuilder())
+
+	num := len(module.CodeSection)
+	cm := &compiledModule{module: module, listeners: listeners, executables: &executables{}}
+	cm.functionOffsets = make([]int, num)
+	totalSize := 0 // Total binary size of the executable.
+	bodies := make([][]byte, num)
+	var sig ssa.Signature
+	for i := range module.CodeSection {
+		totalSize = (totalSize + 15) &^ 15
+		cm.functionOffsets[i] = totalSize
+
+		typIndex := module.FunctionSection[i]
+		typ := &module.TypeSection[typIndex]
+
+		// We can relax until the index fits together in ExitCode as we do in wazevoapi.ExitCodeCallGoModuleFunctionWithIndex.
+		// However, 1 << 16 should be large enough for a real use case.
+		const hostFunctionNumMaximum = 1 << 16
+		if i >= hostFunctionNumMaximum {
+			return nil, fmt.Errorf("too many host functions (maximum %d)", hostFunctionNumMaximum)
+		}
+
+		sig.ID = ssa.SignatureID(typIndex) // This is important since we reuse the `machine` which caches the ABI based on the SignatureID.
+		sig.Params = append(sig.Params[:0],
+			ssa.TypeI64, // First argument must be exec context.
+			ssa.TypeI64, // The second argument is the moduleContextOpaque of this host module.
+		)
+		for _, t := range typ.Params {
+			sig.Params = append(sig.Params, frontend.WasmTypeToSSAType(t))
+		}
+
+		sig.Results = sig.Results[:0]
+		for _, t := range typ.Results {
+			sig.Results = append(sig.Results, frontend.WasmTypeToSSAType(t))
+		}
+
+		c := &module.CodeSection[i]
+		if c.GoFunc == nil {
+			panic("BUG: GoFunc must be set for host module")
+		}
+
+		withListener := len(listeners) > 0 && listeners[i] != nil
+		var exitCode wazevoapi.ExitCode
+		fn := c.GoFunc
+		switch fn.(type) {
+		case api.GoModuleFunction:
+			exitCode = wazevoapi.ExitCodeCallGoModuleFunctionWithIndex(i, withListener)
+		case api.GoFunction:
+			exitCode = wazevoapi.ExitCodeCallGoFunctionWithIndex(i, withListener)
+		}
+
+		be.Init()
+		machine.CompileGoFunctionTrampoline(exitCode, &sig, true)
+		if err := be.Finalize(ctx); err != nil {
+			return nil, err
+		}
+		body := be.Buf()
+
+		if wazevoapi.PerfMapEnabled {
+			name := module.FunctionDefinition(wasm.Index(i)).DebugName()
+			wazevoapi.PerfMap.AddModuleEntry(i,
+				int64(totalSize),
+				uint64(len(body)),
+				fmt.Sprintf("trampoline:%s", name))
+		}
+
+		// TODO: optimize as zero copy.
+		copied := make([]byte, len(body))
+		copy(copied, body)
+		bodies[i] = copied
+		totalSize += len(body)
+	}
+
+	if totalSize == 0 {
+		// Empty module.
+		return cm, nil
+	}
+
+	// Allocate executable memory and then copy the generated machine code.
+	executable, err := platform.MmapCodeSegment(totalSize)
+	if err != nil {
+		panic(err)
+	}
+	cm.executable = executable
+
+	for i, b := range bodies {
+		offset := cm.functionOffsets[i]
+		copy(executable[offset:], b)
+	}
+
+	if wazevoapi.PerfMapEnabled {
+		wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
+	}
+
+	if runtime.GOARCH == "arm64" {
+		// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
+		if err = platform.MprotectRX(executable); err != nil {
+			return nil, err
+		}
+	}
+	e.setFinalizer(cm.executables, executablesFinalizer)
+	return cm, nil
+}
+
+// Close implements wasm.Engine.
+func (e *engine) Close() (err error) {
+	e.mux.Lock()
+	defer e.mux.Unlock()
+	e.sortedCompiledModules = nil
+	e.compiledModules = nil
+	e.sharedFunctions = nil
+	return nil
+}
+
+// CompiledModuleCount implements wasm.Engine.
+func (e *engine) CompiledModuleCount() uint32 {
+	e.mux.RLock()
+	defer e.mux.RUnlock()
+	return uint32(len(e.compiledModules))
+}
+
+// DeleteCompiledModule implements wasm.Engine.
+func (e *engine) DeleteCompiledModule(m *wasm.Module) {
+	e.mux.Lock()
+	defer e.mux.Unlock()
+	cm, ok := e.compiledModules[m.ID]
+	if ok {
+		if len(cm.executable) > 0 {
+			e.deleteCompiledModuleFromSortedList(cm)
+		}
+		delete(e.compiledModules, m.ID)
+	}
+}
+
+func (e *engine) addCompiledModuleToSortedList(cm *compiledModule) {
+	ptr := uintptr(unsafe.Pointer(&cm.executable[0]))
+
+	index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
+		return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) >= ptr
+	})
+	e.sortedCompiledModules = append(e.sortedCompiledModules, nil)
+	copy(e.sortedCompiledModules[index+1:], e.sortedCompiledModules[index:])
+	e.sortedCompiledModules[index] = cm
+}
+
+func (e *engine) deleteCompiledModuleFromSortedList(cm *compiledModule) {
+	ptr := uintptr(unsafe.Pointer(&cm.executable[0]))
+
+	index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
+		return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) >= ptr
+	})
+	if index >= len(e.sortedCompiledModules) {
+		return
+	}
+	copy(e.sortedCompiledModules[index:], e.sortedCompiledModules[index+1:])
+	e.sortedCompiledModules = e.sortedCompiledModules[:len(e.sortedCompiledModules)-1]
+}
+
+func (e *engine) compiledModuleOfAddr(addr uintptr) *compiledModule {
+	e.mux.RLock()
+	defer e.mux.RUnlock()
+
+	index := sort.Search(len(e.sortedCompiledModules), func(i int) bool {
+		return uintptr(unsafe.Pointer(&e.sortedCompiledModules[i].executable[0])) > addr
+	})
+	index -= 1
+	if index < 0 {
+		return nil
+	}
+	candidate := e.sortedCompiledModules[index]
+	if checkAddrInBytes(addr, candidate.executable) {
+		// If a module is already deleted, the found module may have been wrong.
+		return candidate
+	}
+	return nil
+}
+
+func checkAddrInBytes(addr uintptr, b []byte) bool {
+	return uintptr(unsafe.Pointer(&b[0])) <= addr && addr <= uintptr(unsafe.Pointer(&b[len(b)-1]))
+}
+
+// NewModuleEngine implements wasm.Engine.
+func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.ModuleEngine, error) {
+	me := &moduleEngine{}
+
+	// Note: imported functions are resolved in moduleEngine.ResolveImportedFunction.
+	me.importedFunctions = make([]importedFunction, m.ImportFunctionCount)
+
+	compiled, ok := e.getCompiledModuleFromMemory(m)
+	if !ok {
+		return nil, errors.New("source module must be compiled before instantiation")
+	}
+	me.parent = compiled
+	me.module = mi
+	me.listeners = compiled.listeners
+
+	if m.IsHostModule {
+		me.opaque = buildHostModuleOpaque(m, compiled.listeners)
+		me.opaquePtr = &me.opaque[0]
+	} else {
+		if size := compiled.offsets.TotalSize; size != 0 {
+			opaque := newAlignedOpaque(size)
+			me.opaque = opaque
+			me.opaquePtr = &opaque[0]
+		}
+	}
+	return me, nil
+}
+
+func (e *engine) compileSharedFunctions() {
+	e.sharedFunctions = &sharedFunctions{
+		listenerBeforeTrampolines: make(map[*wasm.FunctionType][]byte),
+		listenerAfterTrampolines:  make(map[*wasm.FunctionType][]byte),
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeGrowMemory, &ssa.Signature{
+			Params:  []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32},
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.memoryGrowExecutable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.memoryGrowExecutable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_grow_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeTableGrow, &ssa.Signature{
+			Params:  []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* table index */, ssa.TypeI32 /* num */, ssa.TypeI64 /* ref */},
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.tableGrowExecutable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.tableGrowExecutable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "table_grow_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCheckModuleExitCode, &ssa.Signature{
+			Params:  []ssa.Type{ssa.TypeI32 /* exec context */},
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.checkModuleExitCode = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.checkModuleExitCode
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "check_module_exit_code_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeRefFunc, &ssa.Signature{
+			Params:  []ssa.Type{ssa.TypeI64 /* exec context */, ssa.TypeI32 /* function index */},
+			Results: []ssa.Type{ssa.TypeI64}, // returns the function reference.
+		}, false)
+		e.sharedFunctions.refFuncExecutable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.refFuncExecutable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "ref_func_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileStackGrowCallSequence()
+		e.sharedFunctions.stackGrowExecutable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.stackGrowExecutable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "stack_grow_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait32, &ssa.Signature{
+			// exec context, timeout, expected, addr
+			Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
+			// Returns the status.
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.memoryWait32Executable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.memoryWait32Executable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait32_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryWait64, &ssa.Signature{
+			// exec context, timeout, expected, addr
+			Params: []ssa.Type{ssa.TypeI64, ssa.TypeI64, ssa.TypeI64, ssa.TypeI64},
+			// Returns the status.
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.memoryWait64Executable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.memoryWait64Executable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_wait64_trampoline")
+		}
+	}
+
+	e.be.Init()
+	{
+		src := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeMemoryNotify, &ssa.Signature{
+			// exec context, count, addr
+			Params: []ssa.Type{ssa.TypeI64, ssa.TypeI32, ssa.TypeI64},
+			// Returns the number notified.
+			Results: []ssa.Type{ssa.TypeI32},
+		}, false)
+		e.sharedFunctions.memoryNotifyExecutable = mmapExecutable(src)
+		if wazevoapi.PerfMapEnabled {
+			exe := e.sharedFunctions.memoryNotifyExecutable
+			wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_notify_trampoline")
+		}
+	}
+
+	e.setFinalizer(e.sharedFunctions, sharedFunctionsFinalizer)
+}
+
+func sharedFunctionsFinalizer(sf *sharedFunctions) {
+	if err := platform.MunmapCodeSegment(sf.memoryGrowExecutable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.checkModuleExitCode); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.stackGrowExecutable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.tableGrowExecutable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.refFuncExecutable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.memoryWait32Executable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.memoryWait64Executable); err != nil {
+		panic(err)
+	}
+	if err := platform.MunmapCodeSegment(sf.memoryNotifyExecutable); err != nil {
+		panic(err)
+	}
+	for _, f := range sf.listenerBeforeTrampolines {
+		if err := platform.MunmapCodeSegment(f); err != nil {
+			panic(err)
+		}
+	}
+	for _, f := range sf.listenerAfterTrampolines {
+		if err := platform.MunmapCodeSegment(f); err != nil {
+			panic(err)
+		}
+	}
+
+	sf.memoryGrowExecutable = nil
+	sf.checkModuleExitCode = nil
+	sf.stackGrowExecutable = nil
+	sf.tableGrowExecutable = nil
+	sf.refFuncExecutable = nil
+	sf.memoryWait32Executable = nil
+	sf.memoryWait64Executable = nil
+	sf.memoryNotifyExecutable = nil
+	sf.listenerBeforeTrampolines = nil
+	sf.listenerAfterTrampolines = nil
+}
+
+func executablesFinalizer(exec *executables) {
+	if len(exec.executable) > 0 {
+		if err := platform.MunmapCodeSegment(exec.executable); err != nil {
+			panic(err)
+		}
+	}
+	exec.executable = nil
+
+	for _, f := range exec.entryPreambles {
+		if err := platform.MunmapCodeSegment(f); err != nil {
+			panic(err)
+		}
+	}
+	exec.entryPreambles = nil
+}
+
+func mmapExecutable(src []byte) []byte {
+	executable, err := platform.MmapCodeSegment(len(src))
+	if err != nil {
+		panic(err)
+	}
+
+	copy(executable, src)
+
+	if runtime.GOARCH == "arm64" {
+		// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
+		if err = platform.MprotectRX(executable); err != nil {
+			panic(err)
+		}
+	}
+	return executable
+}
+
+func (cm *compiledModule) functionIndexOf(addr uintptr) wasm.Index {
+	addr -= uintptr(unsafe.Pointer(&cm.executable[0]))
+	offset := cm.functionOffsets
+	index := sort.Search(len(offset), func(i int) bool {
+		return offset[i] > int(addr)
+	})
+	index--
+	if index < 0 {
+		panic("BUG")
+	}
+	return wasm.Index(index)
+}
+
+func (e *engine) getListenerTrampolineForType(functionType *wasm.FunctionType) (before, after *byte) {
+	e.mux.Lock()
+	defer e.mux.Unlock()
+
+	beforeBuf, ok := e.sharedFunctions.listenerBeforeTrampolines[functionType]
+	afterBuf := e.sharedFunctions.listenerAfterTrampolines[functionType]
+	if ok {
+		return &beforeBuf[0], &afterBuf[0]
+	}
+
+	beforeSig, afterSig := frontend.SignatureForListener(functionType)
+
+	e.be.Init()
+	buf := e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerBefore, beforeSig, false)
+	beforeBuf = mmapExecutable(buf)
+
+	e.be.Init()
+	buf = e.machine.CompileGoFunctionTrampoline(wazevoapi.ExitCodeCallListenerAfter, afterSig, false)
+	afterBuf = mmapExecutable(buf)
+
+	e.sharedFunctions.listenerBeforeTrampolines[functionType] = beforeBuf
+	e.sharedFunctions.listenerAfterTrampolines[functionType] = afterBuf
+	return &beforeBuf[0], &afterBuf[0]
+}
+
+func (cm *compiledModule) getSourceOffset(pc uintptr) uint64 {
+	offsets := cm.sourceMap.executableOffsets
+	if len(offsets) == 0 {
+		return 0
+	}
+
+	index := sort.Search(len(offsets), func(i int) bool {
+		return offsets[i] >= pc
+	})
+
+	index--
+	if index < 0 {
+		return 0
+	}
+	return cm.sourceMap.wasmBinaryOffsets[index]
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
@ -0,0 +1,296 @@
+package wazevo
+
+import (
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"encoding/binary"
+	"fmt"
+	"hash/crc32"
+	"io"
+	"runtime"
+	"unsafe"
+
+	"github.com/tetratelabs/wazero/experimental"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
+	"github.com/tetratelabs/wazero/internal/filecache"
+	"github.com/tetratelabs/wazero/internal/platform"
+	"github.com/tetratelabs/wazero/internal/u32"
+	"github.com/tetratelabs/wazero/internal/u64"
+	"github.com/tetratelabs/wazero/internal/wasm"
+)
+
+var crc = crc32.MakeTable(crc32.Castagnoli)
+
+// fileCacheKey returns a key for the file cache.
+// In order to avoid collisions with the existing compiler, we do not use m.ID directly,
+// but instead we rehash it with magic.
+func fileCacheKey(m *wasm.Module) (ret filecache.Key) {
+	s := sha256.New()
+	s.Write(m.ID[:])
+	s.Write(magic)
+	s.Sum(ret[:0])
+	return
+}
+
+func (e *engine) addCompiledModule(module *wasm.Module, cm *compiledModule) (err error) {
+	e.addCompiledModuleToMemory(module, cm)
+	if !module.IsHostModule && e.fileCache != nil {
+		err = e.addCompiledModuleToCache(module, cm)
+	}
+	return
+}
+
+func (e *engine) getCompiledModule(module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (cm *compiledModule, ok bool, err error) {
+	cm, ok = e.getCompiledModuleFromMemory(module)
+	if ok {
+		return
+	}
+	cm, ok, err = e.getCompiledModuleFromCache(module)
+	if ok {
+		cm.parent = e
+		cm.module = module
+		cm.sharedFunctions = e.sharedFunctions
+		cm.ensureTermination = ensureTermination
+		cm.offsets = wazevoapi.NewModuleContextOffsetData(module, len(listeners) > 0)
+		if len(listeners) > 0 {
+			cm.listeners = listeners
+			cm.listenerBeforeTrampolines = make([]*byte, len(module.TypeSection))
+			cm.listenerAfterTrampolines = make([]*byte, len(module.TypeSection))
+			for i := range module.TypeSection {
+				typ := &module.TypeSection[i]
+				before, after := e.getListenerTrampolineForType(typ)
+				cm.listenerBeforeTrampolines[i] = before
+				cm.listenerAfterTrampolines[i] = after
+			}
+		}
+		e.addCompiledModuleToMemory(module, cm)
+		ssaBuilder := ssa.NewBuilder()
+		machine := newMachine()
+		be := backend.NewCompiler(context.Background(), machine, ssaBuilder)
+		cm.executables.compileEntryPreambles(module, machine, be)
+
+		// Set the finalizer.
+		e.setFinalizer(cm.executables, executablesFinalizer)
+	}
+	return
+}
+
+func (e *engine) addCompiledModuleToMemory(m *wasm.Module, cm *compiledModule) {
+	e.mux.Lock()
+	defer e.mux.Unlock()
+	e.compiledModules[m.ID] = cm
+	if len(cm.executable) > 0 {
+		e.addCompiledModuleToSortedList(cm)
+	}
+}
+
+func (e *engine) getCompiledModuleFromMemory(module *wasm.Module) (cm *compiledModule, ok bool) {
+	e.mux.RLock()
+	defer e.mux.RUnlock()
+	cm, ok = e.compiledModules[module.ID]
+	return
+}
+
+func (e *engine) addCompiledModuleToCache(module *wasm.Module, cm *compiledModule) (err error) {
+	if e.fileCache == nil || module.IsHostModule {
+		return
+	}
+	err = e.fileCache.Add(fileCacheKey(module), serializeCompiledModule(e.wazeroVersion, cm))
+	return
+}
+
+func (e *engine) getCompiledModuleFromCache(module *wasm.Module) (cm *compiledModule, hit bool, err error) {
+	if e.fileCache == nil || module.IsHostModule {
+		return
+	}
+
+	// Check if the entries exist in the external cache.
+	var cached io.ReadCloser
+	cached, hit, err = e.fileCache.Get(fileCacheKey(module))
+	if !hit || err != nil {
+		return
+	}
+
+	// Otherwise, we hit the cache on external cache.
+	// We retrieve *code structures from `cached`.
+	var staleCache bool
+	// Note: cached.Close is ensured to be called in deserializeCodes.
+	cm, staleCache, err = deserializeCompiledModule(e.wazeroVersion, cached)
+	if err != nil {
+		hit = false
+		return
+	} else if staleCache {
+		return nil, false, e.fileCache.Delete(fileCacheKey(module))
+	}
+	return
+}
+
+var magic = []byte{'W', 'A', 'Z', 'E', 'V', 'O'}
+
+func serializeCompiledModule(wazeroVersion string, cm *compiledModule) io.Reader {
+	buf := bytes.NewBuffer(nil)
+	// First 6 byte: WAZEVO header.
+	buf.Write(magic)
+	// Next 1 byte: length of version:
+	buf.WriteByte(byte(len(wazeroVersion)))
+	// Version of wazero.
+	buf.WriteString(wazeroVersion)
+	// Number of *code (== locally defined functions in the module): 4 bytes.
+	buf.Write(u32.LeBytes(uint32(len(cm.functionOffsets))))
+	for _, offset := range cm.functionOffsets {
+		// The offset of this function in the executable (8 bytes).
+		buf.Write(u64.LeBytes(uint64(offset)))
+	}
+	// The length of code segment (8 bytes).
+	buf.Write(u64.LeBytes(uint64(len(cm.executable))))
+	// Append the native code.
+	buf.Write(cm.executable)
+	// Append checksum.
+	checksum := crc32.Checksum(cm.executable, crc)
+	buf.Write(u32.LeBytes(checksum))
+	if sm := cm.sourceMap; len(sm.executableOffsets) > 0 {
+		buf.WriteByte(1) // indicates that source map is present.
+		l := len(sm.wasmBinaryOffsets)
+		buf.Write(u64.LeBytes(uint64(l)))
+		executableAddr := uintptr(unsafe.Pointer(&cm.executable[0]))
+		for i := 0; i < l; i++ {
+			buf.Write(u64.LeBytes(sm.wasmBinaryOffsets[i]))
+			// executableOffsets is absolute address, so we need to subtract executableAddr.
+			buf.Write(u64.LeBytes(uint64(sm.executableOffsets[i] - executableAddr)))
+		}
+	} else {
+		buf.WriteByte(0) // indicates that source map is not present.
+	}
+	return bytes.NewReader(buf.Bytes())
+}
+
+func deserializeCompiledModule(wazeroVersion string, reader io.ReadCloser) (cm *compiledModule, staleCache bool, err error) {
+	defer reader.Close()
+	cacheHeaderSize := len(magic) + 1 /* version size */ + len(wazeroVersion) + 4 /* number of functions */
+
+	// Read the header before the native code.
+	header := make([]byte, cacheHeaderSize)
+	n, err := reader.Read(header)
+	if err != nil {
+		return nil, false, fmt.Errorf("compilationcache: error reading header: %v", err)
+	}
+
+	if n != cacheHeaderSize {
+		return nil, false, fmt.Errorf("compilationcache: invalid header length: %d", n)
+	}
+
+	if !bytes.Equal(header[:len(magic)], magic) {
+		return nil, false, fmt.Errorf(
+			"compilationcache: invalid magic number: got %s but want %s", magic, header[:len(magic)])
+	}
+
+	// Check the version compatibility.
+	versionSize := int(header[len(magic)])
+
+	cachedVersionBegin, cachedVersionEnd := len(magic)+1, len(magic)+1+versionSize
+	if cachedVersionEnd >= len(header) {
+		staleCache = true
+		return
+	} else if cachedVersion := string(header[cachedVersionBegin:cachedVersionEnd]); cachedVersion != wazeroVersion {
+		staleCache = true
+		return
+	}
+
+	functionsNum := binary.LittleEndian.Uint32(header[len(header)-4:])
+	cm = &compiledModule{functionOffsets: make([]int, functionsNum), executables: &executables{}}
+
+	var eightBytes [8]byte
+	for i := uint32(0); i < functionsNum; i++ {
+		// Read the offset of each function in the executable.
+		var offset uint64
+		if offset, err = readUint64(reader, &eightBytes); err != nil {
+			err = fmt.Errorf("compilationcache: error reading func[%d] executable offset: %v", i, err)
+			return
+		}
+		cm.functionOffsets[i] = int(offset)
+	}
+
+	executableLen, err := readUint64(reader, &eightBytes)
+	if err != nil {
+		err = fmt.Errorf("compilationcache: error reading executable size: %v", err)
+		return
+	}
+
+	if executableLen > 0 {
+		executable, err := platform.MmapCodeSegment(int(executableLen))
+		if err != nil {
+			err = fmt.Errorf("compilationcache: error mmapping executable (len=%d): %v", executableLen, err)
+			return nil, false, err
+		}
+
+		_, err = io.ReadFull(reader, executable)
+		if err != nil {
+			err = fmt.Errorf("compilationcache: error reading executable (len=%d): %v", executableLen, err)
+			return nil, false, err
+		}
+
+		expected := crc32.Checksum(executable, crc)
+		if _, err = io.ReadFull(reader, eightBytes[:4]); err != nil {
+			return nil, false, fmt.Errorf("compilationcache: could not read checksum: %v", err)
+		} else if checksum := binary.LittleEndian.Uint32(eightBytes[:4]); expected != checksum {
+			return nil, false, fmt.Errorf("compilationcache: checksum mismatch (expected %d, got %d)", expected, checksum)
+		}
+
+		if runtime.GOARCH == "arm64" {
+			// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
+			if err = platform.MprotectRX(executable); err != nil {
+				return nil, false, err
+			}
+		}
+		cm.executable = executable
+	}
+
+	if _, err := io.ReadFull(reader, eightBytes[:1]); err != nil {
+		return nil, false, fmt.Errorf("compilationcache: error reading source map presence: %v", err)
+	}
+
+	if eightBytes[0] == 1 {
+		sm := &cm.sourceMap
+		sourceMapLen, err := readUint64(reader, &eightBytes)
+		if err != nil {
+			err = fmt.Errorf("compilationcache: error reading source map length: %v", err)
+			return nil, false, err
+		}
+		executableOffset := uintptr(unsafe.Pointer(&cm.executable[0]))
+		for i := uint64(0); i < sourceMapLen; i++ {
+			wasmBinaryOffset, err := readUint64(reader, &eightBytes)
+			if err != nil {
+				err = fmt.Errorf("compilationcache: error reading source map[%d] wasm binary offset: %v", i, err)
+				return nil, false, err
+			}
+			executableRelativeOffset, err := readUint64(reader, &eightBytes)
+			if err != nil {
+				err = fmt.Errorf("compilationcache: error reading source map[%d] executable offset: %v", i, err)
+				return nil, false, err
+			}
+			sm.wasmBinaryOffsets = append(sm.wasmBinaryOffsets, wasmBinaryOffset)
+			// executableOffsets is absolute address, so we need to add executableOffset.
+			sm.executableOffsets = append(sm.executableOffsets, uintptr(executableRelativeOffset)+executableOffset)
+		}
+	}
+	return
+}
+
+// readUint64 strictly reads an uint64 in little-endian byte order, using the
+// given array as a buffer. This returns io.EOF if less than 8 bytes were read.
+func readUint64(reader io.Reader, b *[8]byte) (uint64, error) {
+	s := b[0:8]
+	n, err := reader.Read(s)
+	if err != nil {
+		return 0, err
+	} else if n < 8 { // more strict than reader.Read
+		return 0, io.EOF
+	}
+
+	// Read the u64 from the underlying buffer.
+	ret := binary.LittleEndian.Uint64(s)
+	return ret, nil
+}
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_amd64.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_amd64.go
@ -0,0 +1,15 @@
+//go:build amd64 && !tinygo
+
+package wazevo
+
+import _ "unsafe"
+
+// entrypoint is implemented by the backend.
+//
+//go:linkname entrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64.entrypoint
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr)
+
+// entrypoint is implemented by the backend.
+//
+//go:linkname afterGoFunctionCallEntrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64.afterGoFunctionCallEntrypoint
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_arm64.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_arm64.go
@ -0,0 +1,15 @@
+//go:build arm64 && !tinygo
+
+package wazevo
+
+import _ "unsafe"
+
+// entrypoint is implemented by the backend.
+//
+//go:linkname entrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64.entrypoint
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr)
+
+// entrypoint is implemented by the backend.
+//
+//go:linkname afterGoFunctionCallEntrypoint github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64.afterGoFunctionCallEntrypoint
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr)
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_others.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/entrypoint_others.go
@ -0,0 +1,15 @@
+//go:build (!arm64 && !amd64) || tinygo
+
+package wazevo
+
+import (
+	"runtime"
+)
+
+func entrypoint(preambleExecutable, functionExecutable *byte, executionContextPtr uintptr, moduleContextPtr *byte, paramResultStackPtr *uint64, goAllocatedStackSlicePtr uintptr) {
+	panic(runtime.GOARCH)
+}
+
+func afterGoFunctionCallEntrypoint(executable *byte, executionContextPtr uintptr, stackPointer, framePointer uintptr) {
+	panic(runtime.GOARCH)
+}
--- a/Show more
+++ b/Show more