diff --git a/CMakeLists.txt b/CMakeLists.txt index e4df966aed..81c56aa5e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -568,6 +568,7 @@ add_library(arm-softmmu STATIC qemu/target/arm/helper.c qemu/target/arm/iwmmxt_helper.c qemu/target/arm/m_helper.c + qemu/target/arm/mte_helper.c qemu/target/arm/neon_helper.c qemu/target/arm/op_helper.c qemu/target/arm/psci.c @@ -612,6 +613,7 @@ add_library(aarch64-softmmu STATIC qemu/target/arm/helper.c qemu/target/arm/iwmmxt_helper.c qemu/target/arm/m_helper.c + qemu/target/arm/mte_helper.c qemu/target/arm/neon_helper.c qemu/target/arm/op_helper.c qemu/target/arm/pauth_helper.c @@ -1019,6 +1021,7 @@ add_library(riscv32-softmmu STATIC qemu/target/riscv/pmp.c qemu/target/riscv/translate.c qemu/target/riscv/unicorn.c + qemu/target/riscv/vector_helper.c ) if(MSVC) @@ -1052,6 +1055,7 @@ add_library(riscv64-softmmu STATIC qemu/target/riscv/pmp.c qemu/target/riscv/translate.c qemu/target/riscv/unicorn.c + qemu/target/riscv/vector_helper.c ) if(MSVC) diff --git a/bindings/dotnet/UnicornEngine/Const/Mips.fs b/bindings/dotnet/UnicornEngine/Const/Mips.fs index 76d0c71494..74c4946442 100644 --- a/bindings/dotnet/UnicornEngine/Const/Mips.fs +++ b/bindings/dotnet/UnicornEngine/Const/Mips.fs @@ -41,8 +41,10 @@ module Mips = let UC_CPU_MIPS64_I6500 = 9 let UC_CPU_MIPS64_LOONGSON_2E = 10 let UC_CPU_MIPS64_LOONGSON_2F = 11 - let UC_CPU_MIPS64_MIPS64DSPR2 = 12 - let UC_CPU_MIPS64_ENDING = 13 + let UC_CPU_MIPS64_LOONGSON_3A1000 = 12 + let UC_CPU_MIPS64_LOONGSON_3A4000 = 13 + let UC_CPU_MIPS64_MIPS64DSPR2 = 14 + let UC_CPU_MIPS64_ENDING = 15 // MIPS registers diff --git a/bindings/dotnet/UnicornEngine/Const/Riscv.fs b/bindings/dotnet/UnicornEngine/Const/Riscv.fs index 244e5fec45..d791cfcd5f 100644 --- a/bindings/dotnet/UnicornEngine/Const/Riscv.fs +++ b/bindings/dotnet/UnicornEngine/Const/Riscv.fs @@ -10,15 +10,17 @@ module Riscv = // RISCV32 CPU let UC_CPU_RISCV32_ANY = 0 - let UC_CPU_RISCV32_BASE32 = 1 - let UC_CPU_RISCV32_SIFIVE_E31 = 2 - let UC_CPU_RISCV32_SIFIVE_U34 = 3 - let UC_CPU_RISCV32_ENDING = 4 + let UC_CPU_RISCV32_BASE = 1 + let UC_CPU_RISCV32_IBEX = 2 + let UC_CPU_RISCV32_SIFIVE_E31 = 3 + let UC_CPU_RISCV32_SIFIVE_E34 = 4 + let UC_CPU_RISCV32_SIFIVE_U34 = 5 + let UC_CPU_RISCV32_ENDING = 6 // RISCV64 CPU let UC_CPU_RISCV64_ANY = 0 - let UC_CPU_RISCV64_BASE64 = 1 + let UC_CPU_RISCV64_BASE = 1 let UC_CPU_RISCV64_SIFIVE_E51 = 2 let UC_CPU_RISCV64_SIFIVE_U54 = 3 let UC_CPU_RISCV64_ENDING = 4 diff --git a/bindings/go/unicorn/mips_const.go b/bindings/go/unicorn/mips_const.go index dfb6ddb066..c78b813e2b 100644 --- a/bindings/go/unicorn/mips_const.go +++ b/bindings/go/unicorn/mips_const.go @@ -36,8 +36,10 @@ const ( CPU_MIPS64_I6500 = 9 CPU_MIPS64_LOONGSON_2E = 10 CPU_MIPS64_LOONGSON_2F = 11 - CPU_MIPS64_MIPS64DSPR2 = 12 - CPU_MIPS64_ENDING = 13 + CPU_MIPS64_LOONGSON_3A1000 = 12 + CPU_MIPS64_LOONGSON_3A4000 = 13 + CPU_MIPS64_MIPS64DSPR2 = 14 + CPU_MIPS64_ENDING = 15 // MIPS registers diff --git a/bindings/go/unicorn/riscv_const.go b/bindings/go/unicorn/riscv_const.go index 08458f77a6..d497b50f87 100644 --- a/bindings/go/unicorn/riscv_const.go +++ b/bindings/go/unicorn/riscv_const.go @@ -5,15 +5,17 @@ const ( // RISCV32 CPU CPU_RISCV32_ANY = 0 - CPU_RISCV32_BASE32 = 1 - CPU_RISCV32_SIFIVE_E31 = 2 - CPU_RISCV32_SIFIVE_U34 = 3 - CPU_RISCV32_ENDING = 4 + CPU_RISCV32_BASE = 1 + CPU_RISCV32_IBEX = 2 + CPU_RISCV32_SIFIVE_E31 = 3 + CPU_RISCV32_SIFIVE_E34 = 4 + CPU_RISCV32_SIFIVE_U34 = 5 + CPU_RISCV32_ENDING = 6 // RISCV64 CPU CPU_RISCV64_ANY = 0 - CPU_RISCV64_BASE64 = 1 + CPU_RISCV64_BASE = 1 CPU_RISCV64_SIFIVE_E51 = 2 CPU_RISCV64_SIFIVE_U54 = 3 CPU_RISCV64_ENDING = 4 diff --git a/bindings/java/src/main/java/unicorn/MipsConst.java b/bindings/java/src/main/java/unicorn/MipsConst.java index bf6d8cf2fe..567f55661d 100644 --- a/bindings/java/src/main/java/unicorn/MipsConst.java +++ b/bindings/java/src/main/java/unicorn/MipsConst.java @@ -38,8 +38,10 @@ public interface MipsConst { public static final int UC_CPU_MIPS64_I6500 = 9; public static final int UC_CPU_MIPS64_LOONGSON_2E = 10; public static final int UC_CPU_MIPS64_LOONGSON_2F = 11; - public static final int UC_CPU_MIPS64_MIPS64DSPR2 = 12; - public static final int UC_CPU_MIPS64_ENDING = 13; + public static final int UC_CPU_MIPS64_LOONGSON_3A1000 = 12; + public static final int UC_CPU_MIPS64_LOONGSON_3A4000 = 13; + public static final int UC_CPU_MIPS64_MIPS64DSPR2 = 14; + public static final int UC_CPU_MIPS64_ENDING = 15; // MIPS registers diff --git a/bindings/java/src/main/java/unicorn/RiscvConst.java b/bindings/java/src/main/java/unicorn/RiscvConst.java index 5814180974..3a298c5c7a 100644 --- a/bindings/java/src/main/java/unicorn/RiscvConst.java +++ b/bindings/java/src/main/java/unicorn/RiscvConst.java @@ -7,15 +7,17 @@ public interface RiscvConst { // RISCV32 CPU public static final int UC_CPU_RISCV32_ANY = 0; - public static final int UC_CPU_RISCV32_BASE32 = 1; - public static final int UC_CPU_RISCV32_SIFIVE_E31 = 2; - public static final int UC_CPU_RISCV32_SIFIVE_U34 = 3; - public static final int UC_CPU_RISCV32_ENDING = 4; + public static final int UC_CPU_RISCV32_BASE = 1; + public static final int UC_CPU_RISCV32_IBEX = 2; + public static final int UC_CPU_RISCV32_SIFIVE_E31 = 3; + public static final int UC_CPU_RISCV32_SIFIVE_E34 = 4; + public static final int UC_CPU_RISCV32_SIFIVE_U34 = 5; + public static final int UC_CPU_RISCV32_ENDING = 6; // RISCV64 CPU public static final int UC_CPU_RISCV64_ANY = 0; - public static final int UC_CPU_RISCV64_BASE64 = 1; + public static final int UC_CPU_RISCV64_BASE = 1; public static final int UC_CPU_RISCV64_SIFIVE_E51 = 2; public static final int UC_CPU_RISCV64_SIFIVE_U54 = 3; public static final int UC_CPU_RISCV64_ENDING = 4; diff --git a/bindings/pascal/unicorn/MipsConst.pas b/bindings/pascal/unicorn/MipsConst.pas index 3cbce39ac0..748b1d8568 100644 --- a/bindings/pascal/unicorn/MipsConst.pas +++ b/bindings/pascal/unicorn/MipsConst.pas @@ -39,8 +39,10 @@ interface UC_CPU_MIPS64_I6500 = 9; UC_CPU_MIPS64_LOONGSON_2E = 10; UC_CPU_MIPS64_LOONGSON_2F = 11; - UC_CPU_MIPS64_MIPS64DSPR2 = 12; - UC_CPU_MIPS64_ENDING = 13; + UC_CPU_MIPS64_LOONGSON_3A1000 = 12; + UC_CPU_MIPS64_LOONGSON_3A4000 = 13; + UC_CPU_MIPS64_MIPS64DSPR2 = 14; + UC_CPU_MIPS64_ENDING = 15; // MIPS registers @@ -242,4 +244,4 @@ interface UC_MIPS_REG_LO3 = 48; implementation -end. \ No newline at end of file +end. diff --git a/bindings/pascal/unicorn/RiscvConst.pas b/bindings/pascal/unicorn/RiscvConst.pas index 075e271c65..a4629832c8 100644 --- a/bindings/pascal/unicorn/RiscvConst.pas +++ b/bindings/pascal/unicorn/RiscvConst.pas @@ -8,15 +8,17 @@ interface // RISCV32 CPU UC_CPU_RISCV32_ANY = 0; - UC_CPU_RISCV32_BASE32 = 1; - UC_CPU_RISCV32_SIFIVE_E31 = 2; - UC_CPU_RISCV32_SIFIVE_U34 = 3; - UC_CPU_RISCV32_ENDING = 4; + UC_CPU_RISCV32_BASE = 1; + UC_CPU_RISCV32_IBEX = 2; + UC_CPU_RISCV32_SIFIVE_E31 = 3; + UC_CPU_RISCV32_SIFIVE_E34 = 4; + UC_CPU_RISCV32_SIFIVE_U34 = 5; + UC_CPU_RISCV32_ENDING = 6; // RISCV64 CPU UC_CPU_RISCV64_ANY = 0; - UC_CPU_RISCV64_BASE64 = 1; + UC_CPU_RISCV64_BASE = 1; UC_CPU_RISCV64_SIFIVE_E51 = 2; UC_CPU_RISCV64_SIFIVE_U54 = 3; UC_CPU_RISCV64_ENDING = 4; @@ -291,4 +293,4 @@ interface UC_RISCV_REG_FT11 = 189; implementation -end. \ No newline at end of file +end. diff --git a/bindings/python/unicorn/mips_const.py b/bindings/python/unicorn/mips_const.py index c60b2d0f77..63bf9f3f39 100644 --- a/bindings/python/unicorn/mips_const.py +++ b/bindings/python/unicorn/mips_const.py @@ -34,8 +34,10 @@ UC_CPU_MIPS64_I6500 = 9 UC_CPU_MIPS64_LOONGSON_2E = 10 UC_CPU_MIPS64_LOONGSON_2F = 11 -UC_CPU_MIPS64_MIPS64DSPR2 = 12 -UC_CPU_MIPS64_ENDING = 13 +UC_CPU_MIPS64_LOONGSON_3A1000 = 12 +UC_CPU_MIPS64_LOONGSON_3A4000 = 13 +UC_CPU_MIPS64_MIPS64DSPR2 = 14 +UC_CPU_MIPS64_ENDING = 15 # MIPS registers diff --git a/bindings/python/unicorn/riscv_const.py b/bindings/python/unicorn/riscv_const.py index 3e63376fd5..d1f2ccd2f9 100644 --- a/bindings/python/unicorn/riscv_const.py +++ b/bindings/python/unicorn/riscv_const.py @@ -3,15 +3,17 @@ # RISCV32 CPU UC_CPU_RISCV32_ANY = 0 -UC_CPU_RISCV32_BASE32 = 1 -UC_CPU_RISCV32_SIFIVE_E31 = 2 -UC_CPU_RISCV32_SIFIVE_U34 = 3 -UC_CPU_RISCV32_ENDING = 4 +UC_CPU_RISCV32_BASE = 1 +UC_CPU_RISCV32_IBEX = 2 +UC_CPU_RISCV32_SIFIVE_E31 = 3 +UC_CPU_RISCV32_SIFIVE_E34 = 4 +UC_CPU_RISCV32_SIFIVE_U34 = 5 +UC_CPU_RISCV32_ENDING = 6 # RISCV64 CPU UC_CPU_RISCV64_ANY = 0 -UC_CPU_RISCV64_BASE64 = 1 +UC_CPU_RISCV64_BASE = 1 UC_CPU_RISCV64_SIFIVE_E51 = 2 UC_CPU_RISCV64_SIFIVE_U54 = 3 UC_CPU_RISCV64_ENDING = 4 diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb index 374912a870..ed9520b592 100644 --- a/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb +++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/mips_const.rb @@ -36,8 +36,10 @@ module UnicornEngine UC_CPU_MIPS64_I6500 = 9 UC_CPU_MIPS64_LOONGSON_2E = 10 UC_CPU_MIPS64_LOONGSON_2F = 11 - UC_CPU_MIPS64_MIPS64DSPR2 = 12 - UC_CPU_MIPS64_ENDING = 13 + UC_CPU_MIPS64_LOONGSON_3A1000 = 12 + UC_CPU_MIPS64_LOONGSON_3A4000 = 13 + UC_CPU_MIPS64_MIPS64DSPR2 = 14 + UC_CPU_MIPS64_ENDING = 15 # MIPS registers diff --git a/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb b/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb index 33203d0a4d..99eba71355 100644 --- a/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb +++ b/bindings/ruby/unicorn_gem/lib/unicorn_engine/riscv_const.rb @@ -5,15 +5,17 @@ module UnicornEngine # RISCV32 CPU UC_CPU_RISCV32_ANY = 0 - UC_CPU_RISCV32_BASE32 = 1 - UC_CPU_RISCV32_SIFIVE_E31 = 2 - UC_CPU_RISCV32_SIFIVE_U34 = 3 - UC_CPU_RISCV32_ENDING = 4 + UC_CPU_RISCV32_BASE = 1 + UC_CPU_RISCV32_IBEX = 2 + UC_CPU_RISCV32_SIFIVE_E31 = 3 + UC_CPU_RISCV32_SIFIVE_E34 = 4 + UC_CPU_RISCV32_SIFIVE_U34 = 5 + UC_CPU_RISCV32_ENDING = 6 # RISCV64 CPU UC_CPU_RISCV64_ANY = 0 - UC_CPU_RISCV64_BASE64 = 1 + UC_CPU_RISCV64_BASE = 1 UC_CPU_RISCV64_SIFIVE_E51 = 2 UC_CPU_RISCV64_SIFIVE_U54 = 3 UC_CPU_RISCV64_ENDING = 4 diff --git a/bindings/rust/src/riscv.rs b/bindings/rust/src/riscv.rs index 53c5990bc3..0862e91e7b 100644 --- a/bindings/rust/src/riscv.rs +++ b/bindings/rust/src/riscv.rs @@ -349,8 +349,10 @@ impl From for i32 { #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Riscv32CpuModel { UC_CPU_RISCV32_ANY = 0, - UC_CPU_RISCV32_BASE32, + UC_CPU_RISCV32_BASE, + UC_CPU_RISCV32_IBEX, UC_CPU_RISCV32_SIFIVE_E31, + UC_CPU_RISCV32_SIFIVE_E34, UC_CPU_RISCV32_SIFIVE_U34, } @@ -370,7 +372,7 @@ impl From<&Riscv32CpuModel> for i32 { #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Riscv64CpuModel { UC_CPU_RISCV64_ANY = 0, - UC_CPU_RISCV64_BASE64, + UC_CPU_RISCV64_BASE, UC_CPU_RISCV64_SIFIVE_E51, UC_CPU_RISCV64_SIFIVE_U54, } diff --git a/bindings/zig/unicorn/mips_const.zig b/bindings/zig/unicorn/mips_const.zig index 0987cb2f7b..de66623340 100644 --- a/bindings/zig/unicorn/mips_const.zig +++ b/bindings/zig/unicorn/mips_const.zig @@ -36,8 +36,10 @@ pub const mipsConst = enum(c_int) { CPU_MIPS64_I6500 = 9, CPU_MIPS64_LOONGSON_2E = 10, CPU_MIPS64_LOONGSON_2F = 11, - CPU_MIPS64_MIPS64DSPR2 = 12, - CPU_MIPS64_ENDING = 13, + CPU_MIPS64_LOONGSON_3A1000 = 12, + CPU_MIPS64_LOONGSON_3A4000 = 13, + CPU_MIPS64_MIPS64DSPR2 = 14, + CPU_MIPS64_ENDING = 15, // MIPS registers diff --git a/bindings/zig/unicorn/riscv_const.zig b/bindings/zig/unicorn/riscv_const.zig index 00a34001f7..340d1988c6 100644 --- a/bindings/zig/unicorn/riscv_const.zig +++ b/bindings/zig/unicorn/riscv_const.zig @@ -5,10 +5,12 @@ pub const riscvConst = enum(c_int) { // RISCV32 CPU CPU_RISCV32_ANY = 0, - CPU_RISCV32_BASE32 = 1, - CPU_RISCV32_SIFIVE_E31 = 2, - CPU_RISCV32_SIFIVE_U34 = 3, - CPU_RISCV32_ENDING = 4, + CPU_RISCV32_BASE = 1, + CPU_RISCV32_IBEX = 2, + CPU_RISCV32_SIFIVE_E31 = 3, + CPU_RISCV32_SIFIVE_E34 = 4, + CPU_RISCV32_SIFIVE_U34 = 5, + CPU_RISCV32_ENDING = 6, // RISCV64 CPU diff --git a/include/unicorn/mips.h b/include/unicorn/mips.h index 7a4c9c1cb8..4eeb9241d7 100644 --- a/include/unicorn/mips.h +++ b/include/unicorn/mips.h @@ -55,6 +55,8 @@ typedef enum uc_cpu_mips64 { UC_CPU_MIPS64_I6500, UC_CPU_MIPS64_LOONGSON_2E, UC_CPU_MIPS64_LOONGSON_2F, + UC_CPU_MIPS64_LOONGSON_3A1000, + UC_CPU_MIPS64_LOONGSON_3A4000, UC_CPU_MIPS64_MIPS64DSPR2, UC_CPU_MIPS64_ENDING diff --git a/include/unicorn/riscv.h b/include/unicorn/riscv.h index cf1595ae4f..e72a1aa39c 100644 --- a/include/unicorn/riscv.h +++ b/include/unicorn/riscv.h @@ -18,8 +18,10 @@ extern "C" { //> RISCV32 CPU typedef enum uc_cpu_riscv32 { UC_CPU_RISCV32_ANY = 0, - UC_CPU_RISCV32_BASE32, + UC_CPU_RISCV32_BASE, + UC_CPU_RISCV32_IBEX, UC_CPU_RISCV32_SIFIVE_E31, + UC_CPU_RISCV32_SIFIVE_E34, UC_CPU_RISCV32_SIFIVE_U34, UC_CPU_RISCV32_ENDING @@ -28,7 +30,7 @@ typedef enum uc_cpu_riscv32 { //> RISCV64 CPU typedef enum uc_cpu_riscv64 { UC_CPU_RISCV64_ANY = 0, - UC_CPU_RISCV64_BASE64, + UC_CPU_RISCV64_BASE, UC_CPU_RISCV64_SIFIVE_E51, UC_CPU_RISCV64_SIFIVE_U54, diff --git a/include/unicorn/unicorn.h b/include/unicorn/unicorn.h index fc4d447f2e..a802d9b629 100644 --- a/include/unicorn/unicorn.h +++ b/include/unicorn/unicorn.h @@ -69,6 +69,18 @@ typedef size_t uc_hook; #define UNICORN_DEPRECATED #endif +#ifdef _MSC_VER +#define UNICORN_UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNICORN_UNUSED __attribute__((unused)) +#endif + +#ifdef _MSC_VER +#define UNICORN_NONNULL +#else +#define UNICORN_NONNULL __attribute__((nonnull)) +#endif + // Unicorn API version #define UC_API_MAJOR 2 #define UC_API_MINOR 1 diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 6f1315028d..55c3afb894 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_aarch64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_aarch64 #define tcg_gen_st_i64 tcg_gen_st_i64_aarch64 +#define tcg_gen_add_i64 tcg_gen_add_i64_aarch64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_aarch64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_aarch64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_aarch64 #define cpu_icount_to_ns cpu_icount_to_ns_aarch64 #define cpu_is_stopped cpu_is_stopped_aarch64 #define cpu_get_ticks cpu_get_ticks_aarch64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_aarch64 #define floatx80_mul floatx80_mul_aarch64 #define floatx80_div floatx80_div_aarch64 +#define floatx80_modrem floatx80_modrem_aarch64 +#define floatx80_mod floatx80_mod_aarch64 #define floatx80_rem floatx80_rem_aarch64 #define floatx80_sqrt floatx80_sqrt_aarch64 #define floatx80_eq floatx80_eq_aarch64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_aarch64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_aarch64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_aarch64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_aarch64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_aarch64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_aarch64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_aarch64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_aarch64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_aarch64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_aarch64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_aarch64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_aarch64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_aarch64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_aarch64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_aarch64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_aarch64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_aarch64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_aarch64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_aarch64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_aarch64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_aarch64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_aarch64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_aarch64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_aarch64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_aarch64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_aarch64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_aarch64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_aarch64 #define tcg_gen_shri_vec tcg_gen_shri_vec_aarch64 #define tcg_gen_sari_vec tcg_gen_sari_vec_aarch64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_aarch64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_aarch64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_aarch64 #define tcg_gen_add_vec tcg_gen_add_vec_aarch64 #define tcg_gen_sub_vec tcg_gen_sub_vec_aarch64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_aarch64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_aarch64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_aarch64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_aarch64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_aarch64 #define tcg_gen_shls_vec tcg_gen_shls_vec_aarch64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_aarch64 #define tcg_gen_sars_vec tcg_gen_sars_vec_aarch64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_aarch64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_aarch64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_aarch64 #define tb_htable_lookup tb_htable_lookup_aarch64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_aarch64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_aarch64 #define tlb_init tlb_init_aarch64 +#define tlb_destroy tlb_destroy_aarch64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_aarch64 #define tlb_flush tlb_flush_aarch64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_aarch64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_aarch64 #define get_page_addr_code_hostp get_page_addr_code_hostp_aarch64 #define get_page_addr_code get_page_addr_code_aarch64 +#define probe_access_flags probe_access_flags_aarch64 #define probe_access probe_access_aarch64 #define tlb_vaddr_to_host tlb_vaddr_to_host_aarch64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_aarch64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_aarch64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_aarch64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_aarch64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_aarch64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_aarch64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_aarch64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_aarch64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_aarch64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_aarch64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_aarch64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_aarch64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_aarch64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_aarch64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_aarch64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_aarch64 #define cpu_ldub_data_ra cpu_ldub_data_ra_aarch64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_aarch64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_aarch64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_aarch64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_aarch64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_aarch64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_aarch64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_aarch64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_aarch64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_aarch64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_aarch64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_aarch64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_aarch64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_aarch64 #define cpu_ldub_data cpu_ldub_data_aarch64 #define cpu_ldsb_data cpu_ldsb_data_aarch64 -#define cpu_lduw_data cpu_lduw_data_aarch64 -#define cpu_ldsw_data cpu_ldsw_data_aarch64 -#define cpu_ldl_data cpu_ldl_data_aarch64 -#define cpu_ldq_data cpu_ldq_data_aarch64 +#define cpu_lduw_be_data cpu_lduw_be_data_aarch64 +#define cpu_lduw_le_data cpu_lduw_le_data_aarch64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_aarch64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_aarch64 +#define cpu_ldl_be_data cpu_ldl_be_data_aarch64 +#define cpu_ldl_le_data cpu_ldl_le_data_aarch64 +#define cpu_ldq_le_data cpu_ldq_le_data_aarch64 +#define cpu_ldq_be_data cpu_ldq_be_data_aarch64 #define helper_ret_stb_mmu helper_ret_stb_mmu_aarch64 #define helper_le_stw_mmu helper_le_stw_mmu_aarch64 #define helper_be_stw_mmu helper_be_stw_mmu_aarch64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_aarch64 #define helper_be_stq_mmu helper_be_stq_mmu_aarch64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_aarch64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_aarch64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_aarch64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_aarch64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_aarch64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_aarch64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_aarch64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_aarch64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_aarch64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_aarch64 #define cpu_stb_data_ra cpu_stb_data_ra_aarch64 -#define cpu_stw_data_ra cpu_stw_data_ra_aarch64 -#define cpu_stl_data_ra cpu_stl_data_ra_aarch64 -#define cpu_stq_data_ra cpu_stq_data_ra_aarch64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_aarch64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_aarch64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_aarch64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_aarch64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_aarch64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_aarch64 #define cpu_stb_data cpu_stb_data_aarch64 -#define cpu_stw_data cpu_stw_data_aarch64 -#define cpu_stl_data cpu_stl_data_aarch64 -#define cpu_stq_data cpu_stq_data_aarch64 +#define cpu_stw_be_data cpu_stw_be_data_aarch64 +#define cpu_stw_le_data cpu_stw_le_data_aarch64 +#define cpu_stl_be_data cpu_stl_be_data_aarch64 +#define cpu_stl_le_data cpu_stl_le_data_aarch64 +#define cpu_stq_be_data cpu_stq_be_data_aarch64 +#define cpu_stq_le_data cpu_stq_le_data_aarch64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_aarch64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_aarch64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_aarch64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_aarch64 #define cpu_ldl_code cpu_ldl_code_aarch64 #define cpu_ldq_code cpu_ldq_code_aarch64 +#define cpu_interrupt_handler cpu_interrupt_handler_aarch64 #define helper_div_i32 helper_div_i32_aarch64 #define helper_rem_i32 helper_rem_i32_aarch64 #define helper_divu_i32 helper_divu_i32_aarch64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_aarch64 #define helper_gvec_sar32i helper_gvec_sar32i_aarch64 #define helper_gvec_sar64i helper_gvec_sar64i_aarch64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_aarch64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_aarch64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_aarch64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_aarch64 #define helper_gvec_shl8v helper_gvec_shl8v_aarch64 #define helper_gvec_shl16v helper_gvec_shl16v_aarch64 #define helper_gvec_shl32v helper_gvec_shl32v_aarch64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_aarch64 #define helper_gvec_sar32v helper_gvec_sar32v_aarch64 #define helper_gvec_sar64v helper_gvec_sar64v_aarch64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_aarch64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_aarch64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_aarch64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_aarch64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_aarch64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_aarch64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_aarch64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_aarch64 #define helper_gvec_eq8 helper_gvec_eq8_aarch64 #define helper_gvec_ne8 helper_gvec_ne8_aarch64 #define helper_gvec_lt8 helper_gvec_lt8_aarch64 @@ -1615,6 +1669,11 @@ #define arm_v7m_mmu_idx_all arm_v7m_mmu_idx_all_aarch64 #define arm_v7m_mmu_idx_for_secstate_and_priv arm_v7m_mmu_idx_for_secstate_and_priv_aarch64 #define arm_v7m_mmu_idx_for_secstate arm_v7m_mmu_idx_for_secstate_aarch64 +#define mte_probe1 mte_probe1_aarch64 +#define mte_check1 mte_check1_aarch64 +#define mte_checkN mte_checkN_aarch64 +#define gen_helper_mte_check1 gen_helper_mte_check1_aarch64 +#define gen_helper_mte_checkN gen_helper_mte_checkN_aarch64 #define helper_neon_qadd_u8 helper_neon_qadd_u8_aarch64 #define helper_neon_qadd_u16 helper_neon_qadd_u16_aarch64 #define helper_neon_qadd_u32 helper_neon_qadd_u32_aarch64 @@ -1854,6 +1913,21 @@ #define helper_autdb helper_autdb_aarch64 #define helper_xpaci helper_xpaci_aarch64 #define helper_xpacd helper_xpacd_aarch64 +#define helper_mte_check1 helper_mte_check1_aarch64 +#define helper_mte_checkN helper_mte_checkN_aarch64 +#define helper_mte_check_zva helper_mte_check_zva_aarch64 +#define helper_irg helper_irg_aarch64 +#define helper_addsubg helper_addsubg_aarch64 +#define helper_ldg helper_ldg_aarch64 +#define helper_stg helper_stg_aarch64 +#define helper_stg_parallel helper_stg_parallel_aarch64 +#define helper_stg_stub helper_stg_stub_aarch64 +#define helper_st2g helper_st2g_aarch64 +#define helper_st2g_parallel helper_st2g_parallel_aarch64 +#define helper_st2g_stub helper_st2g_stub_aarch64 +#define helper_ldgm helper_ldgm_aarch64 +#define helper_stgm helper_stgm_aarch64 +#define helper_stzgm_tags helper_stzgm_tags_aarch64 #define arm_is_psci_call arm_is_psci_call_aarch64 #define arm_handle_psci_call arm_handle_psci_call_aarch64 #define helper_sve_predtest1 helper_sve_predtest1_aarch64 @@ -2746,6 +2820,7 @@ #define gen_a64_set_pc_im gen_a64_set_pc_im_aarch64 #define unallocated_encoding unallocated_encoding_aarch64 #define new_tmp_a64 new_tmp_a64_aarch64 +#define new_tmp_a64_local new_tmp_a64_local_aarch64 #define new_tmp_a64_zero new_tmp_a64_zero_aarch64 #define cpu_reg cpu_reg_aarch64 #define cpu_reg_sp cpu_reg_sp_aarch64 diff --git a/qemu/accel/tcg/cputlb.c b/qemu/accel/tcg/cputlb.c index f7ffee48f1..3efbf5562c 100644 --- a/qemu/accel/tcg/cputlb.c +++ b/qemu/accel/tcg/cputlb.c @@ -261,6 +261,21 @@ void tlb_init(CPUState *cpu) } } +void tlb_destroy(CPUState *cpu) +{ + CPUArchState *env = cpu->env_ptr; + int i; + + // qemu_spin_destroy(&env_tlb(env)->c.lock); + for (i = 0; i < NB_MMU_MODES; i++) { + CPUTLBDesc *desc = &env_tlb(env)->d[i]; + CPUTLBDescFast *fast = &env_tlb(env)->f[i]; + + g_free(fast->table); + g_free(desc->iotlb); + } +} + /* flush_all_helper: run fn across all cpus * * If the wait flag is set then the src cpu's helper will be queued as @@ -450,9 +465,7 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, run_on_cpu_data data) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; @@ -486,9 +499,7 @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -523,9 +534,7 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, uint16_t idxmap) { -#ifdef TARGET_ARM - struct uc_struct *uc = src_cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = src_cpu->uc; /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -567,9 +576,7 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, target_ulong addr, uint16_t idxmap) { -#ifdef TARGET_ARM - struct uc_struct *uc = src_cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = src_cpu->uc; /* This should already be page aligned */ addr &= TARGET_PAGE_MASK; @@ -755,9 +762,7 @@ static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry, so that it is no longer dirty */ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; CPUArchState *env = cpu->env_ptr; int mmu_idx; @@ -811,9 +816,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, hwaddr paddr, MemTxAttrs attrs, int prot, int mmu_idx, target_ulong size) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; CPUArchState *env = cpu->env_ptr; CPUTLB *tlb = env_tlb(env); CPUTLBDesc *desc = &tlb->d[mmu_idx]; @@ -1190,9 +1193,7 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, CPUIOTLBEntry *iotlbentry, uintptr_t retaddr, CPUTLBEntry *tlbe) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr; MemoryRegion *mr = cpu->uc->memory_mapping(cpu->uc, tlbe->paddr | (mem_vaddr & ~TARGET_PAGE_MASK)); @@ -1215,6 +1216,86 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, } } +static int probe_access_internal(CPUArchState *env, target_ulong addr, + int fault_size, MMUAccessType access_type, + int mmu_idx, bool nonfault, + void **phost, uintptr_t retaddr) +{ + struct uc_struct *uc = env->uc; + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong tlb_addr, page_addr; + size_t elt_ofs; + int flags; + + switch (access_type) { + case MMU_DATA_LOAD: + elt_ofs = offsetof(CPUTLBEntry, addr_read); + break; + case MMU_DATA_STORE: + elt_ofs = offsetof(CPUTLBEntry, addr_write); + break; + case MMU_INST_FETCH: + elt_ofs = offsetof(CPUTLBEntry, addr_code); + break; + default: + g_assert_not_reached(); + } + tlb_addr = tlb_read_ofs(entry, elt_ofs); + + page_addr = addr & TARGET_PAGE_MASK; + if (!tlb_hit_page(uc, tlb_addr, page_addr)) { + if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) { + CPUState *cs = env_cpu(env); + CPUClass *cc = CPU_GET_CLASS(cs); + + if (!cc->tlb_fill(cs, addr, fault_size, access_type, + mmu_idx, nonfault, retaddr)) { + /* Non-faulting page table read failed. */ + *phost = NULL; + return TLB_INVALID_MASK; + } + + /* TLB resize via tlb_fill may have moved the entry. */ + entry = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlb_read_ofs(entry, elt_ofs); + } + flags = tlb_addr & TLB_FLAGS_MASK; + + /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */ + if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) { + *phost = NULL; + return TLB_MMIO; + } + + /* Everything else is RAM. */ + *phost = (void *)((uintptr_t)addr + entry->addend); + return flags; +} + +int probe_access_flags(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t retaddr) +{ + int flags; + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + + flags = probe_access_internal(env, addr, 0, access_type, mmu_idx, + nonfault, phost, retaddr); + + /* Handle clean RAM pages. */ + if (unlikely(flags & TLB_NOTDIRTY)) { + uintptr_t index = tlb_index(env, mmu_idx, addr); + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + + notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr, entry); + flags &= ~TLB_NOTDIRTY; + } + + return flags; +} + /* * Probe for whether the specified guest access is permitted. If it is not * permitted then an exception will be taken in the same way as if this @@ -1225,9 +1306,7 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size, void *probe_access(CPUArchState *env, target_ulong addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) { -#ifdef TARGET_ARM - struct uc_struct *uc = env->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = env->uc; uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); target_ulong tlb_addr; @@ -1352,9 +1431,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { -#ifdef TARGET_ARM - struct uc_struct *uc = env->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = env->uc; size_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); @@ -1951,36 +2028,54 @@ int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, full_ldub_mmu); } -uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW, - MO_TE == MO_LE - ? full_le_lduw_mmu : full_be_lduw_mmu); + return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu); } -int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW, + full_be_lduw_mmu); +} + +uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu); +} + +uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW, - MO_TE == MO_LE - ? full_le_lduw_mmu : full_be_lduw_mmu); + return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu); } -uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL, - MO_TE == MO_LE - ? full_le_ldul_mmu : full_be_ldul_mmu); + return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu); } -uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra) +int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) { - return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ, - MO_TE == MO_LE - ? helper_le_ldq_mmu : helper_be_ldq_mmu); + return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW, + full_le_lduw_mmu); +} + +uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu); +} + +uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra) +{ + return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu); } uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr, @@ -1994,25 +2089,50 @@ int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr, - uintptr_t retaddr) +uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) +{ + return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +{ + return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) +{ + return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) +{ + return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); +} + +uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) { - return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) { - return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) { - return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } -uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr) +uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr, + uintptr_t retaddr) { - return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); + return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr); } uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr) @@ -2025,24 +2145,44 @@ int cpu_ldsb_data(CPUArchState *env, target_ulong ptr) return cpu_ldsb_data_ra(env, ptr, 0); } -uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr) +uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_lduw_be_data_ra(env, ptr, 0); +} + +int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_ldsw_be_data_ra(env, ptr, 0); +} + +uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_ldl_be_data_ra(env, ptr, 0); +} + +uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr) { - return cpu_lduw_data_ra(env, ptr, 0); + return cpu_ldq_be_data_ra(env, ptr, 0); } -int cpu_ldsw_data(CPUArchState *env, target_ulong ptr) +uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr) { - return cpu_ldsw_data_ra(env, ptr, 0); + return cpu_lduw_le_data_ra(env, ptr, 0); } -uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr) +int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr) { - return cpu_ldl_data_ra(env, ptr, 0); + return cpu_ldsw_le_data_ra(env, ptr, 0); } -uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr) +uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr) { - return cpu_ldq_data_ra(env, ptr, 0); + return cpu_ldl_le_data_ra(env, ptr, 0); +} + +uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr) +{ + return cpu_ldq_le_data_ra(env, ptr, 0); } /* @@ -2428,22 +2568,40 @@ void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB); } -void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW); + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW); } -void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL); + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL); } -void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, - int mmu_idx, uintptr_t retaddr) +void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, + int mmu_idx, uintptr_t retaddr) +{ + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ); +} + +void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) { - cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ); + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW); +} + +void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val, + int mmu_idx, uintptr_t retaddr) +{ + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL); +} + +void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val, + int mmu_idx, uintptr_t retaddr) +{ + cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ); } void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, @@ -2452,22 +2610,40 @@ void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr, cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } -void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) { - cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } -void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr, - uint32_t val, uintptr_t retaddr) +void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) { - cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } -void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr, - uint64_t val, uintptr_t retaddr) +void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr, + uint64_t val, uintptr_t retaddr) { - cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); + cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); +} + +void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) +{ + cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); +} + +void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr, + uint32_t val, uintptr_t retaddr) +{ + cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); +} + +void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr, + uint64_t val, uintptr_t retaddr) +{ + cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr); } void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) @@ -2475,19 +2651,34 @@ void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val) cpu_stb_data_ra(env, ptr, val, 0); } -void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val) +void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) +{ + cpu_stw_be_data_ra(env, ptr, val, 0); +} + +void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val) +{ + cpu_stl_be_data_ra(env, ptr, val, 0); +} + +void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val) +{ + cpu_stq_be_data_ra(env, ptr, val, 0); +} + +void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) { - cpu_stw_data_ra(env, ptr, val, 0); + cpu_stw_le_data_ra(env, ptr, val, 0); } -void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val) +void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val) { - cpu_stl_data_ra(env, ptr, val, 0); + cpu_stl_le_data_ra(env, ptr, val, 0); } -void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val) +void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) { - cpu_stq_data_ra(env, ptr, val, 0); + cpu_stq_le_data_ra(env, ptr, val, 0); } /* First set of helpers allows passing in of OI and RETADDR. This makes diff --git a/qemu/accel/tcg/tcg-runtime-gvec.c b/qemu/accel/tcg/tcg-runtime-gvec.c index ea997c257f..41ab422366 100644 --- a/qemu/accel/tcg/tcg-runtime-gvec.c +++ b/qemu/accel/tcg/tcg-runtime-gvec.c @@ -724,6 +724,54 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) clear_high(d, oprsz, desc); } +void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + int shift = simd_data(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + *(uint8_t *)((char *)d + i) = rol8(*(uint8_t *)((char *)a + i), shift); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + int shift = simd_data(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + *(uint16_t *)((char *)d + i) = rol16(*(uint16_t *)((char *)a + i), shift); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + int shift = simd_data(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + *(uint32_t *)((char *)d + i) = rol32(*(uint32_t *)((char *)a + i), shift); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + int shift = simd_data(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + *(uint64_t *)((char *)d + i) = rol64(*(uint64_t *)((char *)a + i), shift); + } + clear_high(d, oprsz, desc); +} + void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) { intptr_t oprsz = simd_oprsz(desc); @@ -868,6 +916,102 @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) clear_high(d, oprsz, desc); } +void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + uint8_t sh = *(uint8_t *)((char *)b + i) & 7; + *(uint8_t *)((char *)d + i) = rol8(*(uint8_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + uint8_t sh = *(uint16_t *)((char *)b + i) & 15; + *(uint16_t *)((char *)d + i) = rol16(*(uint16_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + uint8_t sh = *(uint32_t *)((char *)b + i) & 31; + *(uint32_t *)((char *)d + i) = rol32(*(uint32_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint8_t sh = *(uint64_t *)((char *)b + i) & 63; + *(uint64_t *)((char *)d + i) = rol64(*(uint64_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + uint8_t sh = *(uint8_t *)((char *)b + i) & 7; + *(uint8_t *)((char *)d + i) = ror8(*(uint8_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + uint8_t sh = *(uint16_t *)((char *)b + i) & 15; + *(uint16_t *)((char *)d + i) = ror16(*(uint16_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + uint8_t sh = *(uint32_t *)((char *)b + i) & 31; + *(uint32_t *)((char *)d + i) = ror32(*(uint32_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint8_t sh = *(uint64_t *)((char *)b + i) & 63; + *(uint64_t *)((char *)d + i) = ror64(*(uint64_t *)((char *)a + i), sh); + } + clear_high(d, oprsz, desc); +} + #define DO_CMP1(NAME, TYPE, OP) \ void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ { \ diff --git a/qemu/accel/tcg/tcg-runtime.h b/qemu/accel/tcg/tcg-runtime.h index ab7369e8e3..b694d30e22 100644 --- a/qemu/accel/tcg/tcg-runtime.h +++ b/qemu/accel/tcg/tcg-runtime.h @@ -213,6 +213,11 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_rotl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_rotl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_rotl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_rotl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -228,6 +233,16 @@ DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_rotr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_rotr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c index 3f6d2630f7..d240f35c87 100644 --- a/qemu/accel/tcg/translate-all.c +++ b/qemu/accel/tcg/translate-all.c @@ -1694,9 +1694,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags, int cflags) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; TCGContext *tcg_ctx = cpu->uc->tcg_ctx; CPUArchState *env = cpu->env_ptr; TranslationBlock *tb, *existing_tb; @@ -2155,9 +2153,7 @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; /* Discard jump cache entries for any tb which might potentially overlap the flushed page. */ diff --git a/qemu/arm.h b/qemu/arm.h index 27592db350..061cd1d444 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_arm #define tcg_gen_shr_i64 tcg_gen_shr_i64_arm #define tcg_gen_st_i64 tcg_gen_st_i64_arm +#define tcg_gen_add_i64 tcg_gen_add_i64_arm +#define tcg_gen_sub_i64 tcg_gen_sub_i64_arm #define tcg_gen_xor_i64 tcg_gen_xor_i64_arm +#define tcg_gen_neg_i64 tcg_gen_neg_i64_arm #define cpu_icount_to_ns cpu_icount_to_ns_arm #define cpu_is_stopped cpu_is_stopped_arm #define cpu_get_ticks cpu_get_ticks_arm @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_arm #define floatx80_mul floatx80_mul_arm #define floatx80_div floatx80_div_arm +#define floatx80_modrem floatx80_modrem_arm +#define floatx80_mod floatx80_mod_arm #define floatx80_rem floatx80_rem_arm #define floatx80_sqrt floatx80_sqrt_arm #define floatx80_eq floatx80_eq_arm @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_arm #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_arm #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_arm +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_arm #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_arm #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_arm #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_arm @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_arm #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_arm #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_arm +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_arm +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_arm #define tcg_gen_gvec_sari tcg_gen_gvec_sari_arm +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_arm +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_arm #define tcg_gen_gvec_shls tcg_gen_gvec_shls_arm #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_arm #define tcg_gen_gvec_sars tcg_gen_gvec_sars_arm +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_arm #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_arm #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_arm #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_arm +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_arm +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_arm #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_arm #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_arm #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_arm @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_arm #define tcg_gen_shri_vec tcg_gen_shri_vec_arm #define tcg_gen_sari_vec tcg_gen_sari_vec_arm +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_arm +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_arm #define tcg_gen_cmp_vec tcg_gen_cmp_vec_arm #define tcg_gen_add_vec tcg_gen_add_vec_arm #define tcg_gen_sub_vec tcg_gen_sub_vec_arm @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_arm #define tcg_gen_shrv_vec tcg_gen_shrv_vec_arm #define tcg_gen_sarv_vec tcg_gen_sarv_vec_arm +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_arm +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_arm #define tcg_gen_shls_vec tcg_gen_shls_vec_arm #define tcg_gen_shrs_vec tcg_gen_shrs_vec_arm #define tcg_gen_sars_vec tcg_gen_sars_vec_arm +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_arm #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_arm #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_arm #define tb_htable_lookup tb_htable_lookup_arm @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_arm #define cpu_loop_exit_atomic cpu_loop_exit_atomic_arm #define tlb_init tlb_init_arm +#define tlb_destroy tlb_destroy_arm #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_arm #define tlb_flush tlb_flush_arm #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_arm @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_arm #define get_page_addr_code_hostp get_page_addr_code_hostp_arm #define get_page_addr_code get_page_addr_code_arm +#define probe_access_flags probe_access_flags_arm #define probe_access probe_access_arm #define tlb_vaddr_to_host tlb_vaddr_to_host_arm #define helper_ret_ldub_mmu helper_ret_ldub_mmu_arm @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_arm #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_arm #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_arm -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_arm -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_arm -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_arm -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_arm +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_arm +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_arm +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_arm +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_arm +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_arm +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_arm +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_arm +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_arm #define cpu_ldub_data_ra cpu_ldub_data_ra_arm #define cpu_ldsb_data_ra cpu_ldsb_data_ra_arm -#define cpu_lduw_data_ra cpu_lduw_data_ra_arm -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_arm -#define cpu_ldl_data_ra cpu_ldl_data_ra_arm -#define cpu_ldq_data_ra cpu_ldq_data_ra_arm +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_arm +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_arm +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_arm +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_arm +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_arm +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_arm +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_arm +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_arm #define cpu_ldub_data cpu_ldub_data_arm #define cpu_ldsb_data cpu_ldsb_data_arm -#define cpu_lduw_data cpu_lduw_data_arm -#define cpu_ldsw_data cpu_ldsw_data_arm -#define cpu_ldl_data cpu_ldl_data_arm -#define cpu_ldq_data cpu_ldq_data_arm +#define cpu_lduw_be_data cpu_lduw_be_data_arm +#define cpu_lduw_le_data cpu_lduw_le_data_arm +#define cpu_ldsw_be_data cpu_ldsw_be_data_arm +#define cpu_ldsw_le_data cpu_ldsw_le_data_arm +#define cpu_ldl_be_data cpu_ldl_be_data_arm +#define cpu_ldl_le_data cpu_ldl_le_data_arm +#define cpu_ldq_le_data cpu_ldq_le_data_arm +#define cpu_ldq_be_data cpu_ldq_be_data_arm #define helper_ret_stb_mmu helper_ret_stb_mmu_arm #define helper_le_stw_mmu helper_le_stw_mmu_arm #define helper_be_stw_mmu helper_be_stw_mmu_arm @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_arm #define helper_be_stq_mmu helper_be_stq_mmu_arm #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_arm -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_arm -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_arm -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_arm +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_arm +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_arm +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_arm +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_arm +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_arm +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_arm #define cpu_stb_data_ra cpu_stb_data_ra_arm -#define cpu_stw_data_ra cpu_stw_data_ra_arm -#define cpu_stl_data_ra cpu_stl_data_ra_arm -#define cpu_stq_data_ra cpu_stq_data_ra_arm +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_arm +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_arm +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_arm +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_arm +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_arm +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_arm #define cpu_stb_data cpu_stb_data_arm -#define cpu_stw_data cpu_stw_data_arm -#define cpu_stl_data cpu_stl_data_arm -#define cpu_stq_data cpu_stq_data_arm +#define cpu_stw_be_data cpu_stw_be_data_arm +#define cpu_stw_le_data cpu_stw_le_data_arm +#define cpu_stl_be_data cpu_stl_be_data_arm +#define cpu_stl_le_data cpu_stl_le_data_arm +#define cpu_stq_be_data cpu_stq_be_data_arm +#define cpu_stq_le_data cpu_stq_le_data_arm #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_arm #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_arm #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_arm @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_arm #define cpu_ldl_code cpu_ldl_code_arm #define cpu_ldq_code cpu_ldq_code_arm +#define cpu_interrupt_handler cpu_interrupt_handler_arm #define helper_div_i32 helper_div_i32_arm #define helper_rem_i32 helper_rem_i32_arm #define helper_divu_i32 helper_divu_i32_arm @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_arm #define helper_gvec_sar32i helper_gvec_sar32i_arm #define helper_gvec_sar64i helper_gvec_sar64i_arm +#define helper_gvec_rotl8i helper_gvec_rotl8i_arm +#define helper_gvec_rotl16i helper_gvec_rotl16i_arm +#define helper_gvec_rotl32i helper_gvec_rotl32i_arm +#define helper_gvec_rotl64i helper_gvec_rotl64i_arm #define helper_gvec_shl8v helper_gvec_shl8v_arm #define helper_gvec_shl16v helper_gvec_shl16v_arm #define helper_gvec_shl32v helper_gvec_shl32v_arm @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_arm #define helper_gvec_sar32v helper_gvec_sar32v_arm #define helper_gvec_sar64v helper_gvec_sar64v_arm +#define helper_gvec_rotl8v helper_gvec_rotl8v_arm +#define helper_gvec_rotl16v helper_gvec_rotl16v_arm +#define helper_gvec_rotl32v helper_gvec_rotl32v_arm +#define helper_gvec_rotl64v helper_gvec_rotl64v_arm +#define helper_gvec_rotr8v helper_gvec_rotr8v_arm +#define helper_gvec_rotr16v helper_gvec_rotr16v_arm +#define helper_gvec_rotr32v helper_gvec_rotr32v_arm +#define helper_gvec_rotr64v helper_gvec_rotr64v_arm #define helper_gvec_eq8 helper_gvec_eq8_arm #define helper_gvec_ne8 helper_gvec_ne8_arm #define helper_gvec_lt8 helper_gvec_lt8_arm @@ -1997,4 +2051,100 @@ #define sri_op sri_op_arm #define usra_op usra_op_arm #define ssra_op ssra_op_arm +#define gen_gvec_ceq0 gen_gvec_ceq0_arm +#define gen_gvec_cge0 gen_gvec_cge0_arm +#define gen_gvec_cgt0 gen_gvec_cgt0_arm +#define gen_gvec_cle0 gen_gvec_cle0_arm +#define gen_gvec_clt0 gen_gvec_clt0_arm +#define gen_gvec_cmtst gen_gvec_cmtst_arm +#define gen_gvec_mla gen_gvec_mla_arm +#define gen_gvec_mls gen_gvec_mls_arm +#define gen_gvec_saba gen_gvec_saba_arm +#define gen_gvec_sabd gen_gvec_sabd_arm +#define gen_gvec_sli gen_gvec_sli_arm +#define gen_gvec_sqadd_qc gen_gvec_sqadd_qc_arm +#define gen_gvec_sqrdmlah_qc gen_gvec_sqrdmlah_qc_arm +#define gen_gvec_sqrdmlsh_qc gen_gvec_sqrdmlsh_qc_arm +#define gen_gvec_sqsub_qc gen_gvec_sqsub_qc_arm +#define gen_gvec_sri gen_gvec_sri_arm +#define gen_gvec_srshr gen_gvec_srshr_arm +#define gen_gvec_srsra gen_gvec_srsra_arm +#define gen_gvec_sshl gen_gvec_sshl_arm +#define gen_gvec_ssra gen_gvec_ssra_arm +#define gen_gvec_uaba gen_gvec_uaba_arm +#define gen_gvec_uabd gen_gvec_uabd_arm +#define gen_gvec_uqadd_qc gen_gvec_uqadd_qc_arm +#define gen_gvec_uqsub_qc gen_gvec_uqsub_qc_arm +#define gen_gvec_urshr gen_gvec_urshr_arm +#define gen_gvec_ursra gen_gvec_ursra_arm +#define gen_gvec_ushl gen_gvec_ushl_arm +#define gen_gvec_usra gen_gvec_usra_arm +#define helper_crypto_rax1 helper_crypto_rax1_arm +#define helper_crypto_sha1c helper_crypto_sha1c_arm +#define helper_crypto_sha1m helper_crypto_sha1m_arm +#define helper_crypto_sha1p helper_crypto_sha1p_arm +#define helper_crypto_sha1su0 helper_crypto_sha1su0_arm +#define helper_crypto_sm3tt1a helper_crypto_sm3tt1a_arm +#define helper_crypto_sm3tt1b helper_crypto_sm3tt1b_arm +#define helper_crypto_sm3tt2a helper_crypto_sm3tt2a_arm +#define helper_crypto_sm3tt2b helper_crypto_sm3tt2b_arm +#define helper_gvec_ceq0_b helper_gvec_ceq0_b_arm +#define helper_gvec_ceq0_h helper_gvec_ceq0_h_arm +#define helper_gvec_cge0_b helper_gvec_cge0_b_arm +#define helper_gvec_cge0_h helper_gvec_cge0_h_arm +#define helper_gvec_cgt0_b helper_gvec_cgt0_b_arm +#define helper_gvec_cgt0_h helper_gvec_cgt0_h_arm +#define helper_gvec_cle0_b helper_gvec_cle0_b_arm +#define helper_gvec_cle0_h helper_gvec_cle0_h_arm +#define helper_gvec_clt0_b helper_gvec_clt0_b_arm +#define helper_gvec_clt0_h helper_gvec_clt0_h_arm +#define helper_gvec_fabd_s helper_gvec_fabd_s_arm +#define helper_gvec_saba_b helper_gvec_saba_b_arm +#define helper_gvec_saba_d helper_gvec_saba_d_arm +#define helper_gvec_saba_h helper_gvec_saba_h_arm +#define helper_gvec_saba_s helper_gvec_saba_s_arm +#define helper_gvec_sabd_b helper_gvec_sabd_b_arm +#define helper_gvec_sabd_d helper_gvec_sabd_d_arm +#define helper_gvec_sabd_h helper_gvec_sabd_h_arm +#define helper_gvec_sabd_s helper_gvec_sabd_s_arm +#define helper_gvec_sli_b helper_gvec_sli_b_arm +#define helper_gvec_sli_d helper_gvec_sli_d_arm +#define helper_gvec_sli_h helper_gvec_sli_h_arm +#define helper_gvec_sli_s helper_gvec_sli_s_arm +#define helper_gvec_sri_b helper_gvec_sri_b_arm +#define helper_gvec_sri_d helper_gvec_sri_d_arm +#define helper_gvec_sri_h helper_gvec_sri_h_arm +#define helper_gvec_sri_s helper_gvec_sri_s_arm +#define helper_gvec_srshr_b helper_gvec_srshr_b_arm +#define helper_gvec_srshr_d helper_gvec_srshr_d_arm +#define helper_gvec_srshr_h helper_gvec_srshr_h_arm +#define helper_gvec_srshr_s helper_gvec_srshr_s_arm +#define helper_gvec_srsra_b helper_gvec_srsra_b_arm +#define helper_gvec_srsra_d helper_gvec_srsra_d_arm +#define helper_gvec_srsra_h helper_gvec_srsra_h_arm +#define helper_gvec_srsra_s helper_gvec_srsra_s_arm +#define helper_gvec_ssra_b helper_gvec_ssra_b_arm +#define helper_gvec_ssra_d helper_gvec_ssra_d_arm +#define helper_gvec_ssra_h helper_gvec_ssra_h_arm +#define helper_gvec_ssra_s helper_gvec_ssra_s_arm +#define helper_gvec_uaba_b helper_gvec_uaba_b_arm +#define helper_gvec_uaba_d helper_gvec_uaba_d_arm +#define helper_gvec_uaba_h helper_gvec_uaba_h_arm +#define helper_gvec_uaba_s helper_gvec_uaba_s_arm +#define helper_gvec_uabd_b helper_gvec_uabd_b_arm +#define helper_gvec_uabd_d helper_gvec_uabd_d_arm +#define helper_gvec_uabd_h helper_gvec_uabd_h_arm +#define helper_gvec_uabd_s helper_gvec_uabd_s_arm +#define helper_gvec_urshr_b helper_gvec_urshr_b_arm +#define helper_gvec_urshr_d helper_gvec_urshr_d_arm +#define helper_gvec_urshr_h helper_gvec_urshr_h_arm +#define helper_gvec_urshr_s helper_gvec_urshr_s_arm +#define helper_gvec_ursra_b helper_gvec_ursra_b_arm +#define helper_gvec_ursra_d helper_gvec_ursra_d_arm +#define helper_gvec_ursra_h helper_gvec_ursra_h_arm +#define helper_gvec_ursra_s helper_gvec_ursra_s_arm +#define helper_gvec_usra_b helper_gvec_usra_b_arm +#define helper_gvec_usra_d helper_gvec_usra_d_arm +#define helper_gvec_usra_h helper_gvec_usra_h_arm +#define helper_gvec_usra_s helper_gvec_usra_s_arm #endif diff --git a/qemu/exec.c b/qemu/exec.c index 9786b19557..e9070d3448 100644 --- a/qemu/exec.c +++ b/qemu/exec.c @@ -171,9 +171,7 @@ static void phys_page_set(AddressSpaceDispatch *d, hwaddr index, uint64_t nb, uint16_t leaf) { -#ifdef TARGET_ARM - struct uc_struct *uc = d->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = d->uc; /* Wildly overreserve - it doesn't matter much. */ phys_map_node_reserve(d, &d->map, 3 * P_L2_LEVELS); @@ -254,9 +252,7 @@ static inline bool section_covers_addr(const MemoryRegionSection *section, static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr) { -#ifdef TARGET_ARM - struct uc_struct *uc = d->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = d->uc; PhysPageEntry lp = d->phys_map, *p; Node *nodes = d->map.nodes; MemoryRegionSection *sections = d->map.sections; @@ -283,9 +279,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, hwaddr addr, bool resolve_subpage) { -#ifdef TARGET_ARM - struct uc_struct *uc = d->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = d->uc; MemoryRegionSection *section = d->mru_section; subpage_t *subpage; @@ -1421,9 +1415,7 @@ static uint16_t dummy_section(struct uc_struct *uc, PhysPageMap *map, FlatView * MemoryRegionSection *iotlb_to_section(CPUState *cpu, hwaddr index, MemTxAttrs attrs) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; int asidx = cpu_asidx_from_attrs(cpu, attrs); CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; AddressSpaceDispatch *d = cpuas->memory_dispatch; @@ -1459,7 +1451,7 @@ AddressSpaceDispatch *address_space_dispatch_new(struct uc_struct *uc, FlatView void address_space_dispatch_clear(AddressSpaceDispatch *d) { MemoryRegionSection *section; - struct uc_struct *uc = d->uc; + UNICORN_UNUSED struct uc_struct *uc = d->uc; while (d->map.sections_nb > 0) { d->map.sections_nb--; section = &d->map.sections[d->map.sections_nb]; @@ -1891,7 +1883,7 @@ void *address_space_map(AddressSpace *as, MemoryRegion *mr; void *ptr; FlatView *fv; - struct uc_struct *uc = as->uc; + UNICORN_UNUSED struct uc_struct *uc = as->uc; if (len == 0) { return NULL; @@ -2020,9 +2012,7 @@ static inline MemoryRegion *address_space_translate_cached( int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, void *ptr, target_ulong len, bool is_write) { -#ifdef TARGET_ARM - struct uc_struct *uc = cpu->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = cpu->uc; hwaddr phys_addr; target_ulong l, page; uint8_t *buf = ptr; @@ -2030,6 +2020,7 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, while (len > 0) { int asidx; MemTxAttrs attrs; + MemTxResult res; page = addr & TARGET_PAGE_MASK; phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs); @@ -2042,12 +2033,15 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, l = len; phys_addr += (addr & ~TARGET_PAGE_MASK); if (is_write) { - address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr, + res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf, l); } else { - address_space_read(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf, + res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf, l); } + if (res != MEMTX_OK) { + return -1; + } len -= l; buf += l; addr += l; diff --git a/qemu/fpu/softfloat-specialize.inc.c b/qemu/fpu/softfloat-specialize.inc.c index 5ab2fa1941..034d18199c 100644 --- a/qemu/fpu/softfloat-specialize.inc.c +++ b/qemu/fpu/softfloat-specialize.inc.c @@ -93,7 +93,7 @@ this code that are retained. * 2008 revision and backward compatibility with their original choice. * Thus for MIPS we must make the choice at runtime. */ -static inline flag snan_bit_is_one(float_status *status) +static inline bool snan_bit_is_one(float_status *status) { #if defined(TARGET_MIPS) return status->snan_bit_is_one; @@ -114,7 +114,7 @@ static bool parts_is_snan_frac(uint64_t frac, float_status *status) #ifdef NO_SIGNALING_NANS return false; #else - flag msb = extract64(frac, DECOMPOSED_BINARY_POINT - 1, 1); + bool msb = extract64(frac, DECOMPOSED_BINARY_POINT - 1, 1); return msb == snan_bit_is_one(status); #endif } @@ -236,7 +236,7 @@ void float_raise(uint8_t flags, float_status *status) | Internal canonical NaN format. *----------------------------------------------------------------------------*/ typedef struct { - flag sign; + bool sign; uint64_t high, low; } commonNaNT; @@ -245,7 +245,7 @@ typedef struct { | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float16_is_quiet_nan(float16 a_, float_status *status) +bool float16_is_quiet_nan(float16 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return float16_is_any_nan(a_); @@ -254,7 +254,7 @@ int float16_is_quiet_nan(float16 a_, float_status *status) if (snan_bit_is_one(status)) { return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF); } else { - return ((a & ~0x8000) >= 0x7C80); + return ((a >> 9) & 0x3F) == 0x3F; } #endif } @@ -264,14 +264,14 @@ int float16_is_quiet_nan(float16 a_, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float16_is_signaling_nan(float16 a_, float_status *status) +bool float16_is_signaling_nan(float16 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return 0; #else uint16_t a = float16_val(a_); if (snan_bit_is_one(status)) { - return ((a & ~0x8000) >= 0x7C80); + return ((a >> 9) & 0x3F) == 0x3F; } else { return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF); } @@ -283,7 +283,7 @@ int float16_is_signaling_nan(float16 a_, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float32_is_quiet_nan(float32 a_, float_status *status) +bool float32_is_quiet_nan(float32 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return float32_is_any_nan(a_); @@ -302,7 +302,7 @@ int float32_is_quiet_nan(float32 a_, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float32_is_signaling_nan(float32 a_, float_status *status) +bool float32_is_signaling_nan(float32 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return 0; @@ -374,7 +374,7 @@ static float32 commonNaNToFloat32(commonNaNT a, float_status *status) *----------------------------------------------------------------------------*/ static int pickNaN(FloatClass a_cls, FloatClass b_cls, - flag aIsLargerSignificand) + bool aIsLargerSignificand) { #if defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA) /* ARM mandated NaN propagation rules (see FPProcessNaNs()), take @@ -584,7 +584,7 @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls, static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status) { - flag aIsLargerSignificand; + bool aIsLargerSignificand; uint32_t av, bv; FloatClass a_cls, b_cls; @@ -637,7 +637,7 @@ static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float64_is_quiet_nan(float64 a_, float_status *status) +bool float64_is_quiet_nan(float64 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return float64_is_any_nan(a_); @@ -657,7 +657,7 @@ int float64_is_quiet_nan(float64 a_, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float64_is_signaling_nan(float64 a_, float_status *status) +bool float64_is_signaling_nan(float64 a_, float_status *status) { #ifdef NO_SIGNALING_NANS return 0; @@ -722,7 +722,7 @@ static float64 commonNaNToFloat64(commonNaNT a, float_status *status) static float64 propagateFloat64NaN(float64 a, float64 b, float_status *status) { - flag aIsLargerSignificand; + bool aIsLargerSignificand; uint64_t av, bv; FloatClass a_cls, b_cls; @@ -890,7 +890,7 @@ static floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status) floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status) { - flag aIsLargerSignificand; + bool aIsLargerSignificand; FloatClass a_cls, b_cls; /* This is not complete, but is good enough for pickNaN. */ @@ -939,7 +939,7 @@ floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float128_is_quiet_nan(float128 a, float_status *status) +bool float128_is_quiet_nan(float128 a, float_status *status) { #ifdef NO_SIGNALING_NANS return float128_is_any_nan(a); @@ -959,7 +959,7 @@ int float128_is_quiet_nan(float128 a, float_status *status) | signaling NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -int float128_is_signaling_nan(float128 a, float_status *status) +bool float128_is_signaling_nan(float128 a, float_status *status) { #ifdef NO_SIGNALING_NANS return 0; @@ -1038,7 +1038,7 @@ static float128 commonNaNToFloat128(commonNaNT a, float_status *status) static float128 propagateFloat128NaN(float128 a, float128 b, float_status *status) { - flag aIsLargerSignificand; + bool aIsLargerSignificand; FloatClass a_cls, b_cls; /* This is not complete, but is good enough for pickNaN. */ diff --git a/qemu/fpu/softfloat.c b/qemu/fpu/softfloat.c index 0e7938dc1c..930a2e352a 100644 --- a/qemu/fpu/softfloat.c +++ b/qemu/fpu/softfloat.c @@ -114,7 +114,7 @@ this code that are retained. * * The idea is thus to leverage the host FPU to (1) compute FP operations * and (2) identify whether FP exceptions occurred while avoiding - * expensive exception flag register accesses. + * expensive exception bool register accesses. * * An important optimization shown in the paper is that given that exception * flags are rarely cleared by the guest, we can avoid recomputing some flags. @@ -217,7 +217,7 @@ GEN_INPUT_FLUSH3(float64_input_flush3, float64) /* * Some targets clear the FP flags before most FP operations. This prevents - * the use of hardfloat, since hardfloat relies on the inexact flag being + * the use of hardfloat, since hardfloat relies on the inexact bool being * already set. */ #if defined(TARGET_PPC) || defined(__FAST_MATH__) @@ -342,12 +342,10 @@ static inline bool f64_is_inf(union_float64 a) return float64_is_infinity(a.s); } -/* Note: @fast_test and @post can be NULL */ static inline float32 float32_gen2(float32 xa, float32 xb, float_status *s, hard_f32_op2_fn hard, soft_f32_op2_fn soft, - f32_check_fn pre, f32_check_fn post, - f32_check_fn fast_test, soft_f32_op2_fn fast_op) + f32_check_fn pre, f32_check_fn post) { union_float32 ua, ub, ur; @@ -362,17 +360,12 @@ float32_gen2(float32 xa, float32 xb, float_status *s, if (unlikely(!pre(ua, ub))) { goto soft; } - if (fast_test && fast_test(ua, ub)) { - return fast_op(ua.s, ub.s, s); - } ur.h = hard(ua.h, ub.h); if (unlikely(f32_is_inf(ur))) { s->float_exception_flags |= float_flag_overflow; - } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { - if (post == NULL || post(ua, ub)) { - goto soft; - } + } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) { + goto soft; } return ur.s; @@ -383,8 +376,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s, static inline float64 float64_gen2(float64 xa, float64 xb, float_status *s, hard_f64_op2_fn hard, soft_f64_op2_fn soft, - f64_check_fn pre, f64_check_fn post, - f64_check_fn fast_test, soft_f64_op2_fn fast_op) + f64_check_fn pre, f64_check_fn post) { union_float64 ua, ub, ur; @@ -399,17 +391,12 @@ float64_gen2(float64 xa, float64 xb, float_status *s, if (unlikely(!pre(ua, ub))) { goto soft; } - if (fast_test && fast_test(ua, ub)) { - return fast_op(ua.s, ub.s, s); - } ur.h = hard(ua.h, ub.h); if (unlikely(f64_is_inf(ur))) { s->float_exception_flags |= float_flag_overflow; - } else if (unlikely(fabs(ur.h) <= DBL_MIN)) { - if (post == NULL || post(ua, ub)) { - goto soft; - } + } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) { + goto soft; } return ur.s; @@ -439,7 +426,7 @@ static inline int extractFloat32Exp(float32 a) | Returns the sign bit of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -static inline flag extractFloat32Sign(float32 a) +static inline bool extractFloat32Sign(float32 a) { return float32_val(a) >> 31; } @@ -466,7 +453,7 @@ static inline int extractFloat64Exp(float64 a) | Returns the sign bit of the double-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -static inline flag extractFloat64Sign(float64 a) +static inline bool extractFloat64Sign(float64 a) { return float64_val(a) >> 63; } @@ -786,8 +773,7 @@ static FloatParts round_canonical(FloatParts p, float_status *s, p.cls = float_class_zero; goto do_zero; } else { - bool is_tiny = (s->float_detect_tininess - == float_tininess_before_rounding) + bool is_tiny = s->tininess_before_rounding || (exp < 0) || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT); @@ -802,6 +788,8 @@ static FloatParts round_canonical(FloatParts p, float_status *s, case float_round_to_odd: inc = frac & frac_lsb ? 0 : round_mask; break; + default: + break; } flags |= float_flag_inexact; frac += inc; @@ -1149,7 +1137,7 @@ static double hard_f64_sub(double a, double b) return a - b; } -static bool f32_addsub_post(union_float32 a, union_float32 b) +static bool f32_addsubmul_post(union_float32 a, union_float32 b) { if (QEMU_HARDFLOAT_2F32_USE_FP) { return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); @@ -1157,7 +1145,7 @@ static bool f32_addsub_post(union_float32 a, union_float32 b) return !(float32_is_zero(a.s) && float32_is_zero(b.s)); } -static bool f64_addsub_post(union_float64 a, union_float64 b) +static bool f64_addsubmul_post(union_float64 a, union_float64 b) { if (QEMU_HARDFLOAT_2F64_USE_FP) { return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); @@ -1170,14 +1158,14 @@ static float32 float32_addsub(float32 a, float32 b, float_status *s, hard_f32_op2_fn hard, soft_f32_op2_fn soft) { return float32_gen2(a, b, s, hard, soft, - f32_is_zon2, f32_addsub_post, NULL, NULL); + f32_is_zon2, f32_addsubmul_post); } static float64 float64_addsub(float64 a, float64 b, float_status *s, hard_f64_op2_fn hard, soft_f64_op2_fn soft) { return float64_gen2(a, b, s, hard, soft, - f64_is_zon2, f64_addsub_post, NULL, NULL); + f64_is_zon2, f64_addsubmul_post); } float32 QEMU_FLATTEN @@ -1294,42 +1282,18 @@ static double hard_f64_mul(double a, double b) return a * b; } -static bool f32_mul_fast_test(union_float32 a, union_float32 b) -{ - return float32_is_zero(a.s) || float32_is_zero(b.s); -} - -static bool f64_mul_fast_test(union_float64 a, union_float64 b) -{ - return float64_is_zero(a.s) || float64_is_zero(b.s); -} - -static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s) -{ - bool signbit = float32_is_neg(a) ^ float32_is_neg(b); - - return float32_set_sign(float32_zero, signbit); -} - -static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s) -{ - bool signbit = float64_is_neg(a) ^ float64_is_neg(b); - - return float64_set_sign(float64_zero, signbit); -} - float32 QEMU_FLATTEN float32_mul(float32 a, float32 b, float_status *s) { return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul, - f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op); + f32_is_zon2, f32_addsubmul_post); } float64 QEMU_FLATTEN float64_mul(float64 a, float64 b, float_status *s) { return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul, - f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op); + f64_is_zon2, f64_addsubmul_post); } /* @@ -1872,14 +1836,14 @@ float32 QEMU_FLATTEN float32_div(float32 a, float32 b, float_status *s) { return float32_gen2(a, b, s, hard_f32_div, soft_f32_div, - f32_div_pre, f32_div_post, NULL, NULL); + f32_div_pre, f32_div_post); } float64 QEMU_FLATTEN float64_div(float64 a, float64 b, float_status *s) { return float64_gen2(a, b, s, hard_f64_div, soft_f64_div, - f64_div_pre, f64_div_post, NULL, NULL); + f64_div_pre, f64_div_post); } /* @@ -2004,7 +1968,7 @@ float32 float64_to_float32(float64 a, float_status *s) * Arithmetic. */ -static FloatParts round_to_int(FloatParts a, int rmode, +static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode, int scale, float_status *s) { switch (a.cls) { @@ -2139,7 +2103,7 @@ float64 float64_round_to_int(float64 a, float_status *s) * is returned. */ -static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale, +static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode, int scale, int64_t min, int64_t max, float_status *s) { @@ -2191,63 +2155,63 @@ static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale, } } -int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale, +int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float16_unpack_canonical(a, s), rmode, scale, INT16_MIN, INT16_MAX, s); } -int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale, +int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float16_unpack_canonical(a, s), rmode, scale, INT32_MIN, INT32_MAX, s); } -int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale, +int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float16_unpack_canonical(a, s), rmode, scale, INT64_MIN, INT64_MAX, s); } -int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale, +int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float32_unpack_canonical(a, s), rmode, scale, INT16_MIN, INT16_MAX, s); } -int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale, +int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float32_unpack_canonical(a, s), rmode, scale, INT32_MIN, INT32_MAX, s); } -int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale, +int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float32_unpack_canonical(a, s), rmode, scale, INT64_MIN, INT64_MAX, s); } -int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale, +int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float64_unpack_canonical(a, s), rmode, scale, INT16_MIN, INT16_MAX, s); } -int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale, +int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float64_unpack_canonical(a, s), rmode, scale, INT32_MIN, INT32_MAX, s); } -int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale, +int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_int_and_pack(float64_unpack_canonical(a, s), @@ -2357,7 +2321,7 @@ int64_t float64_to_int64_round_to_zero(float64 a, float_status *s) * flag. */ -static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale, +static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode, int scale, uint64_t max, float_status *s) { int orig_flags = get_float_exception_flags(s); @@ -2404,63 +2368,63 @@ static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale, } } -uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale, +uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float16_unpack_canonical(a, s), rmode, scale, UINT16_MAX, s); } -uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale, +uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float16_unpack_canonical(a, s), rmode, scale, UINT32_MAX, s); } -uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale, +uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float16_unpack_canonical(a, s), rmode, scale, UINT64_MAX, s); } -uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale, +uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float32_unpack_canonical(a, s), rmode, scale, UINT16_MAX, s); } -uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale, +uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float32_unpack_canonical(a, s), rmode, scale, UINT32_MAX, s); } -uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale, +uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float32_unpack_canonical(a, s), rmode, scale, UINT64_MAX, s); } -uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale, +uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float64_unpack_canonical(a, s), rmode, scale, UINT16_MAX, s); } -uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale, +uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float64_unpack_canonical(a, s), rmode, scale, UINT32_MAX, s); } -uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale, +uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale, float_status *s) { return round_to_uint_and_pack(float64_unpack_canonical(a, s), @@ -2934,8 +2898,8 @@ MINMAX(64, maxnummag, false, true, true) #undef MINMAX /* Floating point compare */ -static int compare_floats(FloatParts a, FloatParts b, bool is_quiet, - float_status *s) +static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet, + float_status *s) { if (is_nan(a.cls) || is_nan(b.cls)) { if (!is_quiet || @@ -3006,17 +2970,17 @@ COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64) #undef COMPARE -int float16_compare(float16 a, float16 b, float_status *s) +FloatRelation float16_compare(float16 a, float16 b, float_status *s) { return soft_f16_compare(a, b, false, s); } -int float16_compare_quiet(float16 a, float16 b, float_status *s) +FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s) { return soft_f16_compare(a, b, true, s); } -static int QEMU_FLATTEN +static FloatRelation QEMU_FLATTEN f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s) { union_float32 ua, ub; @@ -3045,17 +3009,17 @@ f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s) return soft_f32_compare(ua.s, ub.s, is_quiet, s); } -int float32_compare(float32 a, float32 b, float_status *s) +FloatRelation float32_compare(float32 a, float32 b, float_status *s) { return f32_compare(a, b, false, s); } -int float32_compare_quiet(float32 a, float32 b, float_status *s) +FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s) { return f32_compare(a, b, true, s); } -static int QEMU_FLATTEN +static FloatRelation QEMU_FLATTEN f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s) { union_float64 ua, ub; @@ -3084,12 +3048,12 @@ f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s) return soft_f64_compare(ua.s, ub.s, is_quiet, s); } -int float64_compare(float64 a, float64 b, float_status *s) +FloatRelation float64_compare(float64 a, float64 b, float_status *s) { return f64_compare(a, b, false, s); } -int float64_compare_quiet(float64 a, float64 b, float_status *s) +FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s) { return f64_compare(a, b, true, s); } @@ -3420,10 +3384,10 @@ float64 float64_squash_input_denormal(float64 a, float_status *status) | positive or negative integer is returned. *----------------------------------------------------------------------------*/ -static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status) +static int32_t roundAndPackInt32(bool zSign, uint64_t absZ, float_status *status) { int8_t roundingMode; - flag roundNearestEven; + bool roundNearestEven; int8_t roundIncrement, roundBits; int32_t z; @@ -3451,7 +3415,9 @@ static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status } roundBits = absZ & 0x7F; absZ = ( absZ + roundIncrement )>>7; - absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if (!(roundBits ^ 0x40) && roundNearestEven) { + absZ &= ~1; + } z = absZ; if ( zSign ) z = - z; if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { @@ -3477,11 +3443,11 @@ static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status | returned. *----------------------------------------------------------------------------*/ -static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1, +static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1, float_status *status) { int8_t roundingMode; - flag roundNearestEven, increment; + bool roundNearestEven, increment; int64_t z; roundingMode = status->float_rounding_mode; @@ -3509,7 +3475,9 @@ static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1, if ( increment ) { ++absZ0; if ( absZ0 == 0 ) goto overflow; - absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven ); + if (!(absZ1 << 1) && roundNearestEven) { + absZ0 &= ~1; + } } z = absZ0; if ( zSign ) z = - z; @@ -3535,11 +3503,11 @@ static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1, | exception is raised and the largest unsigned integer is returned. *----------------------------------------------------------------------------*/ -static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0, +static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0, uint64_t absZ1, float_status *status) { int8_t roundingMode; - flag roundNearestEven, increment; + bool roundNearestEven, increment; roundingMode = status->float_rounding_mode; roundNearestEven = (roundingMode == float_round_nearest_even); @@ -3569,7 +3537,9 @@ static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0, float_raise(float_flag_invalid, status); return UINT64_MAX; } - absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven); + if (!(absZ1 << 1) && roundNearestEven) { + absZ0 &= ~1; + } } if (zSign && absZ0) { @@ -3623,13 +3593,13 @@ static void | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, +static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig, float_status *status) { int8_t roundingMode; - flag roundNearestEven; + bool roundNearestEven; int8_t roundIncrement, roundBits; - flag isTiny; + bool isTiny; roundingMode = status->float_rounding_mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); @@ -3670,11 +3640,9 @@ static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, float_raise(float_flag_output_denormal, status); return packFloat32(zSign, 0, 0); } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < -1 ) - || ( zSig + roundIncrement < 0x80000000 ); + isTiny = status->tininess_before_rounding + || ( zExp < -1 ) + || ( zSig + roundIncrement < 0x80000000 ); shift32RightJamming( zSig, - zExp, &zSig ); zExp = 0; roundBits = zSig & 0x7F; @@ -3694,7 +3662,9 @@ static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, status->float_exception_flags |= float_flag_inexact; } zSig = ( zSig + roundIncrement )>>7; - zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if (!(roundBits ^ 0x40) && roundNearestEven) { + zSig &= ~1; + } if ( zSig == 0 ) zExp = 0; return packFloat32( zSign, zExp, zSig ); @@ -3710,7 +3680,7 @@ static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, *----------------------------------------------------------------------------*/ static float32 - normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, + normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig, float_status *status) { int8_t shiftCount; @@ -3750,7 +3720,7 @@ static void | significand. *----------------------------------------------------------------------------*/ -static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig) +static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig) { return make_float64( @@ -3780,13 +3750,13 @@ static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig) | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, +static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig, float_status *status) { int8_t roundingMode; - flag roundNearestEven; + bool roundNearestEven; int roundIncrement, roundBits; - flag isTiny; + bool isTiny; roundingMode = status->float_rounding_mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); @@ -3826,11 +3796,9 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, float_raise(float_flag_output_denormal, status); return packFloat64(zSign, 0, 0); } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < -1 ) - || ( zSig + roundIncrement < UINT64_C(0x8000000000000000) ); + isTiny = status->tininess_before_rounding + || ( zExp < -1 ) + || ( zSig + roundIncrement < UINT64_C(0x8000000000000000) ); shift64RightJamming( zSig, - zExp, &zSig ); zExp = 0; roundBits = zSig & 0x3FF; @@ -3850,7 +3818,9 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, status->float_exception_flags |= float_flag_inexact; } zSig = ( zSig + roundIncrement )>>10; - zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); + if (!(roundBits ^ 0x200) && roundNearestEven) { + zSig &= ~1; + } if ( zSig == 0 ) zExp = 0; return packFloat64( zSign, zExp, zSig ); @@ -3866,7 +3836,7 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, *----------------------------------------------------------------------------*/ static float64 - normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, + normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig, float_status *status) { int8_t shiftCount; @@ -3918,12 +3888,12 @@ void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, +floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status) { int8_t roundingMode; - flag roundNearestEven, increment, isTiny; + bool roundNearestEven, increment, isTiny; int64_t roundIncrement, roundMask, roundBits; roundingMode = status->float_rounding_mode; @@ -3969,11 +3939,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, float_raise(float_flag_output_denormal, status); return packFloatx80(zSign, 0, 0); } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < 0 ) - || ( zSig0 <= zSig0 + roundIncrement ); + isTiny = status->tininess_before_rounding + || ( zExp < 0 ) + || ( zSig0 <= zSig0 + roundIncrement ); shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); zExp = 0; roundBits = zSig0 & roundMask; @@ -4047,12 +4015,10 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, floatx80_infinity_low); } if ( zExp <= 0 ) { - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < 0 ) - || ! increment - || ( zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF) ); + isTiny = status->tininess_before_rounding + || ( zExp < 0 ) + || ! increment + || ( zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF) ); shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); zExp = 0; if (isTiny && zSig1) { @@ -4080,8 +4046,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, } if ( increment ) { ++zSig0; - zSig0 &= - ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + if (!(zSig1 << 1) && roundNearestEven) { + zSig0 &= ~1; + } if ( (int64_t) zSig0 < 0 ) zExp = 1; } return packFloatx80( zSign, zExp, zSig0 ); @@ -4097,7 +4064,9 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, zSig0 = UINT64_C(0x8000000000000000); } else { - zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + if (!(zSig1 << 1) && roundNearestEven) { + zSig0 &= ~1; + } } } else { @@ -4117,7 +4086,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, *----------------------------------------------------------------------------*/ floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, - flag zSign, int32_t zExp, + bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status) { @@ -4176,10 +4145,10 @@ static inline int32_t extractFloat128Exp( float128 a ) | Returns the sign bit of the quadruple-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -static inline flag extractFloat128Sign( float128 a ) +static inline bool extractFloat128Sign( float128 a ) { - return a.high>>63; + return a.high >> 63; } @@ -4238,7 +4207,7 @@ static void *----------------------------------------------------------------------------*/ static inline float128 - packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 ) + packFloat128( bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 ) { float128 z; @@ -4269,12 +4238,12 @@ static inline float128 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float128 roundAndPackFloat128(flag zSign, int32_t zExp, +static float128 roundAndPackFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2, float_status *status) { int8_t roundingMode; - flag roundNearestEven, increment, isTiny; + bool roundNearestEven, increment, isTiny; roundingMode = status->float_rounding_mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); @@ -4331,17 +4300,12 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp, float_raise(float_flag_output_denormal, status); return packFloat128(zSign, 0, 0, 0); } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < -1 ) - || ! increment - || lt128( - zSig0, - zSig1, - UINT64_C(0x0001FFFFFFFFFFFF), - UINT64_C(0xFFFFFFFFFFFFFFFF) - ); + isTiny = status->tininess_before_rounding + || ( zExp < -1 ) + || ! increment + || lt128(zSig0, zSig1, + UINT64_C(0x0001FFFFFFFFFFFF), + UINT64_C(0xFFFFFFFFFFFFFFFF)); shift128ExtraRightJamming( zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); zExp = 0; @@ -4375,7 +4339,9 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp, } if ( increment ) { add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); - zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); + if ((zSig2 + zSig2 == 0) && roundNearestEven) { + zSig1 &= ~1; + } } else { if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; @@ -4394,7 +4360,7 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp, | point exponent. *----------------------------------------------------------------------------*/ -static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp, +static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status) { @@ -4430,7 +4396,7 @@ static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp, floatx80 int32_to_floatx80(int32_t a, float_status *status) { - flag zSign; + bool zSign; uint32_t absA; int8_t shiftCount; uint64_t zSig; @@ -4452,7 +4418,7 @@ floatx80 int32_to_floatx80(int32_t a, float_status *status) float128 int32_to_float128(int32_t a, float_status *status) { - flag zSign; + bool zSign; uint32_t absA; int8_t shiftCount; uint64_t zSig0; @@ -4475,7 +4441,7 @@ float128 int32_to_float128(int32_t a, float_status *status) floatx80 int64_to_floatx80(int64_t a, float_status *status) { - flag zSign; + bool zSign; uint64_t absA; int8_t shiftCount; @@ -4495,7 +4461,7 @@ floatx80 int64_to_floatx80(int64_t a, float_status *status) float128 int64_to_float128(int64_t a, float_status *status) { - flag zSign; + bool zSign; uint64_t absA; int8_t shiftCount; int32_t zExp; @@ -4543,7 +4509,7 @@ float128 uint64_to_float128(uint64_t a, float_status *status) floatx80 float32_to_floatx80(float32 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint32_t aSig; @@ -4553,7 +4519,8 @@ floatx80 float32_to_floatx80(float32 a, float_status *status) aSign = extractFloat32Sign( a ); if ( aExp == 0xFF ) { if (aSig) { - return commonNaNToFloatx80(float32ToCommonNaN(a, status), status); + floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status), status); + return floatx80_silence_nan(res, status); } return packFloatx80(aSign, floatx80_infinity_high, @@ -4577,7 +4544,7 @@ floatx80 float32_to_floatx80(float32 a, float_status *status) float128 float32_to_float128(float32 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint32_t aSig; @@ -4608,7 +4575,7 @@ float128 float32_to_float128(float32 a, float_status *status) float32 float32_rem(float32 a, float32 b, float_status *status) { - flag aSign, zSign; + bool aSign, zSign; int aExp, bExp, expDiff; uint32_t aSig, bSig; uint32_t q; @@ -4751,7 +4718,7 @@ static const float64 float32_exp2_coefficients[15] = float32 float32_exp2(float32 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint32_t aSig; float64 r, x, xn; @@ -4801,7 +4768,7 @@ float32 float32_exp2(float32 a, float_status *status) *----------------------------------------------------------------------------*/ float32 float32_log2(float32 a, float_status *status) { - flag aSign, zSign; + bool aSign, zSign; int aExp; uint32_t aSig, zSig, i; @@ -4848,222 +4815,6 @@ float32 float32_log2(float32 a, float_status *status) return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status); } -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_eq(float32 a, float32 b, float_status *status) -{ - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - av = float32_val(a); - bv = float32_val(b); - return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_le(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. The comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_lt(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. The invalid exception is raised if either -| operand is a NaN. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_unordered(float32 a, float32 b, float_status *status) -{ - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. The comparison is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_eq_quiet(float32 a, float32 b, float_status *status) -{ - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - return ( float32_val(a) == float32_val(b) ) || - ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 ); -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_le_quiet(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_lt_quiet(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_unordered_quiet(float32 a, float32 b, float_status *status) -{ - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - /*---------------------------------------------------------------------------- | Returns the result of converting the double-precision floating-point value | `a' to the extended double-precision floating-point format. The conversion @@ -5073,7 +4824,7 @@ int float32_unordered_quiet(float32 a, float32 b, float_status *status) floatx80 float64_to_floatx80(float64 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint64_t aSig; @@ -5083,7 +4834,8 @@ floatx80 float64_to_floatx80(float64 a, float_status *status) aSign = extractFloat64Sign( a ); if ( aExp == 0x7FF ) { if (aSig) { - return commonNaNToFloatx80(float64ToCommonNaN(a, status), status); + floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status), status); + return floatx80_silence_nan(res, status); } return packFloatx80(aSign, floatx80_infinity_high, @@ -5108,7 +4860,7 @@ floatx80 float64_to_floatx80(float64 a, float_status *status) float128 float64_to_float128(float64 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint64_t aSig, zSig0, zSig1; @@ -5141,7 +4893,7 @@ float128 float64_to_float128(float64 a, float_status *status) float64 float64_rem(float64 a, float64 b, float_status *status) { - flag aSign, zSign; + bool aSign, zSign; int aExp, bExp, expDiff; uint64_t aSig, bSig; uint64_t q, alternateASig; @@ -5236,7 +4988,7 @@ float64 float64_rem(float64 a, float64 b, float_status *status) *----------------------------------------------------------------------------*/ float64 float64_log2(float64 a, float_status *status) { - flag aSign, zSign; + bool aSign, zSign; int aExp; uint64_t aSig, aSig0, aSig1, zSig, i; a = float64_squash_input_denormal(a, status); @@ -5283,361 +5035,141 @@ float64 float64_log2(float64 a, float_status *status) } /*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is equal to the -| corresponding value `b', and 0 otherwise. The invalid exception is raised -| if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 32-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic---which means in particular that the conversion +| is rounded according to the current rounding mode. If `a' is a NaN, the +| largest positive integer is returned. Otherwise, if the conversion +| overflows, the largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int float64_eq(float64 a, float64 b, float_status *status) +int32_t floatx80_to_int32(floatx80 a, float_status *status) { - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); + bool aSign; + int32_t aExp, shiftCount; + uint64_t aSig; - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { + if (floatx80_invalid_encoding(a)) { float_raise(float_flag_invalid, status); - return 0; + return 1 << 31; } - av = float64_val(a); - bv = float64_val(b); - return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32(aSign, aSig, status); } /*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 32-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic, except that the conversion is always rounded +| toward zero. If `a' is a NaN, the largest positive integer is returned. +| Otherwise, if the conversion overflows, the largest integer with the same +| sign as `a' is returned. *----------------------------------------------------------------------------*/ -int float64_le(float64 a, float64 b, float_status *status) +int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) { - flag aSign, bSign; - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); + bool aSign; + int32_t aExp, shiftCount; + uint64_t aSig, savedASig; + int32_t z; - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { + if (floatx80_invalid_encoding(a)) { float_raise(float_flag_invalid, status); + return 1 << 31; + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( 0x401E < aExp ) { + if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; + goto invalid; + } + else if ( aExp < 0x3FFF ) { + if (aExp || aSig) { + status->float_exception_flags |= float_flag_inexact; + } return 0; } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); + shiftCount = 0x403E - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise(float_flag_invalid, status); + return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<float_exception_flags |= float_flag_inexact; + } + return z; } /*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. The comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 64-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic---which means in particular that the conversion +| is rounded according to the current rounding mode. If `a' is a NaN, +| the largest positive integer is returned. Otherwise, if the conversion +| overflows, the largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int float64_lt(float64 a, float64 b, float_status *status) +int64_t floatx80_to_int64(floatx80 a, float_status *status) { - flag aSign, bSign; - uint64_t av, bv; + bool aSign; + int32_t aExp, shiftCount; + uint64_t aSig, aSigExtra; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { + if (floatx80_invalid_encoding(a)) { float_raise(float_flag_invalid, status); - return 0; + return 1ULL << 63; } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + shiftCount = 0x403E - aExp; + if ( shiftCount <= 0 ) { + if ( shiftCount ) { + float_raise(float_flag_invalid, status); + if (!aSign || floatx80_is_any_nan(a)) { + return INT64_MAX; + } + return INT64_MIN; + } + aSigExtra = 0; + } + else { + shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); + } + return roundAndPackInt64(aSign, aSig, aSigExtra, status); } /*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. The invalid exception is raised if either -| operand is a NaN. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 64-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic, except that the conversion is always rounded +| toward zero. If `a' is a NaN, the largest positive integer is returned. +| Otherwise, if the conversion overflows, the largest integer with the same +| sign as `a' is returned. *----------------------------------------------------------------------------*/ -int float64_unordered(float64 a, float64 b, float_status *status) +int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) { - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is equal to the -| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception.The comparison is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_eq_quiet(float64 a, float64 b, float_status *status) -{ - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - av = float64_val(a); - bv = float64_val(b); - return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_le_quiet(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign; - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_lt_quiet(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign; - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_unordered_quiet(float64 a, float64 b, float_status *status) -{ - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic---which means in particular that the conversion -| is rounded according to the current rounding mode. If `a' is a NaN, the -| largest positive integer is returned. Otherwise, if the conversion -| overflows, the largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32_t floatx80_to_int32(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1 << 31; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32(aSign, aSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic, except that the conversion is always rounded -| toward zero. If `a' is a NaN, the largest positive integer is returned. -| Otherwise, if the conversion overflows, the largest integer with the same -| sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig, savedASig; - int32_t z; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1 << 31; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if (aExp || aSig) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - shiftCount = 0x403E - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise(float_flag_invalid, status); - return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig<float_exception_flags |= float_flag_inexact; - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 64-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic---which means in particular that the conversion -| is rounded according to the current rounding mode. If `a' is a NaN, -| the largest positive integer is returned. Otherwise, if the conversion -| overflows, the largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64_t floatx80_to_int64(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig, aSigExtra; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1ULL << 63; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - shiftCount = 0x403E - aExp; - if ( shiftCount <= 0 ) { - if ( shiftCount ) { - float_raise(float_flag_invalid, status); - if (!aSign || floatx80_is_any_nan(a)) { - return INT64_MAX; - } - return INT64_MIN; - } - aSigExtra = 0; - } - else { - shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); - } - return roundAndPackInt64(aSign, aSig, aSigExtra, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 64-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic, except that the conversion is always rounded -| toward zero. If `a' is a NaN, the largest positive integer is returned. -| Otherwise, if the conversion overflows, the largest integer with the same -| sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) -{ - flag aSign; + bool aSign; int32_t aExp, shiftCount; uint64_t aSig; int64_t z; @@ -5684,7 +5216,7 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) float32 floatx80_to_float32(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -5697,7 +5229,8 @@ float32 floatx80_to_float32(floatx80 a, float_status *status) aSign = extractFloatx80Sign( a ); if ( aExp == 0x7FFF ) { if ( (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status); + float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status), status); + return float32_silence_nan(res, status); } return packFloat32( aSign, 0xFF, 0 ); } @@ -5716,7 +5249,7 @@ float32 floatx80_to_float32(floatx80 a, float_status *status) float64 floatx80_to_float64(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig, zSig; @@ -5729,7 +5262,8 @@ float64 floatx80_to_float64(floatx80 a, float_status *status) aSign = extractFloatx80Sign( a ); if ( aExp == 0x7FFF ) { if ( (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status); + float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status), status); + return float64_silence_nan(res, status); } return packFloat64( aSign, 0x7FF, 0 ); } @@ -5748,7 +5282,7 @@ float64 floatx80_to_float64(floatx80 a, float_status *status) float128 floatx80_to_float128(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int aExp; uint64_t aSig, zSig0, zSig1; @@ -5760,7 +5294,8 @@ float128 floatx80_to_float128(floatx80 a, float_status *status) aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status); + float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status), status); + return float128_silence_nan(res, status); } shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); return packFloat128( aSign, aExp, zSig0, zSig1 ); @@ -5792,7 +5327,7 @@ floatx80 floatx80_round(floatx80 a, float_status *status) floatx80 floatx80_round_to_int(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t lastBitMask, roundBitsMask; floatx80 z; @@ -5810,7 +5345,7 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status) } if ( aExp < 0x3FFF ) { if ( ( aExp == 0 ) - && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { + && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) { return a; } status->float_exception_flags |= float_flag_inexact; @@ -5837,6 +5372,10 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status) return aSign ? packFloatx80( 1, 0, 0 ) : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000)); + case float_round_to_zero: + break; + default: + g_assert_not_reached(); } return packFloatx80( aSign, 0, 0 ); } @@ -5889,7 +5428,7 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, +static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign, float_status *status) { int32_t aExp, bExp, zExp; @@ -5935,6 +5474,12 @@ static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, zSig1 = 0; zSig0 = aSig + bSig; if ( aExp == 0 ) { + if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) { + /* At least one of the values is a pseudo-denormal, + * and there is a carry out of the result. */ + zExp = 1; + goto shiftRight1; + } if (zSig0 == 0) { return packFloatx80(zSign, 0, 0); } @@ -5963,7 +5508,7 @@ static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, +static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign, float_status *status) { int32_t aExp, bExp, zExp; @@ -6032,7 +5577,7 @@ static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { float_raise(float_flag_invalid, status); @@ -6057,7 +5602,7 @@ floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { float_raise(float_flag_invalid, status); @@ -6082,7 +5627,7 @@ floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign, zSign; + bool aSign, bSign, zSign; int32_t aExp, bExp, zExp; uint64_t aSig, bSig, zSig0, zSig1; @@ -6144,7 +5689,7 @@ floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign, zSign; + bool aSign, bSign, zSign; int32_t aExp, bExp, zExp; uint64_t aSig, bSig, zSig0, zSig1; uint64_t rem0, rem1, rem2, term0, term1, term2; @@ -6226,13 +5771,16 @@ floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) /*---------------------------------------------------------------------------- | Returns the remainder of the extended double-precision floating-point value | `a' with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic, +| if 'mod' is false; if 'mod' is true, return the remainder based on truncating +| the quotient toward zero instead. '*quotient' is set to the low 64 bits of +| the absolute value of the integer quotient. *----------------------------------------------------------------------------*/ -floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) +floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient, float_status *status) { - flag aSign, zSign; - int32_t aExp, bExp, expDiff; + bool aSign, zSign; + int32_t aExp, bExp, expDiff, aExpOrig; uint64_t aSig0, aSig1, bSig; uint64_t q, term0, term1, alternateASig0, alternateASig1; @@ -6241,7 +5789,7 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) return floatx80_default_nan(status); } aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); + aExpOrig = aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); bSig = extractFloatx80Frac( b ); bExp = extractFloatx80Exp( b ); @@ -6256,6 +5804,13 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) if ((uint64_t)(bSig << 1)) { return propagateFloatx80NaN(a, b, status); } + if (aExp == 0 && aSig0 >> 63) { + /* + * Pseudo-denormal argument must be returned in normalized + * form. + */ + return packFloatx80(aSign, 1, aSig0); + } return a; } if ( bExp == 0 ) { @@ -6267,19 +5822,26 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } if ( aExp == 0 ) { - if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; + if ( aSig0 == 0 ) return a; normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); } - bSig |= UINT64_C(0x8000000000000000); zSign = aSign; expDiff = aExp - bExp; aSig1 = 0; if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; + if ( mod || expDiff < -1 ) { + if (aExp == 1 && aExpOrig == 0) { + /* + * Pseudo-denormal argument must be returned in + * normalized form. + */ + return packFloatx80(aSign, aExp, aSig0); + } + } shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); expDiff = 0; } - q = ( bSig <= aSig0 ); + *quotient = q = ( bSig <= aSig0 ); if ( q ) aSig0 -= bSig; expDiff -= 64; while ( 0 < expDiff ) { @@ -6289,6 +5851,8 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); expDiff -= 62; + *quotient <<= 62; + *quotient += q; } expDiff += 64; if ( 0 < expDiff ) { @@ -6302,19 +5866,28 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) ++q; sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); } + if (expDiff < 64) { + *quotient <<= expDiff; + } else { + *quotient = 0; + } + *quotient += q; } else { term1 = 0; term0 = bSig; } - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) - && ( q & 1 ) ) - ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - zSign = ! zSign; + if (!mod) { + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( q & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + ++*quotient; + } } return normalizeRoundAndPackFloatx80( @@ -6322,6 +5895,30 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) } +/*---------------------------------------------------------------------------- +| Returns the remainder of the extended double-precision floating-point value +| `a' with respect to the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) +{ + uint64_t quotient; + return floatx80_modrem(a, b, false, "ient, status); +} + +/*---------------------------------------------------------------------------- +| Returns the remainder of the extended double-precision floating-point value +| `a' with respect to the corresponding value `b', with the quotient truncated +| toward zero. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) +{ + uint64_t quotient; + return floatx80_modrem(a, b, true, "ient, status); +} + /*---------------------------------------------------------------------------- | Returns the square root of the extended double-precision floating-point | value `a'. The operation is performed according to the IEC/IEEE Standard @@ -6330,7 +5927,7 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_sqrt(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, zExp; uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0; uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; @@ -6393,263 +5990,6 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *status) 0, zExp, zSig0, zSig1, status); } -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is equal -| to the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_eq(floatx80 a, floatx80 b, float_status *status) -{ - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| less than or equal to the corresponding value `b', and 0 otherwise. The -| invalid exception is raised if either operand is a NaN. The comparison is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_le(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| less than the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_lt(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point values `a' and `b' -| cannot be compared, and 0 otherwise. The invalid exception is raised if -| either operand is a NaN. The comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -int floatx80_unordered(floatx80 a, floatx80 b, float_status *status) -{ - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status) -{ - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 0; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is less -| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs -| do not cause an exception. Otherwise, the comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 0; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is less -| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause -| an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 0; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point values `a' and `b' -| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception. -| The comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status) -{ - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 1; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - /*---------------------------------------------------------------------------- | Returns the result of converting the quadruple-precision floating-point | value `a' to the 32-bit two's complement integer format. The conversion @@ -6662,7 +6002,7 @@ int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status) int32_t float128_to_int32(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, shiftCount; uint64_t aSig0, aSig1; @@ -6691,7 +6031,7 @@ int32_t float128_to_int32(float128 a, float_status *status) int32_t float128_to_int32_round_to_zero(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, shiftCount; uint64_t aSig0, aSig1, savedASig; int32_t z; @@ -6741,7 +6081,7 @@ int32_t float128_to_int32_round_to_zero(float128 a, float_status *status) int64_t float128_to_int64(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, shiftCount; uint64_t aSig0, aSig1; @@ -6784,7 +6124,7 @@ int64_t float128_to_int64(float128 a, float_status *status) int64_t float128_to_int64_round_to_zero(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, shiftCount; uint64_t aSig0, aSig1; int64_t z; @@ -6849,7 +6189,7 @@ int64_t float128_to_int64_round_to_zero(float128 a, float_status *status) uint64_t float128_to_uint64(float128 a, float_status *status) { - flag aSign; + bool aSign; int aExp; int shiftCount; uint64_t aSig0, aSig1; @@ -6960,7 +6300,7 @@ uint32_t float128_to_uint32(float128 a, float_status *status) float32 float128_to_float32(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig0, aSig1; uint32_t zSig; @@ -6995,7 +6335,7 @@ float32 float128_to_float32(float128 a, float_status *status) float64 float128_to_float64(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig0, aSig1; @@ -7028,7 +6368,7 @@ float64 float128_to_float64(float128 a, float_status *status) floatx80 float128_to_floatx80(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig0, aSig1; @@ -7038,7 +6378,8 @@ floatx80 float128_to_floatx80(float128 a, float_status *status) aSign = extractFloat128Sign( a ); if ( aExp == 0x7FFF ) { if ( aSig0 | aSig1 ) { - return commonNaNToFloatx80(float128ToCommonNaN(a, status), status); + floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status), status); + return floatx80_silence_nan(res, status); } return packFloatx80(aSign, floatx80_infinity_high, floatx80_infinity_low); @@ -7064,7 +6405,7 @@ floatx80 float128_to_floatx80(float128 a, float_status *status) float128 float128_round_to_int(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t lastBitMask, roundBitsMask; float128 z; @@ -7161,6 +6502,8 @@ float128 float128_round_to_int(float128 a, float_status *status) case float_round_to_odd: return packFloat128(aSign, 0x3FFF, 0, 0); + case float_round_to_zero: + break; } return packFloat128( aSign, 0, 0, 0 ); } @@ -7219,7 +6562,7 @@ float128 float128_round_to_int(float128 a, float_status *status) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float128 addFloat128Sigs(float128 a, float128 b, flag zSign, +static float128 addFloat128Sigs(float128 a, float128 b, bool zSign, float_status *status) { int32_t aExp, bExp, zExp; @@ -7310,7 +6653,7 @@ static float128 addFloat128Sigs(float128 a, float128 b, flag zSign, | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float128 subFloat128Sigs(float128 a, float128 b, flag zSign, +static float128 subFloat128Sigs(float128 a, float128 b, bool zSign, float_status *status) { int32_t aExp, bExp, zExp; @@ -7398,7 +6741,7 @@ static float128 subFloat128Sigs(float128 a, float128 b, flag zSign, float128 float128_add(float128 a, float128 b, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; aSign = extractFloat128Sign( a ); bSign = extractFloat128Sign( b ); @@ -7419,7 +6762,7 @@ float128 float128_add(float128 a, float128 b, float_status *status) float128 float128_sub(float128 a, float128 b, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; aSign = extractFloat128Sign( a ); bSign = extractFloat128Sign( b ); @@ -7440,7 +6783,7 @@ float128 float128_sub(float128 a, float128 b, float_status *status) float128 float128_mul(float128 a, float128 b, float_status *status) { - flag aSign, bSign, zSign; + bool aSign, bSign, zSign; int32_t aExp, bExp, zExp; uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; @@ -7503,7 +6846,7 @@ float128 float128_mul(float128 a, float128 b, float_status *status) float128 float128_div(float128 a, float128 b, float_status *status) { - flag aSign, bSign, zSign; + bool aSign, bSign, zSign; int32_t aExp, bExp, zExp; uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; @@ -7590,7 +6933,7 @@ float128 float128_div(float128 a, float128 b, float_status *status) float128 float128_rem(float128 a, float128 b, float_status *status) { - flag aSign, zSign; + bool aSign, zSign; int32_t aExp, bExp, expDiff; uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; uint64_t allZero, alternateASig0, alternateASig1, sigMean1; @@ -7697,7 +7040,7 @@ float128 float128_rem(float128 a, float128 b, float_status *status) float128 float128_sqrt(float128 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp, zExp; uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; @@ -7757,248 +7100,10 @@ float128 float128_sqrt(float128 a, float_status *status) } -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_eq(float128 a, float128 b, float_status *status) -{ - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_le(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. The comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_lt(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. The invalid exception is raised if either -| operand is a NaN. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_unordered(float128 a, float128 b, float_status *status) -{ - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. The comparison is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_eq_quiet(float128 a, float128 b, float_status *status) -{ - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_le_quiet(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_lt_quiet(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_unordered_quiet(float128 a, float128 b, float_status *status) -{ - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - static inline int floatx80_compare_internal(floatx80 a, floatx80 b, int is_quiet, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { float_raise(float_flag_invalid, status); @@ -8027,6 +7132,13 @@ static inline int floatx80_compare_internal(floatx80 a, floatx80 b, return 1 - (2 * aSign); } } else { + /* Normalize pseudo-denormals before comparison */ + if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) { + ++a.high; + } + if ((b.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) { + ++b.high; + } if (a.low == b.low && a.high == b.high) { return float_relation_equal; } else { @@ -8035,20 +7147,20 @@ static inline int floatx80_compare_internal(floatx80 a, floatx80 b, } } -int floatx80_compare(floatx80 a, floatx80 b, float_status *status) +FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status) { return floatx80_compare_internal(a, b, 0, status); } -int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status) +FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status) { return floatx80_compare_internal(a, b, 1, status); } -static inline int float128_compare_internal(float128 a, float128 b, - int is_quiet, float_status *status) +static inline FloatRelation float128_compare_internal(float128 a, float128 b, + bool is_quiet, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; if (( ( extractFloat128Exp( a ) == 0x7fff ) && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) || @@ -8079,19 +7191,19 @@ static inline int float128_compare_internal(float128 a, float128 b, } } -int float128_compare(float128 a, float128 b, float_status *status) +FloatRelation float128_compare(float128 a, float128 b, float_status *status) { return float128_compare_internal(a, b, 0, status); } -int float128_compare_quiet(float128 a, float128 b, float_status *status) +FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *status) { return float128_compare_internal(a, b, 1, status); } floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -8130,7 +7242,7 @@ floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) float128 float128_scalbn(float128 a, int n, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig0, aSig1; diff --git a/qemu/include/elf.h b/qemu/include/elf.h index 8fbfe60e09..5b06b55f28 100644 --- a/qemu/include/elf.h +++ b/qemu/include/elf.h @@ -160,6 +160,8 @@ typedef struct mips_elf_abiflags_v0 { #define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */ +#define EM_AVR 83 /* AVR 8-bit microcontroller */ + #define EM_V850 87 /* NEC v850 */ #define EM_H8_300H 47 /* Hitachi H8/300H */ @@ -202,6 +204,8 @@ typedef struct mips_elf_abiflags_v0 { #define EM_MOXIE 223 /* Moxie processor family */ #define EM_MOXIE_OLD 0xFEED +#define EF_AVR_MACH 0x7F /* Mask for AVR e_flags to get core type */ + /* This is the info that is needed to parse the dynamic section of the file */ #define DT_NULL 0 #define DT_NEEDED 1 diff --git a/qemu/include/exec/cpu-all.h b/qemu/include/exec/cpu-all.h index ddac720740..48c7635daf 100644 --- a/qemu/include/exec/cpu-all.h +++ b/qemu/include/exec/cpu-all.h @@ -368,6 +368,7 @@ static inline bool tlb_hit(struct uc_struct *uc, target_ulong tlb_addr, target_u return tlb_hit_page(uc, tlb_addr, addr & TARGET_PAGE_MASK); } +/* Returns: 0 on success, -1 on error */ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, void *ptr, target_ulong len, bool is_write); diff --git a/qemu/include/exec/cpu-common.h b/qemu/include/exec/cpu-common.h index 28ba0e0e22..a532215518 100644 --- a/qemu/include/exec/cpu-common.h +++ b/qemu/include/exec/cpu-common.h @@ -31,9 +31,6 @@ typedef uintptr_t ram_addr_t; /* memory API */ -typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value); -typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr); - /* This should not be used by devices. */ ram_addr_t qemu_ram_addr_from_host(struct uc_struct *uc, void *ptr); RAMBlock *qemu_ram_block_from_host(struct uc_struct *uc, void *ptr, diff --git a/qemu/include/exec/cpu-defs.h b/qemu/include/exec/cpu-defs.h index 5c11015565..a7da99f7d0 100644 --- a/qemu/include/exec/cpu-defs.h +++ b/qemu/include/exec/cpu-defs.h @@ -96,8 +96,13 @@ typedef uint64_t target_ulong; * Skylake's Level-2 STLB has 16 1G entries. * Also, make sure we do not size the TLB past the guest's address space. */ -# define CPU_TLB_DYN_MAX_BITS \ +# ifdef TARGET_PAGE_BITS_VARY +# define CPU_TLB_DYN_MAX_BITS \ MIN(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS) +# else +# define CPU_TLB_DYN_MAX_BITS \ + MIN_CONST(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS) +# endif # endif typedef struct CPUTLBEntry { diff --git a/qemu/include/exec/cpu_ldst.h b/qemu/include/exec/cpu_ldst.h index b8482bced1..29c21c6678 100644 --- a/qemu/include/exec/cpu_ldst.h +++ b/qemu/include/exec/cpu_ldst.h @@ -25,13 +25,13 @@ * * The syntax for the accessors is: * - * load: cpu_ld{sign}{size}_{mmusuffix}(env, ptr) - * cpu_ld{sign}{size}_{mmusuffix}_ra(env, ptr, retaddr) - * cpu_ld{sign}{size}_mmuidx_ra(env, ptr, mmu_idx, retaddr) + * load: cpu_ld{sign}{size}{end}_{mmusuffix}(env, ptr) + * cpu_ld{sign}{size}{end}_{mmusuffix}_ra(env, ptr, retaddr) + * cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmu_idx, retaddr) * - * store: cpu_st{size}_{mmusuffix}(env, ptr, val) - * cpu_st{size}_{mmusuffix}_ra(env, ptr, val, retaddr) - * cpu_st{size}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr) + * store: cpu_st{size}{end}_{mmusuffix}(env, ptr, val) + * cpu_st{size}{end}_{mmusuffix}_ra(env, ptr, val, retaddr) + * cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr) * * sign is: * (empty): for 32 and 64 bit sizes @@ -44,6 +44,11 @@ * l: 32 bits * q: 64 bits * + * end is: + * (empty): for target native endian, or for 8 bit access + * _be: for forced big endian + * _le: for forced little endian + * * mmusuffix is one of the generic suffixes "data" or "code", or "mmuidx". * The "mmuidx" suffix carries an extra mmu_idx argument that specifies * the index to use; the "data" and "code" suffixes take the index from @@ -59,32 +64,58 @@ typedef target_ulong abi_ptr; #define TARGET_ABI_FMT_ptr TARGET_ABI_FMT_lx uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr); -uint32_t cpu_lduw_data(CPUArchState *env, abi_ptr ptr); -uint32_t cpu_ldl_data(CPUArchState *env, abi_ptr ptr); -uint64_t cpu_ldq_data(CPUArchState *env, abi_ptr ptr); int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr); -int cpu_ldsw_data(CPUArchState *env, abi_ptr ptr); -uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -uint32_t cpu_lduw_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -uint32_t cpu_ldl_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -uint64_t cpu_ldq_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); -int cpu_ldsw_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr); +uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr); +int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr); +uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr); +uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr); + +uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr); +int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr); +uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr); +uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr); + +uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); + +uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); + +uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); +uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra); void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val); -void cpu_stw_data(CPUArchState *env, abi_ptr ptr, uint32_t val); -void cpu_stl_data(CPUArchState *env, abi_ptr ptr, uint32_t val); -void cpu_stq_data(CPUArchState *env, abi_ptr ptr, uint64_t val); + +void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val); + +void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val); +void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val); void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr, uint32_t val, uintptr_t retaddr); -void cpu_stw_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr); -void cpu_stl_data_ra(CPUArchState *env, abi_ptr ptr, - uint32_t val, uintptr_t retaddr); -void cpu_stq_data_ra(CPUArchState *env, abi_ptr ptr, - uint64_t val, uintptr_t retaddr); + +void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr, + uint64_t val, uintptr_t ra); + +void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint32_t val, uintptr_t ra); +void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr ptr, + uint64_t val, uintptr_t ra); + /* Needed for TCG_OVERSIZED_GUEST */ #include "tcg/tcg.h" @@ -98,9 +129,7 @@ static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry) static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, target_ulong addr) { -#ifdef TARGET_ARM - struct uc_struct *uc = env->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = env->uc; uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS; return (addr >> TARGET_PAGE_BITS) & size_mask; @@ -115,27 +144,90 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr, int mmu_idx, uintptr_t ra); -uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); -uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr, int mmu_idx, uintptr_t ra); -int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr, - int mmu_idx, uintptr_t ra); + +uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); + +uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); +uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, + int mmu_idx, uintptr_t ra); void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, int mmu_idx, uintptr_t retaddr); -void cpu_stw_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); -void cpu_stl_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, - int mmu_idx, uintptr_t retaddr); -void cpu_stq_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, - int mmu_idx, uintptr_t retaddr); +void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, + int mmu_idx, uintptr_t retaddr); + +void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val, + int mmu_idx, uintptr_t retaddr); +void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val, + int mmu_idx, uintptr_t retaddr); + +#ifdef TARGET_WORDS_BIGENDIAN +# define cpu_lduw_data cpu_lduw_be_data +# define cpu_ldsw_data cpu_ldsw_be_data +# define cpu_ldl_data cpu_ldl_be_data +# define cpu_ldq_data cpu_ldq_be_data +# define cpu_lduw_data_ra cpu_lduw_be_data_ra +# define cpu_ldsw_data_ra cpu_ldsw_be_data_ra +# define cpu_ldl_data_ra cpu_ldl_be_data_ra +# define cpu_ldq_data_ra cpu_ldq_be_data_ra +# define cpu_lduw_mmuidx_ra cpu_lduw_be_mmuidx_ra +# define cpu_ldsw_mmuidx_ra cpu_ldsw_be_mmuidx_ra +# define cpu_ldl_mmuidx_ra cpu_ldl_be_mmuidx_ra +# define cpu_ldq_mmuidx_ra cpu_ldq_be_mmuidx_ra +# define cpu_stw_data cpu_stw_be_data +# define cpu_stl_data cpu_stl_be_data +# define cpu_stq_data cpu_stq_be_data +# define cpu_stw_data_ra cpu_stw_be_data_ra +# define cpu_stl_data_ra cpu_stl_be_data_ra +# define cpu_stq_data_ra cpu_stq_be_data_ra +# define cpu_stw_mmuidx_ra cpu_stw_be_mmuidx_ra +# define cpu_stl_mmuidx_ra cpu_stl_be_mmuidx_ra +# define cpu_stq_mmuidx_ra cpu_stq_be_mmuidx_ra +#else +# define cpu_lduw_data cpu_lduw_le_data +# define cpu_ldsw_data cpu_ldsw_le_data +# define cpu_ldl_data cpu_ldl_le_data +# define cpu_ldq_data cpu_ldq_le_data +# define cpu_lduw_data_ra cpu_lduw_le_data_ra +# define cpu_ldsw_data_ra cpu_ldsw_le_data_ra +# define cpu_ldl_data_ra cpu_ldl_le_data_ra +# define cpu_ldq_data_ra cpu_ldq_le_data_ra +# define cpu_lduw_mmuidx_ra cpu_lduw_le_mmuidx_ra +# define cpu_ldsw_mmuidx_ra cpu_ldsw_le_mmuidx_ra +# define cpu_ldl_mmuidx_ra cpu_ldl_le_mmuidx_ra +# define cpu_ldq_mmuidx_ra cpu_ldq_le_mmuidx_ra +# define cpu_stw_data cpu_stw_le_data +# define cpu_stl_data cpu_stl_le_data +# define cpu_stq_data cpu_stq_le_data +# define cpu_stw_data_ra cpu_stw_le_data_ra +# define cpu_stl_data_ra cpu_stl_le_data_ra +# define cpu_stq_data_ra cpu_stq_le_data_ra +# define cpu_stw_mmuidx_ra cpu_stw_le_mmuidx_ra +# define cpu_stl_mmuidx_ra cpu_stl_le_mmuidx_ra +# define cpu_stq_mmuidx_ra cpu_stq_le_mmuidx_ra +#endif uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr); uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr); diff --git a/qemu/include/exec/exec-all.h b/qemu/include/exec/exec-all.h index 68c656787f..a717c75adc 100644 --- a/qemu/include/exec/exec-all.h +++ b/qemu/include/exec/exec-all.h @@ -108,6 +108,11 @@ void cpu_address_space_init(CPUState *cpu, int asidx, MemoryRegion *mr); * @cpu: CPU whose TLB should be initialized */ void tlb_init(CPUState *cpu); +/** + * tlb_destroy - destroy a CPU's TLB + * @cpu: CPU whose TLB should be destroyed + */ +void tlb_destroy(CPUState *cpu); /** * tlb_flush_page: * @cpu: CPU whose TLB should be flushed @@ -264,6 +269,23 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, void tlb_set_page(CPUState *cpu, target_ulong vaddr, hwaddr paddr, int prot, int mmu_idx, target_ulong size); +/** + * probe_access: + * @env: CPUArchState + * @addr: guest virtual address to look up + * @size: size of the access + * @access_type: read, write or execute permission + * @mmu_idx: MMU index to use for lookup + * @retaddr: return address for unwinding + * + * Look up the guest virtual address @addr. Raise an exception if the + * page does not satisfy @access_type. Raise an exception if the + * access (@addr, @size) hits a watchpoint. For writes, mark a clean + * page as dirty. + * + * Finally, return the host address for a page that is backed by RAM, + * or NULL if the page requires I/O. + */ void *probe_access(CPUArchState *env, target_ulong addr, int size, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr); @@ -279,6 +301,28 @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size, return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr); } +/** + * probe_access_flags: + * @env: CPUArchState + * @addr: guest virtual address to look up + * @access_type: read, write or execute permission + * @mmu_idx: MMU index to use for lookup + * @nonfault: suppress the fault + * @phost: return value for host address + * @retaddr: return address for unwinding + * + * Similar to probe_access, loosely returning the TLB_FLAGS_MASK for + * the page, and storing the host address for RAM in @phost. + * + * If @nonfault is set, do not raise an exception but return TLB_INVALID_MASK. + * Do not handle watchpoints, but include TLB_WATCHPOINT in the returned flags. + * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags. + * For simplicity, all "mmio-like" flags are folded to TLB_MMIO. + */ +int probe_access_flags(CPUArchState *env, target_ulong addr, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t retaddr); + #define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */ /* Estimated block size for TB allocation. */ diff --git a/qemu/include/fpu/softfloat-helpers.h b/qemu/include/fpu/softfloat-helpers.h index e0baf24c8f..9ddecba70f 100644 --- a/qemu/include/fpu/softfloat-helpers.h +++ b/qemu/include/fpu/softfloat-helpers.h @@ -53,12 +53,12 @@ this code that are retained. #include "fpu/softfloat-types.h" -static inline void set_float_detect_tininess(int val, float_status *status) +static inline void set_float_detect_tininess(bool val, float_status *status) { - status->float_detect_tininess = val; + status->tininess_before_rounding = val; } -static inline void set_float_rounding_mode(int val, float_status *status) +static inline void set_float_rounding_mode(FloatRoundMode val, float_status *status) { status->float_rounding_mode = val; } @@ -74,32 +74,32 @@ static inline void set_floatx80_rounding_precision(int val, status->floatx80_rounding_precision = val; } -static inline void set_flush_to_zero(flag val, float_status *status) +static inline void set_flush_to_zero(bool val, float_status *status) { status->flush_to_zero = val; } -static inline void set_flush_inputs_to_zero(flag val, float_status *status) +static inline void set_flush_inputs_to_zero(bool val, float_status *status) { status->flush_inputs_to_zero = val; } -static inline void set_default_nan_mode(flag val, float_status *status) +static inline void set_default_nan_mode(bool val, float_status *status) { status->default_nan_mode = val; } -static inline void set_snan_bit_is_one(flag val, float_status *status) +static inline void set_snan_bit_is_one(bool val, float_status *status) { status->snan_bit_is_one = val; } static inline int get_float_detect_tininess(float_status *status) { - return status->float_detect_tininess; + return status->tininess_before_rounding; } -static inline int get_float_rounding_mode(float_status *status) +static inline FloatRoundMode get_float_rounding_mode(float_status *status) { return status->float_rounding_mode; } @@ -114,17 +114,17 @@ static inline int get_floatx80_rounding_precision(float_status *status) return status->floatx80_rounding_precision; } -static inline flag get_flush_to_zero(float_status *status) +static inline bool get_flush_to_zero(float_status *status) { return status->flush_to_zero; } -static inline flag get_flush_inputs_to_zero(float_status *status) +static inline bool get_flush_inputs_to_zero(float_status *status) { return status->flush_inputs_to_zero; } -static inline flag get_default_nan_mode(float_status *status) +static inline bool get_default_nan_mode(float_status *status) { return status->default_nan_mode; } diff --git a/qemu/include/fpu/softfloat-macros.h b/qemu/include/fpu/softfloat-macros.h index afae4f7404..38d8c97dce 100644 --- a/qemu/include/fpu/softfloat-macros.h +++ b/qemu/include/fpu/softfloat-macros.h @@ -756,10 +756,10 @@ static inline uint32_t estimateSqrt32(int aExp, uint32_t a) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) +static inline bool eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { - return ( a0 == b0 ) && ( a1 == b1 ); + return a0 == b0 && a1 == b1; } @@ -769,10 +769,10 @@ static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) +static inline bool le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); + return a0 < b0 || (a0 == b0 && a1 <= b1); } @@ -782,10 +782,10 @@ static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) | returns 0. *----------------------------------------------------------------------------*/ -static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) +static inline bool lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); + return a0 < b0 || (a0 == b0 && a1 < b1); } @@ -795,10 +795,10 @@ static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) +static inline bool ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { - return ( a0 != b0 ) || ( a1 != b1 ); + return a0 != b0 || a1 != b1; } diff --git a/qemu/include/fpu/softfloat-types.h b/qemu/include/fpu/softfloat-types.h index 565dced559..00bc527d4a 100644 --- a/qemu/include/fpu/softfloat-types.h +++ b/qemu/include/fpu/softfloat-types.h @@ -82,12 +82,6 @@ this code that are retained. #include -/* This 'flag' type must be able to hold at least 0 and 1. It should - * probably be replaced with 'bool' but the uses would need to be audited - * to check that they weren't accidentally relying on it being a larger type. - */ -typedef uint8_t flag; - /* * Software IEC/IEEE floating-point types. */ @@ -124,16 +118,25 @@ typedef struct { * Software IEC/IEEE floating-point underflow tininess-detection mode. */ -enum { - float_tininess_after_rounding = 0, - float_tininess_before_rounding = 1 -}; +#define float_tininess_after_rounding false +#define float_tininess_before_rounding true /* *Software IEC/IEEE floating-point rounding mode. */ -enum { +#ifdef _MSC_VER +#define ENUM_PACKED \ + __pragma(pack(push, 1)) \ + enum +#define ENUM_PACKED_END \ + __pragma(pack(pop)) +#else +#define ENUM_PACKED enum __attribute__((packed)) +#define ENUM_PACKED_END +#endif + +typedef ENUM_PACKED { float_round_nearest_even = 0, float_round_down = 1, float_round_up = 2, @@ -141,7 +144,7 @@ enum { float_round_ties_away = 4, /* Not an IEEE rounding mode: round to the closest odd mantissa value */ float_round_to_odd = 5, -}; +} ENUM_PACKED_END FloatRoundMode; /* * Software IEC/IEEE floating-point exception flags. @@ -166,17 +169,17 @@ enum { */ typedef struct float_status { - signed char float_detect_tininess; - signed char float_rounding_mode; + FloatRoundMode float_rounding_mode; uint8_t float_exception_flags; signed char floatx80_rounding_precision; + bool tininess_before_rounding; /* should denormalised results go to zero and set the inexact flag? */ - flag flush_to_zero; + bool flush_to_zero; /* should denormalised inputs go to zero and set the input_denormal flag? */ - flag flush_inputs_to_zero; - flag default_nan_mode; + bool flush_inputs_to_zero; + bool default_nan_mode; /* not always used -- see snan_bit_is_one() in softfloat-specialize.h */ - flag snan_bit_is_one; + bool snan_bit_is_one; } float_status; #endif /* SOFTFLOAT_TYPES_H */ diff --git a/qemu/include/fpu/softfloat.h b/qemu/include/fpu/softfloat.h index ecb8ba0114..76d023725c 100644 --- a/qemu/include/fpu/softfloat.h +++ b/qemu/include/fpu/softfloat.h @@ -85,12 +85,12 @@ this code that are retained. /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point ordering relations *----------------------------------------------------------------------------*/ -enum { +typedef enum { float_relation_less = -1, float_relation_equal = 0, float_relation_greater = 1, float_relation_unordered = 2 -}; +} FloatRelation; #include "fpu/softfloat-types.h" #include "fpu/softfloat-helpers.h" @@ -186,9 +186,9 @@ float32 float16_to_float32(float16, bool ieee, float_status *status); float16 float64_to_float16(float64 a, bool ieee, float_status *status); float64 float16_to_float64(float16 a, bool ieee, float_status *status); -int16_t float16_to_int16_scalbn(float16, int, int, float_status *status); -int32_t float16_to_int32_scalbn(float16, int, int, float_status *status); -int64_t float16_to_int64_scalbn(float16, int, int, float_status *status); +int16_t float16_to_int16_scalbn(float16, FloatRoundMode, int, float_status *status); +int32_t float16_to_int32_scalbn(float16, FloatRoundMode, int, float_status *status); +int64_t float16_to_int64_scalbn(float16, FloatRoundMode, int, float_status *status); int16_t float16_to_int16(float16, float_status *status); int32_t float16_to_int32(float16, float_status *status); @@ -198,9 +198,9 @@ int16_t float16_to_int16_round_to_zero(float16, float_status *status); int32_t float16_to_int32_round_to_zero(float16, float_status *status); int64_t float16_to_int64_round_to_zero(float16, float_status *status); -uint16_t float16_to_uint16_scalbn(float16 a, int, int, float_status *status); -uint32_t float16_to_uint32_scalbn(float16 a, int, int, float_status *status); -uint64_t float16_to_uint64_scalbn(float16 a, int, int, float_status *status); +uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode, int, float_status *status); +uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode, int, float_status *status); +uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode, int, float_status *status); uint16_t float16_to_uint16(float16 a, float_status *status); uint32_t float16_to_uint32(float16 a, float_status *status); @@ -228,34 +228,34 @@ float16 float16_maxnum(float16, float16, float_status *status); float16 float16_minnummag(float16, float16, float_status *status); float16 float16_maxnummag(float16, float16, float_status *status); float16 float16_sqrt(float16, float_status *status); -int float16_compare(float16, float16, float_status *status); -int float16_compare_quiet(float16, float16, float_status *status); +FloatRelation float16_compare(float16, float16, float_status *status); +FloatRelation float16_compare_quiet(float16, float16, float_status *status); -int float16_is_quiet_nan(float16, float_status *status); -int float16_is_signaling_nan(float16, float_status *status); +bool float16_is_quiet_nan(float16, float_status *status); +bool float16_is_signaling_nan(float16, float_status *status); float16 float16_silence_nan(float16, float_status *status); -static inline int float16_is_any_nan(float16 a) +static inline bool float16_is_any_nan(float16 a) { return ((float16_val(a) & ~0x8000) > 0x7c00); } -static inline int float16_is_neg(float16 a) +static inline bool float16_is_neg(float16 a) { return float16_val(a) >> 15; } -static inline int float16_is_infinity(float16 a) +static inline bool float16_is_infinity(float16 a) { return (float16_val(a) & 0x7fff) == 0x7c00; } -static inline int float16_is_zero(float16 a) +static inline bool float16_is_zero(float16 a) { return (float16_val(a) & 0x7fff) == 0; } -static inline int float16_is_zero_or_denormal(float16 a) +static inline bool float16_is_zero_or_denormal(float16 a) { return (float16_val(a) & 0x7c00) == 0; } @@ -298,9 +298,9 @@ float16 float16_default_nan(float_status *status); | Software IEC/IEEE single-precision conversion routines. *----------------------------------------------------------------------------*/ -int16_t float32_to_int16_scalbn(float32, int, int, float_status *status); -int32_t float32_to_int32_scalbn(float32, int, int, float_status *status); -int64_t float32_to_int64_scalbn(float32, int, int, float_status *status); +int16_t float32_to_int16_scalbn(float32, FloatRoundMode, int, float_status *status); +int32_t float32_to_int32_scalbn(float32, FloatRoundMode, int, float_status *status); +int64_t float32_to_int64_scalbn(float32, FloatRoundMode, int, float_status *status); int16_t float32_to_int16(float32, float_status *status); int32_t float32_to_int32(float32, float_status *status); @@ -310,9 +310,9 @@ int16_t float32_to_int16_round_to_zero(float32, float_status *status); int32_t float32_to_int32_round_to_zero(float32, float_status *status); int64_t float32_to_int64_round_to_zero(float32, float_status *status); -uint16_t float32_to_uint16_scalbn(float32, int, int, float_status *status); -uint32_t float32_to_uint32_scalbn(float32, int, int, float_status *status); -uint64_t float32_to_uint64_scalbn(float32, int, int, float_status *status); +uint16_t float32_to_uint16_scalbn(float32, FloatRoundMode, int, float_status *status); +uint32_t float32_to_uint32_scalbn(float32, FloatRoundMode, int, float_status *status); +uint64_t float32_to_uint64_scalbn(float32, FloatRoundMode, int, float_status *status); uint16_t float32_to_uint16(float32, float_status *status); uint32_t float32_to_uint32(float32, float_status *status); @@ -339,24 +339,16 @@ float32 float32_muladd(float32, float32, float32, int, float_status *status); float32 float32_sqrt(float32, float_status *status); float32 float32_exp2(float32, float_status *status); float32 float32_log2(float32, float_status *status); -int float32_eq(float32, float32, float_status *status); -int float32_le(float32, float32, float_status *status); -int float32_lt(float32, float32, float_status *status); -int float32_unordered(float32, float32, float_status *status); -int float32_eq_quiet(float32, float32, float_status *status); -int float32_le_quiet(float32, float32, float_status *status); -int float32_lt_quiet(float32, float32, float_status *status); -int float32_unordered_quiet(float32, float32, float_status *status); -int float32_compare(float32, float32, float_status *status); -int float32_compare_quiet(float32, float32, float_status *status); +FloatRelation float32_compare(float32, float32, float_status *status); +FloatRelation float32_compare_quiet(float32, float32, float_status *status); float32 float32_min(float32, float32, float_status *status); float32 float32_max(float32, float32, float_status *status); float32 float32_minnum(float32, float32, float_status *status); float32 float32_maxnum(float32, float32, float_status *status); float32 float32_minnummag(float32, float32, float_status *status); float32 float32_maxnummag(float32, float32, float_status *status); -int float32_is_quiet_nan(float32, float_status *status); -int float32_is_signaling_nan(float32, float_status *status); +bool float32_is_quiet_nan(float32, float_status *status); +bool float32_is_signaling_nan(float32, float_status *status); float32 float32_silence_nan(float32, float_status *status); float32 float32_scalbn(float32, int, float_status *status); @@ -376,27 +368,27 @@ static inline float32 float32_chs(float32 a) return make_float32(float32_val(a) ^ 0x80000000); } -static inline int float32_is_infinity(float32 a) +static inline bool float32_is_infinity(float32 a) { return (float32_val(a) & 0x7fffffff) == 0x7f800000; } -static inline int float32_is_neg(float32 a) +static inline bool float32_is_neg(float32 a) { return float32_val(a) >> 31; } -static inline int float32_is_zero(float32 a) +static inline bool float32_is_zero(float32 a) { return (float32_val(a) & 0x7fffffff) == 0; } -static inline int float32_is_any_nan(float32 a) +static inline bool float32_is_any_nan(float32 a) { return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL); } -static inline int float32_is_zero_or_denormal(float32 a) +static inline bool float32_is_zero_or_denormal(float32 a) { return (float32_val(a) & 0x7f800000) == 0; } @@ -421,6 +413,47 @@ static inline float32 float32_set_sign(float32 a, int sign) return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31)); } +static inline bool float32_eq(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) == float_relation_equal; +} + +static inline bool float32_le(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) <= float_relation_equal; +} + +static inline bool float32_lt(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) < float_relation_equal; +} + +static inline bool float32_unordered(float32 a, float32 b, float_status *s) +{ + return float32_compare(a, b, s) == float_relation_unordered; +} + +static inline bool float32_eq_quiet(float32 a, float32 b, float_status *s) +{ + return float32_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool float32_le_quiet(float32 a, float32 b, float_status *s) +{ + return float32_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool float32_lt_quiet(float32 a, float32 b, float_status *s) +{ + return float32_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool float32_unordered_quiet(float32 a, float32 b, + float_status *s) +{ + return float32_compare_quiet(a, b, s) == float_relation_unordered; +} + #define float32_zero make_float32(0) #define float32_half make_float32(0x3f000000) #define float32_one make_float32(0x3f800000) @@ -440,7 +473,7 @@ static inline float32 float32_set_sign(float32 a, int sign) | significand. *----------------------------------------------------------------------------*/ -static inline float32 packFloat32(flag zSign, int zExp, uint32_t zSig) +static inline float32 packFloat32(bool zSign, int zExp, uint32_t zSig) { return make_float32( (((uint32_t)zSign) << 31) + (((uint32_t)zExp) << 23) + zSig); @@ -455,9 +488,9 @@ float32 float32_default_nan(float_status *status); | Software IEC/IEEE double-precision conversion routines. *----------------------------------------------------------------------------*/ -int16_t float64_to_int16_scalbn(float64, int, int, float_status *status); -int32_t float64_to_int32_scalbn(float64, int, int, float_status *status); -int64_t float64_to_int64_scalbn(float64, int, int, float_status *status); +int16_t float64_to_int16_scalbn(float64, FloatRoundMode, int, float_status *status); +int32_t float64_to_int32_scalbn(float64, FloatRoundMode, int, float_status *status); +int64_t float64_to_int64_scalbn(float64, FloatRoundMode, int, float_status *status); int16_t float64_to_int16(float64, float_status *status); int32_t float64_to_int32(float64, float_status *status); @@ -467,9 +500,9 @@ int16_t float64_to_int16_round_to_zero(float64, float_status *status); int32_t float64_to_int32_round_to_zero(float64, float_status *status); int64_t float64_to_int64_round_to_zero(float64, float_status *status); -uint16_t float64_to_uint16_scalbn(float64, int, int, float_status *status); -uint32_t float64_to_uint32_scalbn(float64, int, int, float_status *status); -uint64_t float64_to_uint64_scalbn(float64, int, int, float_status *status); +uint16_t float64_to_uint16_scalbn(float64, FloatRoundMode, int, float_status *status); +uint32_t float64_to_uint32_scalbn(float64, FloatRoundMode, int, float_status *status); +uint64_t float64_to_uint64_scalbn(float64, FloatRoundMode, int, float_status *status); uint16_t float64_to_uint16(float64, float_status *status); uint32_t float64_to_uint32(float64, float_status *status); @@ -495,24 +528,16 @@ float64 float64_rem(float64, float64, float_status *status); float64 float64_muladd(float64, float64, float64, int, float_status *status); float64 float64_sqrt(float64, float_status *status); float64 float64_log2(float64, float_status *status); -int float64_eq(float64, float64, float_status *status); -int float64_le(float64, float64, float_status *status); -int float64_lt(float64, float64, float_status *status); -int float64_unordered(float64, float64, float_status *status); -int float64_eq_quiet(float64, float64, float_status *status); -int float64_le_quiet(float64, float64, float_status *status); -int float64_lt_quiet(float64, float64, float_status *status); -int float64_unordered_quiet(float64, float64, float_status *status); -int float64_compare(float64, float64, float_status *status); -int float64_compare_quiet(float64, float64, float_status *status); +FloatRelation float64_compare(float64, float64, float_status *status); +FloatRelation float64_compare_quiet(float64, float64, float_status *status); float64 float64_min(float64, float64, float_status *status); float64 float64_max(float64, float64, float_status *status); float64 float64_minnum(float64, float64, float_status *status); float64 float64_maxnum(float64, float64, float_status *status); float64 float64_minnummag(float64, float64, float_status *status); float64 float64_maxnummag(float64, float64, float_status *status); -int float64_is_quiet_nan(float64 a, float_status *status); -int float64_is_signaling_nan(float64, float_status *status); +bool float64_is_quiet_nan(float64 a, float_status *status); +bool float64_is_signaling_nan(float64, float_status *status); float64 float64_silence_nan(float64, float_status *status); float64 float64_scalbn(float64, int, float_status *status); @@ -532,27 +557,27 @@ static inline float64 float64_chs(float64 a) return make_float64(float64_val(a) ^ 0x8000000000000000LL); } -static inline int float64_is_infinity(float64 a) +static inline bool float64_is_infinity(float64 a) { return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL; } -static inline int float64_is_neg(float64 a) +static inline bool float64_is_neg(float64 a) { return float64_val(a) >> 63; } -static inline int float64_is_zero(float64 a) +static inline bool float64_is_zero(float64 a) { return (float64_val(a) & 0x7fffffffffffffffLL) == 0; } -static inline int float64_is_any_nan(float64 a) +static inline bool float64_is_any_nan(float64 a) { return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL); } -static inline int float64_is_zero_or_denormal(float64 a) +static inline bool float64_is_zero_or_denormal(float64 a) { return (float64_val(a) & 0x7ff0000000000000LL) == 0; } @@ -578,6 +603,47 @@ static inline float64 float64_set_sign(float64 a, int sign) | ((int64_t)sign << 63)); } +static inline bool float64_eq(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) == float_relation_equal; +} + +static inline bool float64_le(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) <= float_relation_equal; +} + +static inline bool float64_lt(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) < float_relation_equal; +} + +static inline bool float64_unordered(float64 a, float64 b, float_status *s) +{ + return float64_compare(a, b, s) == float_relation_unordered; +} + +static inline bool float64_eq_quiet(float64 a, float64 b, float_status *s) +{ + return float64_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool float64_le_quiet(float64 a, float64 b, float_status *s) +{ + return float64_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool float64_lt_quiet(float64 a, float64 b, float_status *s) +{ + return float64_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool float64_unordered_quiet(float64 a, float64 b, + float_status *s) +{ + return float64_compare_quiet(a, b, s) == float_relation_unordered; +} + #define float64_zero make_float64(0) #define float64_half make_float64(0x3fe0000000000000LL) #define float64_one make_float64(0x3ff0000000000000LL) @@ -617,18 +683,12 @@ floatx80 floatx80_add(floatx80, floatx80, float_status *status); floatx80 floatx80_sub(floatx80, floatx80, float_status *status); floatx80 floatx80_mul(floatx80, floatx80, float_status *status); floatx80 floatx80_div(floatx80, floatx80, float_status *status); +floatx80 floatx80_modrem(floatx80, floatx80, bool, uint64_t *, float_status *status); +floatx80 floatx80_mod(floatx80, floatx80, float_status *status); floatx80 floatx80_rem(floatx80, floatx80, float_status *status); floatx80 floatx80_sqrt(floatx80, float_status *status); -int floatx80_eq(floatx80, floatx80, float_status *status); -int floatx80_le(floatx80, floatx80, float_status *status); -int floatx80_lt(floatx80, floatx80, float_status *status); -int floatx80_unordered(floatx80, floatx80, float_status *status); -int floatx80_eq_quiet(floatx80, floatx80, float_status *status); -int floatx80_le_quiet(floatx80, floatx80, float_status *status); -int floatx80_lt_quiet(floatx80, floatx80, float_status *status); -int floatx80_unordered_quiet(floatx80, floatx80, float_status *status); -int floatx80_compare(floatx80, floatx80, float_status *status); -int floatx80_compare_quiet(floatx80, floatx80, float_status *status); +FloatRelation floatx80_compare(floatx80, floatx80, float_status *status); +FloatRelation floatx80_compare_quiet(floatx80, floatx80, float_status *status); int floatx80_is_quiet_nan(floatx80, float_status *status); int floatx80_is_signaling_nan(floatx80, float_status *status); floatx80 floatx80_silence_nan(floatx80, float_status *status); @@ -646,7 +706,7 @@ static inline floatx80 floatx80_chs(floatx80 a) return a; } -static inline int floatx80_is_infinity(floatx80 a) +static inline bool floatx80_is_infinity(floatx80 a) { #if defined(TARGET_M68K) return (a.high & 0x7fff) == floatx80_infinity.high && !(a.low << 1); @@ -656,26 +716,67 @@ static inline int floatx80_is_infinity(floatx80 a) #endif } -static inline int floatx80_is_neg(floatx80 a) +static inline bool floatx80_is_neg(floatx80 a) { return a.high >> 15; } -static inline int floatx80_is_zero(floatx80 a) +static inline bool floatx80_is_zero(floatx80 a) { return (a.high & 0x7fff) == 0 && a.low == 0; } -static inline int floatx80_is_zero_or_denormal(floatx80 a) +static inline bool floatx80_is_zero_or_denormal(floatx80 a) { return (a.high & 0x7fff) == 0; } -static inline int floatx80_is_any_nan(floatx80 a) +static inline bool floatx80_is_any_nan(floatx80 a) { return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1); } +static inline bool floatx80_eq(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) == float_relation_equal; +} + +static inline bool floatx80_le(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) <= float_relation_equal; +} + +static inline bool floatx80_lt(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) < float_relation_equal; +} + +static inline bool floatx80_unordered(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare(a, b, s) == float_relation_unordered; +} + +static inline bool floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool floatx80_le_quiet(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *s) +{ + return floatx80_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool floatx80_unordered_quiet(floatx80 a, floatx80 b, + float_status *s) +{ + return floatx80_compare_quiet(a, b, s) == float_relation_unordered; +} + /*---------------------------------------------------------------------------- | Return whether the given value is an invalid floatx80 encoding. | Invalid floatx80 encodings arise when the integer bit is not set, but @@ -688,10 +789,35 @@ static inline int floatx80_is_any_nan(floatx80 a) *----------------------------------------------------------------------------*/ static inline bool floatx80_invalid_encoding(floatx80 a) { +#if defined(TARGET_M68K) + /*------------------------------------------------------------------------- + | With m68k, the explicit integer bit can be zero in the case of: + | - zeros (exp == 0, mantissa == 0) + | - denormalized numbers (exp == 0, mantissa != 0) + | - unnormalized numbers (exp != 0, exp < 0x7FFF) + | - infinities (exp == 0x7FFF, mantissa == 0) + | - not-a-numbers (exp == 0x7FFF, mantissa != 0) + | + | For infinities and NaNs, the explicit integer bit can be either one or + | zero. + | + | The IEEE 754 standard does not define a zero integer bit. Such a number + | is an unnormalized number. Hardware does not directly support + | denormalized and unnormalized numbers, but implicitly supports them by + | trapping them as unimplemented data types, allowing efficient conversion + | in software. + | + | See "M68000 FAMILY PROGRAMMER’S REFERENCE MANUAL", + | "1.6 FLOATING-POINT DATA TYPES" + *------------------------------------------------------------------------*/ + return false; +#else return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0; +#endif } #define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL) +#define floatx80_zero_init make_floatx80_init(0x0000, 0x0000000000000000LL) #define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL) #define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL) #define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL) @@ -722,7 +848,7 @@ static inline int32_t extractFloatx80Exp(floatx80 a) | `a'. *----------------------------------------------------------------------------*/ -static inline flag extractFloatx80Sign(floatx80 a) +static inline bool extractFloatx80Sign(floatx80 a) { return a.high >> 15; } @@ -732,7 +858,7 @@ static inline flag extractFloatx80Sign(floatx80 a) | extended double-precision floating-point value, returning the result. *----------------------------------------------------------------------------*/ -static inline floatx80 packFloatx80(flag zSign, int32_t zExp, uint64_t zSig) +static inline floatx80 packFloatx80(bool zSign, int32_t zExp, uint64_t zSig) { floatx80 z; @@ -783,7 +909,7 @@ floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status); | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, +floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status); @@ -797,7 +923,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, *----------------------------------------------------------------------------*/ floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, - flag zSign, int32_t zExp, + bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status); @@ -831,18 +957,10 @@ float128 float128_mul(float128, float128, float_status *status); float128 float128_div(float128, float128, float_status *status); float128 float128_rem(float128, float128, float_status *status); float128 float128_sqrt(float128, float_status *status); -int float128_eq(float128, float128, float_status *status); -int float128_le(float128, float128, float_status *status); -int float128_lt(float128, float128, float_status *status); -int float128_unordered(float128, float128, float_status *status); -int float128_eq_quiet(float128, float128, float_status *status); -int float128_le_quiet(float128, float128, float_status *status); -int float128_lt_quiet(float128, float128, float_status *status); -int float128_unordered_quiet(float128, float128, float_status *status); -int float128_compare(float128, float128, float_status *status); -int float128_compare_quiet(float128, float128, float_status *status); -int float128_is_quiet_nan(float128, float_status *status); -int float128_is_signaling_nan(float128, float_status *status); +FloatRelation float128_compare(float128, float128, float_status *status); +FloatRelation float128_compare_quiet(float128, float128, float_status *status); +bool float128_is_quiet_nan(float128, float_status *status); +bool float128_is_signaling_nan(float128, float_status *status); float128 float128_silence_nan(float128, float_status *status); float128 float128_scalbn(float128, int, float_status *status); @@ -858,22 +976,22 @@ static inline float128 float128_chs(float128 a) return a; } -static inline int float128_is_infinity(float128 a) +static inline bool float128_is_infinity(float128 a) { return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0; } -static inline int float128_is_neg(float128 a) +static inline bool float128_is_neg(float128 a) { return a.high >> 63; } -static inline int float128_is_zero(float128 a) +static inline bool float128_is_zero(float128 a) { return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0; } -static inline int float128_is_zero_or_denormal(float128 a) +static inline bool float128_is_zero_or_denormal(float128 a) { return (a.high & 0x7fff000000000000LL) == 0; } @@ -888,12 +1006,53 @@ static inline bool float128_is_denormal(float128 a) return float128_is_zero_or_denormal(a) && !float128_is_zero(a); } -static inline int float128_is_any_nan(float128 a) +static inline bool float128_is_any_nan(float128 a) { return ((a.high >> 48) & 0x7fff) == 0x7fff && ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0)); } +static inline bool float128_eq(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) == float_relation_equal; +} + +static inline bool float128_le(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) <= float_relation_equal; +} + +static inline bool float128_lt(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) < float_relation_equal; +} + +static inline bool float128_unordered(float128 a, float128 b, float_status *s) +{ + return float128_compare(a, b, s) == float_relation_unordered; +} + +static inline bool float128_eq_quiet(float128 a, float128 b, float_status *s) +{ + return float128_compare_quiet(a, b, s) == float_relation_equal; +} + +static inline bool float128_le_quiet(float128 a, float128 b, float_status *s) +{ + return float128_compare_quiet(a, b, s) <= float_relation_equal; +} + +static inline bool float128_lt_quiet(float128 a, float128 b, float_status *s) +{ + return float128_compare_quiet(a, b, s) < float_relation_equal; +} + +static inline bool float128_unordered_quiet(float128 a, float128 b, + float_status *s) +{ + return float128_compare_quiet(a, b, s) == float_relation_unordered; +} + #define float128_zero make_float128(0, 0) /*---------------------------------------------------------------------------- diff --git a/qemu/include/hw/registerfields.h b/qemu/include/hw/registerfields.h index 686aca1225..972876f98c 100644 --- a/qemu/include/hw/registerfields.h +++ b/qemu/include/hw/registerfields.h @@ -66,30 +66,30 @@ #define FIELD_DP8(storage, reg, field, val, d) { \ struct { \ unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ - } v = { .v = val }; \ + } _v = { .v = val }; \ d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ - R_ ## reg ## _ ## field ## _LENGTH, v.v); \ + R_ ## reg ## _ ## field ## _LENGTH, _v.v); \ } #define FIELD_DP16(storage, reg, field, val, d) { \ struct { \ unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ - } v = { .v = val }; \ + } _v = { .v = val }; \ d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ - R_ ## reg ## _ ## field ## _LENGTH, v.v); \ + R_ ## reg ## _ ## field ## _LENGTH, _v.v); \ } -#define FIELD_DP32(storage, reg, field, val, d) { \ - struct { \ - unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ - } v = { .v = val }; \ - d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ - R_ ## reg ## _ ## field ## _LENGTH, v.v); \ +#define FIELD_DP32(storage, reg, field, val, d) { \ + struct { \ + unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ + } v = { .v = val }; \ + d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH, v.v); \ } -#define FIELD_DP64(storage, reg, field, val, d) { \ - struct { \ - unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ - } v = { .v = val }; \ - d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT, \ - R_ ## reg ## _ ## field ## _LENGTH, v.v); \ +#define FIELD_DP64(storage, reg, field, val, d) { \ + struct { \ + unsigned int v:R_ ## reg ## _ ## field ## _LENGTH; \ + } v = { .v = val }; \ + d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT, \ + R_ ## reg ## _ ## field ## _LENGTH, v.v); \ } /* Deposit a field to array of registers. */ diff --git a/qemu/include/qemu/bswap.h b/qemu/include/qemu/bswap.h index 7591f6c88e..5afcf853f0 100644 --- a/qemu/include/qemu/bswap.h +++ b/qemu/include/qemu/bswap.h @@ -9,6 +9,8 @@ # include #elif defined(__FreeBSD__) # include +#elif defined(__HAIKU__) +# include #elif defined(CONFIG_BYTESWAP_H) # include diff --git a/qemu/include/qemu/compiler.h b/qemu/include/qemu/compiler.h index 971aa12721..e0cb4b3dd1 100644 --- a/qemu/include/qemu/compiler.h +++ b/qemu/include/qemu/compiler.h @@ -89,6 +89,8 @@ static union MSVC_FLOAT_HACK __NAN = {{0x00, 0x00, 0xC0, 0x7F}}; #define QEMU_FLATTEN #define QEMU_ALWAYS_INLINE __declspec(inline) +#define qemu_build_not_reached() __assume(0) + #else // Unix compilers #ifndef NAN diff --git a/qemu/include/qemu/host-utils.h b/qemu/include/qemu/host-utils.h index 0c5b30ff67..50063ffffe 100644 --- a/qemu/include/qemu/host-utils.h +++ b/qemu/include/qemu/host-utils.h @@ -100,8 +100,8 @@ static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor) } } #else -void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b); -void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b); +void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b); +void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); int divs128(int64_t *plow, int64_t *phigh, int64_t divisor); diff --git a/qemu/include/qemu/osdep.h b/qemu/include/qemu/osdep.h index ad18b8ddd6..80d0869acd 100644 --- a/qemu/include/qemu/osdep.h +++ b/qemu/include/qemu/osdep.h @@ -96,7 +96,7 @@ struct uc_struct; #include #include -#ifdef __OpenBSD__ +#ifdef HAVE_SYS_SIGNAL_H #include #endif @@ -189,6 +189,9 @@ struct uc_struct; #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE 0 +#endif #ifndef ENOMEDIUM #define ENOMEDIUM ENODEV #endif @@ -252,18 +255,72 @@ struct uc_struct; #define SIZE_MAX ((size_t)-1) #endif -#ifndef MIN -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +/* + * Two variations of MIN/MAX macros. The first is for runtime use, and + * evaluates arguments only once (so it is safe even with side + * effects), but will not work in constant contexts (such as array + * size declarations) because of the '{}'. The second is for constant + * expression use, where evaluating arguments twice is safe because + * the result is going to be constant anyway, but will not work in a + * runtime context because of a void expression where a value is + * expected. Thus, both gcc and clang will fail to compile if you use + * the wrong macro (even if the error may seem a bit cryptic). + * + * Note that neither form is usable as an #if condition; if you truly + * need to write conditional code that depends on a minimum or maximum + * determined by the pre-processor instead of the compiler, you'll + * have to open-code it. Sadly, Coverity is severely confused by the + * constant variants, so we have to dumb things down there. + */ +#undef MIN +#ifdef _MSC_VER +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#else +#define MIN(a, b) \ + ({ \ + typeof(1 ? (a) : (b)) _a = (a), _b = (b); \ + _a < _b ? _a : _b; \ + }) +#endif + +#undef MAX +#ifdef _MSC_VER + // MSVC version + #define MAX(a, b) ((a) > (b) ? (a) : (b)) +#else + // GCC/Clang version with statement expression + #define MAX(a, b) ({ \ + typeof(1 ? (a) : (b)) _a = (a), _b = (b); \ + _a > _b ? _a : _b; \ + }) #endif -#ifndef MAX -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +#if defined(__COVERITY__) || defined(_MSC_VER) +# define MIN_CONST(a, b) ((a) < (b) ? (a) : (b)) +# define MAX_CONST(a, b) ((a) > (b) ? (a) : (b)) +#else +# define MIN_CONST(a, b) \ + __builtin_choose_expr( \ + __builtin_constant_p(a) && __builtin_constant_p(b), \ + (a) < (b) ? (a) : (b), \ + ((void)0)) +# define MAX_CONST(a, b) \ + __builtin_choose_expr( \ + __builtin_constant_p(a) && __builtin_constant_p(b), \ + (a) > (b) ? (a) : (b), \ + ((void)0)) #endif -/* Minimum function that returns zero only iff both values are zero. - * Intended for use with unsigned values only. */ +/* + * Minimum function that returns zero only if both values are zero. + * Intended for use with unsigned values only. + */ #ifndef MIN_NON_ZERO -#define MIN_NON_ZERO(a, b) ((a) == 0 ? (b) : \ - ((b) == 0 ? (a) : (MIN(a, b)))) +#define MIN_NON_ZERO(a, b) \ + ({ \ + typeof(1 ? (a) : (b)) _a = (a), _b = (b); \ + _a == 0 ? _b : (_b == 0 || _b > _a) ? _a : _b; \ + }) #endif /* Round number down to multiple */ @@ -408,7 +465,7 @@ void qemu_anon_ram_free(struct uc_struct *uc, void *ptr, size_t size); #define HAVE_CHARDEV_SERIAL 1 #elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \ || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \ - || defined(__GLIBC__) + || defined(__GLIBC__) || defined(__APPLE__) #define HAVE_CHARDEV_SERIAL 1 #endif @@ -417,6 +474,10 @@ void qemu_anon_ram_free(struct uc_struct *uc, void *ptr, size_t size); #define HAVE_CHARDEV_PARPORT 1 #endif +#if defined(__HAIKU__) +#define SIGIO SIGPOLL +#endif + #if defined(CONFIG_LINUX) #ifndef BUS_MCEERR_AR #define BUS_MCEERR_AR 4 diff --git a/qemu/include/tcg/tcg-op-gvec.h b/qemu/include/tcg/tcg-op-gvec.h index dd414fc768..5610e89f99 100644 --- a/qemu/include/tcg/tcg-op-gvec.h +++ b/qemu/include/tcg/tcg-op-gvec.h @@ -39,56 +39,61 @@ void tcg_gen_gvec_2_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, gen_helper_gvec_2 *fn); /* Similarly, passing an extra data value. */ -typedef void gen_helper_gvec_2i(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); -void tcg_gen_gvec_2i_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, TCGv_i64 c, - uint32_t oprsz, uint32_t maxsz, int32_t data, - gen_helper_gvec_2i *fn); +typedef void gen_helper_gvec_2i(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_i64, + TCGv_i32); +void tcg_gen_gvec_2i_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_2i *fn); /* Similarly, passing an extra pointer (e.g. env or float_status). */ -typedef void gen_helper_gvec_2_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); +typedef void gen_helper_gvec_2_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_i32); void tcg_gen_gvec_2_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz, int32_t data, gen_helper_gvec_2_ptr *fn); /* Similarly, with three vector operands. */ -typedef void gen_helper_gvec_3(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_3_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t oprsz, uint32_t maxsz, int32_t data, - gen_helper_gvec_3 *fn); +typedef void gen_helper_gvec_3(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_i32); +void tcg_gen_gvec_3_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_3 *fn); /* Similarly, with four vector operands. */ typedef void gen_helper_gvec_4(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_4_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t cofs, uint32_t oprsz, uint32_t maxsz, - int32_t data, gen_helper_gvec_4 *fn); +void tcg_gen_gvec_4_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, uint32_t oprsz, + uint32_t maxsz, int32_t data, gen_helper_gvec_4 *fn); /* Similarly, with five vector operands. */ -typedef void gen_helper_gvec_5(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, - TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_5_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t cofs, uint32_t xofs, uint32_t oprsz, - uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn); +typedef void gen_helper_gvec_5(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_5_ool(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, uint32_t xofs, + uint32_t oprsz, uint32_t maxsz, int32_t data, + gen_helper_gvec_5 *fn); typedef void gen_helper_gvec_3_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_3_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz, - int32_t data, gen_helper_gvec_3_ptr *fn); +void tcg_gen_gvec_3_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, TCGv_ptr ptr, uint32_t oprsz, + uint32_t maxsz, int32_t data, + gen_helper_gvec_3_ptr *fn); typedef void gen_helper_gvec_4_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_4_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz, - uint32_t maxsz, int32_t data, +void tcg_gen_gvec_4_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, TCGv_ptr ptr, + uint32_t oprsz, uint32_t maxsz, int32_t data, gen_helper_gvec_4_ptr *fn); -typedef void gen_helper_gvec_5_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, - TCGv_ptr, TCGv_ptr, TCGv_i32); -void tcg_gen_gvec_5_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t cofs, uint32_t eofs, TCGv_ptr ptr, - uint32_t oprsz, uint32_t maxsz, int32_t data, - gen_helper_gvec_5_ptr *fn); +typedef void gen_helper_gvec_5_ptr(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); +void tcg_gen_gvec_5_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, uint32_t eofs, + TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_5_ptr *fn); /* Expand a gvec operation. Either inline or out-of-line depending on the actual vector size and the operations supported by the host. */ @@ -109,6 +114,8 @@ typedef struct { uint8_t vece; /* Prefer i64 to v64. */ bool prefer_i64; + /* Load dest as a 2nd source operand. */ + bool load_dest; } GVecGen2; typedef struct { @@ -201,7 +208,8 @@ typedef struct { void (*fni8)(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64); void (*fni4)(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); /* Expand inline with a host vector type. */ - void (*fniv)(TCGContext *, unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec); + void (*fniv)(TCGContext *, unsigned, TCGv_vec, TCGv_vec, TCGv_vec, + TCGv_vec); /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_4 *fno; /* The optional opcodes, if any, utilized by .fniv. */ @@ -218,110 +226,146 @@ typedef struct { void tcg_gen_gvec_2(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, const GVecGen2 *); -void tcg_gen_gvec_2i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - uint32_t maxsz, int64_t c, const GVecGen2i *); -void tcg_gen_gvec_2s(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - uint32_t maxsz, TCGv_i64 c, const GVecGen2s *); -void tcg_gen_gvec_3(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t oprsz, uint32_t maxsz, const GVecGen3 *); -void tcg_gen_gvec_3i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, +void tcg_gen_gvec_2i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int64_t c, + const GVecGen2i *); +void tcg_gen_gvec_2s(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz, TCGv_i64 c, + const GVecGen2s *); +void tcg_gen_gvec_3(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz, + const GVecGen3 *); +void tcg_gen_gvec_3i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen3i *); -void tcg_gen_gvec_4(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, - uint32_t oprsz, uint32_t maxsz, const GVecGen4 *); +void tcg_gen_gvec_4(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, uint32_t oprsz, + uint32_t maxsz, const GVecGen4 *); /* Expand a specific vector operation. */ -void tcg_gen_gvec_mov(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_not(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_neg(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_abs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_add(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_sub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_mul(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_addi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - int64_t c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_muli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - int64_t c, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_adds(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_subs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_muls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_mov(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_not(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_neg(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_abs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_add(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_sub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_mul(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); + +void tcg_gen_gvec_addi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, int64_t c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_muli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, int64_t c, uint32_t oprsz, + uint32_t maxsz); + +void tcg_gen_gvec_adds(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_subs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_muls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); /* Saturated arithmetic. */ -void tcg_gen_gvec_ssadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_sssub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_usadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_ussub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_ssadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_sssub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_usadd(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_ussub(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); /* Min/max. */ -void tcg_gen_gvec_smin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_umin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_smax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_umax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_and(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_or(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_xor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_andc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_orc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_nand(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_nor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_eqv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_andi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - int64_t c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_xori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - int64_t c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_ori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - int64_t c, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_ands(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_xors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_ors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); - -void tcg_gen_gvec_dup_mem(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t s, uint32_t m); +void tcg_gen_gvec_smin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_umin(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_smax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_umax(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); + +void tcg_gen_gvec_and(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_or(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_xor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_andc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_orc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_nand(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_nor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_eqv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t bofs, uint32_t oprsz, + uint32_t maxsz); + +void tcg_gen_gvec_andi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, int64_t c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_xori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, int64_t c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_ori(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, int64_t c, uint32_t oprsz, uint32_t maxsz); + +void tcg_gen_gvec_ands(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_xors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); +void tcg_gen_gvec_ors(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, TCGv_i64 c, uint32_t oprsz, + uint32_t maxsz); + +void tcg_gen_gvec_dup_mem(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, + uint32_t aofs, uint32_t s, uint32_t m); +void tcg_gen_gvec_dup_imm(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t s, + uint32_t m, uint64_t imm); void tcg_gen_gvec_dup_i32(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t s, uint32_t m, TCGv_i32); void tcg_gen_gvec_dup_i64(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t s, uint32_t m, TCGv_i64); -void tcg_gen_gvec_dup8i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint8_t x); -void tcg_gen_gvec_dup16i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint16_t x); -void tcg_gen_gvec_dup32i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint32_t x); -void tcg_gen_gvec_dup64i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t s, uint32_t m, uint64_t x); +#if TARGET_LONG_BITS == 64 +#define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i64 +#else +#define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i32 +#endif void tcg_gen_gvec_shli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz); @@ -329,6 +373,10 @@ void tcg_gen_gvec_shri(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 int64_t shift, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_sari(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotri(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_shls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); @@ -336,6 +384,8 @@ void tcg_gen_gvec_shrs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_sars(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); /* * Perform vector shift by vector element, modulo the element size. @@ -347,6 +397,10 @@ void tcg_gen_gvec_shrv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 uint32_t bofs, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_sarv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotlv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); +void tcg_gen_gvec_rotrv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_cmp(TCGContext *tcg_ctx, TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, @@ -383,5 +437,7 @@ void tcg_gen_vec_shr8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t) void tcg_gen_vec_shr16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_sar8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t); void tcg_gen_vec_sar16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t); +void tcg_gen_vec_rotl8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c); +void tcg_gen_vec_rotl16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c); #endif diff --git a/qemu/include/tcg/tcg-op.h b/qemu/include/tcg/tcg-op.h index 93026d1d51..5b9685da03 100644 --- a/qemu/include/tcg/tcg-op.h +++ b/qemu/include/tcg/tcg-op.h @@ -359,9 +359,9 @@ void tcg_gen_ctzi_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, uint32_t void tcg_gen_clrsb_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ctpop_i32(TCGContext *tcg_ctx, TCGv_i32 a1, TCGv_i32 a2); void tcg_gen_rotl_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); -void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); +void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); void tcg_gen_rotr_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); -void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); +void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); void tcg_gen_deposit_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, unsigned int ofs, unsigned int len); void tcg_gen_deposit_z_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg, @@ -569,9 +569,9 @@ void tcg_gen_ctzi_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, uint64_t void tcg_gen_clrsb_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_ctpop_i64(TCGContext *tcg_ctx, TCGv_i64 a1, TCGv_i64 a2); void tcg_gen_rotl_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); -void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); +void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); void tcg_gen_rotr_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); -void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); +void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); void tcg_gen_deposit_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, unsigned int ofs, unsigned int len); void tcg_gen_deposit_z_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg, @@ -1078,14 +1078,19 @@ void tcg_gen_umax_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a void tcg_gen_shli_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); void tcg_gen_shri_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); void tcg_gen_sari_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); +void tcg_gen_rotli_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); +void tcg_gen_rotri_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i); void tcg_gen_shls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); void tcg_gen_shrs_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); void tcg_gen_sars_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); +void tcg_gen_rotls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s); void tcg_gen_shlv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); void tcg_gen_shrv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); void tcg_gen_sarv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); +void tcg_gen_rotlv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); +void tcg_gen_rotrv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s); void tcg_gen_cmp_vec(TCGContext *tcg_ctx, TCGCond cond, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); diff --git a/qemu/include/tcg/tcg-opc.h b/qemu/include/tcg/tcg-opc.h index 22033870bf..a583ca4900 100644 --- a/qemu/include/tcg/tcg-opc.h +++ b/qemu/include/tcg/tcg-opc.h @@ -270,19 +270,28 @@ DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec)) DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) +DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec)) DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec)) +DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec)) #ifdef _MSC_VER -DEF(shlv_vec, 1, 2, 0, IMPLVEC) -DEF(shrv_vec, 1, 2, 0, IMPLVEC) -DEF(sarv_vec, 1, 2, 0, IMPLVEC) +// For MSVC, pre-compute the flags since it can't evaluate the OR at compile time +#define VEC_FLAGS (TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT) +DEF(shlv_vec, 1, 2, 0, VEC_FLAGS) +DEF(shrv_vec, 1, 2, 0, VEC_FLAGS) +DEF(sarv_vec, 1, 2, 0, VEC_FLAGS) +DEF(rotlv_vec, 1, 2, 0, VEC_FLAGS) +DEF(rotrv_vec, 1, 2, 0, VEC_FLAGS) +#undef VEC_FLAGS #else -DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) -DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) -DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec)) +DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec)) +DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec)) #endif DEF(cmp_vec, 1, 2, 1, IMPLVEC) diff --git a/qemu/include/tcg/tcg.h b/qemu/include/tcg/tcg.h index ade583e43f..966103e25d 100644 --- a/qemu/include/tcg/tcg.h +++ b/qemu/include/tcg/tcg.h @@ -182,6 +182,9 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_not_vec 0 #define TCG_TARGET_HAS_andc_vec 0 #define TCG_TARGET_HAS_orc_vec 0 +#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 0 #define TCG_TARGET_HAS_shi_vec 0 #define TCG_TARGET_HAS_shs_vec 0 #define TCG_TARGET_HAS_shv_vec 0 @@ -721,7 +724,7 @@ struct TCGContext { void *tb_ret_addr; /* target/riscv/translate.c */ - TCGv cpu_gpr[32], cpu_pc; // also target/mips/translate.c + TCGv cpu_gpr[32], cpu_pc, cpu_vl; // also target/mips/translate.c TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */ TCGv load_res; TCGv load_val; diff --git a/qemu/m68k.h b/qemu/m68k.h index 1b1703d19c..065357bbe1 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_m68k #define tcg_gen_shr_i64 tcg_gen_shr_i64_m68k #define tcg_gen_st_i64 tcg_gen_st_i64_m68k +#define tcg_gen_add_i64 tcg_gen_add_i64_m68k +#define tcg_gen_sub_i64 tcg_gen_sub_i64_m68k #define tcg_gen_xor_i64 tcg_gen_xor_i64_m68k +#define tcg_gen_neg_i64 tcg_gen_neg_i64_m68k #define cpu_icount_to_ns cpu_icount_to_ns_m68k #define cpu_is_stopped cpu_is_stopped_m68k #define cpu_get_ticks cpu_get_ticks_m68k @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_m68k #define floatx80_mul floatx80_mul_m68k #define floatx80_div floatx80_div_m68k +#define floatx80_modrem floatx80_modrem_m68k +#define floatx80_mod floatx80_mod_m68k #define floatx80_rem floatx80_rem_m68k #define floatx80_sqrt floatx80_sqrt_m68k #define floatx80_eq floatx80_eq_m68k @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_m68k #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_m68k #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_m68k +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_m68k #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_m68k #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_m68k #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_m68k @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_m68k #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_m68k #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_m68k +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_m68k +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_m68k #define tcg_gen_gvec_sari tcg_gen_gvec_sari_m68k +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_m68k +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_m68k #define tcg_gen_gvec_shls tcg_gen_gvec_shls_m68k #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_m68k #define tcg_gen_gvec_sars tcg_gen_gvec_sars_m68k +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_m68k #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_m68k #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_m68k #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_m68k +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_m68k +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_m68k #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_m68k #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_m68k #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_m68k @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_m68k #define tcg_gen_shri_vec tcg_gen_shri_vec_m68k #define tcg_gen_sari_vec tcg_gen_sari_vec_m68k +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_m68k +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_m68k #define tcg_gen_cmp_vec tcg_gen_cmp_vec_m68k #define tcg_gen_add_vec tcg_gen_add_vec_m68k #define tcg_gen_sub_vec tcg_gen_sub_vec_m68k @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_m68k #define tcg_gen_shrv_vec tcg_gen_shrv_vec_m68k #define tcg_gen_sarv_vec tcg_gen_sarv_vec_m68k +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_m68k +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_m68k #define tcg_gen_shls_vec tcg_gen_shls_vec_m68k #define tcg_gen_shrs_vec tcg_gen_shrs_vec_m68k #define tcg_gen_sars_vec tcg_gen_sars_vec_m68k +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_m68k #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_m68k #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_m68k #define tb_htable_lookup tb_htable_lookup_m68k @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_m68k #define cpu_loop_exit_atomic cpu_loop_exit_atomic_m68k #define tlb_init tlb_init_m68k +#define tlb_destroy tlb_destroy_m68k #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_m68k #define tlb_flush tlb_flush_m68k #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_m68k @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_m68k #define get_page_addr_code_hostp get_page_addr_code_hostp_m68k #define get_page_addr_code get_page_addr_code_m68k +#define probe_access_flags probe_access_flags_m68k #define probe_access probe_access_m68k #define tlb_vaddr_to_host tlb_vaddr_to_host_m68k #define helper_ret_ldub_mmu helper_ret_ldub_mmu_m68k @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_m68k #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_m68k #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_m68k -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_m68k -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_m68k -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_m68k -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_m68k +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_m68k +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_m68k +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_m68k +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_m68k +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_m68k +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_m68k +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_m68k +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_m68k #define cpu_ldub_data_ra cpu_ldub_data_ra_m68k #define cpu_ldsb_data_ra cpu_ldsb_data_ra_m68k -#define cpu_lduw_data_ra cpu_lduw_data_ra_m68k -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_m68k -#define cpu_ldl_data_ra cpu_ldl_data_ra_m68k -#define cpu_ldq_data_ra cpu_ldq_data_ra_m68k +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_m68k +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_m68k +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_m68k +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_m68k +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_m68k +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_m68k +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_m68k +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_m68k #define cpu_ldub_data cpu_ldub_data_m68k #define cpu_ldsb_data cpu_ldsb_data_m68k -#define cpu_lduw_data cpu_lduw_data_m68k -#define cpu_ldsw_data cpu_ldsw_data_m68k -#define cpu_ldl_data cpu_ldl_data_m68k -#define cpu_ldq_data cpu_ldq_data_m68k +#define cpu_lduw_be_data cpu_lduw_be_data_m68k +#define cpu_lduw_le_data cpu_lduw_le_data_m68k +#define cpu_ldsw_be_data cpu_ldsw_be_data_m68k +#define cpu_ldsw_le_data cpu_ldsw_le_data_m68k +#define cpu_ldl_be_data cpu_ldl_be_data_m68k +#define cpu_ldl_le_data cpu_ldl_le_data_m68k +#define cpu_ldq_le_data cpu_ldq_le_data_m68k +#define cpu_ldq_be_data cpu_ldq_be_data_m68k #define helper_ret_stb_mmu helper_ret_stb_mmu_m68k #define helper_le_stw_mmu helper_le_stw_mmu_m68k #define helper_be_stw_mmu helper_be_stw_mmu_m68k @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_m68k #define helper_be_stq_mmu helper_be_stq_mmu_m68k #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_m68k -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_m68k -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_m68k -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_m68k +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_m68k +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_m68k +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_m68k +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_m68k +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_m68k +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_m68k #define cpu_stb_data_ra cpu_stb_data_ra_m68k -#define cpu_stw_data_ra cpu_stw_data_ra_m68k -#define cpu_stl_data_ra cpu_stl_data_ra_m68k -#define cpu_stq_data_ra cpu_stq_data_ra_m68k +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_m68k +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_m68k +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_m68k +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_m68k +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_m68k +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_m68k #define cpu_stb_data cpu_stb_data_m68k -#define cpu_stw_data cpu_stw_data_m68k -#define cpu_stl_data cpu_stl_data_m68k -#define cpu_stq_data cpu_stq_data_m68k +#define cpu_stw_be_data cpu_stw_be_data_m68k +#define cpu_stw_le_data cpu_stw_le_data_m68k +#define cpu_stl_be_data cpu_stl_be_data_m68k +#define cpu_stl_le_data cpu_stl_le_data_m68k +#define cpu_stq_be_data cpu_stq_be_data_m68k +#define cpu_stq_le_data cpu_stq_le_data_m68k #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_m68k #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_m68k #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_m68k @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_m68k #define cpu_ldl_code cpu_ldl_code_m68k #define cpu_ldq_code cpu_ldq_code_m68k +#define cpu_interrupt_handler cpu_interrupt_handler_m68k #define helper_div_i32 helper_div_i32_m68k #define helper_rem_i32 helper_rem_i32_m68k #define helper_divu_i32 helper_divu_i32_m68k @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_m68k #define helper_gvec_sar32i helper_gvec_sar32i_m68k #define helper_gvec_sar64i helper_gvec_sar64i_m68k +#define helper_gvec_rotl8i helper_gvec_rotl8i_m68k +#define helper_gvec_rotl16i helper_gvec_rotl16i_m68k +#define helper_gvec_rotl32i helper_gvec_rotl32i_m68k +#define helper_gvec_rotl64i helper_gvec_rotl64i_m68k #define helper_gvec_shl8v helper_gvec_shl8v_m68k #define helper_gvec_shl16v helper_gvec_shl16v_m68k #define helper_gvec_shl32v helper_gvec_shl32v_m68k @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_m68k #define helper_gvec_sar32v helper_gvec_sar32v_m68k #define helper_gvec_sar64v helper_gvec_sar64v_m68k +#define helper_gvec_rotl8v helper_gvec_rotl8v_m68k +#define helper_gvec_rotl16v helper_gvec_rotl16v_m68k +#define helper_gvec_rotl32v helper_gvec_rotl32v_m68k +#define helper_gvec_rotl64v helper_gvec_rotl64v_m68k +#define helper_gvec_rotr8v helper_gvec_rotr8v_m68k +#define helper_gvec_rotr16v helper_gvec_rotr16v_m68k +#define helper_gvec_rotr32v helper_gvec_rotr32v_m68k +#define helper_gvec_rotr64v helper_gvec_rotr64v_m68k #define helper_gvec_eq8 helper_gvec_eq8_m68k #define helper_gvec_ne8 helper_gvec_ne8_m68k #define helper_gvec_lt8 helper_gvec_lt8_m68k @@ -1420,7 +1474,6 @@ #define helper_bfffo_mem helper_bfffo_mem_m68k #define helper_chk helper_chk_m68k #define helper_chk2 helper_chk2_m68k -#define floatx80_mod floatx80_mod_m68k #define floatx80_getman floatx80_getman_m68k #define floatx80_getexp floatx80_getexp_m68k #define floatx80_scale floatx80_scale_m68k diff --git a/qemu/mips.h b/qemu/mips.h index 3a005710c7..b55e68792d 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_mips #define tcg_gen_shr_i64 tcg_gen_shr_i64_mips #define tcg_gen_st_i64 tcg_gen_st_i64_mips +#define tcg_gen_add_i64 tcg_gen_add_i64_mips +#define tcg_gen_sub_i64 tcg_gen_sub_i64_mips #define tcg_gen_xor_i64 tcg_gen_xor_i64_mips +#define tcg_gen_neg_i64 tcg_gen_neg_i64_mips #define cpu_icount_to_ns cpu_icount_to_ns_mips #define cpu_is_stopped cpu_is_stopped_mips #define cpu_get_ticks cpu_get_ticks_mips @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_mips #define floatx80_mul floatx80_mul_mips #define floatx80_div floatx80_div_mips +#define floatx80_modrem floatx80_modrem_mips +#define floatx80_mod floatx80_mod_mips #define floatx80_rem floatx80_rem_mips #define floatx80_sqrt floatx80_sqrt_mips #define floatx80_eq floatx80_eq_mips @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mips #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mips #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mips +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mips #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mips #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mips #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mips @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mips #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mips #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mips +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mips +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mips #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mips +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mips +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mips #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mips #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mips #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mips +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mips #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mips #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mips #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mips +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mips +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mips #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mips #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mips #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_mips #define tcg_gen_shri_vec tcg_gen_shri_vec_mips #define tcg_gen_sari_vec tcg_gen_sari_vec_mips +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mips +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mips #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mips #define tcg_gen_add_vec tcg_gen_add_vec_mips #define tcg_gen_sub_vec tcg_gen_sub_vec_mips @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mips #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mips #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mips +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mips +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mips #define tcg_gen_shls_vec tcg_gen_shls_vec_mips #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mips #define tcg_gen_sars_vec tcg_gen_sars_vec_mips +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mips #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mips #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mips #define tb_htable_lookup tb_htable_lookup_mips @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_mips #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mips #define tlb_init tlb_init_mips +#define tlb_destroy tlb_destroy_mips #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mips #define tlb_flush tlb_flush_mips #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mips @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_mips #define get_page_addr_code_hostp get_page_addr_code_hostp_mips #define get_page_addr_code get_page_addr_code_mips +#define probe_access_flags probe_access_flags_mips #define probe_access probe_access_mips #define tlb_vaddr_to_host tlb_vaddr_to_host_mips #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mips @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mips #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mips #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mips -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mips -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mips -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mips -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mips +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mips +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mips +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mips +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mips +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mips +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mips +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mips +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mips #define cpu_ldub_data_ra cpu_ldub_data_ra_mips #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mips -#define cpu_lduw_data_ra cpu_lduw_data_ra_mips -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mips -#define cpu_ldl_data_ra cpu_ldl_data_ra_mips -#define cpu_ldq_data_ra cpu_ldq_data_ra_mips +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mips +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mips +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mips +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mips +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mips +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mips +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mips +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mips #define cpu_ldub_data cpu_ldub_data_mips #define cpu_ldsb_data cpu_ldsb_data_mips -#define cpu_lduw_data cpu_lduw_data_mips -#define cpu_ldsw_data cpu_ldsw_data_mips -#define cpu_ldl_data cpu_ldl_data_mips -#define cpu_ldq_data cpu_ldq_data_mips +#define cpu_lduw_be_data cpu_lduw_be_data_mips +#define cpu_lduw_le_data cpu_lduw_le_data_mips +#define cpu_ldsw_be_data cpu_ldsw_be_data_mips +#define cpu_ldsw_le_data cpu_ldsw_le_data_mips +#define cpu_ldl_be_data cpu_ldl_be_data_mips +#define cpu_ldl_le_data cpu_ldl_le_data_mips +#define cpu_ldq_le_data cpu_ldq_le_data_mips +#define cpu_ldq_be_data cpu_ldq_be_data_mips #define helper_ret_stb_mmu helper_ret_stb_mmu_mips #define helper_le_stw_mmu helper_le_stw_mmu_mips #define helper_be_stw_mmu helper_be_stw_mmu_mips @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_mips #define helper_be_stq_mmu helper_be_stq_mmu_mips #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mips -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mips -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mips -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mips +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mips +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mips +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mips +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mips +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mips +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mips #define cpu_stb_data_ra cpu_stb_data_ra_mips -#define cpu_stw_data_ra cpu_stw_data_ra_mips -#define cpu_stl_data_ra cpu_stl_data_ra_mips -#define cpu_stq_data_ra cpu_stq_data_ra_mips +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mips +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mips +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mips +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mips +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mips +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mips #define cpu_stb_data cpu_stb_data_mips -#define cpu_stw_data cpu_stw_data_mips -#define cpu_stl_data cpu_stl_data_mips -#define cpu_stq_data cpu_stq_data_mips +#define cpu_stw_be_data cpu_stw_be_data_mips +#define cpu_stw_le_data cpu_stw_le_data_mips +#define cpu_stl_be_data cpu_stl_be_data_mips +#define cpu_stl_le_data cpu_stl_le_data_mips +#define cpu_stq_be_data cpu_stq_be_data_mips +#define cpu_stq_le_data cpu_stq_le_data_mips #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mips #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mips #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mips @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_mips #define cpu_ldl_code cpu_ldl_code_mips #define cpu_ldq_code cpu_ldq_code_mips +#define cpu_interrupt_handler cpu_interrupt_handler_mips #define helper_div_i32 helper_div_i32_mips #define helper_rem_i32 helper_rem_i32_mips #define helper_divu_i32 helper_divu_i32_mips @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mips #define helper_gvec_sar32i helper_gvec_sar32i_mips #define helper_gvec_sar64i helper_gvec_sar64i_mips +#define helper_gvec_rotl8i helper_gvec_rotl8i_mips +#define helper_gvec_rotl16i helper_gvec_rotl16i_mips +#define helper_gvec_rotl32i helper_gvec_rotl32i_mips +#define helper_gvec_rotl64i helper_gvec_rotl64i_mips #define helper_gvec_shl8v helper_gvec_shl8v_mips #define helper_gvec_shl16v helper_gvec_shl16v_mips #define helper_gvec_shl32v helper_gvec_shl32v_mips @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_mips #define helper_gvec_sar32v helper_gvec_sar32v_mips #define helper_gvec_sar64v helper_gvec_sar64v_mips +#define helper_gvec_rotl8v helper_gvec_rotl8v_mips +#define helper_gvec_rotl16v helper_gvec_rotl16v_mips +#define helper_gvec_rotl32v helper_gvec_rotl32v_mips +#define helper_gvec_rotl64v helper_gvec_rotl64v_mips +#define helper_gvec_rotr8v helper_gvec_rotr8v_mips +#define helper_gvec_rotr16v helper_gvec_rotr16v_mips +#define helper_gvec_rotr32v helper_gvec_rotr32v_mips +#define helper_gvec_rotr64v helper_gvec_rotr64v_mips #define helper_gvec_eq8 helper_gvec_eq8_mips #define helper_gvec_ne8 helper_gvec_ne8_mips #define helper_gvec_lt8 helper_gvec_lt8_mips @@ -1677,7 +1731,6 @@ #define helper_rddsp helper_rddsp_mips #define helper_cfc1 helper_cfc1_mips #define helper_ctc1 helper_ctc1_mips -#define ieee_ex_to_mips ieee_ex_to_mips_mips #define helper_float_sqrt_d helper_float_sqrt_d_mips #define helper_float_sqrt_s helper_float_sqrt_s_mips #define helper_float_cvtd_s helper_float_cvtd_s_mips @@ -2232,23 +2285,59 @@ #define helper_msa_srlri_df helper_msa_srlri_df_mips #define helper_msa_binsli_df helper_msa_binsli_df_mips #define helper_msa_binsri_df helper_msa_binsri_df_mips -#define helper_msa_subv_df helper_msa_subv_df_mips -#define helper_msa_subs_s_df helper_msa_subs_s_df_mips -#define helper_msa_subs_u_df helper_msa_subs_u_df_mips -#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mips -#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mips -#define helper_msa_mulv_df helper_msa_mulv_df_mips -#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mips -#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mips +#define helper_msa_subv_b helper_msa_subv_b_mips +#define helper_msa_subv_h helper_msa_subv_h_mips +#define helper_msa_subv_w helper_msa_subv_w_mips +#define helper_msa_subv_d helper_msa_subv_d_mips +#define helper_msa_subs_s_b helper_msa_subs_s_b_mips +#define helper_msa_subs_s_h helper_msa_subs_s_h_mips +#define helper_msa_subs_s_w helper_msa_subs_s_w_mips +#define helper_msa_subs_s_d helper_msa_subs_s_d_mips +#define helper_msa_subs_u_b helper_msa_subs_u_b_mips +#define helper_msa_subs_u_h helper_msa_subs_u_h_mips +#define helper_msa_subs_u_w helper_msa_subs_u_w_mips +#define helper_msa_subs_u_d helper_msa_subs_u_d_mips +#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mips +#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mips +#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mips +#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mips +#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mips +#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mips +#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mips +#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mips +#define helper_msa_mulv_b helper_msa_mulv_b_mips +#define helper_msa_mulv_h helper_msa_mulv_h_mips +#define helper_msa_mulv_w helper_msa_mulv_w_mips +#define helper_msa_mulv_d helper_msa_mulv_d_mips +#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mips +#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mips +#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mips +#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mips +#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mips +#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mips #define helper_msa_mul_q_df helper_msa_mul_q_df_mips #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mips #define helper_msa_sld_df helper_msa_sld_df_mips -#define helper_msa_maddv_df helper_msa_maddv_df_mips -#define helper_msa_msubv_df helper_msa_msubv_df_mips -#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mips -#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mips -#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mips -#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mips +#define helper_msa_maddv_b helper_msa_maddv_b_mips +#define helper_msa_maddv_h helper_msa_maddv_h_mips +#define helper_msa_maddv_w helper_msa_maddv_w_mips +#define helper_msa_maddv_d helper_msa_maddv_d_mips +#define helper_msa_msubv_b helper_msa_msubv_b_mips +#define helper_msa_msubv_h helper_msa_msubv_h_mips +#define helper_msa_msubv_w helper_msa_msubv_w_mips +#define helper_msa_msubv_d helper_msa_msubv_d_mips +#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mips +#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mips +#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mips +#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mips +#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mips +#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mips +#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mips +#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mips +#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mips +#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mips +#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mips +#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mips #define helper_msa_binsl_df helper_msa_binsl_df_mips #define helper_msa_binsr_df helper_msa_binsr_df_mips #define helper_msa_madd_q_df helper_msa_madd_q_df_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index 367c6b7e79..76990196b2 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_mips64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_mips64 #define tcg_gen_st_i64 tcg_gen_st_i64_mips64 +#define tcg_gen_add_i64 tcg_gen_add_i64_mips64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_mips64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_mips64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_mips64 #define cpu_icount_to_ns cpu_icount_to_ns_mips64 #define cpu_is_stopped cpu_is_stopped_mips64 #define cpu_get_ticks cpu_get_ticks_mips64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_mips64 #define floatx80_mul floatx80_mul_mips64 #define floatx80_div floatx80_div_mips64 +#define floatx80_modrem floatx80_modrem_mips64 +#define floatx80_mod floatx80_mod_mips64 #define floatx80_rem floatx80_rem_mips64 #define floatx80_sqrt floatx80_sqrt_mips64 #define floatx80_eq floatx80_eq_mips64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mips64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mips64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mips64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mips64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mips64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mips64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mips64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mips64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mips64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mips64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mips64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mips64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mips64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mips64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mips64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mips64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mips64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mips64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mips64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mips64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mips64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mips64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mips64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mips64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mips64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mips64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_mips64 #define tcg_gen_shri_vec tcg_gen_shri_vec_mips64 #define tcg_gen_sari_vec tcg_gen_sari_vec_mips64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mips64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mips64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mips64 #define tcg_gen_add_vec tcg_gen_add_vec_mips64 #define tcg_gen_sub_vec tcg_gen_sub_vec_mips64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mips64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mips64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mips64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mips64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mips64 #define tcg_gen_shls_vec tcg_gen_shls_vec_mips64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mips64 #define tcg_gen_sars_vec tcg_gen_sars_vec_mips64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mips64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mips64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mips64 #define tb_htable_lookup tb_htable_lookup_mips64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_mips64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mips64 #define tlb_init tlb_init_mips64 +#define tlb_destroy tlb_destroy_mips64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mips64 #define tlb_flush tlb_flush_mips64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mips64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_mips64 #define get_page_addr_code_hostp get_page_addr_code_hostp_mips64 #define get_page_addr_code get_page_addr_code_mips64 +#define probe_access_flags probe_access_flags_mips64 #define probe_access probe_access_mips64 #define tlb_vaddr_to_host tlb_vaddr_to_host_mips64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mips64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mips64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mips64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mips64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mips64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mips64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mips64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mips64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mips64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mips64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mips64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mips64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mips64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mips64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mips64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mips64 #define cpu_ldub_data_ra cpu_ldub_data_ra_mips64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mips64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_mips64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mips64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_mips64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_mips64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mips64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mips64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mips64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mips64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mips64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mips64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mips64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mips64 #define cpu_ldub_data cpu_ldub_data_mips64 #define cpu_ldsb_data cpu_ldsb_data_mips64 -#define cpu_lduw_data cpu_lduw_data_mips64 -#define cpu_ldsw_data cpu_ldsw_data_mips64 -#define cpu_ldl_data cpu_ldl_data_mips64 -#define cpu_ldq_data cpu_ldq_data_mips64 +#define cpu_lduw_be_data cpu_lduw_be_data_mips64 +#define cpu_lduw_le_data cpu_lduw_le_data_mips64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_mips64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_mips64 +#define cpu_ldl_be_data cpu_ldl_be_data_mips64 +#define cpu_ldl_le_data cpu_ldl_le_data_mips64 +#define cpu_ldq_le_data cpu_ldq_le_data_mips64 +#define cpu_ldq_be_data cpu_ldq_be_data_mips64 #define helper_ret_stb_mmu helper_ret_stb_mmu_mips64 #define helper_le_stw_mmu helper_le_stw_mmu_mips64 #define helper_be_stw_mmu helper_be_stw_mmu_mips64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_mips64 #define helper_be_stq_mmu helper_be_stq_mmu_mips64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mips64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mips64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mips64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mips64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mips64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mips64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mips64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mips64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mips64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mips64 #define cpu_stb_data_ra cpu_stb_data_ra_mips64 -#define cpu_stw_data_ra cpu_stw_data_ra_mips64 -#define cpu_stl_data_ra cpu_stl_data_ra_mips64 -#define cpu_stq_data_ra cpu_stq_data_ra_mips64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mips64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mips64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mips64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mips64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mips64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mips64 #define cpu_stb_data cpu_stb_data_mips64 -#define cpu_stw_data cpu_stw_data_mips64 -#define cpu_stl_data cpu_stl_data_mips64 -#define cpu_stq_data cpu_stq_data_mips64 +#define cpu_stw_be_data cpu_stw_be_data_mips64 +#define cpu_stw_le_data cpu_stw_le_data_mips64 +#define cpu_stl_be_data cpu_stl_be_data_mips64 +#define cpu_stl_le_data cpu_stl_le_data_mips64 +#define cpu_stq_be_data cpu_stq_be_data_mips64 +#define cpu_stq_le_data cpu_stq_le_data_mips64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mips64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mips64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mips64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_mips64 #define cpu_ldl_code cpu_ldl_code_mips64 #define cpu_ldq_code cpu_ldq_code_mips64 +#define cpu_interrupt_handler cpu_interrupt_handler_mips64 #define helper_div_i32 helper_div_i32_mips64 #define helper_rem_i32 helper_rem_i32_mips64 #define helper_divu_i32 helper_divu_i32_mips64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mips64 #define helper_gvec_sar32i helper_gvec_sar32i_mips64 #define helper_gvec_sar64i helper_gvec_sar64i_mips64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_mips64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_mips64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_mips64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_mips64 #define helper_gvec_shl8v helper_gvec_shl8v_mips64 #define helper_gvec_shl16v helper_gvec_shl16v_mips64 #define helper_gvec_shl32v helper_gvec_shl32v_mips64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_mips64 #define helper_gvec_sar32v helper_gvec_sar32v_mips64 #define helper_gvec_sar64v helper_gvec_sar64v_mips64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_mips64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_mips64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_mips64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_mips64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_mips64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_mips64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_mips64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_mips64 #define helper_gvec_eq8 helper_gvec_eq8_mips64 #define helper_gvec_ne8 helper_gvec_ne8_mips64 #define helper_gvec_lt8 helper_gvec_lt8_mips64 @@ -1677,7 +1731,6 @@ #define helper_rddsp helper_rddsp_mips64 #define helper_cfc1 helper_cfc1_mips64 #define helper_ctc1 helper_ctc1_mips64 -#define ieee_ex_to_mips ieee_ex_to_mips_mips64 #define helper_float_sqrt_d helper_float_sqrt_d_mips64 #define helper_float_sqrt_s helper_float_sqrt_s_mips64 #define helper_float_cvtd_s helper_float_cvtd_s_mips64 @@ -2232,23 +2285,59 @@ #define helper_msa_srlri_df helper_msa_srlri_df_mips64 #define helper_msa_binsli_df helper_msa_binsli_df_mips64 #define helper_msa_binsri_df helper_msa_binsri_df_mips64 -#define helper_msa_subv_df helper_msa_subv_df_mips64 -#define helper_msa_subs_s_df helper_msa_subs_s_df_mips64 -#define helper_msa_subs_u_df helper_msa_subs_u_df_mips64 -#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mips64 -#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mips64 -#define helper_msa_mulv_df helper_msa_mulv_df_mips64 -#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mips64 -#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mips64 +#define helper_msa_subv_b helper_msa_subv_b_mips64 +#define helper_msa_subv_h helper_msa_subv_h_mips64 +#define helper_msa_subv_w helper_msa_subv_w_mips64 +#define helper_msa_subv_d helper_msa_subv_d_mips64 +#define helper_msa_subs_s_b helper_msa_subs_s_b_mips64 +#define helper_msa_subs_s_h helper_msa_subs_s_h_mips64 +#define helper_msa_subs_s_w helper_msa_subs_s_w_mips64 +#define helper_msa_subs_s_d helper_msa_subs_s_d_mips64 +#define helper_msa_subs_u_b helper_msa_subs_u_b_mips64 +#define helper_msa_subs_u_h helper_msa_subs_u_h_mips64 +#define helper_msa_subs_u_w helper_msa_subs_u_w_mips64 +#define helper_msa_subs_u_d helper_msa_subs_u_d_mips64 +#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mips64 +#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mips64 +#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mips64 +#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mips64 +#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mips64 +#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mips64 +#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mips64 +#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mips64 +#define helper_msa_mulv_b helper_msa_mulv_b_mips64 +#define helper_msa_mulv_h helper_msa_mulv_h_mips64 +#define helper_msa_mulv_w helper_msa_mulv_w_mips64 +#define helper_msa_mulv_d helper_msa_mulv_d_mips64 +#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mips64 +#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mips64 +#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mips64 +#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mips64 +#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mips64 +#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mips64 #define helper_msa_mul_q_df helper_msa_mul_q_df_mips64 #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mips64 #define helper_msa_sld_df helper_msa_sld_df_mips64 -#define helper_msa_maddv_df helper_msa_maddv_df_mips64 -#define helper_msa_msubv_df helper_msa_msubv_df_mips64 -#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mips64 -#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mips64 -#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mips64 -#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mips64 +#define helper_msa_maddv_b helper_msa_maddv_b_mips64 +#define helper_msa_maddv_h helper_msa_maddv_h_mips64 +#define helper_msa_maddv_w helper_msa_maddv_w_mips64 +#define helper_msa_maddv_d helper_msa_maddv_d_mips64 +#define helper_msa_msubv_b helper_msa_msubv_b_mips64 +#define helper_msa_msubv_h helper_msa_msubv_h_mips64 +#define helper_msa_msubv_w helper_msa_msubv_w_mips64 +#define helper_msa_msubv_d helper_msa_msubv_d_mips64 +#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mips64 +#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mips64 +#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mips64 +#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mips64 +#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mips64 +#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mips64 +#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mips64 +#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mips64 +#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mips64 +#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mips64 +#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mips64 +#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mips64 #define helper_msa_binsl_df helper_msa_binsl_df_mips64 #define helper_msa_binsr_df helper_msa_binsr_df_mips64 #define helper_msa_madd_q_df helper_msa_madd_q_df_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index 1c3f8ca26f..d8c1ac16b7 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_mips64el #define tcg_gen_shr_i64 tcg_gen_shr_i64_mips64el #define tcg_gen_st_i64 tcg_gen_st_i64_mips64el +#define tcg_gen_add_i64 tcg_gen_add_i64_mips64el +#define tcg_gen_sub_i64 tcg_gen_sub_i64_mips64el #define tcg_gen_xor_i64 tcg_gen_xor_i64_mips64el +#define tcg_gen_neg_i64 tcg_gen_neg_i64_mips64el #define cpu_icount_to_ns cpu_icount_to_ns_mips64el #define cpu_is_stopped cpu_is_stopped_mips64el #define cpu_get_ticks cpu_get_ticks_mips64el @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_mips64el #define floatx80_mul floatx80_mul_mips64el #define floatx80_div floatx80_div_mips64el +#define floatx80_modrem floatx80_modrem_mips64el +#define floatx80_mod floatx80_mod_mips64el #define floatx80_rem floatx80_rem_mips64el #define floatx80_sqrt floatx80_sqrt_mips64el #define floatx80_eq floatx80_eq_mips64el @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mips64el #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mips64el #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mips64el +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mips64el #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mips64el #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mips64el #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mips64el @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mips64el #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mips64el #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mips64el +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mips64el +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mips64el #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mips64el +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mips64el +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mips64el #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mips64el #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mips64el #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mips64el +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mips64el #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mips64el #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mips64el #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mips64el +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mips64el +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mips64el #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mips64el #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mips64el #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips64el @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_mips64el #define tcg_gen_shri_vec tcg_gen_shri_vec_mips64el #define tcg_gen_sari_vec tcg_gen_sari_vec_mips64el +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mips64el +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mips64el #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mips64el #define tcg_gen_add_vec tcg_gen_add_vec_mips64el #define tcg_gen_sub_vec tcg_gen_sub_vec_mips64el @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mips64el #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mips64el #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mips64el +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mips64el +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mips64el #define tcg_gen_shls_vec tcg_gen_shls_vec_mips64el #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mips64el #define tcg_gen_sars_vec tcg_gen_sars_vec_mips64el +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mips64el #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mips64el #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mips64el #define tb_htable_lookup tb_htable_lookup_mips64el @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_mips64el #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mips64el #define tlb_init tlb_init_mips64el +#define tlb_destroy tlb_destroy_mips64el #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mips64el #define tlb_flush tlb_flush_mips64el #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mips64el @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_mips64el #define get_page_addr_code_hostp get_page_addr_code_hostp_mips64el #define get_page_addr_code get_page_addr_code_mips64el +#define probe_access_flags probe_access_flags_mips64el #define probe_access probe_access_mips64el #define tlb_vaddr_to_host tlb_vaddr_to_host_mips64el #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mips64el @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mips64el #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mips64el #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mips64el -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mips64el -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mips64el -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mips64el -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mips64el +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mips64el +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mips64el +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mips64el +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mips64el +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mips64el +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mips64el +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mips64el +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mips64el #define cpu_ldub_data_ra cpu_ldub_data_ra_mips64el #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mips64el -#define cpu_lduw_data_ra cpu_lduw_data_ra_mips64el -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mips64el -#define cpu_ldl_data_ra cpu_ldl_data_ra_mips64el -#define cpu_ldq_data_ra cpu_ldq_data_ra_mips64el +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mips64el +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mips64el +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mips64el +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mips64el +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mips64el +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mips64el +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mips64el +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mips64el #define cpu_ldub_data cpu_ldub_data_mips64el #define cpu_ldsb_data cpu_ldsb_data_mips64el -#define cpu_lduw_data cpu_lduw_data_mips64el -#define cpu_ldsw_data cpu_ldsw_data_mips64el -#define cpu_ldl_data cpu_ldl_data_mips64el -#define cpu_ldq_data cpu_ldq_data_mips64el +#define cpu_lduw_be_data cpu_lduw_be_data_mips64el +#define cpu_lduw_le_data cpu_lduw_le_data_mips64el +#define cpu_ldsw_be_data cpu_ldsw_be_data_mips64el +#define cpu_ldsw_le_data cpu_ldsw_le_data_mips64el +#define cpu_ldl_be_data cpu_ldl_be_data_mips64el +#define cpu_ldl_le_data cpu_ldl_le_data_mips64el +#define cpu_ldq_le_data cpu_ldq_le_data_mips64el +#define cpu_ldq_be_data cpu_ldq_be_data_mips64el #define helper_ret_stb_mmu helper_ret_stb_mmu_mips64el #define helper_le_stw_mmu helper_le_stw_mmu_mips64el #define helper_be_stw_mmu helper_be_stw_mmu_mips64el @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_mips64el #define helper_be_stq_mmu helper_be_stq_mmu_mips64el #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mips64el -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mips64el -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mips64el -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mips64el +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mips64el +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mips64el +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mips64el +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mips64el +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mips64el +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mips64el #define cpu_stb_data_ra cpu_stb_data_ra_mips64el -#define cpu_stw_data_ra cpu_stw_data_ra_mips64el -#define cpu_stl_data_ra cpu_stl_data_ra_mips64el -#define cpu_stq_data_ra cpu_stq_data_ra_mips64el +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mips64el +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mips64el +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mips64el +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mips64el +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mips64el +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mips64el #define cpu_stb_data cpu_stb_data_mips64el -#define cpu_stw_data cpu_stw_data_mips64el -#define cpu_stl_data cpu_stl_data_mips64el -#define cpu_stq_data cpu_stq_data_mips64el +#define cpu_stw_be_data cpu_stw_be_data_mips64el +#define cpu_stw_le_data cpu_stw_le_data_mips64el +#define cpu_stl_be_data cpu_stl_be_data_mips64el +#define cpu_stl_le_data cpu_stl_le_data_mips64el +#define cpu_stq_be_data cpu_stq_be_data_mips64el +#define cpu_stq_le_data cpu_stq_le_data_mips64el #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mips64el #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mips64el #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mips64el @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_mips64el #define cpu_ldl_code cpu_ldl_code_mips64el #define cpu_ldq_code cpu_ldq_code_mips64el +#define cpu_interrupt_handler cpu_interrupt_handler_mips64el #define helper_div_i32 helper_div_i32_mips64el #define helper_rem_i32 helper_rem_i32_mips64el #define helper_divu_i32 helper_divu_i32_mips64el @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mips64el #define helper_gvec_sar32i helper_gvec_sar32i_mips64el #define helper_gvec_sar64i helper_gvec_sar64i_mips64el +#define helper_gvec_rotl8i helper_gvec_rotl8i_mips64el +#define helper_gvec_rotl16i helper_gvec_rotl16i_mips64el +#define helper_gvec_rotl32i helper_gvec_rotl32i_mips64el +#define helper_gvec_rotl64i helper_gvec_rotl64i_mips64el #define helper_gvec_shl8v helper_gvec_shl8v_mips64el #define helper_gvec_shl16v helper_gvec_shl16v_mips64el #define helper_gvec_shl32v helper_gvec_shl32v_mips64el @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_mips64el #define helper_gvec_sar32v helper_gvec_sar32v_mips64el #define helper_gvec_sar64v helper_gvec_sar64v_mips64el +#define helper_gvec_rotl8v helper_gvec_rotl8v_mips64el +#define helper_gvec_rotl16v helper_gvec_rotl16v_mips64el +#define helper_gvec_rotl32v helper_gvec_rotl32v_mips64el +#define helper_gvec_rotl64v helper_gvec_rotl64v_mips64el +#define helper_gvec_rotr8v helper_gvec_rotr8v_mips64el +#define helper_gvec_rotr16v helper_gvec_rotr16v_mips64el +#define helper_gvec_rotr32v helper_gvec_rotr32v_mips64el +#define helper_gvec_rotr64v helper_gvec_rotr64v_mips64el #define helper_gvec_eq8 helper_gvec_eq8_mips64el #define helper_gvec_ne8 helper_gvec_ne8_mips64el #define helper_gvec_lt8 helper_gvec_lt8_mips64el @@ -1677,7 +1731,6 @@ #define helper_rddsp helper_rddsp_mips64el #define helper_cfc1 helper_cfc1_mips64el #define helper_ctc1 helper_ctc1_mips64el -#define ieee_ex_to_mips ieee_ex_to_mips_mips64el #define helper_float_sqrt_d helper_float_sqrt_d_mips64el #define helper_float_sqrt_s helper_float_sqrt_s_mips64el #define helper_float_cvtd_s helper_float_cvtd_s_mips64el @@ -2232,23 +2285,59 @@ #define helper_msa_srlri_df helper_msa_srlri_df_mips64el #define helper_msa_binsli_df helper_msa_binsli_df_mips64el #define helper_msa_binsri_df helper_msa_binsri_df_mips64el -#define helper_msa_subv_df helper_msa_subv_df_mips64el -#define helper_msa_subs_s_df helper_msa_subs_s_df_mips64el -#define helper_msa_subs_u_df helper_msa_subs_u_df_mips64el -#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mips64el -#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mips64el -#define helper_msa_mulv_df helper_msa_mulv_df_mips64el -#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mips64el -#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mips64el +#define helper_msa_subv_b helper_msa_subv_b_mips64el +#define helper_msa_subv_h helper_msa_subv_h_mips64el +#define helper_msa_subv_w helper_msa_subv_w_mips64el +#define helper_msa_subv_d helper_msa_subv_d_mips64el +#define helper_msa_subs_s_b helper_msa_subs_s_b_mips64el +#define helper_msa_subs_s_h helper_msa_subs_s_h_mips64el +#define helper_msa_subs_s_w helper_msa_subs_s_w_mips64el +#define helper_msa_subs_s_d helper_msa_subs_s_d_mips64el +#define helper_msa_subs_u_b helper_msa_subs_u_b_mips64el +#define helper_msa_subs_u_h helper_msa_subs_u_h_mips64el +#define helper_msa_subs_u_w helper_msa_subs_u_w_mips64el +#define helper_msa_subs_u_d helper_msa_subs_u_d_mips64el +#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mips64el +#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mips64el +#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mips64el +#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mips64el +#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mips64el +#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mips64el +#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mips64el +#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mips64el +#define helper_msa_mulv_b helper_msa_mulv_b_mips64el +#define helper_msa_mulv_h helper_msa_mulv_h_mips64el +#define helper_msa_mulv_w helper_msa_mulv_w_mips64el +#define helper_msa_mulv_d helper_msa_mulv_d_mips64el +#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mips64el +#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mips64el +#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mips64el +#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mips64el +#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mips64el +#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mips64el #define helper_msa_mul_q_df helper_msa_mul_q_df_mips64el #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mips64el #define helper_msa_sld_df helper_msa_sld_df_mips64el -#define helper_msa_maddv_df helper_msa_maddv_df_mips64el -#define helper_msa_msubv_df helper_msa_msubv_df_mips64el -#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mips64el -#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mips64el -#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mips64el -#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mips64el +#define helper_msa_maddv_b helper_msa_maddv_b_mips64el +#define helper_msa_maddv_h helper_msa_maddv_h_mips64el +#define helper_msa_maddv_w helper_msa_maddv_w_mips64el +#define helper_msa_maddv_d helper_msa_maddv_d_mips64el +#define helper_msa_msubv_b helper_msa_msubv_b_mips64el +#define helper_msa_msubv_h helper_msa_msubv_h_mips64el +#define helper_msa_msubv_w helper_msa_msubv_w_mips64el +#define helper_msa_msubv_d helper_msa_msubv_d_mips64el +#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mips64el +#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mips64el +#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mips64el +#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mips64el +#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mips64el +#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mips64el +#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mips64el +#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mips64el +#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mips64el +#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mips64el +#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mips64el +#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mips64el #define helper_msa_binsl_df helper_msa_binsl_df_mips64el #define helper_msa_binsr_df helper_msa_binsr_df_mips64el #define helper_msa_madd_q_df helper_msa_madd_q_df_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index 511cfcfb0c..5e47f6184d 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_mipsel #define tcg_gen_shr_i64 tcg_gen_shr_i64_mipsel #define tcg_gen_st_i64 tcg_gen_st_i64_mipsel +#define tcg_gen_add_i64 tcg_gen_add_i64_mipsel +#define tcg_gen_sub_i64 tcg_gen_sub_i64_mipsel #define tcg_gen_xor_i64 tcg_gen_xor_i64_mipsel +#define tcg_gen_neg_i64 tcg_gen_neg_i64_mipsel #define cpu_icount_to_ns cpu_icount_to_ns_mipsel #define cpu_is_stopped cpu_is_stopped_mipsel #define cpu_get_ticks cpu_get_ticks_mipsel @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_mipsel #define floatx80_mul floatx80_mul_mipsel #define floatx80_div floatx80_div_mipsel +#define floatx80_modrem floatx80_modrem_mipsel +#define floatx80_mod floatx80_mod_mipsel #define floatx80_rem floatx80_rem_mipsel #define floatx80_sqrt floatx80_sqrt_mipsel #define floatx80_eq floatx80_eq_mipsel @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_mipsel #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_mipsel #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_mipsel +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_mipsel #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_mipsel #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_mipsel #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_mipsel @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_mipsel #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_mipsel #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_mipsel +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_mipsel +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_mipsel #define tcg_gen_gvec_sari tcg_gen_gvec_sari_mipsel +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_mipsel +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_mipsel #define tcg_gen_gvec_shls tcg_gen_gvec_shls_mipsel #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_mipsel #define tcg_gen_gvec_sars tcg_gen_gvec_sars_mipsel +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_mipsel #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_mipsel #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_mipsel #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_mipsel +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_mipsel +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_mipsel #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_mipsel #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_mipsel #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mipsel @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_mipsel #define tcg_gen_shri_vec tcg_gen_shri_vec_mipsel #define tcg_gen_sari_vec tcg_gen_sari_vec_mipsel +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_mipsel +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_mipsel #define tcg_gen_cmp_vec tcg_gen_cmp_vec_mipsel #define tcg_gen_add_vec tcg_gen_add_vec_mipsel #define tcg_gen_sub_vec tcg_gen_sub_vec_mipsel @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_mipsel #define tcg_gen_shrv_vec tcg_gen_shrv_vec_mipsel #define tcg_gen_sarv_vec tcg_gen_sarv_vec_mipsel +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_mipsel +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_mipsel #define tcg_gen_shls_vec tcg_gen_shls_vec_mipsel #define tcg_gen_shrs_vec tcg_gen_shrs_vec_mipsel #define tcg_gen_sars_vec tcg_gen_sars_vec_mipsel +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_mipsel #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_mipsel #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_mipsel #define tb_htable_lookup tb_htable_lookup_mipsel @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_mipsel #define cpu_loop_exit_atomic cpu_loop_exit_atomic_mipsel #define tlb_init tlb_init_mipsel +#define tlb_destroy tlb_destroy_mipsel #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_mipsel #define tlb_flush tlb_flush_mipsel #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_mipsel @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_mipsel #define get_page_addr_code_hostp get_page_addr_code_hostp_mipsel #define get_page_addr_code get_page_addr_code_mipsel +#define probe_access_flags probe_access_flags_mipsel #define probe_access probe_access_mipsel #define tlb_vaddr_to_host tlb_vaddr_to_host_mipsel #define helper_ret_ldub_mmu helper_ret_ldub_mmu_mipsel @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_mipsel #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_mipsel #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_mipsel -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_mipsel -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_mipsel -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_mipsel -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_mipsel +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_mipsel +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_mipsel +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_mipsel +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_mipsel +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_mipsel +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_mipsel +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_mipsel +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_mipsel #define cpu_ldub_data_ra cpu_ldub_data_ra_mipsel #define cpu_ldsb_data_ra cpu_ldsb_data_ra_mipsel -#define cpu_lduw_data_ra cpu_lduw_data_ra_mipsel -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_mipsel -#define cpu_ldl_data_ra cpu_ldl_data_ra_mipsel -#define cpu_ldq_data_ra cpu_ldq_data_ra_mipsel +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_mipsel +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_mipsel +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_mipsel +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_mipsel +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_mipsel +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_mipsel +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_mipsel +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_mipsel #define cpu_ldub_data cpu_ldub_data_mipsel #define cpu_ldsb_data cpu_ldsb_data_mipsel -#define cpu_lduw_data cpu_lduw_data_mipsel -#define cpu_ldsw_data cpu_ldsw_data_mipsel -#define cpu_ldl_data cpu_ldl_data_mipsel -#define cpu_ldq_data cpu_ldq_data_mipsel +#define cpu_lduw_be_data cpu_lduw_be_data_mipsel +#define cpu_lduw_le_data cpu_lduw_le_data_mipsel +#define cpu_ldsw_be_data cpu_ldsw_be_data_mipsel +#define cpu_ldsw_le_data cpu_ldsw_le_data_mipsel +#define cpu_ldl_be_data cpu_ldl_be_data_mipsel +#define cpu_ldl_le_data cpu_ldl_le_data_mipsel +#define cpu_ldq_le_data cpu_ldq_le_data_mipsel +#define cpu_ldq_be_data cpu_ldq_be_data_mipsel #define helper_ret_stb_mmu helper_ret_stb_mmu_mipsel #define helper_le_stw_mmu helper_le_stw_mmu_mipsel #define helper_be_stw_mmu helper_be_stw_mmu_mipsel @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_mipsel #define helper_be_stq_mmu helper_be_stq_mmu_mipsel #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_mipsel -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_mipsel -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_mipsel -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_mipsel +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_mipsel +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_mipsel +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_mipsel +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_mipsel +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_mipsel +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_mipsel #define cpu_stb_data_ra cpu_stb_data_ra_mipsel -#define cpu_stw_data_ra cpu_stw_data_ra_mipsel -#define cpu_stl_data_ra cpu_stl_data_ra_mipsel -#define cpu_stq_data_ra cpu_stq_data_ra_mipsel +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_mipsel +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_mipsel +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_mipsel +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_mipsel +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_mipsel +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_mipsel #define cpu_stb_data cpu_stb_data_mipsel -#define cpu_stw_data cpu_stw_data_mipsel -#define cpu_stl_data cpu_stl_data_mipsel -#define cpu_stq_data cpu_stq_data_mipsel +#define cpu_stw_be_data cpu_stw_be_data_mipsel +#define cpu_stw_le_data cpu_stw_le_data_mipsel +#define cpu_stl_be_data cpu_stl_be_data_mipsel +#define cpu_stl_le_data cpu_stl_le_data_mipsel +#define cpu_stq_be_data cpu_stq_be_data_mipsel +#define cpu_stq_le_data cpu_stq_le_data_mipsel #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_mipsel #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_mipsel #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_mipsel @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_mipsel #define cpu_ldl_code cpu_ldl_code_mipsel #define cpu_ldq_code cpu_ldq_code_mipsel +#define cpu_interrupt_handler cpu_interrupt_handler_mipsel #define helper_div_i32 helper_div_i32_mipsel #define helper_rem_i32 helper_rem_i32_mipsel #define helper_divu_i32 helper_divu_i32_mipsel @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mipsel #define helper_gvec_sar32i helper_gvec_sar32i_mipsel #define helper_gvec_sar64i helper_gvec_sar64i_mipsel +#define helper_gvec_rotl8i helper_gvec_rotl8i_mipsel +#define helper_gvec_rotl16i helper_gvec_rotl16i_mipsel +#define helper_gvec_rotl32i helper_gvec_rotl32i_mipsel +#define helper_gvec_rotl64i helper_gvec_rotl64i_mipsel #define helper_gvec_shl8v helper_gvec_shl8v_mipsel #define helper_gvec_shl16v helper_gvec_shl16v_mipsel #define helper_gvec_shl32v helper_gvec_shl32v_mipsel @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_mipsel #define helper_gvec_sar32v helper_gvec_sar32v_mipsel #define helper_gvec_sar64v helper_gvec_sar64v_mipsel +#define helper_gvec_rotl8v helper_gvec_rotl8v_mipsel +#define helper_gvec_rotl16v helper_gvec_rotl16v_mipsel +#define helper_gvec_rotl32v helper_gvec_rotl32v_mipsel +#define helper_gvec_rotl64v helper_gvec_rotl64v_mipsel +#define helper_gvec_rotr8v helper_gvec_rotr8v_mipsel +#define helper_gvec_rotr16v helper_gvec_rotr16v_mipsel +#define helper_gvec_rotr32v helper_gvec_rotr32v_mipsel +#define helper_gvec_rotr64v helper_gvec_rotr64v_mipsel #define helper_gvec_eq8 helper_gvec_eq8_mipsel #define helper_gvec_ne8 helper_gvec_ne8_mipsel #define helper_gvec_lt8 helper_gvec_lt8_mipsel @@ -1677,7 +1731,6 @@ #define helper_rddsp helper_rddsp_mipsel #define helper_cfc1 helper_cfc1_mipsel #define helper_ctc1 helper_ctc1_mipsel -#define ieee_ex_to_mips ieee_ex_to_mips_mipsel #define helper_float_sqrt_d helper_float_sqrt_d_mipsel #define helper_float_sqrt_s helper_float_sqrt_s_mipsel #define helper_float_cvtd_s helper_float_cvtd_s_mipsel @@ -2232,23 +2285,59 @@ #define helper_msa_srlri_df helper_msa_srlri_df_mipsel #define helper_msa_binsli_df helper_msa_binsli_df_mipsel #define helper_msa_binsri_df helper_msa_binsri_df_mipsel -#define helper_msa_subv_df helper_msa_subv_df_mipsel -#define helper_msa_subs_s_df helper_msa_subs_s_df_mipsel -#define helper_msa_subs_u_df helper_msa_subs_u_df_mipsel -#define helper_msa_subsus_u_df helper_msa_subsus_u_df_mipsel -#define helper_msa_subsuu_s_df helper_msa_subsuu_s_df_mipsel -#define helper_msa_mulv_df helper_msa_mulv_df_mipsel -#define helper_msa_dotp_s_df helper_msa_dotp_s_df_mipsel -#define helper_msa_dotp_u_df helper_msa_dotp_u_df_mipsel +#define helper_msa_subv_b helper_msa_subv_b_mipsel +#define helper_msa_subv_h helper_msa_subv_h_mipsel +#define helper_msa_subv_w helper_msa_subv_w_mipsel +#define helper_msa_subv_d helper_msa_subv_d_mipsel +#define helper_msa_subs_s_b helper_msa_subs_s_b_mipsel +#define helper_msa_subs_s_h helper_msa_subs_s_h_mipsel +#define helper_msa_subs_s_w helper_msa_subs_s_w_mipsel +#define helper_msa_subs_s_d helper_msa_subs_s_d_mipsel +#define helper_msa_subs_u_b helper_msa_subs_u_b_mipsel +#define helper_msa_subs_u_h helper_msa_subs_u_h_mipsel +#define helper_msa_subs_u_w helper_msa_subs_u_w_mipsel +#define helper_msa_subs_u_d helper_msa_subs_u_d_mipsel +#define helper_msa_subsus_u_b helper_msa_subsus_u_b_mipsel +#define helper_msa_subsus_u_h helper_msa_subsus_u_h_mipsel +#define helper_msa_subsus_u_w helper_msa_subsus_u_w_mipsel +#define helper_msa_subsus_u_d helper_msa_subsus_u_d_mipsel +#define helper_msa_subsuu_s_b helper_msa_subsuu_s_b_mipsel +#define helper_msa_subsuu_s_h helper_msa_subsuu_s_h_mipsel +#define helper_msa_subsuu_s_w helper_msa_subsuu_s_w_mipsel +#define helper_msa_subsuu_s_d helper_msa_subsuu_s_d_mipsel +#define helper_msa_mulv_b helper_msa_mulv_b_mipsel +#define helper_msa_mulv_h helper_msa_mulv_h_mipsel +#define helper_msa_mulv_w helper_msa_mulv_w_mipsel +#define helper_msa_mulv_d helper_msa_mulv_d_mipsel +#define helper_msa_dotp_s_h helper_msa_dotp_s_h_mipsel +#define helper_msa_dotp_s_w helper_msa_dotp_s_w_mipsel +#define helper_msa_dotp_s_d helper_msa_dotp_s_d_mipsel +#define helper_msa_dotp_u_h helper_msa_dotp_u_h_mipsel +#define helper_msa_dotp_u_w helper_msa_dotp_u_w_mipsel +#define helper_msa_dotp_u_d helper_msa_dotp_u_d_mipsel #define helper_msa_mul_q_df helper_msa_mul_q_df_mipsel #define helper_msa_mulr_q_df helper_msa_mulr_q_df_mipsel #define helper_msa_sld_df helper_msa_sld_df_mipsel -#define helper_msa_maddv_df helper_msa_maddv_df_mipsel -#define helper_msa_msubv_df helper_msa_msubv_df_mipsel -#define helper_msa_dpadd_s_df helper_msa_dpadd_s_df_mipsel -#define helper_msa_dpadd_u_df helper_msa_dpadd_u_df_mipsel -#define helper_msa_dpsub_s_df helper_msa_dpsub_s_df_mipsel -#define helper_msa_dpsub_u_df helper_msa_dpsub_u_df_mipsel +#define helper_msa_maddv_b helper_msa_maddv_b_mipsel +#define helper_msa_maddv_h helper_msa_maddv_h_mipsel +#define helper_msa_maddv_w helper_msa_maddv_w_mipsel +#define helper_msa_maddv_d helper_msa_maddv_d_mipsel +#define helper_msa_msubv_b helper_msa_msubv_b_mipsel +#define helper_msa_msubv_h helper_msa_msubv_h_mipsel +#define helper_msa_msubv_w helper_msa_msubv_w_mipsel +#define helper_msa_msubv_d helper_msa_msubv_d_mipsel +#define helper_msa_dpadd_s_h helper_msa_dpadd_s_h_mipsel +#define helper_msa_dpadd_s_w helper_msa_dpadd_s_w_mipsel +#define helper_msa_dpadd_s_d helper_msa_dpadd_s_d_mipsel +#define helper_msa_dpadd_u_h helper_msa_dpadd_u_h_mipsel +#define helper_msa_dpadd_u_w helper_msa_dpadd_u_w_mipsel +#define helper_msa_dpadd_u_d helper_msa_dpadd_u_d_mipsel +#define helper_msa_dpsub_s_h helper_msa_dpsub_s_h_mipsel +#define helper_msa_dpsub_s_w helper_msa_dpsub_s_w_mipsel +#define helper_msa_dpsub_s_d helper_msa_dpsub_s_d_mipsel +#define helper_msa_dpsub_u_h helper_msa_dpsub_u_h_mipsel +#define helper_msa_dpsub_u_w helper_msa_dpsub_u_w_mipsel +#define helper_msa_dpsub_u_d helper_msa_dpsub_u_d_mipsel #define helper_msa_binsl_df helper_msa_binsl_df_mipsel #define helper_msa_binsr_df helper_msa_binsr_df_mipsel #define helper_msa_madd_q_df helper_msa_madd_q_df_mipsel diff --git a/qemu/ppc.h b/qemu/ppc.h index 7fd122913d..28ca9753e2 100644 --- a/qemu/ppc.h +++ b/qemu/ppc.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_ppc #define tcg_gen_shr_i64 tcg_gen_shr_i64_ppc #define tcg_gen_st_i64 tcg_gen_st_i64_ppc +#define tcg_gen_add_i64 tcg_gen_add_i64_ppc +#define tcg_gen_sub_i64 tcg_gen_sub_i64_ppc #define tcg_gen_xor_i64 tcg_gen_xor_i64_ppc +#define tcg_gen_neg_i64 tcg_gen_neg_i64_ppc #define cpu_icount_to_ns cpu_icount_to_ns_ppc #define cpu_is_stopped cpu_is_stopped_ppc #define cpu_get_ticks cpu_get_ticks_ppc @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_ppc #define floatx80_mul floatx80_mul_ppc #define floatx80_div floatx80_div_ppc +#define floatx80_modrem floatx80_modrem_ppc +#define floatx80_mod floatx80_mod_ppc #define floatx80_rem floatx80_rem_ppc #define floatx80_sqrt floatx80_sqrt_ppc #define floatx80_eq floatx80_eq_ppc @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_ppc #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_ppc #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_ppc +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_ppc #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_ppc #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_ppc #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_ppc @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_ppc #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_ppc #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_ppc +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_ppc +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_ppc #define tcg_gen_gvec_sari tcg_gen_gvec_sari_ppc +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_ppc +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_ppc #define tcg_gen_gvec_shls tcg_gen_gvec_shls_ppc #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_ppc #define tcg_gen_gvec_sars tcg_gen_gvec_sars_ppc +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_ppc #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_ppc #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_ppc #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_ppc +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_ppc +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_ppc #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_ppc #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_ppc #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_ppc @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_ppc #define tcg_gen_shri_vec tcg_gen_shri_vec_ppc #define tcg_gen_sari_vec tcg_gen_sari_vec_ppc +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_ppc +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_ppc #define tcg_gen_cmp_vec tcg_gen_cmp_vec_ppc #define tcg_gen_add_vec tcg_gen_add_vec_ppc #define tcg_gen_sub_vec tcg_gen_sub_vec_ppc @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_ppc #define tcg_gen_shrv_vec tcg_gen_shrv_vec_ppc #define tcg_gen_sarv_vec tcg_gen_sarv_vec_ppc +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_ppc +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_ppc #define tcg_gen_shls_vec tcg_gen_shls_vec_ppc #define tcg_gen_shrs_vec tcg_gen_shrs_vec_ppc #define tcg_gen_sars_vec tcg_gen_sars_vec_ppc +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_ppc #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_ppc #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_ppc #define tb_htable_lookup tb_htable_lookup_ppc @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_ppc #define cpu_loop_exit_atomic cpu_loop_exit_atomic_ppc #define tlb_init tlb_init_ppc +#define tlb_destroy tlb_destroy_ppc #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_ppc #define tlb_flush tlb_flush_ppc #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_ppc @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_ppc #define get_page_addr_code_hostp get_page_addr_code_hostp_ppc #define get_page_addr_code get_page_addr_code_ppc +#define probe_access_flags probe_access_flags_ppc #define probe_access probe_access_ppc #define tlb_vaddr_to_host tlb_vaddr_to_host_ppc #define helper_ret_ldub_mmu helper_ret_ldub_mmu_ppc @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_ppc #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_ppc #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_ppc -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_ppc -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_ppc -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_ppc -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_ppc +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_ppc +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_ppc +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_ppc +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_ppc +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_ppc +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_ppc +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_ppc +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_ppc #define cpu_ldub_data_ra cpu_ldub_data_ra_ppc #define cpu_ldsb_data_ra cpu_ldsb_data_ra_ppc -#define cpu_lduw_data_ra cpu_lduw_data_ra_ppc -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_ppc -#define cpu_ldl_data_ra cpu_ldl_data_ra_ppc -#define cpu_ldq_data_ra cpu_ldq_data_ra_ppc +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_ppc +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_ppc +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_ppc +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_ppc +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_ppc +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_ppc +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_ppc +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_ppc #define cpu_ldub_data cpu_ldub_data_ppc #define cpu_ldsb_data cpu_ldsb_data_ppc -#define cpu_lduw_data cpu_lduw_data_ppc -#define cpu_ldsw_data cpu_ldsw_data_ppc -#define cpu_ldl_data cpu_ldl_data_ppc -#define cpu_ldq_data cpu_ldq_data_ppc +#define cpu_lduw_be_data cpu_lduw_be_data_ppc +#define cpu_lduw_le_data cpu_lduw_le_data_ppc +#define cpu_ldsw_be_data cpu_ldsw_be_data_ppc +#define cpu_ldsw_le_data cpu_ldsw_le_data_ppc +#define cpu_ldl_be_data cpu_ldl_be_data_ppc +#define cpu_ldl_le_data cpu_ldl_le_data_ppc +#define cpu_ldq_le_data cpu_ldq_le_data_ppc +#define cpu_ldq_be_data cpu_ldq_be_data_ppc #define helper_ret_stb_mmu helper_ret_stb_mmu_ppc #define helper_le_stw_mmu helper_le_stw_mmu_ppc #define helper_be_stw_mmu helper_be_stw_mmu_ppc @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_ppc #define helper_be_stq_mmu helper_be_stq_mmu_ppc #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_ppc -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_ppc -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_ppc -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_ppc +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_ppc +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_ppc +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_ppc +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_ppc +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_ppc +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_ppc #define cpu_stb_data_ra cpu_stb_data_ra_ppc -#define cpu_stw_data_ra cpu_stw_data_ra_ppc -#define cpu_stl_data_ra cpu_stl_data_ra_ppc -#define cpu_stq_data_ra cpu_stq_data_ra_ppc +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_ppc +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_ppc +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_ppc +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_ppc +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_ppc +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_ppc #define cpu_stb_data cpu_stb_data_ppc -#define cpu_stw_data cpu_stw_data_ppc -#define cpu_stl_data cpu_stl_data_ppc -#define cpu_stq_data cpu_stq_data_ppc +#define cpu_stw_be_data cpu_stw_be_data_ppc +#define cpu_stw_le_data cpu_stw_le_data_ppc +#define cpu_stl_be_data cpu_stl_be_data_ppc +#define cpu_stl_le_data cpu_stl_le_data_ppc +#define cpu_stq_be_data cpu_stq_be_data_ppc +#define cpu_stq_le_data cpu_stq_le_data_ppc #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_ppc #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_ppc #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_ppc @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_ppc #define cpu_ldl_code cpu_ldl_code_ppc #define cpu_ldq_code cpu_ldq_code_ppc +#define cpu_interrupt_handler cpu_interrupt_handler_ppc #define helper_div_i32 helper_div_i32_ppc #define helper_rem_i32 helper_rem_i32_ppc #define helper_divu_i32 helper_divu_i32_ppc @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_ppc #define helper_gvec_sar32i helper_gvec_sar32i_ppc #define helper_gvec_sar64i helper_gvec_sar64i_ppc +#define helper_gvec_rotl8i helper_gvec_rotl8i_ppc +#define helper_gvec_rotl16i helper_gvec_rotl16i_ppc +#define helper_gvec_rotl32i helper_gvec_rotl32i_ppc +#define helper_gvec_rotl64i helper_gvec_rotl64i_ppc #define helper_gvec_shl8v helper_gvec_shl8v_ppc #define helper_gvec_shl16v helper_gvec_shl16v_ppc #define helper_gvec_shl32v helper_gvec_shl32v_ppc @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_ppc #define helper_gvec_sar32v helper_gvec_sar32v_ppc #define helper_gvec_sar64v helper_gvec_sar64v_ppc +#define helper_gvec_rotl8v helper_gvec_rotl8v_ppc +#define helper_gvec_rotl16v helper_gvec_rotl16v_ppc +#define helper_gvec_rotl32v helper_gvec_rotl32v_ppc +#define helper_gvec_rotl64v helper_gvec_rotl64v_ppc +#define helper_gvec_rotr8v helper_gvec_rotr8v_ppc +#define helper_gvec_rotr16v helper_gvec_rotr16v_ppc +#define helper_gvec_rotr32v helper_gvec_rotr32v_ppc +#define helper_gvec_rotr64v helper_gvec_rotr64v_ppc #define helper_gvec_eq8 helper_gvec_eq8_ppc #define helper_gvec_ne8 helper_gvec_ne8_ppc #define helper_gvec_lt8 helper_gvec_lt8_ppc @@ -1575,6 +1629,33 @@ #define helper_tbegin helper_tbegin_ppc #define helper_load_dump_spr helper_load_dump_spr_ppc #define helper_store_dump_spr helper_store_dump_spr_ppc +#define store_fpscr store_fpscr_ppc +#define helper_store_fpscr helper_store_fpscr_ppc +#define helper_float_check_status helper_float_check_status_ppc +#define helper_reset_fpstatus helper_reset_fpstatus_ppc +#define helper_fadd helper_fadd_ppc +#define helper_fsub helper_fsub_ppc +#define helper_fmul helper_fmul_ppc +#define helper_fdiv helper_fdiv_ppc +#define helper_fctiw helper_fctiw_ppc +#define helper_fctiwz helper_fctiwz_ppc +#define helper_fctiwuz helper_fctiwuz_ppc +#define helper_fctid helper_fctid_ppc +#define helper_fctidz helper_fctidz_ppc +#define helper_fctidu helper_fctidu_ppc +#define helper_fctiduz helper_fctiduz_ppc +#define helper_fcfid helper_fcfid_ppc +#define helper_fcfids helper_fcfids_ppc +#define helper_fcfidu helper_fcfidu_ppc +#define helper_fcfidus helper_fcfidus_ppc +#define helper_frin helper_frin_ppc +#define helper_friz helper_friz_ppc +#define helper_frip helper_frip_ppc +#define helper_frim helper_frim_ppc +#define helper_fmadd helper_fmadd_ppc +#define helper_fnmadd helper_fnmadd_ppc +#define helper_fmsub helper_fmsub_ppc +#define helper_fnmsub helper_fnmsub_ppc #define helper_hfscr_facility_check helper_hfscr_facility_check_ppc #define helper_fscr_facility_check helper_fscr_facility_check_ppc #define helper_msr_facility_check helper_msr_facility_check_ppc @@ -1726,6 +1807,243 @@ #define ppc_hash32_handle_mmu_fault ppc_hash32_handle_mmu_fault_ppc #define gen_helper_store_booke_tsr gen_helper_store_booke_tsr_ppc #define gen_helper_store_booke_tcr gen_helper_store_booke_tcr_ppc +#define gen_helper_store_fpscr gen_helper_store_fpscr_ppc #define store_booke_tcr store_booke_tcr_ppc #define ppc_hash32_get_phys_page_debug ppc_hash32_get_phys_page_debug_ppc +#define helper_compute_fprf_float128 helper_compute_fprf_float128_ppc +#define helper_compute_fprf_float16 helper_compute_fprf_float16_ppc +#define helper_compute_fprf_float32 helper_compute_fprf_float32_ppc +#define helper_compute_fprf_float64 helper_compute_fprf_float64_ppc +#define helper_efdadd helper_efdadd_ppc +#define helper_efdcfs helper_efdcfs_ppc +#define helper_efdcfsf helper_efdcfsf_ppc +#define helper_efdcfsi helper_efdcfsi_ppc +#define helper_efdcfsid helper_efdcfsid_ppc +#define helper_efdcfuf helper_efdcfuf_ppc +#define helper_efdcfui helper_efdcfui_ppc +#define helper_efdcfuid helper_efdcfuid_ppc +#define helper_efdcmpeq helper_efdcmpeq_ppc +#define helper_efdcmpgt helper_efdcmpgt_ppc +#define helper_efdcmplt helper_efdcmplt_ppc +#define helper_efdctsf helper_efdctsf_ppc +#define helper_efdctsi helper_efdctsi_ppc +#define helper_efdctsidz helper_efdctsidz_ppc +#define helper_efdctsiz helper_efdctsiz_ppc +#define helper_efdctuf helper_efdctuf_ppc +#define helper_efdctui helper_efdctui_ppc +#define helper_efdctuidz helper_efdctuidz_ppc +#define helper_efdctuiz helper_efdctuiz_ppc +#define helper_efddiv helper_efddiv_ppc +#define helper_efdmul helper_efdmul_ppc +#define helper_efdsub helper_efdsub_ppc +#define helper_efdtsteq helper_efdtsteq_ppc +#define helper_efdtstgt helper_efdtstgt_ppc +#define helper_efdtstlt helper_efdtstlt_ppc +#define helper_efsadd helper_efsadd_ppc +#define helper_efscfd helper_efscfd_ppc +#define helper_efscfsf helper_efscfsf_ppc +#define helper_efscfsi helper_efscfsi_ppc +#define helper_efscfuf helper_efscfuf_ppc +#define helper_efscfui helper_efscfui_ppc +#define helper_efscmpeq helper_efscmpeq_ppc +#define helper_efscmpgt helper_efscmpgt_ppc +#define helper_efscmplt helper_efscmplt_ppc +#define helper_efsctsf helper_efsctsf_ppc +#define helper_efsctsi helper_efsctsi_ppc +#define helper_efsctsiz helper_efsctsiz_ppc +#define helper_efsctuf helper_efsctuf_ppc +#define helper_efsctui helper_efsctui_ppc +#define helper_efsctuiz helper_efsctuiz_ppc +#define helper_efsdiv helper_efsdiv_ppc +#define helper_efsmul helper_efsmul_ppc +#define helper_efssub helper_efssub_ppc +#define helper_efststeq helper_efststeq_ppc +#define helper_efststgt helper_efststgt_ppc +#define helper_efststlt helper_efststlt_ppc +#define helper_evfsadd helper_evfsadd_ppc +#define helper_evfscfsf helper_evfscfsf_ppc +#define helper_evfscfsi helper_evfscfsi_ppc +#define helper_evfscfuf helper_evfscfuf_ppc +#define helper_evfscfui helper_evfscfui_ppc +#define helper_evfscmpeq helper_evfscmpeq_ppc +#define helper_evfscmpgt helper_evfscmpgt_ppc +#define helper_evfscmplt helper_evfscmplt_ppc +#define helper_evfsctsf helper_evfsctsf_ppc +#define helper_evfsctsi helper_evfsctsi_ppc +#define helper_evfsctsiz helper_evfsctsiz_ppc +#define helper_evfsctuf helper_evfsctuf_ppc +#define helper_evfsctui helper_evfsctui_ppc +#define helper_evfsctuiz helper_evfsctuiz_ppc +#define helper_evfsdiv helper_evfsdiv_ppc +#define helper_evfsmul helper_evfsmul_ppc +#define helper_evfssub helper_evfssub_ppc +#define helper_evfststeq helper_evfststeq_ppc +#define helper_evfststgt helper_evfststgt_ppc +#define helper_evfststlt helper_evfststlt_ppc +#define helper_fcmpo helper_fcmpo_ppc +#define helper_fcmpu helper_fcmpu_ppc +#define helper_fctiwu helper_fctiwu_ppc +#define helper_fpscr_clrbit helper_fpscr_clrbit_ppc +#define helper_fpscr_setbit helper_fpscr_setbit_ppc +#define helper_fre helper_fre_ppc +#define helper_fres helper_fres_ppc +#define helper_frsp helper_frsp_ppc +#define helper_frsqrte helper_frsqrte_ppc +#define helper_fsel helper_fsel_ppc +#define helper_fsqrt helper_fsqrt_ppc +#define helper_ftdiv helper_ftdiv_ppc +#define helper_ftsqrt helper_ftsqrt_ppc +#define helper_todouble helper_todouble_ppc +#define helper_tosingle helper_tosingle_ppc +#define helper_xsadddp helper_xsadddp_ppc +#define helper_xsaddqp helper_xsaddqp_ppc +#define helper_xsaddsp helper_xsaddsp_ppc +#define helper_xscmpeqdp helper_xscmpeqdp_ppc +#define helper_xscmpexpdp helper_xscmpexpdp_ppc +#define helper_xscmpexpqp helper_xscmpexpqp_ppc +#define helper_xscmpgedp helper_xscmpgedp_ppc +#define helper_xscmpgtdp helper_xscmpgtdp_ppc +#define helper_xscmpnedp helper_xscmpnedp_ppc +#define helper_xscmpodp helper_xscmpodp_ppc +#define helper_xscmpoqp helper_xscmpoqp_ppc +#define helper_xscmpudp helper_xscmpudp_ppc +#define helper_xscmpuqp helper_xscmpuqp_ppc +#define helper_xscvdphp helper_xscvdphp_ppc +#define helper_xscvdpqp helper_xscvdpqp_ppc +#define helper_xscvdpsp helper_xscvdpsp_ppc +#define helper_xscvdpspn helper_xscvdpspn_ppc +#define helper_xscvdpsxds helper_xscvdpsxds_ppc +#define helper_xscvdpsxws helper_xscvdpsxws_ppc +#define helper_xscvdpuxds helper_xscvdpuxds_ppc +#define helper_xscvdpuxws helper_xscvdpuxws_ppc +#define helper_xscvhpdp helper_xscvhpdp_ppc +#define helper_xscvqpdp helper_xscvqpdp_ppc +#define helper_xscvqpsdz helper_xscvqpsdz_ppc +#define helper_xscvqpswz helper_xscvqpswz_ppc +#define helper_xscvqpudz helper_xscvqpudz_ppc +#define helper_xscvqpuwz helper_xscvqpuwz_ppc +#define helper_xscvsdqp helper_xscvsdqp_ppc +#define helper_xscvspdp helper_xscvspdp_ppc +#define helper_xscvspdpn helper_xscvspdpn_ppc +#define helper_xscvsxddp helper_xscvsxddp_ppc +#define helper_xscvsxdsp helper_xscvsxdsp_ppc +#define helper_xscvudqp helper_xscvudqp_ppc +#define helper_xscvuxddp helper_xscvuxddp_ppc +#define helper_xscvuxdsp helper_xscvuxdsp_ppc +#define helper_xsdivdp helper_xsdivdp_ppc +#define helper_xsdivqp helper_xsdivqp_ppc +#define helper_xsdivsp helper_xsdivsp_ppc +#define helper_xsmadddp helper_xsmadddp_ppc +#define helper_xsmaddsp helper_xsmaddsp_ppc +#define helper_xsmaxcdp helper_xsmaxcdp_ppc +#define helper_xsmaxdp helper_xsmaxdp_ppc +#define helper_xsmaxjdp helper_xsmaxjdp_ppc +#define helper_xsmincdp helper_xsmincdp_ppc +#define helper_xsmindp helper_xsmindp_ppc +#define helper_xsminjdp helper_xsminjdp_ppc +#define helper_xsmsubdp helper_xsmsubdp_ppc +#define helper_xsmsubsp helper_xsmsubsp_ppc +#define helper_xsmuldp helper_xsmuldp_ppc +#define helper_xsmulqp helper_xsmulqp_ppc +#define helper_xsmulsp helper_xsmulsp_ppc +#define helper_xsnmadddp helper_xsnmadddp_ppc +#define helper_xsnmaddsp helper_xsnmaddsp_ppc +#define helper_xsnmsubdp helper_xsnmsubdp_ppc +#define helper_xsnmsubsp helper_xsnmsubsp_ppc +#define helper_xsrdpi helper_xsrdpi_ppc +#define helper_xsrdpic helper_xsrdpic_ppc +#define helper_xsrdpim helper_xsrdpim_ppc +#define helper_xsrdpip helper_xsrdpip_ppc +#define helper_xsrdpiz helper_xsrdpiz_ppc +#define helper_xsredp helper_xsredp_ppc +#define helper_xsresp helper_xsresp_ppc +#define helper_xsrqpi helper_xsrqpi_ppc +#define helper_xsrqpxp helper_xsrqpxp_ppc +#define helper_xsrsp helper_xsrsp_ppc +#define helper_xsrsqrtedp helper_xsrsqrtedp_ppc +#define helper_xsrsqrtesp helper_xsrsqrtesp_ppc +#define helper_xssqrtdp helper_xssqrtdp_ppc +#define helper_xssqrtqp helper_xssqrtqp_ppc +#define helper_xssqrtsp helper_xssqrtsp_ppc +#define helper_xssubdp helper_xssubdp_ppc +#define helper_xssubqp helper_xssubqp_ppc +#define helper_xssubsp helper_xssubsp_ppc +#define helper_xstdivdp helper_xstdivdp_ppc +#define helper_xstsqrtdp helper_xstsqrtdp_ppc +#define helper_xststdcdp helper_xststdcdp_ppc +#define helper_xststdcqp helper_xststdcqp_ppc +#define helper_xststdcsp helper_xststdcsp_ppc +#define helper_xvadddp helper_xvadddp_ppc +#define helper_xvaddsp helper_xvaddsp_ppc +#define helper_xvcmpeqdp helper_xvcmpeqdp_ppc +#define helper_xvcmpeqsp helper_xvcmpeqsp_ppc +#define helper_xvcmpgedp helper_xvcmpgedp_ppc +#define helper_xvcmpgesp helper_xvcmpgesp_ppc +#define helper_xvcmpgtdp helper_xvcmpgtdp_ppc +#define helper_xvcmpgtsp helper_xvcmpgtsp_ppc +#define helper_xvcmpnedp helper_xvcmpnedp_ppc +#define helper_xvcmpnesp helper_xvcmpnesp_ppc +#define helper_xvcvdpsp helper_xvcvdpsp_ppc +#define helper_xvcvdpsxds helper_xvcvdpsxds_ppc +#define helper_xvcvdpsxws helper_xvcvdpsxws_ppc +#define helper_xvcvdpuxds helper_xvcvdpuxds_ppc +#define helper_xvcvdpuxws helper_xvcvdpuxws_ppc +#define helper_xvcvhpsp helper_xvcvhpsp_ppc +#define helper_xvcvspdp helper_xvcvspdp_ppc +#define helper_xvcvsphp helper_xvcvsphp_ppc +#define helper_xvcvspsxds helper_xvcvspsxds_ppc +#define helper_xvcvspsxws helper_xvcvspsxws_ppc +#define helper_xvcvspuxds helper_xvcvspuxds_ppc +#define helper_xvcvspuxws helper_xvcvspuxws_ppc +#define helper_xvcvsxddp helper_xvcvsxddp_ppc +#define helper_xvcvsxdsp helper_xvcvsxdsp_ppc +#define helper_xvcvsxwdp helper_xvcvsxwdp_ppc +#define helper_xvcvsxwsp helper_xvcvsxwsp_ppc +#define helper_xvcvuxddp helper_xvcvuxddp_ppc +#define helper_xvcvuxdsp helper_xvcvuxdsp_ppc +#define helper_xvcvuxwdp helper_xvcvuxwdp_ppc +#define helper_xvcvuxwsp helper_xvcvuxwsp_ppc +#define helper_xvdivdp helper_xvdivdp_ppc +#define helper_xvdivsp helper_xvdivsp_ppc +#define helper_xvmadddp helper_xvmadddp_ppc +#define helper_xvmaddsp helper_xvmaddsp_ppc +#define helper_xvmaxdp helper_xvmaxdp_ppc +#define helper_xvmaxsp helper_xvmaxsp_ppc +#define helper_xvmindp helper_xvmindp_ppc +#define helper_xvminsp helper_xvminsp_ppc +#define helper_xvmsubdp helper_xvmsubdp_ppc +#define helper_xvmsubsp helper_xvmsubsp_ppc +#define helper_xvmuldp helper_xvmuldp_ppc +#define helper_xvmulsp helper_xvmulsp_ppc +#define helper_xvnmadddp helper_xvnmadddp_ppc +#define helper_xvnmaddsp helper_xvnmaddsp_ppc +#define helper_xvnmsubdp helper_xvnmsubdp_ppc +#define helper_xvnmsubsp helper_xvnmsubsp_ppc +#define helper_xvrdpi helper_xvrdpi_ppc +#define helper_xvrdpic helper_xvrdpic_ppc +#define helper_xvrdpim helper_xvrdpim_ppc +#define helper_xvrdpip helper_xvrdpip_ppc +#define helper_xvrdpiz helper_xvrdpiz_ppc +#define helper_xvredp helper_xvredp_ppc +#define helper_xvresp helper_xvresp_ppc +#define helper_xvrspi helper_xvrspi_ppc +#define helper_xvrspic helper_xvrspic_ppc +#define helper_xvrspim helper_xvrspim_ppc +#define helper_xvrspip helper_xvrspip_ppc +#define helper_xvrspiz helper_xvrspiz_ppc +#define helper_xvrsqrtedp helper_xvrsqrtedp_ppc +#define helper_xvrsqrtesp helper_xvrsqrtesp_ppc +#define helper_xvsqrtdp helper_xvsqrtdp_ppc +#define helper_xvsqrtsp helper_xvsqrtsp_ppc +#define helper_xvsubdp helper_xvsubdp_ppc +#define helper_xvsubsp helper_xvsubsp_ppc +#define helper_xvtdivdp helper_xvtdivdp_ppc +#define helper_xvtdivsp helper_xvtdivsp_ppc +#define helper_xvtsqrtdp helper_xvtsqrtdp_ppc +#define helper_xvtsqrtsp helper_xvtsqrtsp_ppc +#define helper_xvtstdcdp helper_xvtstdcdp_ppc +#define helper_xvtstdcsp helper_xvtstdcsp_ppc +#define helper_xvxsigsp helper_xvxsigsp_ppc +#define helper_xxperm helper_xxperm_ppc +#define helper_xxpermr helper_xxpermr_ppc #endif diff --git a/qemu/ppc64.h b/qemu/ppc64.h index 9e4d79ed11..4944950a82 100644 --- a/qemu/ppc64.h +++ b/qemu/ppc64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_ppc64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_ppc64 #define tcg_gen_st_i64 tcg_gen_st_i64_ppc64 +#define tcg_gen_add_i64 tcg_gen_add_i64_ppc64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_ppc64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_ppc64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_ppc64 #define cpu_icount_to_ns cpu_icount_to_ns_ppc64 #define cpu_is_stopped cpu_is_stopped_ppc64 #define cpu_get_ticks cpu_get_ticks_ppc64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_ppc64 #define floatx80_mul floatx80_mul_ppc64 #define floatx80_div floatx80_div_ppc64 +#define floatx80_modrem floatx80_modrem_ppc64 +#define floatx80_mod floatx80_mod_ppc64 #define floatx80_rem floatx80_rem_ppc64 #define floatx80_sqrt floatx80_sqrt_ppc64 #define floatx80_eq floatx80_eq_ppc64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_ppc64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_ppc64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_ppc64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_ppc64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_ppc64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_ppc64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_ppc64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_ppc64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_ppc64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_ppc64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_ppc64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_ppc64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_ppc64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_ppc64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_ppc64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_ppc64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_ppc64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_ppc64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_ppc64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_ppc64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_ppc64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_ppc64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_ppc64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_ppc64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_ppc64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_ppc64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_ppc64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_ppc64 #define tcg_gen_shri_vec tcg_gen_shri_vec_ppc64 #define tcg_gen_sari_vec tcg_gen_sari_vec_ppc64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_ppc64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_ppc64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_ppc64 #define tcg_gen_add_vec tcg_gen_add_vec_ppc64 #define tcg_gen_sub_vec tcg_gen_sub_vec_ppc64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_ppc64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_ppc64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_ppc64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_ppc64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_ppc64 #define tcg_gen_shls_vec tcg_gen_shls_vec_ppc64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_ppc64 #define tcg_gen_sars_vec tcg_gen_sars_vec_ppc64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_ppc64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_ppc64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_ppc64 #define tb_htable_lookup tb_htable_lookup_ppc64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_ppc64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_ppc64 #define tlb_init tlb_init_ppc64 +#define tlb_destroy tlb_destroy_ppc64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_ppc64 #define tlb_flush tlb_flush_ppc64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_ppc64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_ppc64 #define get_page_addr_code_hostp get_page_addr_code_hostp_ppc64 #define get_page_addr_code get_page_addr_code_ppc64 +#define probe_access_flags probe_access_flags_ppc64 #define probe_access probe_access_ppc64 #define tlb_vaddr_to_host tlb_vaddr_to_host_ppc64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_ppc64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_ppc64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_ppc64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_ppc64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_ppc64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_ppc64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_ppc64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_ppc64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_ppc64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_ppc64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_ppc64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_ppc64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_ppc64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_ppc64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_ppc64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_ppc64 #define cpu_ldub_data_ra cpu_ldub_data_ra_ppc64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_ppc64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_ppc64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_ppc64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_ppc64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_ppc64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_ppc64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_ppc64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_ppc64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_ppc64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_ppc64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_ppc64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_ppc64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_ppc64 #define cpu_ldub_data cpu_ldub_data_ppc64 #define cpu_ldsb_data cpu_ldsb_data_ppc64 -#define cpu_lduw_data cpu_lduw_data_ppc64 -#define cpu_ldsw_data cpu_ldsw_data_ppc64 -#define cpu_ldl_data cpu_ldl_data_ppc64 -#define cpu_ldq_data cpu_ldq_data_ppc64 +#define cpu_lduw_be_data cpu_lduw_be_data_ppc64 +#define cpu_lduw_le_data cpu_lduw_le_data_ppc64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_ppc64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_ppc64 +#define cpu_ldl_be_data cpu_ldl_be_data_ppc64 +#define cpu_ldl_le_data cpu_ldl_le_data_ppc64 +#define cpu_ldq_le_data cpu_ldq_le_data_ppc64 +#define cpu_ldq_be_data cpu_ldq_be_data_ppc64 #define helper_ret_stb_mmu helper_ret_stb_mmu_ppc64 #define helper_le_stw_mmu helper_le_stw_mmu_ppc64 #define helper_be_stw_mmu helper_be_stw_mmu_ppc64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_ppc64 #define helper_be_stq_mmu helper_be_stq_mmu_ppc64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_ppc64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_ppc64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_ppc64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_ppc64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_ppc64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_ppc64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_ppc64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_ppc64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_ppc64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_ppc64 #define cpu_stb_data_ra cpu_stb_data_ra_ppc64 -#define cpu_stw_data_ra cpu_stw_data_ra_ppc64 -#define cpu_stl_data_ra cpu_stl_data_ra_ppc64 -#define cpu_stq_data_ra cpu_stq_data_ra_ppc64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_ppc64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_ppc64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_ppc64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_ppc64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_ppc64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_ppc64 #define cpu_stb_data cpu_stb_data_ppc64 -#define cpu_stw_data cpu_stw_data_ppc64 -#define cpu_stl_data cpu_stl_data_ppc64 -#define cpu_stq_data cpu_stq_data_ppc64 +#define cpu_stw_be_data cpu_stw_be_data_ppc64 +#define cpu_stw_le_data cpu_stw_le_data_ppc64 +#define cpu_stl_be_data cpu_stl_be_data_ppc64 +#define cpu_stl_le_data cpu_stl_le_data_ppc64 +#define cpu_stq_be_data cpu_stq_be_data_ppc64 +#define cpu_stq_le_data cpu_stq_le_data_ppc64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_ppc64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_ppc64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_ppc64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_ppc64 #define cpu_ldl_code cpu_ldl_code_ppc64 #define cpu_ldq_code cpu_ldq_code_ppc64 +#define cpu_interrupt_handler cpu_interrupt_handler_ppc64 #define helper_div_i32 helper_div_i32_ppc64 #define helper_rem_i32 helper_rem_i32_ppc64 #define helper_divu_i32 helper_divu_i32_ppc64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_ppc64 #define helper_gvec_sar32i helper_gvec_sar32i_ppc64 #define helper_gvec_sar64i helper_gvec_sar64i_ppc64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_ppc64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_ppc64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_ppc64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_ppc64 #define helper_gvec_shl8v helper_gvec_shl8v_ppc64 #define helper_gvec_shl16v helper_gvec_shl16v_ppc64 #define helper_gvec_shl32v helper_gvec_shl32v_ppc64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_ppc64 #define helper_gvec_sar32v helper_gvec_sar32v_ppc64 #define helper_gvec_sar64v helper_gvec_sar64v_ppc64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_ppc64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_ppc64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_ppc64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_ppc64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_ppc64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_ppc64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_ppc64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_ppc64 #define helper_gvec_eq8 helper_gvec_eq8_ppc64 #define helper_gvec_ne8 helper_gvec_ne8_ppc64 #define helper_gvec_lt8 helper_gvec_lt8_ppc64 @@ -1575,6 +1629,33 @@ #define helper_tbegin helper_tbegin_ppc64 #define helper_load_dump_spr helper_load_dump_spr_ppc64 #define helper_store_dump_spr helper_store_dump_spr_ppc64 +#define store_fpscr store_fpscr_ppc64 +#define helper_store_fpscr helper_store_fpscr_ppc64 +#define helper_float_check_status helper_float_check_status_ppc64 +#define helper_reset_fpstatus helper_reset_fpstatus_ppc64 +#define helper_fadd helper_fadd_ppc64 +#define helper_fsub helper_fsub_ppc64 +#define helper_fmul helper_fmul_ppc64 +#define helper_fdiv helper_fdiv_ppc64 +#define helper_fctiw helper_fctiw_ppc64 +#define helper_fctiwz helper_fctiwz_ppc64 +#define helper_fctiwuz helper_fctiwuz_ppc64 +#define helper_fctid helper_fctid_ppc64 +#define helper_fctidz helper_fctidz_ppc64 +#define helper_fctidu helper_fctidu_ppc64 +#define helper_fctiduz helper_fctiduz_ppc64 +#define helper_fcfid helper_fcfid_ppc64 +#define helper_fcfids helper_fcfids_ppc64 +#define helper_fcfidu helper_fcfidu_ppc64 +#define helper_fcfidus helper_fcfidus_ppc64 +#define helper_frin helper_frin_ppc64 +#define helper_friz helper_friz_ppc64 +#define helper_frip helper_frip_ppc64 +#define helper_frim helper_frim_ppc64 +#define helper_fmadd helper_fmadd_ppc64 +#define helper_fnmadd helper_fnmadd_ppc64 +#define helper_fmsub helper_fmsub_ppc64 +#define helper_fnmsub helper_fnmsub_ppc64 #define helper_hfscr_facility_check helper_hfscr_facility_check_ppc64 #define helper_fscr_facility_check helper_fscr_facility_check_ppc64 #define helper_msr_facility_check helper_msr_facility_check_ppc64 @@ -1726,6 +1807,243 @@ #define ppc_hash32_handle_mmu_fault ppc_hash32_handle_mmu_fault_ppc64 #define gen_helper_store_booke_tsr gen_helper_store_booke_tsr_ppc64 #define gen_helper_store_booke_tcr gen_helper_store_booke_tcr_ppc64 +#define gen_helper_store_fpscr gen_helper_store_fpscr_ppc64 #define store_booke_tcr store_booke_tcr_ppc64 #define ppc_hash32_get_phys_page_debug ppc_hash32_get_phys_page_debug_ppc64 +#define helper_compute_fprf_float128 helper_compute_fprf_float128_ppc64 +#define helper_compute_fprf_float16 helper_compute_fprf_float16_ppc64 +#define helper_compute_fprf_float32 helper_compute_fprf_float32_ppc64 +#define helper_compute_fprf_float64 helper_compute_fprf_float64_ppc64 +#define helper_efdadd helper_efdadd_ppc64 +#define helper_efdcfs helper_efdcfs_ppc64 +#define helper_efdcfsf helper_efdcfsf_ppc64 +#define helper_efdcfsi helper_efdcfsi_ppc64 +#define helper_efdcfsid helper_efdcfsid_ppc64 +#define helper_efdcfuf helper_efdcfuf_ppc64 +#define helper_efdcfui helper_efdcfui_ppc64 +#define helper_efdcfuid helper_efdcfuid_ppc64 +#define helper_efdcmpeq helper_efdcmpeq_ppc64 +#define helper_efdcmpgt helper_efdcmpgt_ppc64 +#define helper_efdcmplt helper_efdcmplt_ppc64 +#define helper_efdctsf helper_efdctsf_ppc64 +#define helper_efdctsi helper_efdctsi_ppc64 +#define helper_efdctsidz helper_efdctsidz_ppc64 +#define helper_efdctsiz helper_efdctsiz_ppc64 +#define helper_efdctuf helper_efdctuf_ppc64 +#define helper_efdctui helper_efdctui_ppc64 +#define helper_efdctuidz helper_efdctuidz_ppc64 +#define helper_efdctuiz helper_efdctuiz_ppc64 +#define helper_efddiv helper_efddiv_ppc64 +#define helper_efdmul helper_efdmul_ppc64 +#define helper_efdsub helper_efdsub_ppc64 +#define helper_efdtsteq helper_efdtsteq_ppc64 +#define helper_efdtstgt helper_efdtstgt_ppc64 +#define helper_efdtstlt helper_efdtstlt_ppc64 +#define helper_efsadd helper_efsadd_ppc64 +#define helper_efscfd helper_efscfd_ppc64 +#define helper_efscfsf helper_efscfsf_ppc64 +#define helper_efscfsi helper_efscfsi_ppc64 +#define helper_efscfuf helper_efscfuf_ppc64 +#define helper_efscfui helper_efscfui_ppc64 +#define helper_efscmpeq helper_efscmpeq_ppc64 +#define helper_efscmpgt helper_efscmpgt_ppc64 +#define helper_efscmplt helper_efscmplt_ppc64 +#define helper_efsctsf helper_efsctsf_ppc64 +#define helper_efsctsi helper_efsctsi_ppc64 +#define helper_efsctsiz helper_efsctsiz_ppc64 +#define helper_efsctuf helper_efsctuf_ppc64 +#define helper_efsctui helper_efsctui_ppc64 +#define helper_efsctuiz helper_efsctuiz_ppc64 +#define helper_efsdiv helper_efsdiv_ppc64 +#define helper_efsmul helper_efsmul_ppc64 +#define helper_efssub helper_efssub_ppc64 +#define helper_efststeq helper_efststeq_ppc64 +#define helper_efststgt helper_efststgt_ppc64 +#define helper_efststlt helper_efststlt_ppc64 +#define helper_evfsadd helper_evfsadd_ppc64 +#define helper_evfscfsf helper_evfscfsf_ppc64 +#define helper_evfscfsi helper_evfscfsi_ppc64 +#define helper_evfscfuf helper_evfscfuf_ppc64 +#define helper_evfscfui helper_evfscfui_ppc64 +#define helper_evfscmpeq helper_evfscmpeq_ppc64 +#define helper_evfscmpgt helper_evfscmpgt_ppc64 +#define helper_evfscmplt helper_evfscmplt_ppc64 +#define helper_evfsctsf helper_evfsctsf_ppc64 +#define helper_evfsctsi helper_evfsctsi_ppc64 +#define helper_evfsctsiz helper_evfsctsiz_ppc64 +#define helper_evfsctuf helper_evfsctuf_ppc64 +#define helper_evfsctui helper_evfsctui_ppc64 +#define helper_evfsctuiz helper_evfsctuiz_ppc64 +#define helper_evfsdiv helper_evfsdiv_ppc64 +#define helper_evfsmul helper_evfsmul_ppc64 +#define helper_evfssub helper_evfssub_ppc64 +#define helper_evfststeq helper_evfststeq_ppc64 +#define helper_evfststgt helper_evfststgt_ppc64 +#define helper_evfststlt helper_evfststlt_ppc64 +#define helper_fcmpo helper_fcmpo_ppc64 +#define helper_fcmpu helper_fcmpu_ppc64 +#define helper_fctiwu helper_fctiwu_ppc64 +#define helper_fpscr_clrbit helper_fpscr_clrbit_ppc64 +#define helper_fpscr_setbit helper_fpscr_setbit_ppc64 +#define helper_fre helper_fre_ppc64 +#define helper_fres helper_fres_ppc64 +#define helper_frsp helper_frsp_ppc64 +#define helper_frsqrte helper_frsqrte_ppc64 +#define helper_fsel helper_fsel_ppc64 +#define helper_fsqrt helper_fsqrt_ppc64 +#define helper_ftdiv helper_ftdiv_ppc64 +#define helper_ftsqrt helper_ftsqrt_ppc64 +#define helper_todouble helper_todouble_ppc64 +#define helper_tosingle helper_tosingle_ppc64 +#define helper_xsadddp helper_xsadddp_ppc64 +#define helper_xsaddqp helper_xsaddqp_ppc64 +#define helper_xsaddsp helper_xsaddsp_ppc64 +#define helper_xscmpeqdp helper_xscmpeqdp_ppc64 +#define helper_xscmpexpdp helper_xscmpexpdp_ppc64 +#define helper_xscmpexpqp helper_xscmpexpqp_ppc64 +#define helper_xscmpgedp helper_xscmpgedp_ppc64 +#define helper_xscmpgtdp helper_xscmpgtdp_ppc64 +#define helper_xscmpnedp helper_xscmpnedp_ppc64 +#define helper_xscmpodp helper_xscmpodp_ppc64 +#define helper_xscmpoqp helper_xscmpoqp_ppc64 +#define helper_xscmpudp helper_xscmpudp_ppc64 +#define helper_xscmpuqp helper_xscmpuqp_ppc64 +#define helper_xscvdphp helper_xscvdphp_ppc64 +#define helper_xscvdpqp helper_xscvdpqp_ppc64 +#define helper_xscvdpsp helper_xscvdpsp_ppc64 +#define helper_xscvdpspn helper_xscvdpspn_ppc64 +#define helper_xscvdpsxds helper_xscvdpsxds_ppc64 +#define helper_xscvdpsxws helper_xscvdpsxws_ppc64 +#define helper_xscvdpuxds helper_xscvdpuxds_ppc64 +#define helper_xscvdpuxws helper_xscvdpuxws_ppc64 +#define helper_xscvhpdp helper_xscvhpdp_ppc64 +#define helper_xscvqpdp helper_xscvqpdp_ppc64 +#define helper_xscvqpsdz helper_xscvqpsdz_ppc64 +#define helper_xscvqpswz helper_xscvqpswz_ppc64 +#define helper_xscvqpudz helper_xscvqpudz_ppc64 +#define helper_xscvqpuwz helper_xscvqpuwz_ppc64 +#define helper_xscvsdqp helper_xscvsdqp_ppc64 +#define helper_xscvspdp helper_xscvspdp_ppc64 +#define helper_xscvspdpn helper_xscvspdpn_ppc64 +#define helper_xscvsxddp helper_xscvsxddp_ppc64 +#define helper_xscvsxdsp helper_xscvsxdsp_ppc64 +#define helper_xscvudqp helper_xscvudqp_ppc64 +#define helper_xscvuxddp helper_xscvuxddp_ppc64 +#define helper_xscvuxdsp helper_xscvuxdsp_ppc64 +#define helper_xsdivdp helper_xsdivdp_ppc64 +#define helper_xsdivqp helper_xsdivqp_ppc64 +#define helper_xsdivsp helper_xsdivsp_ppc64 +#define helper_xsmadddp helper_xsmadddp_ppc64 +#define helper_xsmaddsp helper_xsmaddsp_ppc64 +#define helper_xsmaxcdp helper_xsmaxcdp_ppc64 +#define helper_xsmaxdp helper_xsmaxdp_ppc64 +#define helper_xsmaxjdp helper_xsmaxjdp_ppc64 +#define helper_xsmincdp helper_xsmincdp_ppc64 +#define helper_xsmindp helper_xsmindp_ppc64 +#define helper_xsminjdp helper_xsminjdp_ppc64 +#define helper_xsmsubdp helper_xsmsubdp_ppc64 +#define helper_xsmsubsp helper_xsmsubsp_ppc64 +#define helper_xsmuldp helper_xsmuldp_ppc64 +#define helper_xsmulqp helper_xsmulqp_ppc64 +#define helper_xsmulsp helper_xsmulsp_ppc64 +#define helper_xsnmadddp helper_xsnmadddp_ppc64 +#define helper_xsnmaddsp helper_xsnmaddsp_ppc64 +#define helper_xsnmsubdp helper_xsnmsubdp_ppc64 +#define helper_xsnmsubsp helper_xsnmsubsp_ppc64 +#define helper_xsrdpi helper_xsrdpi_ppc64 +#define helper_xsrdpic helper_xsrdpic_ppc64 +#define helper_xsrdpim helper_xsrdpim_ppc64 +#define helper_xsrdpip helper_xsrdpip_ppc64 +#define helper_xsrdpiz helper_xsrdpiz_ppc64 +#define helper_xsredp helper_xsredp_ppc64 +#define helper_xsresp helper_xsresp_ppc64 +#define helper_xsrqpi helper_xsrqpi_ppc64 +#define helper_xsrqpxp helper_xsrqpxp_ppc64 +#define helper_xsrsp helper_xsrsp_ppc64 +#define helper_xsrsqrtedp helper_xsrsqrtedp_ppc64 +#define helper_xsrsqrtesp helper_xsrsqrtesp_ppc64 +#define helper_xssqrtdp helper_xssqrtdp_ppc64 +#define helper_xssqrtqp helper_xssqrtqp_ppc64 +#define helper_xssqrtsp helper_xssqrtsp_ppc64 +#define helper_xssubdp helper_xssubdp_ppc64 +#define helper_xssubqp helper_xssubqp_ppc64 +#define helper_xssubsp helper_xssubsp_ppc64 +#define helper_xstdivdp helper_xstdivdp_ppc64 +#define helper_xstsqrtdp helper_xstsqrtdp_ppc64 +#define helper_xststdcdp helper_xststdcdp_ppc64 +#define helper_xststdcqp helper_xststdcqp_ppc64 +#define helper_xststdcsp helper_xststdcsp_ppc64 +#define helper_xvadddp helper_xvadddp_ppc64 +#define helper_xvaddsp helper_xvaddsp_ppc64 +#define helper_xvcmpeqdp helper_xvcmpeqdp_ppc64 +#define helper_xvcmpeqsp helper_xvcmpeqsp_ppc64 +#define helper_xvcmpgedp helper_xvcmpgedp_ppc64 +#define helper_xvcmpgesp helper_xvcmpgesp_ppc64 +#define helper_xvcmpgtdp helper_xvcmpgtdp_ppc64 +#define helper_xvcmpgtsp helper_xvcmpgtsp_ppc64 +#define helper_xvcmpnedp helper_xvcmpnedp_ppc64 +#define helper_xvcmpnesp helper_xvcmpnesp_ppc64 +#define helper_xvcvdpsp helper_xvcvdpsp_ppc64 +#define helper_xvcvdpsxds helper_xvcvdpsxds_ppc64 +#define helper_xvcvdpsxws helper_xvcvdpsxws_ppc64 +#define helper_xvcvdpuxds helper_xvcvdpuxds_ppc64 +#define helper_xvcvdpuxws helper_xvcvdpuxws_ppc64 +#define helper_xvcvhpsp helper_xvcvhpsp_ppc64 +#define helper_xvcvspdp helper_xvcvspdp_ppc64 +#define helper_xvcvsphp helper_xvcvsphp_ppc64 +#define helper_xvcvspsxds helper_xvcvspsxds_ppc64 +#define helper_xvcvspsxws helper_xvcvspsxws_ppc64 +#define helper_xvcvspuxds helper_xvcvspuxds_ppc64 +#define helper_xvcvspuxws helper_xvcvspuxws_ppc64 +#define helper_xvcvsxddp helper_xvcvsxddp_ppc64 +#define helper_xvcvsxdsp helper_xvcvsxdsp_ppc64 +#define helper_xvcvsxwdp helper_xvcvsxwdp_ppc64 +#define helper_xvcvsxwsp helper_xvcvsxwsp_ppc64 +#define helper_xvcvuxddp helper_xvcvuxddp_ppc64 +#define helper_xvcvuxdsp helper_xvcvuxdsp_ppc64 +#define helper_xvcvuxwdp helper_xvcvuxwdp_ppc64 +#define helper_xvcvuxwsp helper_xvcvuxwsp_ppc64 +#define helper_xvdivdp helper_xvdivdp_ppc64 +#define helper_xvdivsp helper_xvdivsp_ppc64 +#define helper_xvmadddp helper_xvmadddp_ppc64 +#define helper_xvmaddsp helper_xvmaddsp_ppc64 +#define helper_xvmaxdp helper_xvmaxdp_ppc64 +#define helper_xvmaxsp helper_xvmaxsp_ppc64 +#define helper_xvmindp helper_xvmindp_ppc64 +#define helper_xvminsp helper_xvminsp_ppc64 +#define helper_xvmsubdp helper_xvmsubdp_ppc64 +#define helper_xvmsubsp helper_xvmsubsp_ppc64 +#define helper_xvmuldp helper_xvmuldp_ppc64 +#define helper_xvmulsp helper_xvmulsp_ppc64 +#define helper_xvnmadddp helper_xvnmadddp_ppc64 +#define helper_xvnmaddsp helper_xvnmaddsp_ppc64 +#define helper_xvnmsubdp helper_xvnmsubdp_ppc64 +#define helper_xvnmsubsp helper_xvnmsubsp_ppc64 +#define helper_xvrdpi helper_xvrdpi_ppc64 +#define helper_xvrdpic helper_xvrdpic_ppc64 +#define helper_xvrdpim helper_xvrdpim_ppc64 +#define helper_xvrdpip helper_xvrdpip_ppc64 +#define helper_xvrdpiz helper_xvrdpiz_ppc64 +#define helper_xvredp helper_xvredp_ppc64 +#define helper_xvresp helper_xvresp_ppc64 +#define helper_xvrspi helper_xvrspi_ppc64 +#define helper_xvrspic helper_xvrspic_ppc64 +#define helper_xvrspim helper_xvrspim_ppc64 +#define helper_xvrspip helper_xvrspip_ppc64 +#define helper_xvrspiz helper_xvrspiz_ppc64 +#define helper_xvrsqrtedp helper_xvrsqrtedp_ppc64 +#define helper_xvrsqrtesp helper_xvrsqrtesp_ppc64 +#define helper_xvsqrtdp helper_xvsqrtdp_ppc64 +#define helper_xvsqrtsp helper_xvsqrtsp_ppc64 +#define helper_xvsubdp helper_xvsubdp_ppc64 +#define helper_xvsubsp helper_xvsubsp_ppc64 +#define helper_xvtdivdp helper_xvtdivdp_ppc64 +#define helper_xvtdivsp helper_xvtdivsp_ppc64 +#define helper_xvtsqrtdp helper_xvtsqrtdp_ppc64 +#define helper_xvtsqrtsp helper_xvtsqrtsp_ppc64 +#define helper_xvtstdcdp helper_xvtstdcdp_ppc64 +#define helper_xvtstdcsp helper_xvtstdcsp_ppc64 +#define helper_xvxsigsp helper_xvxsigsp_ppc64 +#define helper_xxperm helper_xxperm_ppc64 +#define helper_xxpermr helper_xxpermr_ppc64 #endif diff --git a/qemu/riscv32.h b/qemu/riscv32.h index 90889da546..edc897463b 100644 --- a/qemu/riscv32.h +++ b/qemu/riscv32.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_riscv32 #define tcg_gen_shr_i64 tcg_gen_shr_i64_riscv32 #define tcg_gen_st_i64 tcg_gen_st_i64_riscv32 +#define tcg_gen_add_i64 tcg_gen_add_i64_riscv32 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_riscv32 #define tcg_gen_xor_i64 tcg_gen_xor_i64_riscv32 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_riscv32 #define cpu_icount_to_ns cpu_icount_to_ns_riscv32 #define cpu_is_stopped cpu_is_stopped_riscv32 #define cpu_get_ticks cpu_get_ticks_riscv32 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_riscv32 #define floatx80_mul floatx80_mul_riscv32 #define floatx80_div floatx80_div_riscv32 +#define floatx80_modrem floatx80_modrem_riscv32 +#define floatx80_mod floatx80_mod_riscv32 #define floatx80_rem floatx80_rem_riscv32 #define floatx80_sqrt floatx80_sqrt_riscv32 #define floatx80_eq floatx80_eq_riscv32 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_riscv32 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_riscv32 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_riscv32 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_riscv32 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_riscv32 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_riscv32 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_riscv32 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_riscv32 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_riscv32 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_riscv32 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_riscv32 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_riscv32 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_riscv32 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_riscv32 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_riscv32 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_riscv32 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_riscv32 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_riscv32 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_riscv32 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_riscv32 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_riscv32 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_riscv32 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_riscv32 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_riscv32 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_riscv32 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_riscv32 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_riscv32 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_riscv32 #define tcg_gen_shri_vec tcg_gen_shri_vec_riscv32 #define tcg_gen_sari_vec tcg_gen_sari_vec_riscv32 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_riscv32 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_riscv32 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_riscv32 #define tcg_gen_add_vec tcg_gen_add_vec_riscv32 #define tcg_gen_sub_vec tcg_gen_sub_vec_riscv32 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_riscv32 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_riscv32 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_riscv32 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_riscv32 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_riscv32 #define tcg_gen_shls_vec tcg_gen_shls_vec_riscv32 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_riscv32 #define tcg_gen_sars_vec tcg_gen_sars_vec_riscv32 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_riscv32 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_riscv32 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_riscv32 #define tb_htable_lookup tb_htable_lookup_riscv32 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_riscv32 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_riscv32 #define tlb_init tlb_init_riscv32 +#define tlb_destroy tlb_destroy_riscv32 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_riscv32 #define tlb_flush tlb_flush_riscv32 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_riscv32 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_riscv32 #define get_page_addr_code_hostp get_page_addr_code_hostp_riscv32 #define get_page_addr_code get_page_addr_code_riscv32 +#define probe_access_flags probe_access_flags_riscv32 #define probe_access probe_access_riscv32 #define tlb_vaddr_to_host tlb_vaddr_to_host_riscv32 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_riscv32 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_riscv32 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_riscv32 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_riscv32 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_riscv32 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_riscv32 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_riscv32 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_riscv32 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_riscv32 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_riscv32 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_riscv32 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_riscv32 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_riscv32 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_riscv32 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_riscv32 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_riscv32 #define cpu_ldub_data_ra cpu_ldub_data_ra_riscv32 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_riscv32 -#define cpu_lduw_data_ra cpu_lduw_data_ra_riscv32 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_riscv32 -#define cpu_ldl_data_ra cpu_ldl_data_ra_riscv32 -#define cpu_ldq_data_ra cpu_ldq_data_ra_riscv32 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_riscv32 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_riscv32 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_riscv32 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_riscv32 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_riscv32 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_riscv32 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_riscv32 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_riscv32 #define cpu_ldub_data cpu_ldub_data_riscv32 #define cpu_ldsb_data cpu_ldsb_data_riscv32 -#define cpu_lduw_data cpu_lduw_data_riscv32 -#define cpu_ldsw_data cpu_ldsw_data_riscv32 -#define cpu_ldl_data cpu_ldl_data_riscv32 -#define cpu_ldq_data cpu_ldq_data_riscv32 +#define cpu_lduw_be_data cpu_lduw_be_data_riscv32 +#define cpu_lduw_le_data cpu_lduw_le_data_riscv32 +#define cpu_ldsw_be_data cpu_ldsw_be_data_riscv32 +#define cpu_ldsw_le_data cpu_ldsw_le_data_riscv32 +#define cpu_ldl_be_data cpu_ldl_be_data_riscv32 +#define cpu_ldl_le_data cpu_ldl_le_data_riscv32 +#define cpu_ldq_le_data cpu_ldq_le_data_riscv32 +#define cpu_ldq_be_data cpu_ldq_be_data_riscv32 #define helper_ret_stb_mmu helper_ret_stb_mmu_riscv32 #define helper_le_stw_mmu helper_le_stw_mmu_riscv32 #define helper_be_stw_mmu helper_be_stw_mmu_riscv32 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_riscv32 #define helper_be_stq_mmu helper_be_stq_mmu_riscv32 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_riscv32 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_riscv32 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_riscv32 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_riscv32 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_riscv32 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_riscv32 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_riscv32 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_riscv32 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_riscv32 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_riscv32 #define cpu_stb_data_ra cpu_stb_data_ra_riscv32 -#define cpu_stw_data_ra cpu_stw_data_ra_riscv32 -#define cpu_stl_data_ra cpu_stl_data_ra_riscv32 -#define cpu_stq_data_ra cpu_stq_data_ra_riscv32 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_riscv32 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_riscv32 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_riscv32 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_riscv32 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_riscv32 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_riscv32 #define cpu_stb_data cpu_stb_data_riscv32 -#define cpu_stw_data cpu_stw_data_riscv32 -#define cpu_stl_data cpu_stl_data_riscv32 -#define cpu_stq_data cpu_stq_data_riscv32 +#define cpu_stw_be_data cpu_stw_be_data_riscv32 +#define cpu_stw_le_data cpu_stw_le_data_riscv32 +#define cpu_stl_be_data cpu_stl_be_data_riscv32 +#define cpu_stl_le_data cpu_stl_le_data_riscv32 +#define cpu_stq_be_data cpu_stq_be_data_riscv32 +#define cpu_stq_le_data cpu_stq_le_data_riscv32 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_riscv32 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_riscv32 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_riscv32 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_riscv32 #define cpu_ldl_code cpu_ldl_code_riscv32 #define cpu_ldq_code cpu_ldq_code_riscv32 +#define cpu_interrupt_handler cpu_interrupt_handler_riscv32 #define helper_div_i32 helper_div_i32_riscv32 #define helper_rem_i32 helper_rem_i32_riscv32 #define helper_divu_i32 helper_divu_i32_riscv32 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_riscv32 #define helper_gvec_sar32i helper_gvec_sar32i_riscv32 #define helper_gvec_sar64i helper_gvec_sar64i_riscv32 +#define helper_gvec_rotl8i helper_gvec_rotl8i_riscv32 +#define helper_gvec_rotl16i helper_gvec_rotl16i_riscv32 +#define helper_gvec_rotl32i helper_gvec_rotl32i_riscv32 +#define helper_gvec_rotl64i helper_gvec_rotl64i_riscv32 #define helper_gvec_shl8v helper_gvec_shl8v_riscv32 #define helper_gvec_shl16v helper_gvec_shl16v_riscv32 #define helper_gvec_shl32v helper_gvec_shl32v_riscv32 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_riscv32 #define helper_gvec_sar32v helper_gvec_sar32v_riscv32 #define helper_gvec_sar64v helper_gvec_sar64v_riscv32 +#define helper_gvec_rotl8v helper_gvec_rotl8v_riscv32 +#define helper_gvec_rotl16v helper_gvec_rotl16v_riscv32 +#define helper_gvec_rotl32v helper_gvec_rotl32v_riscv32 +#define helper_gvec_rotl64v helper_gvec_rotl64v_riscv32 +#define helper_gvec_rotr8v helper_gvec_rotr8v_riscv32 +#define helper_gvec_rotr16v helper_gvec_rotr16v_riscv32 +#define helper_gvec_rotr32v helper_gvec_rotr32v_riscv32 +#define helper_gvec_rotr64v helper_gvec_rotr64v_riscv32 #define helper_gvec_eq8 helper_gvec_eq8_riscv32 #define helper_gvec_ne8 helper_gvec_ne8_riscv32 #define helper_gvec_lt8 helper_gvec_lt8_riscv32 @@ -1366,6 +1420,7 @@ #define helper_mret helper_mret_riscv32 #define helper_wfi helper_wfi_riscv32 #define helper_tlb_flush helper_tlb_flush_riscv32 +#define helper_hyp_tlb_flush helper_hyp_tlb_flush_riscv32 #define pmp_hart_has_privs pmp_hart_has_privs_riscv32 #define pmpcfg_csr_write pmpcfg_csr_write_riscv32 #define pmpcfg_csr_read pmpcfg_csr_read_riscv32 @@ -1386,4 +1441,1006 @@ #define gen_helper_tlb_flush gen_helper_tlb_flush_riscv32 #define riscv_fpr_regnames riscv_fpr_regnames_riscv32 #define riscv_int_regnames riscv_int_regnames_riscv32 +#define fclass_d fclass_d_riscv32 +#define fclass_h fclass_h_riscv32 +#define fclass_s fclass_s_riscv32 +#define helper_vaadd_vv_b helper_vaadd_vv_b_riscv32 +#define helper_vaadd_vv_d helper_vaadd_vv_d_riscv32 +#define helper_vaadd_vv_h helper_vaadd_vv_h_riscv32 +#define helper_vaadd_vv_w helper_vaadd_vv_w_riscv32 +#define helper_vaadd_vx_b helper_vaadd_vx_b_riscv32 +#define helper_vaadd_vx_d helper_vaadd_vx_d_riscv32 +#define helper_vaadd_vx_h helper_vaadd_vx_h_riscv32 +#define helper_vaadd_vx_w helper_vaadd_vx_w_riscv32 +#define helper_vadc_vvm_b helper_vadc_vvm_b_riscv32 +#define helper_vadc_vvm_d helper_vadc_vvm_d_riscv32 +#define helper_vadc_vvm_h helper_vadc_vvm_h_riscv32 +#define helper_vadc_vvm_w helper_vadc_vvm_w_riscv32 +#define helper_vadc_vxm_b helper_vadc_vxm_b_riscv32 +#define helper_vadc_vxm_d helper_vadc_vxm_d_riscv32 +#define helper_vadc_vxm_h helper_vadc_vxm_h_riscv32 +#define helper_vadc_vxm_w helper_vadc_vxm_w_riscv32 +#define helper_vadd_vv_b helper_vadd_vv_b_riscv32 +#define helper_vadd_vv_d helper_vadd_vv_d_riscv32 +#define helper_vadd_vv_h helper_vadd_vv_h_riscv32 +#define helper_vadd_vv_w helper_vadd_vv_w_riscv32 +#define helper_vadd_vx_b helper_vadd_vx_b_riscv32 +#define helper_vadd_vx_d helper_vadd_vx_d_riscv32 +#define helper_vadd_vx_h helper_vadd_vx_h_riscv32 +#define helper_vadd_vx_w helper_vadd_vx_w_riscv32 +#define helper_vamoaddw_v_w helper_vamoaddw_v_w_riscv32 +#define helper_vamoandw_v_w helper_vamoandw_v_w_riscv32 +#define helper_vamomaxuw_v_w helper_vamomaxuw_v_w_riscv32 +#define helper_vamomaxw_v_w helper_vamomaxw_v_w_riscv32 +#define helper_vamominuw_v_w helper_vamominuw_v_w_riscv32 +#define helper_vamominw_v_w helper_vamominw_v_w_riscv32 +#define helper_vamoorw_v_w helper_vamoorw_v_w_riscv32 +#define helper_vamoswapw_v_w helper_vamoswapw_v_w_riscv32 +#define helper_vamoxorw_v_w helper_vamoxorw_v_w_riscv32 +#define helper_vand_vv_b helper_vand_vv_b_riscv32 +#define helper_vand_vv_d helper_vand_vv_d_riscv32 +#define helper_vand_vv_h helper_vand_vv_h_riscv32 +#define helper_vand_vv_w helper_vand_vv_w_riscv32 +#define helper_vand_vx_b helper_vand_vx_b_riscv32 +#define helper_vand_vx_d helper_vand_vx_d_riscv32 +#define helper_vand_vx_h helper_vand_vx_h_riscv32 +#define helper_vand_vx_w helper_vand_vx_w_riscv32 +#define helper_vasub_vv_b helper_vasub_vv_b_riscv32 +#define helper_vasub_vv_d helper_vasub_vv_d_riscv32 +#define helper_vasub_vv_h helper_vasub_vv_h_riscv32 +#define helper_vasub_vv_w helper_vasub_vv_w_riscv32 +#define helper_vasub_vx_b helper_vasub_vx_b_riscv32 +#define helper_vasub_vx_d helper_vasub_vx_d_riscv32 +#define helper_vasub_vx_h helper_vasub_vx_h_riscv32 +#define helper_vasub_vx_w helper_vasub_vx_w_riscv32 +#define helper_vcompress_vm_b helper_vcompress_vm_b_riscv32 +#define helper_vcompress_vm_d helper_vcompress_vm_d_riscv32 +#define helper_vcompress_vm_h helper_vcompress_vm_h_riscv32 +#define helper_vcompress_vm_w helper_vcompress_vm_w_riscv32 +#define helper_vdiv_vv_b helper_vdiv_vv_b_riscv32 +#define helper_vdiv_vv_d helper_vdiv_vv_d_riscv32 +#define helper_vdiv_vv_h helper_vdiv_vv_h_riscv32 +#define helper_vdiv_vv_w helper_vdiv_vv_w_riscv32 +#define helper_vdiv_vx_b helper_vdiv_vx_b_riscv32 +#define helper_vdiv_vx_d helper_vdiv_vx_d_riscv32 +#define helper_vdiv_vx_h helper_vdiv_vx_h_riscv32 +#define helper_vdiv_vx_w helper_vdiv_vx_w_riscv32 +#define helper_vdivu_vv_b helper_vdivu_vv_b_riscv32 +#define helper_vdivu_vv_d helper_vdivu_vv_d_riscv32 +#define helper_vdivu_vv_h helper_vdivu_vv_h_riscv32 +#define helper_vdivu_vv_w helper_vdivu_vv_w_riscv32 +#define helper_vdivu_vx_b helper_vdivu_vx_b_riscv32 +#define helper_vdivu_vx_d helper_vdivu_vx_d_riscv32 +#define helper_vdivu_vx_h helper_vdivu_vx_h_riscv32 +#define helper_vdivu_vx_w helper_vdivu_vx_w_riscv32 +#define helper_vec_rsubs16 helper_vec_rsubs16_riscv32 +#define helper_vec_rsubs32 helper_vec_rsubs32_riscv32 +#define helper_vec_rsubs64 helper_vec_rsubs64_riscv32 +#define helper_vec_rsubs8 helper_vec_rsubs8_riscv32 +#define helper_vfadd_vf_d helper_vfadd_vf_d_riscv32 +#define helper_vfadd_vf_h helper_vfadd_vf_h_riscv32 +#define helper_vfadd_vf_w helper_vfadd_vf_w_riscv32 +#define helper_vfadd_vv_d helper_vfadd_vv_d_riscv32 +#define helper_vfadd_vv_h helper_vfadd_vv_h_riscv32 +#define helper_vfadd_vv_w helper_vfadd_vv_w_riscv32 +#define helper_vfclass_v_d helper_vfclass_v_d_riscv32 +#define helper_vfclass_v_h helper_vfclass_v_h_riscv32 +#define helper_vfclass_v_w helper_vfclass_v_w_riscv32 +#define helper_vfcvt_f_x_v_d helper_vfcvt_f_x_v_d_riscv32 +#define helper_vfcvt_f_x_v_h helper_vfcvt_f_x_v_h_riscv32 +#define helper_vfcvt_f_x_v_w helper_vfcvt_f_x_v_w_riscv32 +#define helper_vfcvt_f_xu_v_d helper_vfcvt_f_xu_v_d_riscv32 +#define helper_vfcvt_f_xu_v_h helper_vfcvt_f_xu_v_h_riscv32 +#define helper_vfcvt_f_xu_v_w helper_vfcvt_f_xu_v_w_riscv32 +#define helper_vfcvt_x_f_v_d helper_vfcvt_x_f_v_d_riscv32 +#define helper_vfcvt_x_f_v_h helper_vfcvt_x_f_v_h_riscv32 +#define helper_vfcvt_x_f_v_w helper_vfcvt_x_f_v_w_riscv32 +#define helper_vfcvt_xu_f_v_d helper_vfcvt_xu_f_v_d_riscv32 +#define helper_vfcvt_xu_f_v_h helper_vfcvt_xu_f_v_h_riscv32 +#define helper_vfcvt_xu_f_v_w helper_vfcvt_xu_f_v_w_riscv32 +#define helper_vfdiv_vf_d helper_vfdiv_vf_d_riscv32 +#define helper_vfdiv_vf_h helper_vfdiv_vf_h_riscv32 +#define helper_vfdiv_vf_w helper_vfdiv_vf_w_riscv32 +#define helper_vfdiv_vv_d helper_vfdiv_vv_d_riscv32 +#define helper_vfdiv_vv_h helper_vfdiv_vv_h_riscv32 +#define helper_vfdiv_vv_w helper_vfdiv_vv_w_riscv32 +#define helper_vfmacc_vf_d helper_vfmacc_vf_d_riscv32 +#define helper_vfmacc_vf_h helper_vfmacc_vf_h_riscv32 +#define helper_vfmacc_vf_w helper_vfmacc_vf_w_riscv32 +#define helper_vfmacc_vv_d helper_vfmacc_vv_d_riscv32 +#define helper_vfmacc_vv_h helper_vfmacc_vv_h_riscv32 +#define helper_vfmacc_vv_w helper_vfmacc_vv_w_riscv32 +#define helper_vfmadd_vf_d helper_vfmadd_vf_d_riscv32 +#define helper_vfmadd_vf_h helper_vfmadd_vf_h_riscv32 +#define helper_vfmadd_vf_w helper_vfmadd_vf_w_riscv32 +#define helper_vfmadd_vv_d helper_vfmadd_vv_d_riscv32 +#define helper_vfmadd_vv_h helper_vfmadd_vv_h_riscv32 +#define helper_vfmadd_vv_w helper_vfmadd_vv_w_riscv32 +#define helper_vfmax_vf_d helper_vfmax_vf_d_riscv32 +#define helper_vfmax_vf_h helper_vfmax_vf_h_riscv32 +#define helper_vfmax_vf_w helper_vfmax_vf_w_riscv32 +#define helper_vfmax_vv_d helper_vfmax_vv_d_riscv32 +#define helper_vfmax_vv_h helper_vfmax_vv_h_riscv32 +#define helper_vfmax_vv_w helper_vfmax_vv_w_riscv32 +#define helper_vfmerge_vfm_d helper_vfmerge_vfm_d_riscv32 +#define helper_vfmerge_vfm_h helper_vfmerge_vfm_h_riscv32 +#define helper_vfmerge_vfm_w helper_vfmerge_vfm_w_riscv32 +#define helper_vfmin_vf_d helper_vfmin_vf_d_riscv32 +#define helper_vfmin_vf_h helper_vfmin_vf_h_riscv32 +#define helper_vfmin_vf_w helper_vfmin_vf_w_riscv32 +#define helper_vfmin_vv_d helper_vfmin_vv_d_riscv32 +#define helper_vfmin_vv_h helper_vfmin_vv_h_riscv32 +#define helper_vfmin_vv_w helper_vfmin_vv_w_riscv32 +#define helper_vfmsac_vf_d helper_vfmsac_vf_d_riscv32 +#define helper_vfmsac_vf_h helper_vfmsac_vf_h_riscv32 +#define helper_vfmsac_vf_w helper_vfmsac_vf_w_riscv32 +#define helper_vfmsac_vv_d helper_vfmsac_vv_d_riscv32 +#define helper_vfmsac_vv_h helper_vfmsac_vv_h_riscv32 +#define helper_vfmsac_vv_w helper_vfmsac_vv_w_riscv32 +#define helper_vfmsub_vf_d helper_vfmsub_vf_d_riscv32 +#define helper_vfmsub_vf_h helper_vfmsub_vf_h_riscv32 +#define helper_vfmsub_vf_w helper_vfmsub_vf_w_riscv32 +#define helper_vfmsub_vv_d helper_vfmsub_vv_d_riscv32 +#define helper_vfmsub_vv_h helper_vfmsub_vv_h_riscv32 +#define helper_vfmsub_vv_w helper_vfmsub_vv_w_riscv32 +#define helper_vfmul_vf_d helper_vfmul_vf_d_riscv32 +#define helper_vfmul_vf_h helper_vfmul_vf_h_riscv32 +#define helper_vfmul_vf_w helper_vfmul_vf_w_riscv32 +#define helper_vfmul_vv_d helper_vfmul_vv_d_riscv32 +#define helper_vfmul_vv_h helper_vfmul_vv_h_riscv32 +#define helper_vfmul_vv_w helper_vfmul_vv_w_riscv32 +#define helper_vfncvt_f_f_v_h helper_vfncvt_f_f_v_h_riscv32 +#define helper_vfncvt_f_f_v_w helper_vfncvt_f_f_v_w_riscv32 +#define helper_vfncvt_f_x_v_h helper_vfncvt_f_x_v_h_riscv32 +#define helper_vfncvt_f_x_v_w helper_vfncvt_f_x_v_w_riscv32 +#define helper_vfncvt_f_xu_v_h helper_vfncvt_f_xu_v_h_riscv32 +#define helper_vfncvt_f_xu_v_w helper_vfncvt_f_xu_v_w_riscv32 +#define helper_vfncvt_x_f_v_h helper_vfncvt_x_f_v_h_riscv32 +#define helper_vfncvt_x_f_v_w helper_vfncvt_x_f_v_w_riscv32 +#define helper_vfncvt_xu_f_v_h helper_vfncvt_xu_f_v_h_riscv32 +#define helper_vfncvt_xu_f_v_w helper_vfncvt_xu_f_v_w_riscv32 +#define helper_vfnmacc_vf_d helper_vfnmacc_vf_d_riscv32 +#define helper_vfnmacc_vf_h helper_vfnmacc_vf_h_riscv32 +#define helper_vfnmacc_vf_w helper_vfnmacc_vf_w_riscv32 +#define helper_vfnmacc_vv_d helper_vfnmacc_vv_d_riscv32 +#define helper_vfnmacc_vv_h helper_vfnmacc_vv_h_riscv32 +#define helper_vfnmacc_vv_w helper_vfnmacc_vv_w_riscv32 +#define helper_vfnmadd_vf_d helper_vfnmadd_vf_d_riscv32 +#define helper_vfnmadd_vf_h helper_vfnmadd_vf_h_riscv32 +#define helper_vfnmadd_vf_w helper_vfnmadd_vf_w_riscv32 +#define helper_vfnmadd_vv_d helper_vfnmadd_vv_d_riscv32 +#define helper_vfnmadd_vv_h helper_vfnmadd_vv_h_riscv32 +#define helper_vfnmadd_vv_w helper_vfnmadd_vv_w_riscv32 +#define helper_vfnmsac_vf_d helper_vfnmsac_vf_d_riscv32 +#define helper_vfnmsac_vf_h helper_vfnmsac_vf_h_riscv32 +#define helper_vfnmsac_vf_w helper_vfnmsac_vf_w_riscv32 +#define helper_vfnmsac_vv_d helper_vfnmsac_vv_d_riscv32 +#define helper_vfnmsac_vv_h helper_vfnmsac_vv_h_riscv32 +#define helper_vfnmsac_vv_w helper_vfnmsac_vv_w_riscv32 +#define helper_vfnmsub_vf_d helper_vfnmsub_vf_d_riscv32 +#define helper_vfnmsub_vf_h helper_vfnmsub_vf_h_riscv32 +#define helper_vfnmsub_vf_w helper_vfnmsub_vf_w_riscv32 +#define helper_vfnmsub_vv_d helper_vfnmsub_vv_d_riscv32 +#define helper_vfnmsub_vv_h helper_vfnmsub_vv_h_riscv32 +#define helper_vfnmsub_vv_w helper_vfnmsub_vv_w_riscv32 +#define helper_vfrdiv_vf_d helper_vfrdiv_vf_d_riscv32 +#define helper_vfrdiv_vf_h helper_vfrdiv_vf_h_riscv32 +#define helper_vfrdiv_vf_w helper_vfrdiv_vf_w_riscv32 +#define helper_vfredmax_vs_d helper_vfredmax_vs_d_riscv32 +#define helper_vfredmax_vs_h helper_vfredmax_vs_h_riscv32 +#define helper_vfredmax_vs_w helper_vfredmax_vs_w_riscv32 +#define helper_vfredmin_vs_d helper_vfredmin_vs_d_riscv32 +#define helper_vfredmin_vs_h helper_vfredmin_vs_h_riscv32 +#define helper_vfredmin_vs_w helper_vfredmin_vs_w_riscv32 +#define helper_vfredsum_vs_d helper_vfredsum_vs_d_riscv32 +#define helper_vfredsum_vs_h helper_vfredsum_vs_h_riscv32 +#define helper_vfredsum_vs_w helper_vfredsum_vs_w_riscv32 +#define helper_vfrsub_vf_d helper_vfrsub_vf_d_riscv32 +#define helper_vfrsub_vf_h helper_vfrsub_vf_h_riscv32 +#define helper_vfrsub_vf_w helper_vfrsub_vf_w_riscv32 +#define helper_vfsgnj_vf_d helper_vfsgnj_vf_d_riscv32 +#define helper_vfsgnj_vf_h helper_vfsgnj_vf_h_riscv32 +#define helper_vfsgnj_vf_w helper_vfsgnj_vf_w_riscv32 +#define helper_vfsgnj_vv_d helper_vfsgnj_vv_d_riscv32 +#define helper_vfsgnj_vv_h helper_vfsgnj_vv_h_riscv32 +#define helper_vfsgnj_vv_w helper_vfsgnj_vv_w_riscv32 +#define helper_vfsgnjn_vf_d helper_vfsgnjn_vf_d_riscv32 +#define helper_vfsgnjn_vf_h helper_vfsgnjn_vf_h_riscv32 +#define helper_vfsgnjn_vf_w helper_vfsgnjn_vf_w_riscv32 +#define helper_vfsgnjn_vv_d helper_vfsgnjn_vv_d_riscv32 +#define helper_vfsgnjn_vv_h helper_vfsgnjn_vv_h_riscv32 +#define helper_vfsgnjn_vv_w helper_vfsgnjn_vv_w_riscv32 +#define helper_vfsgnjx_vf_d helper_vfsgnjx_vf_d_riscv32 +#define helper_vfsgnjx_vf_h helper_vfsgnjx_vf_h_riscv32 +#define helper_vfsgnjx_vf_w helper_vfsgnjx_vf_w_riscv32 +#define helper_vfsgnjx_vv_d helper_vfsgnjx_vv_d_riscv32 +#define helper_vfsgnjx_vv_h helper_vfsgnjx_vv_h_riscv32 +#define helper_vfsgnjx_vv_w helper_vfsgnjx_vv_w_riscv32 +#define helper_vfsqrt_v_d helper_vfsqrt_v_d_riscv32 +#define helper_vfsqrt_v_h helper_vfsqrt_v_h_riscv32 +#define helper_vfsqrt_v_w helper_vfsqrt_v_w_riscv32 +#define helper_vfsub_vf_d helper_vfsub_vf_d_riscv32 +#define helper_vfsub_vf_h helper_vfsub_vf_h_riscv32 +#define helper_vfsub_vf_w helper_vfsub_vf_w_riscv32 +#define helper_vfsub_vv_d helper_vfsub_vv_d_riscv32 +#define helper_vfsub_vv_h helper_vfsub_vv_h_riscv32 +#define helper_vfsub_vv_w helper_vfsub_vv_w_riscv32 +#define helper_vfwadd_vf_h helper_vfwadd_vf_h_riscv32 +#define helper_vfwadd_vf_w helper_vfwadd_vf_w_riscv32 +#define helper_vfwadd_vv_h helper_vfwadd_vv_h_riscv32 +#define helper_vfwadd_vv_w helper_vfwadd_vv_w_riscv32 +#define helper_vfwadd_wf_h helper_vfwadd_wf_h_riscv32 +#define helper_vfwadd_wf_w helper_vfwadd_wf_w_riscv32 +#define helper_vfwadd_wv_h helper_vfwadd_wv_h_riscv32 +#define helper_vfwadd_wv_w helper_vfwadd_wv_w_riscv32 +#define helper_vfwcvt_f_f_v_h helper_vfwcvt_f_f_v_h_riscv32 +#define helper_vfwcvt_f_f_v_w helper_vfwcvt_f_f_v_w_riscv32 +#define helper_vfwcvt_f_x_v_h helper_vfwcvt_f_x_v_h_riscv32 +#define helper_vfwcvt_f_x_v_w helper_vfwcvt_f_x_v_w_riscv32 +#define helper_vfwcvt_f_xu_v_h helper_vfwcvt_f_xu_v_h_riscv32 +#define helper_vfwcvt_f_xu_v_w helper_vfwcvt_f_xu_v_w_riscv32 +#define helper_vfwcvt_x_f_v_h helper_vfwcvt_x_f_v_h_riscv32 +#define helper_vfwcvt_x_f_v_w helper_vfwcvt_x_f_v_w_riscv32 +#define helper_vfwcvt_xu_f_v_h helper_vfwcvt_xu_f_v_h_riscv32 +#define helper_vfwcvt_xu_f_v_w helper_vfwcvt_xu_f_v_w_riscv32 +#define helper_vfwmacc_vf_h helper_vfwmacc_vf_h_riscv32 +#define helper_vfwmacc_vf_w helper_vfwmacc_vf_w_riscv32 +#define helper_vfwmacc_vv_h helper_vfwmacc_vv_h_riscv32 +#define helper_vfwmacc_vv_w helper_vfwmacc_vv_w_riscv32 +#define helper_vfwmsac_vf_h helper_vfwmsac_vf_h_riscv32 +#define helper_vfwmsac_vf_w helper_vfwmsac_vf_w_riscv32 +#define helper_vfwmsac_vv_h helper_vfwmsac_vv_h_riscv32 +#define helper_vfwmsac_vv_w helper_vfwmsac_vv_w_riscv32 +#define helper_vfwmul_vf_h helper_vfwmul_vf_h_riscv32 +#define helper_vfwmul_vf_w helper_vfwmul_vf_w_riscv32 +#define helper_vfwmul_vv_h helper_vfwmul_vv_h_riscv32 +#define helper_vfwmul_vv_w helper_vfwmul_vv_w_riscv32 +#define helper_vfwnmacc_vf_h helper_vfwnmacc_vf_h_riscv32 +#define helper_vfwnmacc_vf_w helper_vfwnmacc_vf_w_riscv32 +#define helper_vfwnmacc_vv_h helper_vfwnmacc_vv_h_riscv32 +#define helper_vfwnmacc_vv_w helper_vfwnmacc_vv_w_riscv32 +#define helper_vfwnmsac_vf_h helper_vfwnmsac_vf_h_riscv32 +#define helper_vfwnmsac_vf_w helper_vfwnmsac_vf_w_riscv32 +#define helper_vfwnmsac_vv_h helper_vfwnmsac_vv_h_riscv32 +#define helper_vfwnmsac_vv_w helper_vfwnmsac_vv_w_riscv32 +#define helper_vfwredsum_vs_h helper_vfwredsum_vs_h_riscv32 +#define helper_vfwredsum_vs_w helper_vfwredsum_vs_w_riscv32 +#define helper_vfwsub_vf_h helper_vfwsub_vf_h_riscv32 +#define helper_vfwsub_vf_w helper_vfwsub_vf_w_riscv32 +#define helper_vfwsub_vv_h helper_vfwsub_vv_h_riscv32 +#define helper_vfwsub_vv_w helper_vfwsub_vv_w_riscv32 +#define helper_vfwsub_wf_h helper_vfwsub_wf_h_riscv32 +#define helper_vfwsub_wf_w helper_vfwsub_wf_w_riscv32 +#define helper_vfwsub_wv_h helper_vfwsub_wv_h_riscv32 +#define helper_vfwsub_wv_w helper_vfwsub_wv_w_riscv32 +#define helper_vid_v_b helper_vid_v_b_riscv32 +#define helper_vid_v_d helper_vid_v_d_riscv32 +#define helper_vid_v_h helper_vid_v_h_riscv32 +#define helper_vid_v_w helper_vid_v_w_riscv32 +#define helper_viota_m_b helper_viota_m_b_riscv32 +#define helper_viota_m_d helper_viota_m_d_riscv32 +#define helper_viota_m_h helper_viota_m_h_riscv32 +#define helper_viota_m_w helper_viota_m_w_riscv32 +#define helper_vlb_v_b helper_vlb_v_b_riscv32 +#define helper_vlb_v_b_mask helper_vlb_v_b_mask_riscv32 +#define helper_vlb_v_d helper_vlb_v_d_riscv32 +#define helper_vlb_v_d_mask helper_vlb_v_d_mask_riscv32 +#define helper_vlb_v_h helper_vlb_v_h_riscv32 +#define helper_vlb_v_h_mask helper_vlb_v_h_mask_riscv32 +#define helper_vlb_v_w helper_vlb_v_w_riscv32 +#define helper_vlb_v_w_mask helper_vlb_v_w_mask_riscv32 +#define helper_vlbff_v_b helper_vlbff_v_b_riscv32 +#define helper_vlbff_v_d helper_vlbff_v_d_riscv32 +#define helper_vlbff_v_h helper_vlbff_v_h_riscv32 +#define helper_vlbff_v_w helper_vlbff_v_w_riscv32 +#define helper_vlbu_v_b helper_vlbu_v_b_riscv32 +#define helper_vlbu_v_b_mask helper_vlbu_v_b_mask_riscv32 +#define helper_vlbu_v_d helper_vlbu_v_d_riscv32 +#define helper_vlbu_v_d_mask helper_vlbu_v_d_mask_riscv32 +#define helper_vlbu_v_h helper_vlbu_v_h_riscv32 +#define helper_vlbu_v_h_mask helper_vlbu_v_h_mask_riscv32 +#define helper_vlbu_v_w helper_vlbu_v_w_riscv32 +#define helper_vlbu_v_w_mask helper_vlbu_v_w_mask_riscv32 +#define helper_vlbuff_v_b helper_vlbuff_v_b_riscv32 +#define helper_vlbuff_v_d helper_vlbuff_v_d_riscv32 +#define helper_vlbuff_v_h helper_vlbuff_v_h_riscv32 +#define helper_vlbuff_v_w helper_vlbuff_v_w_riscv32 +#define helper_vle_v_b helper_vle_v_b_riscv32 +#define helper_vle_v_b_mask helper_vle_v_b_mask_riscv32 +#define helper_vle_v_d helper_vle_v_d_riscv32 +#define helper_vle_v_d_mask helper_vle_v_d_mask_riscv32 +#define helper_vle_v_h helper_vle_v_h_riscv32 +#define helper_vle_v_h_mask helper_vle_v_h_mask_riscv32 +#define helper_vle_v_w helper_vle_v_w_riscv32 +#define helper_vle_v_w_mask helper_vle_v_w_mask_riscv32 +#define helper_vleff_v_b helper_vleff_v_b_riscv32 +#define helper_vleff_v_d helper_vleff_v_d_riscv32 +#define helper_vleff_v_h helper_vleff_v_h_riscv32 +#define helper_vleff_v_w helper_vleff_v_w_riscv32 +#define helper_vlh_v_d helper_vlh_v_d_riscv32 +#define helper_vlh_v_d_mask helper_vlh_v_d_mask_riscv32 +#define helper_vlh_v_h helper_vlh_v_h_riscv32 +#define helper_vlh_v_h_mask helper_vlh_v_h_mask_riscv32 +#define helper_vlh_v_w helper_vlh_v_w_riscv32 +#define helper_vlh_v_w_mask helper_vlh_v_w_mask_riscv32 +#define helper_vlhff_v_d helper_vlhff_v_d_riscv32 +#define helper_vlhff_v_h helper_vlhff_v_h_riscv32 +#define helper_vlhff_v_w helper_vlhff_v_w_riscv32 +#define helper_vlhu_v_d helper_vlhu_v_d_riscv32 +#define helper_vlhu_v_d_mask helper_vlhu_v_d_mask_riscv32 +#define helper_vlhu_v_h helper_vlhu_v_h_riscv32 +#define helper_vlhu_v_h_mask helper_vlhu_v_h_mask_riscv32 +#define helper_vlhu_v_w helper_vlhu_v_w_riscv32 +#define helper_vlhu_v_w_mask helper_vlhu_v_w_mask_riscv32 +#define helper_vlhuff_v_d helper_vlhuff_v_d_riscv32 +#define helper_vlhuff_v_h helper_vlhuff_v_h_riscv32 +#define helper_vlhuff_v_w helper_vlhuff_v_w_riscv32 +#define helper_vlsb_v_b helper_vlsb_v_b_riscv32 +#define helper_vlsb_v_d helper_vlsb_v_d_riscv32 +#define helper_vlsb_v_h helper_vlsb_v_h_riscv32 +#define helper_vlsb_v_w helper_vlsb_v_w_riscv32 +#define helper_vlsbu_v_b helper_vlsbu_v_b_riscv32 +#define helper_vlsbu_v_d helper_vlsbu_v_d_riscv32 +#define helper_vlsbu_v_h helper_vlsbu_v_h_riscv32 +#define helper_vlsbu_v_w helper_vlsbu_v_w_riscv32 +#define helper_vlse_v_b helper_vlse_v_b_riscv32 +#define helper_vlse_v_d helper_vlse_v_d_riscv32 +#define helper_vlse_v_h helper_vlse_v_h_riscv32 +#define helper_vlse_v_w helper_vlse_v_w_riscv32 +#define helper_vlsh_v_d helper_vlsh_v_d_riscv32 +#define helper_vlsh_v_h helper_vlsh_v_h_riscv32 +#define helper_vlsh_v_w helper_vlsh_v_w_riscv32 +#define helper_vlshu_v_d helper_vlshu_v_d_riscv32 +#define helper_vlshu_v_h helper_vlshu_v_h_riscv32 +#define helper_vlshu_v_w helper_vlshu_v_w_riscv32 +#define helper_vlsw_v_d helper_vlsw_v_d_riscv32 +#define helper_vlsw_v_w helper_vlsw_v_w_riscv32 +#define helper_vlswu_v_d helper_vlswu_v_d_riscv32 +#define helper_vlswu_v_w helper_vlswu_v_w_riscv32 +#define helper_vlw_v_d helper_vlw_v_d_riscv32 +#define helper_vlw_v_d_mask helper_vlw_v_d_mask_riscv32 +#define helper_vlw_v_w helper_vlw_v_w_riscv32 +#define helper_vlw_v_w_mask helper_vlw_v_w_mask_riscv32 +#define helper_vlwff_v_d helper_vlwff_v_d_riscv32 +#define helper_vlwff_v_w helper_vlwff_v_w_riscv32 +#define helper_vlwu_v_d helper_vlwu_v_d_riscv32 +#define helper_vlwu_v_d_mask helper_vlwu_v_d_mask_riscv32 +#define helper_vlwu_v_w helper_vlwu_v_w_riscv32 +#define helper_vlwu_v_w_mask helper_vlwu_v_w_mask_riscv32 +#define helper_vlwuff_v_d helper_vlwuff_v_d_riscv32 +#define helper_vlwuff_v_w helper_vlwuff_v_w_riscv32 +#define helper_vlxb_v_b helper_vlxb_v_b_riscv32 +#define helper_vlxb_v_d helper_vlxb_v_d_riscv32 +#define helper_vlxb_v_h helper_vlxb_v_h_riscv32 +#define helper_vlxb_v_w helper_vlxb_v_w_riscv32 +#define helper_vlxbu_v_b helper_vlxbu_v_b_riscv32 +#define helper_vlxbu_v_d helper_vlxbu_v_d_riscv32 +#define helper_vlxbu_v_h helper_vlxbu_v_h_riscv32 +#define helper_vlxbu_v_w helper_vlxbu_v_w_riscv32 +#define helper_vlxe_v_b helper_vlxe_v_b_riscv32 +#define helper_vlxe_v_d helper_vlxe_v_d_riscv32 +#define helper_vlxe_v_h helper_vlxe_v_h_riscv32 +#define helper_vlxe_v_w helper_vlxe_v_w_riscv32 +#define helper_vlxh_v_d helper_vlxh_v_d_riscv32 +#define helper_vlxh_v_h helper_vlxh_v_h_riscv32 +#define helper_vlxh_v_w helper_vlxh_v_w_riscv32 +#define helper_vlxhu_v_d helper_vlxhu_v_d_riscv32 +#define helper_vlxhu_v_h helper_vlxhu_v_h_riscv32 +#define helper_vlxhu_v_w helper_vlxhu_v_w_riscv32 +#define helper_vlxw_v_d helper_vlxw_v_d_riscv32 +#define helper_vlxw_v_w helper_vlxw_v_w_riscv32 +#define helper_vlxwu_v_d helper_vlxwu_v_d_riscv32 +#define helper_vlxwu_v_w helper_vlxwu_v_w_riscv32 +#define helper_vmacc_vv_b helper_vmacc_vv_b_riscv32 +#define helper_vmacc_vv_d helper_vmacc_vv_d_riscv32 +#define helper_vmacc_vv_h helper_vmacc_vv_h_riscv32 +#define helper_vmacc_vv_w helper_vmacc_vv_w_riscv32 +#define helper_vmacc_vx_b helper_vmacc_vx_b_riscv32 +#define helper_vmacc_vx_d helper_vmacc_vx_d_riscv32 +#define helper_vmacc_vx_h helper_vmacc_vx_h_riscv32 +#define helper_vmacc_vx_w helper_vmacc_vx_w_riscv32 +#define helper_vmadc_vvm_b helper_vmadc_vvm_b_riscv32 +#define helper_vmadc_vvm_d helper_vmadc_vvm_d_riscv32 +#define helper_vmadc_vvm_h helper_vmadc_vvm_h_riscv32 +#define helper_vmadc_vvm_w helper_vmadc_vvm_w_riscv32 +#define helper_vmadc_vxm_b helper_vmadc_vxm_b_riscv32 +#define helper_vmadc_vxm_d helper_vmadc_vxm_d_riscv32 +#define helper_vmadc_vxm_h helper_vmadc_vxm_h_riscv32 +#define helper_vmadc_vxm_w helper_vmadc_vxm_w_riscv32 +#define helper_vmadd_vv_b helper_vmadd_vv_b_riscv32 +#define helper_vmadd_vv_d helper_vmadd_vv_d_riscv32 +#define helper_vmadd_vv_h helper_vmadd_vv_h_riscv32 +#define helper_vmadd_vv_w helper_vmadd_vv_w_riscv32 +#define helper_vmadd_vx_b helper_vmadd_vx_b_riscv32 +#define helper_vmadd_vx_d helper_vmadd_vx_d_riscv32 +#define helper_vmadd_vx_h helper_vmadd_vx_h_riscv32 +#define helper_vmadd_vx_w helper_vmadd_vx_w_riscv32 +#define helper_vmand_mm helper_vmand_mm_riscv32 +#define helper_vmandnot_mm helper_vmandnot_mm_riscv32 +#define helper_vmax_vv_b helper_vmax_vv_b_riscv32 +#define helper_vmax_vv_d helper_vmax_vv_d_riscv32 +#define helper_vmax_vv_h helper_vmax_vv_h_riscv32 +#define helper_vmax_vv_w helper_vmax_vv_w_riscv32 +#define helper_vmax_vx_b helper_vmax_vx_b_riscv32 +#define helper_vmax_vx_d helper_vmax_vx_d_riscv32 +#define helper_vmax_vx_h helper_vmax_vx_h_riscv32 +#define helper_vmax_vx_w helper_vmax_vx_w_riscv32 +#define helper_vmaxu_vv_b helper_vmaxu_vv_b_riscv32 +#define helper_vmaxu_vv_d helper_vmaxu_vv_d_riscv32 +#define helper_vmaxu_vv_h helper_vmaxu_vv_h_riscv32 +#define helper_vmaxu_vv_w helper_vmaxu_vv_w_riscv32 +#define helper_vmaxu_vx_b helper_vmaxu_vx_b_riscv32 +#define helper_vmaxu_vx_d helper_vmaxu_vx_d_riscv32 +#define helper_vmaxu_vx_h helper_vmaxu_vx_h_riscv32 +#define helper_vmaxu_vx_w helper_vmaxu_vx_w_riscv32 +#define helper_vmerge_vvm_b helper_vmerge_vvm_b_riscv32 +#define helper_vmerge_vvm_d helper_vmerge_vvm_d_riscv32 +#define helper_vmerge_vvm_h helper_vmerge_vvm_h_riscv32 +#define helper_vmerge_vvm_w helper_vmerge_vvm_w_riscv32 +#define helper_vmerge_vxm_b helper_vmerge_vxm_b_riscv32 +#define helper_vmerge_vxm_d helper_vmerge_vxm_d_riscv32 +#define helper_vmerge_vxm_h helper_vmerge_vxm_h_riscv32 +#define helper_vmerge_vxm_w helper_vmerge_vxm_w_riscv32 +#define helper_vmfeq_vf_d helper_vmfeq_vf_d_riscv32 +#define helper_vmfeq_vf_h helper_vmfeq_vf_h_riscv32 +#define helper_vmfeq_vf_w helper_vmfeq_vf_w_riscv32 +#define helper_vmfeq_vv_d helper_vmfeq_vv_d_riscv32 +#define helper_vmfeq_vv_h helper_vmfeq_vv_h_riscv32 +#define helper_vmfeq_vv_w helper_vmfeq_vv_w_riscv32 +#define helper_vmfge_vf_d helper_vmfge_vf_d_riscv32 +#define helper_vmfge_vf_h helper_vmfge_vf_h_riscv32 +#define helper_vmfge_vf_w helper_vmfge_vf_w_riscv32 +#define helper_vmfgt_vf_d helper_vmfgt_vf_d_riscv32 +#define helper_vmfgt_vf_h helper_vmfgt_vf_h_riscv32 +#define helper_vmfgt_vf_w helper_vmfgt_vf_w_riscv32 +#define helper_vmfirst_m helper_vmfirst_m_riscv32 +#define helper_vmfle_vf_d helper_vmfle_vf_d_riscv32 +#define helper_vmfle_vf_h helper_vmfle_vf_h_riscv32 +#define helper_vmfle_vf_w helper_vmfle_vf_w_riscv32 +#define helper_vmfle_vv_d helper_vmfle_vv_d_riscv32 +#define helper_vmfle_vv_h helper_vmfle_vv_h_riscv32 +#define helper_vmfle_vv_w helper_vmfle_vv_w_riscv32 +#define helper_vmflt_vf_d helper_vmflt_vf_d_riscv32 +#define helper_vmflt_vf_h helper_vmflt_vf_h_riscv32 +#define helper_vmflt_vf_w helper_vmflt_vf_w_riscv32 +#define helper_vmflt_vv_d helper_vmflt_vv_d_riscv32 +#define helper_vmflt_vv_h helper_vmflt_vv_h_riscv32 +#define helper_vmflt_vv_w helper_vmflt_vv_w_riscv32 +#define helper_vmfne_vf_d helper_vmfne_vf_d_riscv32 +#define helper_vmfne_vf_h helper_vmfne_vf_h_riscv32 +#define helper_vmfne_vf_w helper_vmfne_vf_w_riscv32 +#define helper_vmfne_vv_d helper_vmfne_vv_d_riscv32 +#define helper_vmfne_vv_h helper_vmfne_vv_h_riscv32 +#define helper_vmfne_vv_w helper_vmfne_vv_w_riscv32 +#define helper_vmford_vf_d helper_vmford_vf_d_riscv32 +#define helper_vmford_vf_h helper_vmford_vf_h_riscv32 +#define helper_vmford_vf_w helper_vmford_vf_w_riscv32 +#define helper_vmford_vv_d helper_vmford_vv_d_riscv32 +#define helper_vmford_vv_h helper_vmford_vv_h_riscv32 +#define helper_vmford_vv_w helper_vmford_vv_w_riscv32 +#define helper_vmin_vv_b helper_vmin_vv_b_riscv32 +#define helper_vmin_vv_d helper_vmin_vv_d_riscv32 +#define helper_vmin_vv_h helper_vmin_vv_h_riscv32 +#define helper_vmin_vv_w helper_vmin_vv_w_riscv32 +#define helper_vmin_vx_b helper_vmin_vx_b_riscv32 +#define helper_vmin_vx_d helper_vmin_vx_d_riscv32 +#define helper_vmin_vx_h helper_vmin_vx_h_riscv32 +#define helper_vmin_vx_w helper_vmin_vx_w_riscv32 +#define helper_vminu_vv_b helper_vminu_vv_b_riscv32 +#define helper_vminu_vv_d helper_vminu_vv_d_riscv32 +#define helper_vminu_vv_h helper_vminu_vv_h_riscv32 +#define helper_vminu_vv_w helper_vminu_vv_w_riscv32 +#define helper_vminu_vx_b helper_vminu_vx_b_riscv32 +#define helper_vminu_vx_d helper_vminu_vx_d_riscv32 +#define helper_vminu_vx_h helper_vminu_vx_h_riscv32 +#define helper_vminu_vx_w helper_vminu_vx_w_riscv32 +#define helper_vmnand_mm helper_vmnand_mm_riscv32 +#define helper_vmnor_mm helper_vmnor_mm_riscv32 +#define helper_vmor_mm helper_vmor_mm_riscv32 +#define helper_vmornot_mm helper_vmornot_mm_riscv32 +#define helper_vmpopc_m helper_vmpopc_m_riscv32 +#define helper_vmsbc_vvm_b helper_vmsbc_vvm_b_riscv32 +#define helper_vmsbc_vvm_d helper_vmsbc_vvm_d_riscv32 +#define helper_vmsbc_vvm_h helper_vmsbc_vvm_h_riscv32 +#define helper_vmsbc_vvm_w helper_vmsbc_vvm_w_riscv32 +#define helper_vmsbc_vxm_b helper_vmsbc_vxm_b_riscv32 +#define helper_vmsbc_vxm_d helper_vmsbc_vxm_d_riscv32 +#define helper_vmsbc_vxm_h helper_vmsbc_vxm_h_riscv32 +#define helper_vmsbc_vxm_w helper_vmsbc_vxm_w_riscv32 +#define helper_vmsbf_m helper_vmsbf_m_riscv32 +#define helper_vmseq_vv_b helper_vmseq_vv_b_riscv32 +#define helper_vmseq_vv_d helper_vmseq_vv_d_riscv32 +#define helper_vmseq_vv_h helper_vmseq_vv_h_riscv32 +#define helper_vmseq_vv_w helper_vmseq_vv_w_riscv32 +#define helper_vmseq_vx_b helper_vmseq_vx_b_riscv32 +#define helper_vmseq_vx_d helper_vmseq_vx_d_riscv32 +#define helper_vmseq_vx_h helper_vmseq_vx_h_riscv32 +#define helper_vmseq_vx_w helper_vmseq_vx_w_riscv32 +#define helper_vmsgt_vx_b helper_vmsgt_vx_b_riscv32 +#define helper_vmsgt_vx_d helper_vmsgt_vx_d_riscv32 +#define helper_vmsgt_vx_h helper_vmsgt_vx_h_riscv32 +#define helper_vmsgt_vx_w helper_vmsgt_vx_w_riscv32 +#define helper_vmsgtu_vx_b helper_vmsgtu_vx_b_riscv32 +#define helper_vmsgtu_vx_d helper_vmsgtu_vx_d_riscv32 +#define helper_vmsgtu_vx_h helper_vmsgtu_vx_h_riscv32 +#define helper_vmsgtu_vx_w helper_vmsgtu_vx_w_riscv32 +#define helper_vmsif_m helper_vmsif_m_riscv32 +#define helper_vmsle_vv_b helper_vmsle_vv_b_riscv32 +#define helper_vmsle_vv_d helper_vmsle_vv_d_riscv32 +#define helper_vmsle_vv_h helper_vmsle_vv_h_riscv32 +#define helper_vmsle_vv_w helper_vmsle_vv_w_riscv32 +#define helper_vmsle_vx_b helper_vmsle_vx_b_riscv32 +#define helper_vmsle_vx_d helper_vmsle_vx_d_riscv32 +#define helper_vmsle_vx_h helper_vmsle_vx_h_riscv32 +#define helper_vmsle_vx_w helper_vmsle_vx_w_riscv32 +#define helper_vmsleu_vv_b helper_vmsleu_vv_b_riscv32 +#define helper_vmsleu_vv_d helper_vmsleu_vv_d_riscv32 +#define helper_vmsleu_vv_h helper_vmsleu_vv_h_riscv32 +#define helper_vmsleu_vv_w helper_vmsleu_vv_w_riscv32 +#define helper_vmsleu_vx_b helper_vmsleu_vx_b_riscv32 +#define helper_vmsleu_vx_d helper_vmsleu_vx_d_riscv32 +#define helper_vmsleu_vx_h helper_vmsleu_vx_h_riscv32 +#define helper_vmsleu_vx_w helper_vmsleu_vx_w_riscv32 +#define helper_vmslt_vv_b helper_vmslt_vv_b_riscv32 +#define helper_vmslt_vv_d helper_vmslt_vv_d_riscv32 +#define helper_vmslt_vv_h helper_vmslt_vv_h_riscv32 +#define helper_vmslt_vv_w helper_vmslt_vv_w_riscv32 +#define helper_vmslt_vx_b helper_vmslt_vx_b_riscv32 +#define helper_vmslt_vx_d helper_vmslt_vx_d_riscv32 +#define helper_vmslt_vx_h helper_vmslt_vx_h_riscv32 +#define helper_vmslt_vx_w helper_vmslt_vx_w_riscv32 +#define helper_vmsltu_vv_b helper_vmsltu_vv_b_riscv32 +#define helper_vmsltu_vv_d helper_vmsltu_vv_d_riscv32 +#define helper_vmsltu_vv_h helper_vmsltu_vv_h_riscv32 +#define helper_vmsltu_vv_w helper_vmsltu_vv_w_riscv32 +#define helper_vmsltu_vx_b helper_vmsltu_vx_b_riscv32 +#define helper_vmsltu_vx_d helper_vmsltu_vx_d_riscv32 +#define helper_vmsltu_vx_h helper_vmsltu_vx_h_riscv32 +#define helper_vmsltu_vx_w helper_vmsltu_vx_w_riscv32 +#define helper_vmsne_vv_b helper_vmsne_vv_b_riscv32 +#define helper_vmsne_vv_d helper_vmsne_vv_d_riscv32 +#define helper_vmsne_vv_h helper_vmsne_vv_h_riscv32 +#define helper_vmsne_vv_w helper_vmsne_vv_w_riscv32 +#define helper_vmsne_vx_b helper_vmsne_vx_b_riscv32 +#define helper_vmsne_vx_d helper_vmsne_vx_d_riscv32 +#define helper_vmsne_vx_h helper_vmsne_vx_h_riscv32 +#define helper_vmsne_vx_w helper_vmsne_vx_w_riscv32 +#define helper_vmsof_m helper_vmsof_m_riscv32 +#define helper_vmul_vv_b helper_vmul_vv_b_riscv32 +#define helper_vmul_vv_d helper_vmul_vv_d_riscv32 +#define helper_vmul_vv_h helper_vmul_vv_h_riscv32 +#define helper_vmul_vv_w helper_vmul_vv_w_riscv32 +#define helper_vmul_vx_b helper_vmul_vx_b_riscv32 +#define helper_vmul_vx_d helper_vmul_vx_d_riscv32 +#define helper_vmul_vx_h helper_vmul_vx_h_riscv32 +#define helper_vmul_vx_w helper_vmul_vx_w_riscv32 +#define helper_vmulh_vv_b helper_vmulh_vv_b_riscv32 +#define helper_vmulh_vv_d helper_vmulh_vv_d_riscv32 +#define helper_vmulh_vv_h helper_vmulh_vv_h_riscv32 +#define helper_vmulh_vv_w helper_vmulh_vv_w_riscv32 +#define helper_vmulh_vx_b helper_vmulh_vx_b_riscv32 +#define helper_vmulh_vx_d helper_vmulh_vx_d_riscv32 +#define helper_vmulh_vx_h helper_vmulh_vx_h_riscv32 +#define helper_vmulh_vx_w helper_vmulh_vx_w_riscv32 +#define helper_vmulhsu_vv_b helper_vmulhsu_vv_b_riscv32 +#define helper_vmulhsu_vv_d helper_vmulhsu_vv_d_riscv32 +#define helper_vmulhsu_vv_h helper_vmulhsu_vv_h_riscv32 +#define helper_vmulhsu_vv_w helper_vmulhsu_vv_w_riscv32 +#define helper_vmulhsu_vx_b helper_vmulhsu_vx_b_riscv32 +#define helper_vmulhsu_vx_d helper_vmulhsu_vx_d_riscv32 +#define helper_vmulhsu_vx_h helper_vmulhsu_vx_h_riscv32 +#define helper_vmulhsu_vx_w helper_vmulhsu_vx_w_riscv32 +#define helper_vmulhu_vv_b helper_vmulhu_vv_b_riscv32 +#define helper_vmulhu_vv_d helper_vmulhu_vv_d_riscv32 +#define helper_vmulhu_vv_h helper_vmulhu_vv_h_riscv32 +#define helper_vmulhu_vv_w helper_vmulhu_vv_w_riscv32 +#define helper_vmulhu_vx_b helper_vmulhu_vx_b_riscv32 +#define helper_vmulhu_vx_d helper_vmulhu_vx_d_riscv32 +#define helper_vmulhu_vx_h helper_vmulhu_vx_h_riscv32 +#define helper_vmulhu_vx_w helper_vmulhu_vx_w_riscv32 +#define helper_vmv_v_v_b helper_vmv_v_v_b_riscv32 +#define helper_vmv_v_v_d helper_vmv_v_v_d_riscv32 +#define helper_vmv_v_v_h helper_vmv_v_v_h_riscv32 +#define helper_vmv_v_v_w helper_vmv_v_v_w_riscv32 +#define helper_vmv_v_x_b helper_vmv_v_x_b_riscv32 +#define helper_vmv_v_x_d helper_vmv_v_x_d_riscv32 +#define helper_vmv_v_x_h helper_vmv_v_x_h_riscv32 +#define helper_vmv_v_x_w helper_vmv_v_x_w_riscv32 +#define helper_vmxnor_mm helper_vmxnor_mm_riscv32 +#define helper_vmxor_mm helper_vmxor_mm_riscv32 +#define helper_vnclip_vv_b helper_vnclip_vv_b_riscv32 +#define helper_vnclip_vv_h helper_vnclip_vv_h_riscv32 +#define helper_vnclip_vv_w helper_vnclip_vv_w_riscv32 +#define helper_vnclip_vx_b helper_vnclip_vx_b_riscv32 +#define helper_vnclip_vx_h helper_vnclip_vx_h_riscv32 +#define helper_vnclip_vx_w helper_vnclip_vx_w_riscv32 +#define helper_vnclipu_vv_b helper_vnclipu_vv_b_riscv32 +#define helper_vnclipu_vv_h helper_vnclipu_vv_h_riscv32 +#define helper_vnclipu_vv_w helper_vnclipu_vv_w_riscv32 +#define helper_vnclipu_vx_b helper_vnclipu_vx_b_riscv32 +#define helper_vnclipu_vx_h helper_vnclipu_vx_h_riscv32 +#define helper_vnclipu_vx_w helper_vnclipu_vx_w_riscv32 +#define helper_vnmsac_vv_b helper_vnmsac_vv_b_riscv32 +#define helper_vnmsac_vv_d helper_vnmsac_vv_d_riscv32 +#define helper_vnmsac_vv_h helper_vnmsac_vv_h_riscv32 +#define helper_vnmsac_vv_w helper_vnmsac_vv_w_riscv32 +#define helper_vnmsac_vx_b helper_vnmsac_vx_b_riscv32 +#define helper_vnmsac_vx_d helper_vnmsac_vx_d_riscv32 +#define helper_vnmsac_vx_h helper_vnmsac_vx_h_riscv32 +#define helper_vnmsac_vx_w helper_vnmsac_vx_w_riscv32 +#define helper_vnmsub_vv_b helper_vnmsub_vv_b_riscv32 +#define helper_vnmsub_vv_d helper_vnmsub_vv_d_riscv32 +#define helper_vnmsub_vv_h helper_vnmsub_vv_h_riscv32 +#define helper_vnmsub_vv_w helper_vnmsub_vv_w_riscv32 +#define helper_vnmsub_vx_b helper_vnmsub_vx_b_riscv32 +#define helper_vnmsub_vx_d helper_vnmsub_vx_d_riscv32 +#define helper_vnmsub_vx_h helper_vnmsub_vx_h_riscv32 +#define helper_vnmsub_vx_w helper_vnmsub_vx_w_riscv32 +#define helper_vnsra_vv_b helper_vnsra_vv_b_riscv32 +#define helper_vnsra_vv_h helper_vnsra_vv_h_riscv32 +#define helper_vnsra_vv_w helper_vnsra_vv_w_riscv32 +#define helper_vnsra_vx_b helper_vnsra_vx_b_riscv32 +#define helper_vnsra_vx_h helper_vnsra_vx_h_riscv32 +#define helper_vnsra_vx_w helper_vnsra_vx_w_riscv32 +#define helper_vnsrl_vv_b helper_vnsrl_vv_b_riscv32 +#define helper_vnsrl_vv_h helper_vnsrl_vv_h_riscv32 +#define helper_vnsrl_vv_w helper_vnsrl_vv_w_riscv32 +#define helper_vnsrl_vx_b helper_vnsrl_vx_b_riscv32 +#define helper_vnsrl_vx_h helper_vnsrl_vx_h_riscv32 +#define helper_vnsrl_vx_w helper_vnsrl_vx_w_riscv32 +#define helper_vor_vv_b helper_vor_vv_b_riscv32 +#define helper_vor_vv_d helper_vor_vv_d_riscv32 +#define helper_vor_vv_h helper_vor_vv_h_riscv32 +#define helper_vor_vv_w helper_vor_vv_w_riscv32 +#define helper_vor_vx_b helper_vor_vx_b_riscv32 +#define helper_vor_vx_d helper_vor_vx_d_riscv32 +#define helper_vor_vx_h helper_vor_vx_h_riscv32 +#define helper_vor_vx_w helper_vor_vx_w_riscv32 +#define helper_vredand_vs_b helper_vredand_vs_b_riscv32 +#define helper_vredand_vs_d helper_vredand_vs_d_riscv32 +#define helper_vredand_vs_h helper_vredand_vs_h_riscv32 +#define helper_vredand_vs_w helper_vredand_vs_w_riscv32 +#define helper_vredmax_vs_b helper_vredmax_vs_b_riscv32 +#define helper_vredmax_vs_d helper_vredmax_vs_d_riscv32 +#define helper_vredmax_vs_h helper_vredmax_vs_h_riscv32 +#define helper_vredmax_vs_w helper_vredmax_vs_w_riscv32 +#define helper_vredmaxu_vs_b helper_vredmaxu_vs_b_riscv32 +#define helper_vredmaxu_vs_d helper_vredmaxu_vs_d_riscv32 +#define helper_vredmaxu_vs_h helper_vredmaxu_vs_h_riscv32 +#define helper_vredmaxu_vs_w helper_vredmaxu_vs_w_riscv32 +#define helper_vredmin_vs_b helper_vredmin_vs_b_riscv32 +#define helper_vredmin_vs_d helper_vredmin_vs_d_riscv32 +#define helper_vredmin_vs_h helper_vredmin_vs_h_riscv32 +#define helper_vredmin_vs_w helper_vredmin_vs_w_riscv32 +#define helper_vredminu_vs_b helper_vredminu_vs_b_riscv32 +#define helper_vredminu_vs_d helper_vredminu_vs_d_riscv32 +#define helper_vredminu_vs_h helper_vredminu_vs_h_riscv32 +#define helper_vredminu_vs_w helper_vredminu_vs_w_riscv32 +#define helper_vredor_vs_b helper_vredor_vs_b_riscv32 +#define helper_vredor_vs_d helper_vredor_vs_d_riscv32 +#define helper_vredor_vs_h helper_vredor_vs_h_riscv32 +#define helper_vredor_vs_w helper_vredor_vs_w_riscv32 +#define helper_vredsum_vs_b helper_vredsum_vs_b_riscv32 +#define helper_vredsum_vs_d helper_vredsum_vs_d_riscv32 +#define helper_vredsum_vs_h helper_vredsum_vs_h_riscv32 +#define helper_vredsum_vs_w helper_vredsum_vs_w_riscv32 +#define helper_vredxor_vs_b helper_vredxor_vs_b_riscv32 +#define helper_vredxor_vs_d helper_vredxor_vs_d_riscv32 +#define helper_vredxor_vs_h helper_vredxor_vs_h_riscv32 +#define helper_vredxor_vs_w helper_vredxor_vs_w_riscv32 +#define helper_vrem_vv_b helper_vrem_vv_b_riscv32 +#define helper_vrem_vv_d helper_vrem_vv_d_riscv32 +#define helper_vrem_vv_h helper_vrem_vv_h_riscv32 +#define helper_vrem_vv_w helper_vrem_vv_w_riscv32 +#define helper_vrem_vx_b helper_vrem_vx_b_riscv32 +#define helper_vrem_vx_d helper_vrem_vx_d_riscv32 +#define helper_vrem_vx_h helper_vrem_vx_h_riscv32 +#define helper_vrem_vx_w helper_vrem_vx_w_riscv32 +#define helper_vremu_vv_b helper_vremu_vv_b_riscv32 +#define helper_vremu_vv_d helper_vremu_vv_d_riscv32 +#define helper_vremu_vv_h helper_vremu_vv_h_riscv32 +#define helper_vremu_vv_w helper_vremu_vv_w_riscv32 +#define helper_vremu_vx_b helper_vremu_vx_b_riscv32 +#define helper_vremu_vx_d helper_vremu_vx_d_riscv32 +#define helper_vremu_vx_h helper_vremu_vx_h_riscv32 +#define helper_vremu_vx_w helper_vremu_vx_w_riscv32 +#define helper_vrgather_vv_b helper_vrgather_vv_b_riscv32 +#define helper_vrgather_vv_d helper_vrgather_vv_d_riscv32 +#define helper_vrgather_vv_h helper_vrgather_vv_h_riscv32 +#define helper_vrgather_vv_w helper_vrgather_vv_w_riscv32 +#define helper_vrgather_vx_b helper_vrgather_vx_b_riscv32 +#define helper_vrgather_vx_d helper_vrgather_vx_d_riscv32 +#define helper_vrgather_vx_h helper_vrgather_vx_h_riscv32 +#define helper_vrgather_vx_w helper_vrgather_vx_w_riscv32 +#define helper_vrsub_vx_b helper_vrsub_vx_b_riscv32 +#define helper_vrsub_vx_d helper_vrsub_vx_d_riscv32 +#define helper_vrsub_vx_h helper_vrsub_vx_h_riscv32 +#define helper_vrsub_vx_w helper_vrsub_vx_w_riscv32 +#define helper_vsadd_vv_b helper_vsadd_vv_b_riscv32 +#define helper_vsadd_vv_d helper_vsadd_vv_d_riscv32 +#define helper_vsadd_vv_h helper_vsadd_vv_h_riscv32 +#define helper_vsadd_vv_w helper_vsadd_vv_w_riscv32 +#define helper_vsadd_vx_b helper_vsadd_vx_b_riscv32 +#define helper_vsadd_vx_d helper_vsadd_vx_d_riscv32 +#define helper_vsadd_vx_h helper_vsadd_vx_h_riscv32 +#define helper_vsadd_vx_w helper_vsadd_vx_w_riscv32 +#define helper_vsaddu_vv_b helper_vsaddu_vv_b_riscv32 +#define helper_vsaddu_vv_d helper_vsaddu_vv_d_riscv32 +#define helper_vsaddu_vv_h helper_vsaddu_vv_h_riscv32 +#define helper_vsaddu_vv_w helper_vsaddu_vv_w_riscv32 +#define helper_vsaddu_vx_b helper_vsaddu_vx_b_riscv32 +#define helper_vsaddu_vx_d helper_vsaddu_vx_d_riscv32 +#define helper_vsaddu_vx_h helper_vsaddu_vx_h_riscv32 +#define helper_vsaddu_vx_w helper_vsaddu_vx_w_riscv32 +#define helper_vsb_v_b helper_vsb_v_b_riscv32 +#define helper_vsb_v_b_mask helper_vsb_v_b_mask_riscv32 +#define helper_vsb_v_d helper_vsb_v_d_riscv32 +#define helper_vsb_v_d_mask helper_vsb_v_d_mask_riscv32 +#define helper_vsb_v_h helper_vsb_v_h_riscv32 +#define helper_vsb_v_h_mask helper_vsb_v_h_mask_riscv32 +#define helper_vsb_v_w helper_vsb_v_w_riscv32 +#define helper_vsb_v_w_mask helper_vsb_v_w_mask_riscv32 +#define helper_vsbc_vvm_b helper_vsbc_vvm_b_riscv32 +#define helper_vsbc_vvm_d helper_vsbc_vvm_d_riscv32 +#define helper_vsbc_vvm_h helper_vsbc_vvm_h_riscv32 +#define helper_vsbc_vvm_w helper_vsbc_vvm_w_riscv32 +#define helper_vsbc_vxm_b helper_vsbc_vxm_b_riscv32 +#define helper_vsbc_vxm_d helper_vsbc_vxm_d_riscv32 +#define helper_vsbc_vxm_h helper_vsbc_vxm_h_riscv32 +#define helper_vsbc_vxm_w helper_vsbc_vxm_w_riscv32 +#define helper_vse_v_b helper_vse_v_b_riscv32 +#define helper_vse_v_b_mask helper_vse_v_b_mask_riscv32 +#define helper_vse_v_d helper_vse_v_d_riscv32 +#define helper_vse_v_d_mask helper_vse_v_d_mask_riscv32 +#define helper_vse_v_h helper_vse_v_h_riscv32 +#define helper_vse_v_h_mask helper_vse_v_h_mask_riscv32 +#define helper_vse_v_w helper_vse_v_w_riscv32 +#define helper_vse_v_w_mask helper_vse_v_w_mask_riscv32 +#define helper_vsetvl helper_vsetvl_riscv32 +#define helper_vsh_v_d helper_vsh_v_d_riscv32 +#define helper_vsh_v_d_mask helper_vsh_v_d_mask_riscv32 +#define helper_vsh_v_h helper_vsh_v_h_riscv32 +#define helper_vsh_v_h_mask helper_vsh_v_h_mask_riscv32 +#define helper_vsh_v_w helper_vsh_v_w_riscv32 +#define helper_vsh_v_w_mask helper_vsh_v_w_mask_riscv32 +#define helper_vslide1down_vx_b helper_vslide1down_vx_b_riscv32 +#define helper_vslide1down_vx_d helper_vslide1down_vx_d_riscv32 +#define helper_vslide1down_vx_h helper_vslide1down_vx_h_riscv32 +#define helper_vslide1down_vx_w helper_vslide1down_vx_w_riscv32 +#define helper_vslide1up_vx_b helper_vslide1up_vx_b_riscv32 +#define helper_vslide1up_vx_d helper_vslide1up_vx_d_riscv32 +#define helper_vslide1up_vx_h helper_vslide1up_vx_h_riscv32 +#define helper_vslide1up_vx_w helper_vslide1up_vx_w_riscv32 +#define helper_vslidedown_vx_b helper_vslidedown_vx_b_riscv32 +#define helper_vslidedown_vx_d helper_vslidedown_vx_d_riscv32 +#define helper_vslidedown_vx_h helper_vslidedown_vx_h_riscv32 +#define helper_vslidedown_vx_w helper_vslidedown_vx_w_riscv32 +#define helper_vslideup_vx_b helper_vslideup_vx_b_riscv32 +#define helper_vslideup_vx_d helper_vslideup_vx_d_riscv32 +#define helper_vslideup_vx_h helper_vslideup_vx_h_riscv32 +#define helper_vslideup_vx_w helper_vslideup_vx_w_riscv32 +#define helper_vsll_vv_b helper_vsll_vv_b_riscv32 +#define helper_vsll_vv_d helper_vsll_vv_d_riscv32 +#define helper_vsll_vv_h helper_vsll_vv_h_riscv32 +#define helper_vsll_vv_w helper_vsll_vv_w_riscv32 +#define helper_vsll_vx_b helper_vsll_vx_b_riscv32 +#define helper_vsll_vx_d helper_vsll_vx_d_riscv32 +#define helper_vsll_vx_h helper_vsll_vx_h_riscv32 +#define helper_vsll_vx_w helper_vsll_vx_w_riscv32 +#define helper_vsmul_vv_b helper_vsmul_vv_b_riscv32 +#define helper_vsmul_vv_d helper_vsmul_vv_d_riscv32 +#define helper_vsmul_vv_h helper_vsmul_vv_h_riscv32 +#define helper_vsmul_vv_w helper_vsmul_vv_w_riscv32 +#define helper_vsmul_vx_b helper_vsmul_vx_b_riscv32 +#define helper_vsmul_vx_d helper_vsmul_vx_d_riscv32 +#define helper_vsmul_vx_h helper_vsmul_vx_h_riscv32 +#define helper_vsmul_vx_w helper_vsmul_vx_w_riscv32 +#define helper_vsra_vv_b helper_vsra_vv_b_riscv32 +#define helper_vsra_vv_d helper_vsra_vv_d_riscv32 +#define helper_vsra_vv_h helper_vsra_vv_h_riscv32 +#define helper_vsra_vv_w helper_vsra_vv_w_riscv32 +#define helper_vsra_vx_b helper_vsra_vx_b_riscv32 +#define helper_vsra_vx_d helper_vsra_vx_d_riscv32 +#define helper_vsra_vx_h helper_vsra_vx_h_riscv32 +#define helper_vsra_vx_w helper_vsra_vx_w_riscv32 +#define helper_vsrl_vv_b helper_vsrl_vv_b_riscv32 +#define helper_vsrl_vv_d helper_vsrl_vv_d_riscv32 +#define helper_vsrl_vv_h helper_vsrl_vv_h_riscv32 +#define helper_vsrl_vv_w helper_vsrl_vv_w_riscv32 +#define helper_vsrl_vx_b helper_vsrl_vx_b_riscv32 +#define helper_vsrl_vx_d helper_vsrl_vx_d_riscv32 +#define helper_vsrl_vx_h helper_vsrl_vx_h_riscv32 +#define helper_vsrl_vx_w helper_vsrl_vx_w_riscv32 +#define helper_vssb_v_b helper_vssb_v_b_riscv32 +#define helper_vssb_v_d helper_vssb_v_d_riscv32 +#define helper_vssb_v_h helper_vssb_v_h_riscv32 +#define helper_vssb_v_w helper_vssb_v_w_riscv32 +#define helper_vsse_v_b helper_vsse_v_b_riscv32 +#define helper_vsse_v_d helper_vsse_v_d_riscv32 +#define helper_vsse_v_h helper_vsse_v_h_riscv32 +#define helper_vsse_v_w helper_vsse_v_w_riscv32 +#define helper_vssh_v_d helper_vssh_v_d_riscv32 +#define helper_vssh_v_h helper_vssh_v_h_riscv32 +#define helper_vssh_v_w helper_vssh_v_w_riscv32 +#define helper_vssra_vv_b helper_vssra_vv_b_riscv32 +#define helper_vssra_vv_d helper_vssra_vv_d_riscv32 +#define helper_vssra_vv_h helper_vssra_vv_h_riscv32 +#define helper_vssra_vv_w helper_vssra_vv_w_riscv32 +#define helper_vssra_vx_b helper_vssra_vx_b_riscv32 +#define helper_vssra_vx_d helper_vssra_vx_d_riscv32 +#define helper_vssra_vx_h helper_vssra_vx_h_riscv32 +#define helper_vssra_vx_w helper_vssra_vx_w_riscv32 +#define helper_vssrl_vv_b helper_vssrl_vv_b_riscv32 +#define helper_vssrl_vv_d helper_vssrl_vv_d_riscv32 +#define helper_vssrl_vv_h helper_vssrl_vv_h_riscv32 +#define helper_vssrl_vv_w helper_vssrl_vv_w_riscv32 +#define helper_vssrl_vx_b helper_vssrl_vx_b_riscv32 +#define helper_vssrl_vx_d helper_vssrl_vx_d_riscv32 +#define helper_vssrl_vx_h helper_vssrl_vx_h_riscv32 +#define helper_vssrl_vx_w helper_vssrl_vx_w_riscv32 +#define helper_vssub_vv_b helper_vssub_vv_b_riscv32 +#define helper_vssub_vv_d helper_vssub_vv_d_riscv32 +#define helper_vssub_vv_h helper_vssub_vv_h_riscv32 +#define helper_vssub_vv_w helper_vssub_vv_w_riscv32 +#define helper_vssub_vx_b helper_vssub_vx_b_riscv32 +#define helper_vssub_vx_d helper_vssub_vx_d_riscv32 +#define helper_vssub_vx_h helper_vssub_vx_h_riscv32 +#define helper_vssub_vx_w helper_vssub_vx_w_riscv32 +#define helper_vssubu_vv_b helper_vssubu_vv_b_riscv32 +#define helper_vssubu_vv_d helper_vssubu_vv_d_riscv32 +#define helper_vssubu_vv_h helper_vssubu_vv_h_riscv32 +#define helper_vssubu_vv_w helper_vssubu_vv_w_riscv32 +#define helper_vssubu_vx_b helper_vssubu_vx_b_riscv32 +#define helper_vssubu_vx_d helper_vssubu_vx_d_riscv32 +#define helper_vssubu_vx_h helper_vssubu_vx_h_riscv32 +#define helper_vssubu_vx_w helper_vssubu_vx_w_riscv32 +#define helper_vssw_v_d helper_vssw_v_d_riscv32 +#define helper_vssw_v_w helper_vssw_v_w_riscv32 +#define helper_vsub_vv_b helper_vsub_vv_b_riscv32 +#define helper_vsub_vv_d helper_vsub_vv_d_riscv32 +#define helper_vsub_vv_h helper_vsub_vv_h_riscv32 +#define helper_vsub_vv_w helper_vsub_vv_w_riscv32 +#define helper_vsub_vx_b helper_vsub_vx_b_riscv32 +#define helper_vsub_vx_d helper_vsub_vx_d_riscv32 +#define helper_vsub_vx_h helper_vsub_vx_h_riscv32 +#define helper_vsub_vx_w helper_vsub_vx_w_riscv32 +#define helper_vsw_v_d helper_vsw_v_d_riscv32 +#define helper_vsw_v_d_mask helper_vsw_v_d_mask_riscv32 +#define helper_vsw_v_w helper_vsw_v_w_riscv32 +#define helper_vsw_v_w_mask helper_vsw_v_w_mask_riscv32 +#define helper_vsxb_v_b helper_vsxb_v_b_riscv32 +#define helper_vsxb_v_d helper_vsxb_v_d_riscv32 +#define helper_vsxb_v_h helper_vsxb_v_h_riscv32 +#define helper_vsxb_v_w helper_vsxb_v_w_riscv32 +#define helper_vsxe_v_b helper_vsxe_v_b_riscv32 +#define helper_vsxe_v_d helper_vsxe_v_d_riscv32 +#define helper_vsxe_v_h helper_vsxe_v_h_riscv32 +#define helper_vsxe_v_w helper_vsxe_v_w_riscv32 +#define helper_vsxh_v_d helper_vsxh_v_d_riscv32 +#define helper_vsxh_v_h helper_vsxh_v_h_riscv32 +#define helper_vsxh_v_w helper_vsxh_v_w_riscv32 +#define helper_vsxw_v_d helper_vsxw_v_d_riscv32 +#define helper_vsxw_v_w helper_vsxw_v_w_riscv32 +#define helper_vwadd_vv_b helper_vwadd_vv_b_riscv32 +#define helper_vwadd_vv_h helper_vwadd_vv_h_riscv32 +#define helper_vwadd_vv_w helper_vwadd_vv_w_riscv32 +#define helper_vwadd_vx_b helper_vwadd_vx_b_riscv32 +#define helper_vwadd_vx_h helper_vwadd_vx_h_riscv32 +#define helper_vwadd_vx_w helper_vwadd_vx_w_riscv32 +#define helper_vwadd_wv_b helper_vwadd_wv_b_riscv32 +#define helper_vwadd_wv_h helper_vwadd_wv_h_riscv32 +#define helper_vwadd_wv_w helper_vwadd_wv_w_riscv32 +#define helper_vwadd_wx_b helper_vwadd_wx_b_riscv32 +#define helper_vwadd_wx_h helper_vwadd_wx_h_riscv32 +#define helper_vwadd_wx_w helper_vwadd_wx_w_riscv32 +#define helper_vwaddu_vv_b helper_vwaddu_vv_b_riscv32 +#define helper_vwaddu_vv_h helper_vwaddu_vv_h_riscv32 +#define helper_vwaddu_vv_w helper_vwaddu_vv_w_riscv32 +#define helper_vwaddu_vx_b helper_vwaddu_vx_b_riscv32 +#define helper_vwaddu_vx_h helper_vwaddu_vx_h_riscv32 +#define helper_vwaddu_vx_w helper_vwaddu_vx_w_riscv32 +#define helper_vwaddu_wv_b helper_vwaddu_wv_b_riscv32 +#define helper_vwaddu_wv_h helper_vwaddu_wv_h_riscv32 +#define helper_vwaddu_wv_w helper_vwaddu_wv_w_riscv32 +#define helper_vwaddu_wx_b helper_vwaddu_wx_b_riscv32 +#define helper_vwaddu_wx_h helper_vwaddu_wx_h_riscv32 +#define helper_vwaddu_wx_w helper_vwaddu_wx_w_riscv32 +#define helper_vwmacc_vv_b helper_vwmacc_vv_b_riscv32 +#define helper_vwmacc_vv_h helper_vwmacc_vv_h_riscv32 +#define helper_vwmacc_vv_w helper_vwmacc_vv_w_riscv32 +#define helper_vwmacc_vx_b helper_vwmacc_vx_b_riscv32 +#define helper_vwmacc_vx_h helper_vwmacc_vx_h_riscv32 +#define helper_vwmacc_vx_w helper_vwmacc_vx_w_riscv32 +#define helper_vwmaccsu_vv_b helper_vwmaccsu_vv_b_riscv32 +#define helper_vwmaccsu_vv_h helper_vwmaccsu_vv_h_riscv32 +#define helper_vwmaccsu_vv_w helper_vwmaccsu_vv_w_riscv32 +#define helper_vwmaccsu_vx_b helper_vwmaccsu_vx_b_riscv32 +#define helper_vwmaccsu_vx_h helper_vwmaccsu_vx_h_riscv32 +#define helper_vwmaccsu_vx_w helper_vwmaccsu_vx_w_riscv32 +#define helper_vwmaccu_vv_b helper_vwmaccu_vv_b_riscv32 +#define helper_vwmaccu_vv_h helper_vwmaccu_vv_h_riscv32 +#define helper_vwmaccu_vv_w helper_vwmaccu_vv_w_riscv32 +#define helper_vwmaccu_vx_b helper_vwmaccu_vx_b_riscv32 +#define helper_vwmaccu_vx_h helper_vwmaccu_vx_h_riscv32 +#define helper_vwmaccu_vx_w helper_vwmaccu_vx_w_riscv32 +#define helper_vwmaccus_vx_b helper_vwmaccus_vx_b_riscv32 +#define helper_vwmaccus_vx_h helper_vwmaccus_vx_h_riscv32 +#define helper_vwmaccus_vx_w helper_vwmaccus_vx_w_riscv32 +#define helper_vwmul_vv_b helper_vwmul_vv_b_riscv32 +#define helper_vwmul_vv_h helper_vwmul_vv_h_riscv32 +#define helper_vwmul_vv_w helper_vwmul_vv_w_riscv32 +#define helper_vwmul_vx_b helper_vwmul_vx_b_riscv32 +#define helper_vwmul_vx_h helper_vwmul_vx_h_riscv32 +#define helper_vwmul_vx_w helper_vwmul_vx_w_riscv32 +#define helper_vwmulsu_vv_b helper_vwmulsu_vv_b_riscv32 +#define helper_vwmulsu_vv_h helper_vwmulsu_vv_h_riscv32 +#define helper_vwmulsu_vv_w helper_vwmulsu_vv_w_riscv32 +#define helper_vwmulsu_vx_b helper_vwmulsu_vx_b_riscv32 +#define helper_vwmulsu_vx_h helper_vwmulsu_vx_h_riscv32 +#define helper_vwmulsu_vx_w helper_vwmulsu_vx_w_riscv32 +#define helper_vwmulu_vv_b helper_vwmulu_vv_b_riscv32 +#define helper_vwmulu_vv_h helper_vwmulu_vv_h_riscv32 +#define helper_vwmulu_vv_w helper_vwmulu_vv_w_riscv32 +#define helper_vwmulu_vx_b helper_vwmulu_vx_b_riscv32 +#define helper_vwmulu_vx_h helper_vwmulu_vx_h_riscv32 +#define helper_vwmulu_vx_w helper_vwmulu_vx_w_riscv32 +#define helper_vwredsum_vs_b helper_vwredsum_vs_b_riscv32 +#define helper_vwredsum_vs_h helper_vwredsum_vs_h_riscv32 +#define helper_vwredsum_vs_w helper_vwredsum_vs_w_riscv32 +#define helper_vwredsumu_vs_b helper_vwredsumu_vs_b_riscv32 +#define helper_vwredsumu_vs_h helper_vwredsumu_vs_h_riscv32 +#define helper_vwredsumu_vs_w helper_vwredsumu_vs_w_riscv32 +#define helper_vwsmacc_vv_b helper_vwsmacc_vv_b_riscv32 +#define helper_vwsmacc_vv_h helper_vwsmacc_vv_h_riscv32 +#define helper_vwsmacc_vv_w helper_vwsmacc_vv_w_riscv32 +#define helper_vwsmacc_vx_b helper_vwsmacc_vx_b_riscv32 +#define helper_vwsmacc_vx_h helper_vwsmacc_vx_h_riscv32 +#define helper_vwsmacc_vx_w helper_vwsmacc_vx_w_riscv32 +#define helper_vwsmaccsu_vv_b helper_vwsmaccsu_vv_b_riscv32 +#define helper_vwsmaccsu_vv_h helper_vwsmaccsu_vv_h_riscv32 +#define helper_vwsmaccsu_vv_w helper_vwsmaccsu_vv_w_riscv32 +#define helper_vwsmaccsu_vx_b helper_vwsmaccsu_vx_b_riscv32 +#define helper_vwsmaccsu_vx_h helper_vwsmaccsu_vx_h_riscv32 +#define helper_vwsmaccsu_vx_w helper_vwsmaccsu_vx_w_riscv32 +#define helper_vwsmaccu_vv_b helper_vwsmaccu_vv_b_riscv32 +#define helper_vwsmaccu_vv_h helper_vwsmaccu_vv_h_riscv32 +#define helper_vwsmaccu_vv_w helper_vwsmaccu_vv_w_riscv32 +#define helper_vwsmaccu_vx_b helper_vwsmaccu_vx_b_riscv32 +#define helper_vwsmaccu_vx_h helper_vwsmaccu_vx_h_riscv32 +#define helper_vwsmaccu_vx_w helper_vwsmaccu_vx_w_riscv32 +#define helper_vwsmaccus_vx_b helper_vwsmaccus_vx_b_riscv32 +#define helper_vwsmaccus_vx_h helper_vwsmaccus_vx_h_riscv32 +#define helper_vwsmaccus_vx_w helper_vwsmaccus_vx_w_riscv32 +#define helper_vwsub_vv_b helper_vwsub_vv_b_riscv32 +#define helper_vwsub_vv_h helper_vwsub_vv_h_riscv32 +#define helper_vwsub_vv_w helper_vwsub_vv_w_riscv32 +#define helper_vwsub_vx_b helper_vwsub_vx_b_riscv32 +#define helper_vwsub_vx_h helper_vwsub_vx_h_riscv32 +#define helper_vwsub_vx_w helper_vwsub_vx_w_riscv32 +#define helper_vwsub_wv_b helper_vwsub_wv_b_riscv32 +#define helper_vwsub_wv_h helper_vwsub_wv_h_riscv32 +#define helper_vwsub_wv_w helper_vwsub_wv_w_riscv32 +#define helper_vwsub_wx_b helper_vwsub_wx_b_riscv32 +#define helper_vwsub_wx_h helper_vwsub_wx_h_riscv32 +#define helper_vwsub_wx_w helper_vwsub_wx_w_riscv32 +#define helper_vwsubu_vv_b helper_vwsubu_vv_b_riscv32 +#define helper_vwsubu_vv_h helper_vwsubu_vv_h_riscv32 +#define helper_vwsubu_vv_w helper_vwsubu_vv_w_riscv32 +#define helper_vwsubu_vx_b helper_vwsubu_vx_b_riscv32 +#define helper_vwsubu_vx_h helper_vwsubu_vx_h_riscv32 +#define helper_vwsubu_vx_w helper_vwsubu_vx_w_riscv32 +#define helper_vwsubu_wv_b helper_vwsubu_wv_b_riscv32 +#define helper_vwsubu_wv_h helper_vwsubu_wv_h_riscv32 +#define helper_vwsubu_wv_w helper_vwsubu_wv_w_riscv32 +#define helper_vwsubu_wx_b helper_vwsubu_wx_b_riscv32 +#define helper_vwsubu_wx_h helper_vwsubu_wx_h_riscv32 +#define helper_vwsubu_wx_w helper_vwsubu_wx_w_riscv32 +#define helper_vxor_vv_b helper_vxor_vv_b_riscv32 +#define helper_vxor_vv_d helper_vxor_vv_d_riscv32 +#define helper_vxor_vv_h helper_vxor_vv_h_riscv32 +#define helper_vxor_vv_w helper_vxor_vv_w_riscv32 +#define helper_vxor_vx_b helper_vxor_vx_b_riscv32 +#define helper_vxor_vx_d helper_vxor_vx_d_riscv32 +#define helper_vxor_vx_h helper_vxor_vx_h_riscv32 +#define helper_vxor_vx_w helper_vxor_vx_w_riscv32 #endif diff --git a/qemu/riscv64.h b/qemu/riscv64.h index 1bb119334e..2f0870dc20 100644 --- a/qemu/riscv64.h +++ b/qemu/riscv64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_riscv64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_riscv64 #define tcg_gen_st_i64 tcg_gen_st_i64_riscv64 +#define tcg_gen_add_i64 tcg_gen_add_i64_riscv64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_riscv64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_riscv64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_riscv64 #define cpu_icount_to_ns cpu_icount_to_ns_riscv64 #define cpu_is_stopped cpu_is_stopped_riscv64 #define cpu_get_ticks cpu_get_ticks_riscv64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_riscv64 #define floatx80_mul floatx80_mul_riscv64 #define floatx80_div floatx80_div_riscv64 +#define floatx80_modrem floatx80_modrem_riscv64 +#define floatx80_mod floatx80_mod_riscv64 #define floatx80_rem floatx80_rem_riscv64 #define floatx80_sqrt floatx80_sqrt_riscv64 #define floatx80_eq floatx80_eq_riscv64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_riscv64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_riscv64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_riscv64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_riscv64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_riscv64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_riscv64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_riscv64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_riscv64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_riscv64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_riscv64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_riscv64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_riscv64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_riscv64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_riscv64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_riscv64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_riscv64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_riscv64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_riscv64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_riscv64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_riscv64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_riscv64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_riscv64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_riscv64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_riscv64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_riscv64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_riscv64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_riscv64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_riscv64 #define tcg_gen_shri_vec tcg_gen_shri_vec_riscv64 #define tcg_gen_sari_vec tcg_gen_sari_vec_riscv64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_riscv64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_riscv64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_riscv64 #define tcg_gen_add_vec tcg_gen_add_vec_riscv64 #define tcg_gen_sub_vec tcg_gen_sub_vec_riscv64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_riscv64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_riscv64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_riscv64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_riscv64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_riscv64 #define tcg_gen_shls_vec tcg_gen_shls_vec_riscv64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_riscv64 #define tcg_gen_sars_vec tcg_gen_sars_vec_riscv64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_riscv64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_riscv64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_riscv64 #define tb_htable_lookup tb_htable_lookup_riscv64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_riscv64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_riscv64 #define tlb_init tlb_init_riscv64 +#define tlb_destroy tlb_destroy_riscv64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_riscv64 #define tlb_flush tlb_flush_riscv64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_riscv64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_riscv64 #define get_page_addr_code_hostp get_page_addr_code_hostp_riscv64 #define get_page_addr_code get_page_addr_code_riscv64 +#define probe_access_flags probe_access_flags_riscv64 #define probe_access probe_access_riscv64 #define tlb_vaddr_to_host tlb_vaddr_to_host_riscv64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_riscv64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_riscv64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_riscv64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_riscv64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_riscv64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_riscv64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_riscv64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_riscv64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_riscv64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_riscv64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_riscv64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_riscv64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_riscv64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_riscv64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_riscv64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_riscv64 #define cpu_ldub_data_ra cpu_ldub_data_ra_riscv64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_riscv64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_riscv64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_riscv64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_riscv64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_riscv64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_riscv64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_riscv64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_riscv64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_riscv64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_riscv64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_riscv64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_riscv64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_riscv64 #define cpu_ldub_data cpu_ldub_data_riscv64 #define cpu_ldsb_data cpu_ldsb_data_riscv64 -#define cpu_lduw_data cpu_lduw_data_riscv64 -#define cpu_ldsw_data cpu_ldsw_data_riscv64 -#define cpu_ldl_data cpu_ldl_data_riscv64 -#define cpu_ldq_data cpu_ldq_data_riscv64 +#define cpu_lduw_be_data cpu_lduw_be_data_riscv64 +#define cpu_lduw_le_data cpu_lduw_le_data_riscv64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_riscv64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_riscv64 +#define cpu_ldl_be_data cpu_ldl_be_data_riscv64 +#define cpu_ldl_le_data cpu_ldl_le_data_riscv64 +#define cpu_ldq_le_data cpu_ldq_le_data_riscv64 +#define cpu_ldq_be_data cpu_ldq_be_data_riscv64 #define helper_ret_stb_mmu helper_ret_stb_mmu_riscv64 #define helper_le_stw_mmu helper_le_stw_mmu_riscv64 #define helper_be_stw_mmu helper_be_stw_mmu_riscv64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_riscv64 #define helper_be_stq_mmu helper_be_stq_mmu_riscv64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_riscv64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_riscv64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_riscv64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_riscv64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_riscv64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_riscv64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_riscv64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_riscv64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_riscv64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_riscv64 #define cpu_stb_data_ra cpu_stb_data_ra_riscv64 -#define cpu_stw_data_ra cpu_stw_data_ra_riscv64 -#define cpu_stl_data_ra cpu_stl_data_ra_riscv64 -#define cpu_stq_data_ra cpu_stq_data_ra_riscv64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_riscv64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_riscv64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_riscv64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_riscv64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_riscv64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_riscv64 #define cpu_stb_data cpu_stb_data_riscv64 -#define cpu_stw_data cpu_stw_data_riscv64 -#define cpu_stl_data cpu_stl_data_riscv64 -#define cpu_stq_data cpu_stq_data_riscv64 +#define cpu_stw_be_data cpu_stw_be_data_riscv64 +#define cpu_stw_le_data cpu_stw_le_data_riscv64 +#define cpu_stl_be_data cpu_stl_be_data_riscv64 +#define cpu_stl_le_data cpu_stl_le_data_riscv64 +#define cpu_stq_be_data cpu_stq_be_data_riscv64 +#define cpu_stq_le_data cpu_stq_le_data_riscv64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_riscv64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_riscv64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_riscv64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_riscv64 #define cpu_ldl_code cpu_ldl_code_riscv64 #define cpu_ldq_code cpu_ldq_code_riscv64 +#define cpu_interrupt_handler cpu_interrupt_handler_riscv64 #define helper_div_i32 helper_div_i32_riscv64 #define helper_rem_i32 helper_rem_i32_riscv64 #define helper_divu_i32 helper_divu_i32_riscv64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_riscv64 #define helper_gvec_sar32i helper_gvec_sar32i_riscv64 #define helper_gvec_sar64i helper_gvec_sar64i_riscv64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_riscv64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_riscv64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_riscv64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_riscv64 #define helper_gvec_shl8v helper_gvec_shl8v_riscv64 #define helper_gvec_shl16v helper_gvec_shl16v_riscv64 #define helper_gvec_shl32v helper_gvec_shl32v_riscv64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_riscv64 #define helper_gvec_sar32v helper_gvec_sar32v_riscv64 #define helper_gvec_sar64v helper_gvec_sar64v_riscv64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_riscv64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_riscv64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_riscv64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_riscv64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_riscv64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_riscv64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_riscv64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_riscv64 #define helper_gvec_eq8 helper_gvec_eq8_riscv64 #define helper_gvec_ne8 helper_gvec_ne8_riscv64 #define helper_gvec_lt8 helper_gvec_lt8_riscv64 @@ -1366,6 +1420,7 @@ #define helper_mret helper_mret_riscv64 #define helper_wfi helper_wfi_riscv64 #define helper_tlb_flush helper_tlb_flush_riscv64 +#define helper_hyp_tlb_flush helper_hyp_tlb_flush_riscv64 #define pmp_hart_has_privs pmp_hart_has_privs_riscv64 #define pmpcfg_csr_write pmpcfg_csr_write_riscv64 #define pmpcfg_csr_read pmpcfg_csr_read_riscv64 @@ -1386,4 +1441,1006 @@ #define gen_helper_tlb_flush gen_helper_tlb_flush_riscv64 #define riscv_fpr_regnames riscv_fpr_regnames_riscv64 #define riscv_int_regnames riscv_int_regnames_riscv64 +#define fclass_d fclass_d_riscv64 +#define fclass_h fclass_h_riscv64 +#define fclass_s fclass_s_riscv64 +#define helper_vaadd_vv_b helper_vaadd_vv_b_riscv64 +#define helper_vaadd_vv_d helper_vaadd_vv_d_riscv64 +#define helper_vaadd_vv_h helper_vaadd_vv_h_riscv64 +#define helper_vaadd_vv_w helper_vaadd_vv_w_riscv64 +#define helper_vaadd_vx_b helper_vaadd_vx_b_riscv64 +#define helper_vaadd_vx_d helper_vaadd_vx_d_riscv64 +#define helper_vaadd_vx_h helper_vaadd_vx_h_riscv64 +#define helper_vaadd_vx_w helper_vaadd_vx_w_riscv64 +#define helper_vadc_vvm_b helper_vadc_vvm_b_riscv64 +#define helper_vadc_vvm_d helper_vadc_vvm_d_riscv64 +#define helper_vadc_vvm_h helper_vadc_vvm_h_riscv64 +#define helper_vadc_vvm_w helper_vadc_vvm_w_riscv64 +#define helper_vadc_vxm_b helper_vadc_vxm_b_riscv64 +#define helper_vadc_vxm_d helper_vadc_vxm_d_riscv64 +#define helper_vadc_vxm_h helper_vadc_vxm_h_riscv64 +#define helper_vadc_vxm_w helper_vadc_vxm_w_riscv64 +#define helper_vadd_vv_b helper_vadd_vv_b_riscv64 +#define helper_vadd_vv_d helper_vadd_vv_d_riscv64 +#define helper_vadd_vv_h helper_vadd_vv_h_riscv64 +#define helper_vadd_vv_w helper_vadd_vv_w_riscv64 +#define helper_vadd_vx_b helper_vadd_vx_b_riscv64 +#define helper_vadd_vx_d helper_vadd_vx_d_riscv64 +#define helper_vadd_vx_h helper_vadd_vx_h_riscv64 +#define helper_vadd_vx_w helper_vadd_vx_w_riscv64 +#define helper_vamoaddw_v_w helper_vamoaddw_v_w_riscv64 +#define helper_vamoandw_v_w helper_vamoandw_v_w_riscv64 +#define helper_vamomaxuw_v_w helper_vamomaxuw_v_w_riscv64 +#define helper_vamomaxw_v_w helper_vamomaxw_v_w_riscv64 +#define helper_vamominuw_v_w helper_vamominuw_v_w_riscv64 +#define helper_vamominw_v_w helper_vamominw_v_w_riscv64 +#define helper_vamoorw_v_w helper_vamoorw_v_w_riscv64 +#define helper_vamoswapw_v_w helper_vamoswapw_v_w_riscv64 +#define helper_vamoxorw_v_w helper_vamoxorw_v_w_riscv64 +#define helper_vand_vv_b helper_vand_vv_b_riscv64 +#define helper_vand_vv_d helper_vand_vv_d_riscv64 +#define helper_vand_vv_h helper_vand_vv_h_riscv64 +#define helper_vand_vv_w helper_vand_vv_w_riscv64 +#define helper_vand_vx_b helper_vand_vx_b_riscv64 +#define helper_vand_vx_d helper_vand_vx_d_riscv64 +#define helper_vand_vx_h helper_vand_vx_h_riscv64 +#define helper_vand_vx_w helper_vand_vx_w_riscv64 +#define helper_vasub_vv_b helper_vasub_vv_b_riscv64 +#define helper_vasub_vv_d helper_vasub_vv_d_riscv64 +#define helper_vasub_vv_h helper_vasub_vv_h_riscv64 +#define helper_vasub_vv_w helper_vasub_vv_w_riscv64 +#define helper_vasub_vx_b helper_vasub_vx_b_riscv64 +#define helper_vasub_vx_d helper_vasub_vx_d_riscv64 +#define helper_vasub_vx_h helper_vasub_vx_h_riscv64 +#define helper_vasub_vx_w helper_vasub_vx_w_riscv64 +#define helper_vcompress_vm_b helper_vcompress_vm_b_riscv64 +#define helper_vcompress_vm_d helper_vcompress_vm_d_riscv64 +#define helper_vcompress_vm_h helper_vcompress_vm_h_riscv64 +#define helper_vcompress_vm_w helper_vcompress_vm_w_riscv64 +#define helper_vdiv_vv_b helper_vdiv_vv_b_riscv64 +#define helper_vdiv_vv_d helper_vdiv_vv_d_riscv64 +#define helper_vdiv_vv_h helper_vdiv_vv_h_riscv64 +#define helper_vdiv_vv_w helper_vdiv_vv_w_riscv64 +#define helper_vdiv_vx_b helper_vdiv_vx_b_riscv64 +#define helper_vdiv_vx_d helper_vdiv_vx_d_riscv64 +#define helper_vdiv_vx_h helper_vdiv_vx_h_riscv64 +#define helper_vdiv_vx_w helper_vdiv_vx_w_riscv64 +#define helper_vdivu_vv_b helper_vdivu_vv_b_riscv64 +#define helper_vdivu_vv_d helper_vdivu_vv_d_riscv64 +#define helper_vdivu_vv_h helper_vdivu_vv_h_riscv64 +#define helper_vdivu_vv_w helper_vdivu_vv_w_riscv64 +#define helper_vdivu_vx_b helper_vdivu_vx_b_riscv64 +#define helper_vdivu_vx_d helper_vdivu_vx_d_riscv64 +#define helper_vdivu_vx_h helper_vdivu_vx_h_riscv64 +#define helper_vdivu_vx_w helper_vdivu_vx_w_riscv64 +#define helper_vec_rsubs16 helper_vec_rsubs16_riscv64 +#define helper_vec_rsubs32 helper_vec_rsubs32_riscv64 +#define helper_vec_rsubs64 helper_vec_rsubs64_riscv64 +#define helper_vec_rsubs8 helper_vec_rsubs8_riscv64 +#define helper_vfadd_vf_d helper_vfadd_vf_d_riscv64 +#define helper_vfadd_vf_h helper_vfadd_vf_h_riscv64 +#define helper_vfadd_vf_w helper_vfadd_vf_w_riscv64 +#define helper_vfadd_vv_d helper_vfadd_vv_d_riscv64 +#define helper_vfadd_vv_h helper_vfadd_vv_h_riscv64 +#define helper_vfadd_vv_w helper_vfadd_vv_w_riscv64 +#define helper_vfclass_v_d helper_vfclass_v_d_riscv64 +#define helper_vfclass_v_h helper_vfclass_v_h_riscv64 +#define helper_vfclass_v_w helper_vfclass_v_w_riscv64 +#define helper_vfcvt_f_x_v_d helper_vfcvt_f_x_v_d_riscv64 +#define helper_vfcvt_f_x_v_h helper_vfcvt_f_x_v_h_riscv64 +#define helper_vfcvt_f_x_v_w helper_vfcvt_f_x_v_w_riscv64 +#define helper_vfcvt_f_xu_v_d helper_vfcvt_f_xu_v_d_riscv64 +#define helper_vfcvt_f_xu_v_h helper_vfcvt_f_xu_v_h_riscv64 +#define helper_vfcvt_f_xu_v_w helper_vfcvt_f_xu_v_w_riscv64 +#define helper_vfcvt_x_f_v_d helper_vfcvt_x_f_v_d_riscv64 +#define helper_vfcvt_x_f_v_h helper_vfcvt_x_f_v_h_riscv64 +#define helper_vfcvt_x_f_v_w helper_vfcvt_x_f_v_w_riscv64 +#define helper_vfcvt_xu_f_v_d helper_vfcvt_xu_f_v_d_riscv64 +#define helper_vfcvt_xu_f_v_h helper_vfcvt_xu_f_v_h_riscv64 +#define helper_vfcvt_xu_f_v_w helper_vfcvt_xu_f_v_w_riscv64 +#define helper_vfdiv_vf_d helper_vfdiv_vf_d_riscv64 +#define helper_vfdiv_vf_h helper_vfdiv_vf_h_riscv64 +#define helper_vfdiv_vf_w helper_vfdiv_vf_w_riscv64 +#define helper_vfdiv_vv_d helper_vfdiv_vv_d_riscv64 +#define helper_vfdiv_vv_h helper_vfdiv_vv_h_riscv64 +#define helper_vfdiv_vv_w helper_vfdiv_vv_w_riscv64 +#define helper_vfmacc_vf_d helper_vfmacc_vf_d_riscv64 +#define helper_vfmacc_vf_h helper_vfmacc_vf_h_riscv64 +#define helper_vfmacc_vf_w helper_vfmacc_vf_w_riscv64 +#define helper_vfmacc_vv_d helper_vfmacc_vv_d_riscv64 +#define helper_vfmacc_vv_h helper_vfmacc_vv_h_riscv64 +#define helper_vfmacc_vv_w helper_vfmacc_vv_w_riscv64 +#define helper_vfmadd_vf_d helper_vfmadd_vf_d_riscv64 +#define helper_vfmadd_vf_h helper_vfmadd_vf_h_riscv64 +#define helper_vfmadd_vf_w helper_vfmadd_vf_w_riscv64 +#define helper_vfmadd_vv_d helper_vfmadd_vv_d_riscv64 +#define helper_vfmadd_vv_h helper_vfmadd_vv_h_riscv64 +#define helper_vfmadd_vv_w helper_vfmadd_vv_w_riscv64 +#define helper_vfmax_vf_d helper_vfmax_vf_d_riscv64 +#define helper_vfmax_vf_h helper_vfmax_vf_h_riscv64 +#define helper_vfmax_vf_w helper_vfmax_vf_w_riscv64 +#define helper_vfmax_vv_d helper_vfmax_vv_d_riscv64 +#define helper_vfmax_vv_h helper_vfmax_vv_h_riscv64 +#define helper_vfmax_vv_w helper_vfmax_vv_w_riscv64 +#define helper_vfmerge_vfm_d helper_vfmerge_vfm_d_riscv64 +#define helper_vfmerge_vfm_h helper_vfmerge_vfm_h_riscv64 +#define helper_vfmerge_vfm_w helper_vfmerge_vfm_w_riscv64 +#define helper_vfmin_vf_d helper_vfmin_vf_d_riscv64 +#define helper_vfmin_vf_h helper_vfmin_vf_h_riscv64 +#define helper_vfmin_vf_w helper_vfmin_vf_w_riscv64 +#define helper_vfmin_vv_d helper_vfmin_vv_d_riscv64 +#define helper_vfmin_vv_h helper_vfmin_vv_h_riscv64 +#define helper_vfmin_vv_w helper_vfmin_vv_w_riscv64 +#define helper_vfmsac_vf_d helper_vfmsac_vf_d_riscv64 +#define helper_vfmsac_vf_h helper_vfmsac_vf_h_riscv64 +#define helper_vfmsac_vf_w helper_vfmsac_vf_w_riscv64 +#define helper_vfmsac_vv_d helper_vfmsac_vv_d_riscv64 +#define helper_vfmsac_vv_h helper_vfmsac_vv_h_riscv64 +#define helper_vfmsac_vv_w helper_vfmsac_vv_w_riscv64 +#define helper_vfmsub_vf_d helper_vfmsub_vf_d_riscv64 +#define helper_vfmsub_vf_h helper_vfmsub_vf_h_riscv64 +#define helper_vfmsub_vf_w helper_vfmsub_vf_w_riscv64 +#define helper_vfmsub_vv_d helper_vfmsub_vv_d_riscv64 +#define helper_vfmsub_vv_h helper_vfmsub_vv_h_riscv64 +#define helper_vfmsub_vv_w helper_vfmsub_vv_w_riscv64 +#define helper_vfmul_vf_d helper_vfmul_vf_d_riscv64 +#define helper_vfmul_vf_h helper_vfmul_vf_h_riscv64 +#define helper_vfmul_vf_w helper_vfmul_vf_w_riscv64 +#define helper_vfmul_vv_d helper_vfmul_vv_d_riscv64 +#define helper_vfmul_vv_h helper_vfmul_vv_h_riscv64 +#define helper_vfmul_vv_w helper_vfmul_vv_w_riscv64 +#define helper_vfncvt_f_f_v_h helper_vfncvt_f_f_v_h_riscv64 +#define helper_vfncvt_f_f_v_w helper_vfncvt_f_f_v_w_riscv64 +#define helper_vfncvt_f_x_v_h helper_vfncvt_f_x_v_h_riscv64 +#define helper_vfncvt_f_x_v_w helper_vfncvt_f_x_v_w_riscv64 +#define helper_vfncvt_f_xu_v_h helper_vfncvt_f_xu_v_h_riscv64 +#define helper_vfncvt_f_xu_v_w helper_vfncvt_f_xu_v_w_riscv64 +#define helper_vfncvt_x_f_v_h helper_vfncvt_x_f_v_h_riscv64 +#define helper_vfncvt_x_f_v_w helper_vfncvt_x_f_v_w_riscv64 +#define helper_vfncvt_xu_f_v_h helper_vfncvt_xu_f_v_h_riscv64 +#define helper_vfncvt_xu_f_v_w helper_vfncvt_xu_f_v_w_riscv64 +#define helper_vfnmacc_vf_d helper_vfnmacc_vf_d_riscv64 +#define helper_vfnmacc_vf_h helper_vfnmacc_vf_h_riscv64 +#define helper_vfnmacc_vf_w helper_vfnmacc_vf_w_riscv64 +#define helper_vfnmacc_vv_d helper_vfnmacc_vv_d_riscv64 +#define helper_vfnmacc_vv_h helper_vfnmacc_vv_h_riscv64 +#define helper_vfnmacc_vv_w helper_vfnmacc_vv_w_riscv64 +#define helper_vfnmadd_vf_d helper_vfnmadd_vf_d_riscv64 +#define helper_vfnmadd_vf_h helper_vfnmadd_vf_h_riscv64 +#define helper_vfnmadd_vf_w helper_vfnmadd_vf_w_riscv64 +#define helper_vfnmadd_vv_d helper_vfnmadd_vv_d_riscv64 +#define helper_vfnmadd_vv_h helper_vfnmadd_vv_h_riscv64 +#define helper_vfnmadd_vv_w helper_vfnmadd_vv_w_riscv64 +#define helper_vfnmsac_vf_d helper_vfnmsac_vf_d_riscv64 +#define helper_vfnmsac_vf_h helper_vfnmsac_vf_h_riscv64 +#define helper_vfnmsac_vf_w helper_vfnmsac_vf_w_riscv64 +#define helper_vfnmsac_vv_d helper_vfnmsac_vv_d_riscv64 +#define helper_vfnmsac_vv_h helper_vfnmsac_vv_h_riscv64 +#define helper_vfnmsac_vv_w helper_vfnmsac_vv_w_riscv64 +#define helper_vfnmsub_vf_d helper_vfnmsub_vf_d_riscv64 +#define helper_vfnmsub_vf_h helper_vfnmsub_vf_h_riscv64 +#define helper_vfnmsub_vf_w helper_vfnmsub_vf_w_riscv64 +#define helper_vfnmsub_vv_d helper_vfnmsub_vv_d_riscv64 +#define helper_vfnmsub_vv_h helper_vfnmsub_vv_h_riscv64 +#define helper_vfnmsub_vv_w helper_vfnmsub_vv_w_riscv64 +#define helper_vfrdiv_vf_d helper_vfrdiv_vf_d_riscv64 +#define helper_vfrdiv_vf_h helper_vfrdiv_vf_h_riscv64 +#define helper_vfrdiv_vf_w helper_vfrdiv_vf_w_riscv64 +#define helper_vfredmax_vs_d helper_vfredmax_vs_d_riscv64 +#define helper_vfredmax_vs_h helper_vfredmax_vs_h_riscv64 +#define helper_vfredmax_vs_w helper_vfredmax_vs_w_riscv64 +#define helper_vfredmin_vs_d helper_vfredmin_vs_d_riscv64 +#define helper_vfredmin_vs_h helper_vfredmin_vs_h_riscv64 +#define helper_vfredmin_vs_w helper_vfredmin_vs_w_riscv64 +#define helper_vfredsum_vs_d helper_vfredsum_vs_d_riscv64 +#define helper_vfredsum_vs_h helper_vfredsum_vs_h_riscv64 +#define helper_vfredsum_vs_w helper_vfredsum_vs_w_riscv64 +#define helper_vfrsub_vf_d helper_vfrsub_vf_d_riscv64 +#define helper_vfrsub_vf_h helper_vfrsub_vf_h_riscv64 +#define helper_vfrsub_vf_w helper_vfrsub_vf_w_riscv64 +#define helper_vfsgnj_vf_d helper_vfsgnj_vf_d_riscv64 +#define helper_vfsgnj_vf_h helper_vfsgnj_vf_h_riscv64 +#define helper_vfsgnj_vf_w helper_vfsgnj_vf_w_riscv64 +#define helper_vfsgnj_vv_d helper_vfsgnj_vv_d_riscv64 +#define helper_vfsgnj_vv_h helper_vfsgnj_vv_h_riscv64 +#define helper_vfsgnj_vv_w helper_vfsgnj_vv_w_riscv64 +#define helper_vfsgnjn_vf_d helper_vfsgnjn_vf_d_riscv64 +#define helper_vfsgnjn_vf_h helper_vfsgnjn_vf_h_riscv64 +#define helper_vfsgnjn_vf_w helper_vfsgnjn_vf_w_riscv64 +#define helper_vfsgnjn_vv_d helper_vfsgnjn_vv_d_riscv64 +#define helper_vfsgnjn_vv_h helper_vfsgnjn_vv_h_riscv64 +#define helper_vfsgnjn_vv_w helper_vfsgnjn_vv_w_riscv64 +#define helper_vfsgnjx_vf_d helper_vfsgnjx_vf_d_riscv64 +#define helper_vfsgnjx_vf_h helper_vfsgnjx_vf_h_riscv64 +#define helper_vfsgnjx_vf_w helper_vfsgnjx_vf_w_riscv64 +#define helper_vfsgnjx_vv_d helper_vfsgnjx_vv_d_riscv64 +#define helper_vfsgnjx_vv_h helper_vfsgnjx_vv_h_riscv64 +#define helper_vfsgnjx_vv_w helper_vfsgnjx_vv_w_riscv64 +#define helper_vfsqrt_v_d helper_vfsqrt_v_d_riscv64 +#define helper_vfsqrt_v_h helper_vfsqrt_v_h_riscv64 +#define helper_vfsqrt_v_w helper_vfsqrt_v_w_riscv64 +#define helper_vfsub_vf_d helper_vfsub_vf_d_riscv64 +#define helper_vfsub_vf_h helper_vfsub_vf_h_riscv64 +#define helper_vfsub_vf_w helper_vfsub_vf_w_riscv64 +#define helper_vfsub_vv_d helper_vfsub_vv_d_riscv64 +#define helper_vfsub_vv_h helper_vfsub_vv_h_riscv64 +#define helper_vfsub_vv_w helper_vfsub_vv_w_riscv64 +#define helper_vfwadd_vf_h helper_vfwadd_vf_h_riscv64 +#define helper_vfwadd_vf_w helper_vfwadd_vf_w_riscv64 +#define helper_vfwadd_vv_h helper_vfwadd_vv_h_riscv64 +#define helper_vfwadd_vv_w helper_vfwadd_vv_w_riscv64 +#define helper_vfwadd_wf_h helper_vfwadd_wf_h_riscv64 +#define helper_vfwadd_wf_w helper_vfwadd_wf_w_riscv64 +#define helper_vfwadd_wv_h helper_vfwadd_wv_h_riscv64 +#define helper_vfwadd_wv_w helper_vfwadd_wv_w_riscv64 +#define helper_vfwcvt_f_f_v_h helper_vfwcvt_f_f_v_h_riscv64 +#define helper_vfwcvt_f_f_v_w helper_vfwcvt_f_f_v_w_riscv64 +#define helper_vfwcvt_f_x_v_h helper_vfwcvt_f_x_v_h_riscv64 +#define helper_vfwcvt_f_x_v_w helper_vfwcvt_f_x_v_w_riscv64 +#define helper_vfwcvt_f_xu_v_h helper_vfwcvt_f_xu_v_h_riscv64 +#define helper_vfwcvt_f_xu_v_w helper_vfwcvt_f_xu_v_w_riscv64 +#define helper_vfwcvt_x_f_v_h helper_vfwcvt_x_f_v_h_riscv64 +#define helper_vfwcvt_x_f_v_w helper_vfwcvt_x_f_v_w_riscv64 +#define helper_vfwcvt_xu_f_v_h helper_vfwcvt_xu_f_v_h_riscv64 +#define helper_vfwcvt_xu_f_v_w helper_vfwcvt_xu_f_v_w_riscv64 +#define helper_vfwmacc_vf_h helper_vfwmacc_vf_h_riscv64 +#define helper_vfwmacc_vf_w helper_vfwmacc_vf_w_riscv64 +#define helper_vfwmacc_vv_h helper_vfwmacc_vv_h_riscv64 +#define helper_vfwmacc_vv_w helper_vfwmacc_vv_w_riscv64 +#define helper_vfwmsac_vf_h helper_vfwmsac_vf_h_riscv64 +#define helper_vfwmsac_vf_w helper_vfwmsac_vf_w_riscv64 +#define helper_vfwmsac_vv_h helper_vfwmsac_vv_h_riscv64 +#define helper_vfwmsac_vv_w helper_vfwmsac_vv_w_riscv64 +#define helper_vfwmul_vf_h helper_vfwmul_vf_h_riscv64 +#define helper_vfwmul_vf_w helper_vfwmul_vf_w_riscv64 +#define helper_vfwmul_vv_h helper_vfwmul_vv_h_riscv64 +#define helper_vfwmul_vv_w helper_vfwmul_vv_w_riscv64 +#define helper_vfwnmacc_vf_h helper_vfwnmacc_vf_h_riscv64 +#define helper_vfwnmacc_vf_w helper_vfwnmacc_vf_w_riscv64 +#define helper_vfwnmacc_vv_h helper_vfwnmacc_vv_h_riscv64 +#define helper_vfwnmacc_vv_w helper_vfwnmacc_vv_w_riscv64 +#define helper_vfwnmsac_vf_h helper_vfwnmsac_vf_h_riscv64 +#define helper_vfwnmsac_vf_w helper_vfwnmsac_vf_w_riscv64 +#define helper_vfwnmsac_vv_h helper_vfwnmsac_vv_h_riscv64 +#define helper_vfwnmsac_vv_w helper_vfwnmsac_vv_w_riscv64 +#define helper_vfwredsum_vs_h helper_vfwredsum_vs_h_riscv64 +#define helper_vfwredsum_vs_w helper_vfwredsum_vs_w_riscv64 +#define helper_vfwsub_vf_h helper_vfwsub_vf_h_riscv64 +#define helper_vfwsub_vf_w helper_vfwsub_vf_w_riscv64 +#define helper_vfwsub_vv_h helper_vfwsub_vv_h_riscv64 +#define helper_vfwsub_vv_w helper_vfwsub_vv_w_riscv64 +#define helper_vfwsub_wf_h helper_vfwsub_wf_h_riscv64 +#define helper_vfwsub_wf_w helper_vfwsub_wf_w_riscv64 +#define helper_vfwsub_wv_h helper_vfwsub_wv_h_riscv64 +#define helper_vfwsub_wv_w helper_vfwsub_wv_w_riscv64 +#define helper_vid_v_b helper_vid_v_b_riscv64 +#define helper_vid_v_d helper_vid_v_d_riscv64 +#define helper_vid_v_h helper_vid_v_h_riscv64 +#define helper_vid_v_w helper_vid_v_w_riscv64 +#define helper_viota_m_b helper_viota_m_b_riscv64 +#define helper_viota_m_d helper_viota_m_d_riscv64 +#define helper_viota_m_h helper_viota_m_h_riscv64 +#define helper_viota_m_w helper_viota_m_w_riscv64 +#define helper_vlb_v_b helper_vlb_v_b_riscv64 +#define helper_vlb_v_b_mask helper_vlb_v_b_mask_riscv64 +#define helper_vlb_v_d helper_vlb_v_d_riscv64 +#define helper_vlb_v_d_mask helper_vlb_v_d_mask_riscv64 +#define helper_vlb_v_h helper_vlb_v_h_riscv64 +#define helper_vlb_v_h_mask helper_vlb_v_h_mask_riscv64 +#define helper_vlb_v_w helper_vlb_v_w_riscv64 +#define helper_vlb_v_w_mask helper_vlb_v_w_mask_riscv64 +#define helper_vlbff_v_b helper_vlbff_v_b_riscv64 +#define helper_vlbff_v_d helper_vlbff_v_d_riscv64 +#define helper_vlbff_v_h helper_vlbff_v_h_riscv64 +#define helper_vlbff_v_w helper_vlbff_v_w_riscv64 +#define helper_vlbu_v_b helper_vlbu_v_b_riscv64 +#define helper_vlbu_v_b_mask helper_vlbu_v_b_mask_riscv64 +#define helper_vlbu_v_d helper_vlbu_v_d_riscv64 +#define helper_vlbu_v_d_mask helper_vlbu_v_d_mask_riscv64 +#define helper_vlbu_v_h helper_vlbu_v_h_riscv64 +#define helper_vlbu_v_h_mask helper_vlbu_v_h_mask_riscv64 +#define helper_vlbu_v_w helper_vlbu_v_w_riscv64 +#define helper_vlbu_v_w_mask helper_vlbu_v_w_mask_riscv64 +#define helper_vlbuff_v_b helper_vlbuff_v_b_riscv64 +#define helper_vlbuff_v_d helper_vlbuff_v_d_riscv64 +#define helper_vlbuff_v_h helper_vlbuff_v_h_riscv64 +#define helper_vlbuff_v_w helper_vlbuff_v_w_riscv64 +#define helper_vle_v_b helper_vle_v_b_riscv64 +#define helper_vle_v_b_mask helper_vle_v_b_mask_riscv64 +#define helper_vle_v_d helper_vle_v_d_riscv64 +#define helper_vle_v_d_mask helper_vle_v_d_mask_riscv64 +#define helper_vle_v_h helper_vle_v_h_riscv64 +#define helper_vle_v_h_mask helper_vle_v_h_mask_riscv64 +#define helper_vle_v_w helper_vle_v_w_riscv64 +#define helper_vle_v_w_mask helper_vle_v_w_mask_riscv64 +#define helper_vleff_v_b helper_vleff_v_b_riscv64 +#define helper_vleff_v_d helper_vleff_v_d_riscv64 +#define helper_vleff_v_h helper_vleff_v_h_riscv64 +#define helper_vleff_v_w helper_vleff_v_w_riscv64 +#define helper_vlh_v_d helper_vlh_v_d_riscv64 +#define helper_vlh_v_d_mask helper_vlh_v_d_mask_riscv64 +#define helper_vlh_v_h helper_vlh_v_h_riscv64 +#define helper_vlh_v_h_mask helper_vlh_v_h_mask_riscv64 +#define helper_vlh_v_w helper_vlh_v_w_riscv64 +#define helper_vlh_v_w_mask helper_vlh_v_w_mask_riscv64 +#define helper_vlhff_v_d helper_vlhff_v_d_riscv64 +#define helper_vlhff_v_h helper_vlhff_v_h_riscv64 +#define helper_vlhff_v_w helper_vlhff_v_w_riscv64 +#define helper_vlhu_v_d helper_vlhu_v_d_riscv64 +#define helper_vlhu_v_d_mask helper_vlhu_v_d_mask_riscv64 +#define helper_vlhu_v_h helper_vlhu_v_h_riscv64 +#define helper_vlhu_v_h_mask helper_vlhu_v_h_mask_riscv64 +#define helper_vlhu_v_w helper_vlhu_v_w_riscv64 +#define helper_vlhu_v_w_mask helper_vlhu_v_w_mask_riscv64 +#define helper_vlhuff_v_d helper_vlhuff_v_d_riscv64 +#define helper_vlhuff_v_h helper_vlhuff_v_h_riscv64 +#define helper_vlhuff_v_w helper_vlhuff_v_w_riscv64 +#define helper_vlsb_v_b helper_vlsb_v_b_riscv64 +#define helper_vlsb_v_d helper_vlsb_v_d_riscv64 +#define helper_vlsb_v_h helper_vlsb_v_h_riscv64 +#define helper_vlsb_v_w helper_vlsb_v_w_riscv64 +#define helper_vlsbu_v_b helper_vlsbu_v_b_riscv64 +#define helper_vlsbu_v_d helper_vlsbu_v_d_riscv64 +#define helper_vlsbu_v_h helper_vlsbu_v_h_riscv64 +#define helper_vlsbu_v_w helper_vlsbu_v_w_riscv64 +#define helper_vlse_v_b helper_vlse_v_b_riscv64 +#define helper_vlse_v_d helper_vlse_v_d_riscv64 +#define helper_vlse_v_h helper_vlse_v_h_riscv64 +#define helper_vlse_v_w helper_vlse_v_w_riscv64 +#define helper_vlsh_v_d helper_vlsh_v_d_riscv64 +#define helper_vlsh_v_h helper_vlsh_v_h_riscv64 +#define helper_vlsh_v_w helper_vlsh_v_w_riscv64 +#define helper_vlshu_v_d helper_vlshu_v_d_riscv64 +#define helper_vlshu_v_h helper_vlshu_v_h_riscv64 +#define helper_vlshu_v_w helper_vlshu_v_w_riscv64 +#define helper_vlsw_v_d helper_vlsw_v_d_riscv64 +#define helper_vlsw_v_w helper_vlsw_v_w_riscv64 +#define helper_vlswu_v_d helper_vlswu_v_d_riscv64 +#define helper_vlswu_v_w helper_vlswu_v_w_riscv64 +#define helper_vlw_v_d helper_vlw_v_d_riscv64 +#define helper_vlw_v_d_mask helper_vlw_v_d_mask_riscv64 +#define helper_vlw_v_w helper_vlw_v_w_riscv64 +#define helper_vlw_v_w_mask helper_vlw_v_w_mask_riscv64 +#define helper_vlwff_v_d helper_vlwff_v_d_riscv64 +#define helper_vlwff_v_w helper_vlwff_v_w_riscv64 +#define helper_vlwu_v_d helper_vlwu_v_d_riscv64 +#define helper_vlwu_v_d_mask helper_vlwu_v_d_mask_riscv64 +#define helper_vlwu_v_w helper_vlwu_v_w_riscv64 +#define helper_vlwu_v_w_mask helper_vlwu_v_w_mask_riscv64 +#define helper_vlwuff_v_d helper_vlwuff_v_d_riscv64 +#define helper_vlwuff_v_w helper_vlwuff_v_w_riscv64 +#define helper_vlxb_v_b helper_vlxb_v_b_riscv64 +#define helper_vlxb_v_d helper_vlxb_v_d_riscv64 +#define helper_vlxb_v_h helper_vlxb_v_h_riscv64 +#define helper_vlxb_v_w helper_vlxb_v_w_riscv64 +#define helper_vlxbu_v_b helper_vlxbu_v_b_riscv64 +#define helper_vlxbu_v_d helper_vlxbu_v_d_riscv64 +#define helper_vlxbu_v_h helper_vlxbu_v_h_riscv64 +#define helper_vlxbu_v_w helper_vlxbu_v_w_riscv64 +#define helper_vlxe_v_b helper_vlxe_v_b_riscv64 +#define helper_vlxe_v_d helper_vlxe_v_d_riscv64 +#define helper_vlxe_v_h helper_vlxe_v_h_riscv64 +#define helper_vlxe_v_w helper_vlxe_v_w_riscv64 +#define helper_vlxh_v_d helper_vlxh_v_d_riscv64 +#define helper_vlxh_v_h helper_vlxh_v_h_riscv64 +#define helper_vlxh_v_w helper_vlxh_v_w_riscv64 +#define helper_vlxhu_v_d helper_vlxhu_v_d_riscv64 +#define helper_vlxhu_v_h helper_vlxhu_v_h_riscv64 +#define helper_vlxhu_v_w helper_vlxhu_v_w_riscv64 +#define helper_vlxw_v_d helper_vlxw_v_d_riscv64 +#define helper_vlxw_v_w helper_vlxw_v_w_riscv64 +#define helper_vlxwu_v_d helper_vlxwu_v_d_riscv64 +#define helper_vlxwu_v_w helper_vlxwu_v_w_riscv64 +#define helper_vmacc_vv_b helper_vmacc_vv_b_riscv64 +#define helper_vmacc_vv_d helper_vmacc_vv_d_riscv64 +#define helper_vmacc_vv_h helper_vmacc_vv_h_riscv64 +#define helper_vmacc_vv_w helper_vmacc_vv_w_riscv64 +#define helper_vmacc_vx_b helper_vmacc_vx_b_riscv64 +#define helper_vmacc_vx_d helper_vmacc_vx_d_riscv64 +#define helper_vmacc_vx_h helper_vmacc_vx_h_riscv64 +#define helper_vmacc_vx_w helper_vmacc_vx_w_riscv64 +#define helper_vmadc_vvm_b helper_vmadc_vvm_b_riscv64 +#define helper_vmadc_vvm_d helper_vmadc_vvm_d_riscv64 +#define helper_vmadc_vvm_h helper_vmadc_vvm_h_riscv64 +#define helper_vmadc_vvm_w helper_vmadc_vvm_w_riscv64 +#define helper_vmadc_vxm_b helper_vmadc_vxm_b_riscv64 +#define helper_vmadc_vxm_d helper_vmadc_vxm_d_riscv64 +#define helper_vmadc_vxm_h helper_vmadc_vxm_h_riscv64 +#define helper_vmadc_vxm_w helper_vmadc_vxm_w_riscv64 +#define helper_vmadd_vv_b helper_vmadd_vv_b_riscv64 +#define helper_vmadd_vv_d helper_vmadd_vv_d_riscv64 +#define helper_vmadd_vv_h helper_vmadd_vv_h_riscv64 +#define helper_vmadd_vv_w helper_vmadd_vv_w_riscv64 +#define helper_vmadd_vx_b helper_vmadd_vx_b_riscv64 +#define helper_vmadd_vx_d helper_vmadd_vx_d_riscv64 +#define helper_vmadd_vx_h helper_vmadd_vx_h_riscv64 +#define helper_vmadd_vx_w helper_vmadd_vx_w_riscv64 +#define helper_vmand_mm helper_vmand_mm_riscv64 +#define helper_vmandnot_mm helper_vmandnot_mm_riscv64 +#define helper_vmax_vv_b helper_vmax_vv_b_riscv64 +#define helper_vmax_vv_d helper_vmax_vv_d_riscv64 +#define helper_vmax_vv_h helper_vmax_vv_h_riscv64 +#define helper_vmax_vv_w helper_vmax_vv_w_riscv64 +#define helper_vmax_vx_b helper_vmax_vx_b_riscv64 +#define helper_vmax_vx_d helper_vmax_vx_d_riscv64 +#define helper_vmax_vx_h helper_vmax_vx_h_riscv64 +#define helper_vmax_vx_w helper_vmax_vx_w_riscv64 +#define helper_vmaxu_vv_b helper_vmaxu_vv_b_riscv64 +#define helper_vmaxu_vv_d helper_vmaxu_vv_d_riscv64 +#define helper_vmaxu_vv_h helper_vmaxu_vv_h_riscv64 +#define helper_vmaxu_vv_w helper_vmaxu_vv_w_riscv64 +#define helper_vmaxu_vx_b helper_vmaxu_vx_b_riscv64 +#define helper_vmaxu_vx_d helper_vmaxu_vx_d_riscv64 +#define helper_vmaxu_vx_h helper_vmaxu_vx_h_riscv64 +#define helper_vmaxu_vx_w helper_vmaxu_vx_w_riscv64 +#define helper_vmerge_vvm_b helper_vmerge_vvm_b_riscv64 +#define helper_vmerge_vvm_d helper_vmerge_vvm_d_riscv64 +#define helper_vmerge_vvm_h helper_vmerge_vvm_h_riscv64 +#define helper_vmerge_vvm_w helper_vmerge_vvm_w_riscv64 +#define helper_vmerge_vxm_b helper_vmerge_vxm_b_riscv64 +#define helper_vmerge_vxm_d helper_vmerge_vxm_d_riscv64 +#define helper_vmerge_vxm_h helper_vmerge_vxm_h_riscv64 +#define helper_vmerge_vxm_w helper_vmerge_vxm_w_riscv64 +#define helper_vmfeq_vf_d helper_vmfeq_vf_d_riscv64 +#define helper_vmfeq_vf_h helper_vmfeq_vf_h_riscv64 +#define helper_vmfeq_vf_w helper_vmfeq_vf_w_riscv64 +#define helper_vmfeq_vv_d helper_vmfeq_vv_d_riscv64 +#define helper_vmfeq_vv_h helper_vmfeq_vv_h_riscv64 +#define helper_vmfeq_vv_w helper_vmfeq_vv_w_riscv64 +#define helper_vmfge_vf_d helper_vmfge_vf_d_riscv64 +#define helper_vmfge_vf_h helper_vmfge_vf_h_riscv64 +#define helper_vmfge_vf_w helper_vmfge_vf_w_riscv64 +#define helper_vmfgt_vf_d helper_vmfgt_vf_d_riscv64 +#define helper_vmfgt_vf_h helper_vmfgt_vf_h_riscv64 +#define helper_vmfgt_vf_w helper_vmfgt_vf_w_riscv64 +#define helper_vmfirst_m helper_vmfirst_m_riscv64 +#define helper_vmfle_vf_d helper_vmfle_vf_d_riscv64 +#define helper_vmfle_vf_h helper_vmfle_vf_h_riscv64 +#define helper_vmfle_vf_w helper_vmfle_vf_w_riscv64 +#define helper_vmfle_vv_d helper_vmfle_vv_d_riscv64 +#define helper_vmfle_vv_h helper_vmfle_vv_h_riscv64 +#define helper_vmfle_vv_w helper_vmfle_vv_w_riscv64 +#define helper_vmflt_vf_d helper_vmflt_vf_d_riscv64 +#define helper_vmflt_vf_h helper_vmflt_vf_h_riscv64 +#define helper_vmflt_vf_w helper_vmflt_vf_w_riscv64 +#define helper_vmflt_vv_d helper_vmflt_vv_d_riscv64 +#define helper_vmflt_vv_h helper_vmflt_vv_h_riscv64 +#define helper_vmflt_vv_w helper_vmflt_vv_w_riscv64 +#define helper_vmfne_vf_d helper_vmfne_vf_d_riscv64 +#define helper_vmfne_vf_h helper_vmfne_vf_h_riscv64 +#define helper_vmfne_vf_w helper_vmfne_vf_w_riscv64 +#define helper_vmfne_vv_d helper_vmfne_vv_d_riscv64 +#define helper_vmfne_vv_h helper_vmfne_vv_h_riscv64 +#define helper_vmfne_vv_w helper_vmfne_vv_w_riscv64 +#define helper_vmford_vf_d helper_vmford_vf_d_riscv64 +#define helper_vmford_vf_h helper_vmford_vf_h_riscv64 +#define helper_vmford_vf_w helper_vmford_vf_w_riscv64 +#define helper_vmford_vv_d helper_vmford_vv_d_riscv64 +#define helper_vmford_vv_h helper_vmford_vv_h_riscv64 +#define helper_vmford_vv_w helper_vmford_vv_w_riscv64 +#define helper_vmin_vv_b helper_vmin_vv_b_riscv64 +#define helper_vmin_vv_d helper_vmin_vv_d_riscv64 +#define helper_vmin_vv_h helper_vmin_vv_h_riscv64 +#define helper_vmin_vv_w helper_vmin_vv_w_riscv64 +#define helper_vmin_vx_b helper_vmin_vx_b_riscv64 +#define helper_vmin_vx_d helper_vmin_vx_d_riscv64 +#define helper_vmin_vx_h helper_vmin_vx_h_riscv64 +#define helper_vmin_vx_w helper_vmin_vx_w_riscv64 +#define helper_vminu_vv_b helper_vminu_vv_b_riscv64 +#define helper_vminu_vv_d helper_vminu_vv_d_riscv64 +#define helper_vminu_vv_h helper_vminu_vv_h_riscv64 +#define helper_vminu_vv_w helper_vminu_vv_w_riscv64 +#define helper_vminu_vx_b helper_vminu_vx_b_riscv64 +#define helper_vminu_vx_d helper_vminu_vx_d_riscv64 +#define helper_vminu_vx_h helper_vminu_vx_h_riscv64 +#define helper_vminu_vx_w helper_vminu_vx_w_riscv64 +#define helper_vmnand_mm helper_vmnand_mm_riscv64 +#define helper_vmnor_mm helper_vmnor_mm_riscv64 +#define helper_vmor_mm helper_vmor_mm_riscv64 +#define helper_vmornot_mm helper_vmornot_mm_riscv64 +#define helper_vmpopc_m helper_vmpopc_m_riscv64 +#define helper_vmsbc_vvm_b helper_vmsbc_vvm_b_riscv64 +#define helper_vmsbc_vvm_d helper_vmsbc_vvm_d_riscv64 +#define helper_vmsbc_vvm_h helper_vmsbc_vvm_h_riscv64 +#define helper_vmsbc_vvm_w helper_vmsbc_vvm_w_riscv64 +#define helper_vmsbc_vxm_b helper_vmsbc_vxm_b_riscv64 +#define helper_vmsbc_vxm_d helper_vmsbc_vxm_d_riscv64 +#define helper_vmsbc_vxm_h helper_vmsbc_vxm_h_riscv64 +#define helper_vmsbc_vxm_w helper_vmsbc_vxm_w_riscv64 +#define helper_vmsbf_m helper_vmsbf_m_riscv64 +#define helper_vmseq_vv_b helper_vmseq_vv_b_riscv64 +#define helper_vmseq_vv_d helper_vmseq_vv_d_riscv64 +#define helper_vmseq_vv_h helper_vmseq_vv_h_riscv64 +#define helper_vmseq_vv_w helper_vmseq_vv_w_riscv64 +#define helper_vmseq_vx_b helper_vmseq_vx_b_riscv64 +#define helper_vmseq_vx_d helper_vmseq_vx_d_riscv64 +#define helper_vmseq_vx_h helper_vmseq_vx_h_riscv64 +#define helper_vmseq_vx_w helper_vmseq_vx_w_riscv64 +#define helper_vmsgt_vx_b helper_vmsgt_vx_b_riscv64 +#define helper_vmsgt_vx_d helper_vmsgt_vx_d_riscv64 +#define helper_vmsgt_vx_h helper_vmsgt_vx_h_riscv64 +#define helper_vmsgt_vx_w helper_vmsgt_vx_w_riscv64 +#define helper_vmsgtu_vx_b helper_vmsgtu_vx_b_riscv64 +#define helper_vmsgtu_vx_d helper_vmsgtu_vx_d_riscv64 +#define helper_vmsgtu_vx_h helper_vmsgtu_vx_h_riscv64 +#define helper_vmsgtu_vx_w helper_vmsgtu_vx_w_riscv64 +#define helper_vmsif_m helper_vmsif_m_riscv64 +#define helper_vmsle_vv_b helper_vmsle_vv_b_riscv64 +#define helper_vmsle_vv_d helper_vmsle_vv_d_riscv64 +#define helper_vmsle_vv_h helper_vmsle_vv_h_riscv64 +#define helper_vmsle_vv_w helper_vmsle_vv_w_riscv64 +#define helper_vmsle_vx_b helper_vmsle_vx_b_riscv64 +#define helper_vmsle_vx_d helper_vmsle_vx_d_riscv64 +#define helper_vmsle_vx_h helper_vmsle_vx_h_riscv64 +#define helper_vmsle_vx_w helper_vmsle_vx_w_riscv64 +#define helper_vmsleu_vv_b helper_vmsleu_vv_b_riscv64 +#define helper_vmsleu_vv_d helper_vmsleu_vv_d_riscv64 +#define helper_vmsleu_vv_h helper_vmsleu_vv_h_riscv64 +#define helper_vmsleu_vv_w helper_vmsleu_vv_w_riscv64 +#define helper_vmsleu_vx_b helper_vmsleu_vx_b_riscv64 +#define helper_vmsleu_vx_d helper_vmsleu_vx_d_riscv64 +#define helper_vmsleu_vx_h helper_vmsleu_vx_h_riscv64 +#define helper_vmsleu_vx_w helper_vmsleu_vx_w_riscv64 +#define helper_vmslt_vv_b helper_vmslt_vv_b_riscv64 +#define helper_vmslt_vv_d helper_vmslt_vv_d_riscv64 +#define helper_vmslt_vv_h helper_vmslt_vv_h_riscv64 +#define helper_vmslt_vv_w helper_vmslt_vv_w_riscv64 +#define helper_vmslt_vx_b helper_vmslt_vx_b_riscv64 +#define helper_vmslt_vx_d helper_vmslt_vx_d_riscv64 +#define helper_vmslt_vx_h helper_vmslt_vx_h_riscv64 +#define helper_vmslt_vx_w helper_vmslt_vx_w_riscv64 +#define helper_vmsltu_vv_b helper_vmsltu_vv_b_riscv64 +#define helper_vmsltu_vv_d helper_vmsltu_vv_d_riscv64 +#define helper_vmsltu_vv_h helper_vmsltu_vv_h_riscv64 +#define helper_vmsltu_vv_w helper_vmsltu_vv_w_riscv64 +#define helper_vmsltu_vx_b helper_vmsltu_vx_b_riscv64 +#define helper_vmsltu_vx_d helper_vmsltu_vx_d_riscv64 +#define helper_vmsltu_vx_h helper_vmsltu_vx_h_riscv64 +#define helper_vmsltu_vx_w helper_vmsltu_vx_w_riscv64 +#define helper_vmsne_vv_b helper_vmsne_vv_b_riscv64 +#define helper_vmsne_vv_d helper_vmsne_vv_d_riscv64 +#define helper_vmsne_vv_h helper_vmsne_vv_h_riscv64 +#define helper_vmsne_vv_w helper_vmsne_vv_w_riscv64 +#define helper_vmsne_vx_b helper_vmsne_vx_b_riscv64 +#define helper_vmsne_vx_d helper_vmsne_vx_d_riscv64 +#define helper_vmsne_vx_h helper_vmsne_vx_h_riscv64 +#define helper_vmsne_vx_w helper_vmsne_vx_w_riscv64 +#define helper_vmsof_m helper_vmsof_m_riscv64 +#define helper_vmul_vv_b helper_vmul_vv_b_riscv64 +#define helper_vmul_vv_d helper_vmul_vv_d_riscv64 +#define helper_vmul_vv_h helper_vmul_vv_h_riscv64 +#define helper_vmul_vv_w helper_vmul_vv_w_riscv64 +#define helper_vmul_vx_b helper_vmul_vx_b_riscv64 +#define helper_vmul_vx_d helper_vmul_vx_d_riscv64 +#define helper_vmul_vx_h helper_vmul_vx_h_riscv64 +#define helper_vmul_vx_w helper_vmul_vx_w_riscv64 +#define helper_vmulh_vv_b helper_vmulh_vv_b_riscv64 +#define helper_vmulh_vv_d helper_vmulh_vv_d_riscv64 +#define helper_vmulh_vv_h helper_vmulh_vv_h_riscv64 +#define helper_vmulh_vv_w helper_vmulh_vv_w_riscv64 +#define helper_vmulh_vx_b helper_vmulh_vx_b_riscv64 +#define helper_vmulh_vx_d helper_vmulh_vx_d_riscv64 +#define helper_vmulh_vx_h helper_vmulh_vx_h_riscv64 +#define helper_vmulh_vx_w helper_vmulh_vx_w_riscv64 +#define helper_vmulhsu_vv_b helper_vmulhsu_vv_b_riscv64 +#define helper_vmulhsu_vv_d helper_vmulhsu_vv_d_riscv64 +#define helper_vmulhsu_vv_h helper_vmulhsu_vv_h_riscv64 +#define helper_vmulhsu_vv_w helper_vmulhsu_vv_w_riscv64 +#define helper_vmulhsu_vx_b helper_vmulhsu_vx_b_riscv64 +#define helper_vmulhsu_vx_d helper_vmulhsu_vx_d_riscv64 +#define helper_vmulhsu_vx_h helper_vmulhsu_vx_h_riscv64 +#define helper_vmulhsu_vx_w helper_vmulhsu_vx_w_riscv64 +#define helper_vmulhu_vv_b helper_vmulhu_vv_b_riscv64 +#define helper_vmulhu_vv_d helper_vmulhu_vv_d_riscv64 +#define helper_vmulhu_vv_h helper_vmulhu_vv_h_riscv64 +#define helper_vmulhu_vv_w helper_vmulhu_vv_w_riscv64 +#define helper_vmulhu_vx_b helper_vmulhu_vx_b_riscv64 +#define helper_vmulhu_vx_d helper_vmulhu_vx_d_riscv64 +#define helper_vmulhu_vx_h helper_vmulhu_vx_h_riscv64 +#define helper_vmulhu_vx_w helper_vmulhu_vx_w_riscv64 +#define helper_vmv_v_v_b helper_vmv_v_v_b_riscv64 +#define helper_vmv_v_v_d helper_vmv_v_v_d_riscv64 +#define helper_vmv_v_v_h helper_vmv_v_v_h_riscv64 +#define helper_vmv_v_v_w helper_vmv_v_v_w_riscv64 +#define helper_vmv_v_x_b helper_vmv_v_x_b_riscv64 +#define helper_vmv_v_x_d helper_vmv_v_x_d_riscv64 +#define helper_vmv_v_x_h helper_vmv_v_x_h_riscv64 +#define helper_vmv_v_x_w helper_vmv_v_x_w_riscv64 +#define helper_vmxnor_mm helper_vmxnor_mm_riscv64 +#define helper_vmxor_mm helper_vmxor_mm_riscv64 +#define helper_vnclip_vv_b helper_vnclip_vv_b_riscv64 +#define helper_vnclip_vv_h helper_vnclip_vv_h_riscv64 +#define helper_vnclip_vv_w helper_vnclip_vv_w_riscv64 +#define helper_vnclip_vx_b helper_vnclip_vx_b_riscv64 +#define helper_vnclip_vx_h helper_vnclip_vx_h_riscv64 +#define helper_vnclip_vx_w helper_vnclip_vx_w_riscv64 +#define helper_vnclipu_vv_b helper_vnclipu_vv_b_riscv64 +#define helper_vnclipu_vv_h helper_vnclipu_vv_h_riscv64 +#define helper_vnclipu_vv_w helper_vnclipu_vv_w_riscv64 +#define helper_vnclipu_vx_b helper_vnclipu_vx_b_riscv64 +#define helper_vnclipu_vx_h helper_vnclipu_vx_h_riscv64 +#define helper_vnclipu_vx_w helper_vnclipu_vx_w_riscv64 +#define helper_vnmsac_vv_b helper_vnmsac_vv_b_riscv64 +#define helper_vnmsac_vv_d helper_vnmsac_vv_d_riscv64 +#define helper_vnmsac_vv_h helper_vnmsac_vv_h_riscv64 +#define helper_vnmsac_vv_w helper_vnmsac_vv_w_riscv64 +#define helper_vnmsac_vx_b helper_vnmsac_vx_b_riscv64 +#define helper_vnmsac_vx_d helper_vnmsac_vx_d_riscv64 +#define helper_vnmsac_vx_h helper_vnmsac_vx_h_riscv64 +#define helper_vnmsac_vx_w helper_vnmsac_vx_w_riscv64 +#define helper_vnmsub_vv_b helper_vnmsub_vv_b_riscv64 +#define helper_vnmsub_vv_d helper_vnmsub_vv_d_riscv64 +#define helper_vnmsub_vv_h helper_vnmsub_vv_h_riscv64 +#define helper_vnmsub_vv_w helper_vnmsub_vv_w_riscv64 +#define helper_vnmsub_vx_b helper_vnmsub_vx_b_riscv64 +#define helper_vnmsub_vx_d helper_vnmsub_vx_d_riscv64 +#define helper_vnmsub_vx_h helper_vnmsub_vx_h_riscv64 +#define helper_vnmsub_vx_w helper_vnmsub_vx_w_riscv64 +#define helper_vnsra_vv_b helper_vnsra_vv_b_riscv64 +#define helper_vnsra_vv_h helper_vnsra_vv_h_riscv64 +#define helper_vnsra_vv_w helper_vnsra_vv_w_riscv64 +#define helper_vnsra_vx_b helper_vnsra_vx_b_riscv64 +#define helper_vnsra_vx_h helper_vnsra_vx_h_riscv64 +#define helper_vnsra_vx_w helper_vnsra_vx_w_riscv64 +#define helper_vnsrl_vv_b helper_vnsrl_vv_b_riscv64 +#define helper_vnsrl_vv_h helper_vnsrl_vv_h_riscv64 +#define helper_vnsrl_vv_w helper_vnsrl_vv_w_riscv64 +#define helper_vnsrl_vx_b helper_vnsrl_vx_b_riscv64 +#define helper_vnsrl_vx_h helper_vnsrl_vx_h_riscv64 +#define helper_vnsrl_vx_w helper_vnsrl_vx_w_riscv64 +#define helper_vor_vv_b helper_vor_vv_b_riscv64 +#define helper_vor_vv_d helper_vor_vv_d_riscv64 +#define helper_vor_vv_h helper_vor_vv_h_riscv64 +#define helper_vor_vv_w helper_vor_vv_w_riscv64 +#define helper_vor_vx_b helper_vor_vx_b_riscv64 +#define helper_vor_vx_d helper_vor_vx_d_riscv64 +#define helper_vor_vx_h helper_vor_vx_h_riscv64 +#define helper_vor_vx_w helper_vor_vx_w_riscv64 +#define helper_vredand_vs_b helper_vredand_vs_b_riscv64 +#define helper_vredand_vs_d helper_vredand_vs_d_riscv64 +#define helper_vredand_vs_h helper_vredand_vs_h_riscv64 +#define helper_vredand_vs_w helper_vredand_vs_w_riscv64 +#define helper_vredmax_vs_b helper_vredmax_vs_b_riscv64 +#define helper_vredmax_vs_d helper_vredmax_vs_d_riscv64 +#define helper_vredmax_vs_h helper_vredmax_vs_h_riscv64 +#define helper_vredmax_vs_w helper_vredmax_vs_w_riscv64 +#define helper_vredmaxu_vs_b helper_vredmaxu_vs_b_riscv64 +#define helper_vredmaxu_vs_d helper_vredmaxu_vs_d_riscv64 +#define helper_vredmaxu_vs_h helper_vredmaxu_vs_h_riscv64 +#define helper_vredmaxu_vs_w helper_vredmaxu_vs_w_riscv64 +#define helper_vredmin_vs_b helper_vredmin_vs_b_riscv64 +#define helper_vredmin_vs_d helper_vredmin_vs_d_riscv64 +#define helper_vredmin_vs_h helper_vredmin_vs_h_riscv64 +#define helper_vredmin_vs_w helper_vredmin_vs_w_riscv64 +#define helper_vredminu_vs_b helper_vredminu_vs_b_riscv64 +#define helper_vredminu_vs_d helper_vredminu_vs_d_riscv64 +#define helper_vredminu_vs_h helper_vredminu_vs_h_riscv64 +#define helper_vredminu_vs_w helper_vredminu_vs_w_riscv64 +#define helper_vredor_vs_b helper_vredor_vs_b_riscv64 +#define helper_vredor_vs_d helper_vredor_vs_d_riscv64 +#define helper_vredor_vs_h helper_vredor_vs_h_riscv64 +#define helper_vredor_vs_w helper_vredor_vs_w_riscv64 +#define helper_vredsum_vs_b helper_vredsum_vs_b_riscv64 +#define helper_vredsum_vs_d helper_vredsum_vs_d_riscv64 +#define helper_vredsum_vs_h helper_vredsum_vs_h_riscv64 +#define helper_vredsum_vs_w helper_vredsum_vs_w_riscv64 +#define helper_vredxor_vs_b helper_vredxor_vs_b_riscv64 +#define helper_vredxor_vs_d helper_vredxor_vs_d_riscv64 +#define helper_vredxor_vs_h helper_vredxor_vs_h_riscv64 +#define helper_vredxor_vs_w helper_vredxor_vs_w_riscv64 +#define helper_vrem_vv_b helper_vrem_vv_b_riscv64 +#define helper_vrem_vv_d helper_vrem_vv_d_riscv64 +#define helper_vrem_vv_h helper_vrem_vv_h_riscv64 +#define helper_vrem_vv_w helper_vrem_vv_w_riscv64 +#define helper_vrem_vx_b helper_vrem_vx_b_riscv64 +#define helper_vrem_vx_d helper_vrem_vx_d_riscv64 +#define helper_vrem_vx_h helper_vrem_vx_h_riscv64 +#define helper_vrem_vx_w helper_vrem_vx_w_riscv64 +#define helper_vremu_vv_b helper_vremu_vv_b_riscv64 +#define helper_vremu_vv_d helper_vremu_vv_d_riscv64 +#define helper_vremu_vv_h helper_vremu_vv_h_riscv64 +#define helper_vremu_vv_w helper_vremu_vv_w_riscv64 +#define helper_vremu_vx_b helper_vremu_vx_b_riscv64 +#define helper_vremu_vx_d helper_vremu_vx_d_riscv64 +#define helper_vremu_vx_h helper_vremu_vx_h_riscv64 +#define helper_vremu_vx_w helper_vremu_vx_w_riscv64 +#define helper_vrgather_vv_b helper_vrgather_vv_b_riscv64 +#define helper_vrgather_vv_d helper_vrgather_vv_d_riscv64 +#define helper_vrgather_vv_h helper_vrgather_vv_h_riscv64 +#define helper_vrgather_vv_w helper_vrgather_vv_w_riscv64 +#define helper_vrgather_vx_b helper_vrgather_vx_b_riscv64 +#define helper_vrgather_vx_d helper_vrgather_vx_d_riscv64 +#define helper_vrgather_vx_h helper_vrgather_vx_h_riscv64 +#define helper_vrgather_vx_w helper_vrgather_vx_w_riscv64 +#define helper_vrsub_vx_b helper_vrsub_vx_b_riscv64 +#define helper_vrsub_vx_d helper_vrsub_vx_d_riscv64 +#define helper_vrsub_vx_h helper_vrsub_vx_h_riscv64 +#define helper_vrsub_vx_w helper_vrsub_vx_w_riscv64 +#define helper_vsadd_vv_b helper_vsadd_vv_b_riscv64 +#define helper_vsadd_vv_d helper_vsadd_vv_d_riscv64 +#define helper_vsadd_vv_h helper_vsadd_vv_h_riscv64 +#define helper_vsadd_vv_w helper_vsadd_vv_w_riscv64 +#define helper_vsadd_vx_b helper_vsadd_vx_b_riscv64 +#define helper_vsadd_vx_d helper_vsadd_vx_d_riscv64 +#define helper_vsadd_vx_h helper_vsadd_vx_h_riscv64 +#define helper_vsadd_vx_w helper_vsadd_vx_w_riscv64 +#define helper_vsaddu_vv_b helper_vsaddu_vv_b_riscv64 +#define helper_vsaddu_vv_d helper_vsaddu_vv_d_riscv64 +#define helper_vsaddu_vv_h helper_vsaddu_vv_h_riscv64 +#define helper_vsaddu_vv_w helper_vsaddu_vv_w_riscv64 +#define helper_vsaddu_vx_b helper_vsaddu_vx_b_riscv64 +#define helper_vsaddu_vx_d helper_vsaddu_vx_d_riscv64 +#define helper_vsaddu_vx_h helper_vsaddu_vx_h_riscv64 +#define helper_vsaddu_vx_w helper_vsaddu_vx_w_riscv64 +#define helper_vsb_v_b helper_vsb_v_b_riscv64 +#define helper_vsb_v_b_mask helper_vsb_v_b_mask_riscv64 +#define helper_vsb_v_d helper_vsb_v_d_riscv64 +#define helper_vsb_v_d_mask helper_vsb_v_d_mask_riscv64 +#define helper_vsb_v_h helper_vsb_v_h_riscv64 +#define helper_vsb_v_h_mask helper_vsb_v_h_mask_riscv64 +#define helper_vsb_v_w helper_vsb_v_w_riscv64 +#define helper_vsb_v_w_mask helper_vsb_v_w_mask_riscv64 +#define helper_vsbc_vvm_b helper_vsbc_vvm_b_riscv64 +#define helper_vsbc_vvm_d helper_vsbc_vvm_d_riscv64 +#define helper_vsbc_vvm_h helper_vsbc_vvm_h_riscv64 +#define helper_vsbc_vvm_w helper_vsbc_vvm_w_riscv64 +#define helper_vsbc_vxm_b helper_vsbc_vxm_b_riscv64 +#define helper_vsbc_vxm_d helper_vsbc_vxm_d_riscv64 +#define helper_vsbc_vxm_h helper_vsbc_vxm_h_riscv64 +#define helper_vsbc_vxm_w helper_vsbc_vxm_w_riscv64 +#define helper_vse_v_b helper_vse_v_b_riscv64 +#define helper_vse_v_b_mask helper_vse_v_b_mask_riscv64 +#define helper_vse_v_d helper_vse_v_d_riscv64 +#define helper_vse_v_d_mask helper_vse_v_d_mask_riscv64 +#define helper_vse_v_h helper_vse_v_h_riscv64 +#define helper_vse_v_h_mask helper_vse_v_h_mask_riscv64 +#define helper_vse_v_w helper_vse_v_w_riscv64 +#define helper_vse_v_w_mask helper_vse_v_w_mask_riscv64 +#define helper_vsetvl helper_vsetvl_riscv64 +#define helper_vsh_v_d helper_vsh_v_d_riscv64 +#define helper_vsh_v_d_mask helper_vsh_v_d_mask_riscv64 +#define helper_vsh_v_h helper_vsh_v_h_riscv64 +#define helper_vsh_v_h_mask helper_vsh_v_h_mask_riscv64 +#define helper_vsh_v_w helper_vsh_v_w_riscv64 +#define helper_vsh_v_w_mask helper_vsh_v_w_mask_riscv64 +#define helper_vslide1down_vx_b helper_vslide1down_vx_b_riscv64 +#define helper_vslide1down_vx_d helper_vslide1down_vx_d_riscv64 +#define helper_vslide1down_vx_h helper_vslide1down_vx_h_riscv64 +#define helper_vslide1down_vx_w helper_vslide1down_vx_w_riscv64 +#define helper_vslide1up_vx_b helper_vslide1up_vx_b_riscv64 +#define helper_vslide1up_vx_d helper_vslide1up_vx_d_riscv64 +#define helper_vslide1up_vx_h helper_vslide1up_vx_h_riscv64 +#define helper_vslide1up_vx_w helper_vslide1up_vx_w_riscv64 +#define helper_vslidedown_vx_b helper_vslidedown_vx_b_riscv64 +#define helper_vslidedown_vx_d helper_vslidedown_vx_d_riscv64 +#define helper_vslidedown_vx_h helper_vslidedown_vx_h_riscv64 +#define helper_vslidedown_vx_w helper_vslidedown_vx_w_riscv64 +#define helper_vslideup_vx_b helper_vslideup_vx_b_riscv64 +#define helper_vslideup_vx_d helper_vslideup_vx_d_riscv64 +#define helper_vslideup_vx_h helper_vslideup_vx_h_riscv64 +#define helper_vslideup_vx_w helper_vslideup_vx_w_riscv64 +#define helper_vsll_vv_b helper_vsll_vv_b_riscv64 +#define helper_vsll_vv_d helper_vsll_vv_d_riscv64 +#define helper_vsll_vv_h helper_vsll_vv_h_riscv64 +#define helper_vsll_vv_w helper_vsll_vv_w_riscv64 +#define helper_vsll_vx_b helper_vsll_vx_b_riscv64 +#define helper_vsll_vx_d helper_vsll_vx_d_riscv64 +#define helper_vsll_vx_h helper_vsll_vx_h_riscv64 +#define helper_vsll_vx_w helper_vsll_vx_w_riscv64 +#define helper_vsmul_vv_b helper_vsmul_vv_b_riscv64 +#define helper_vsmul_vv_d helper_vsmul_vv_d_riscv64 +#define helper_vsmul_vv_h helper_vsmul_vv_h_riscv64 +#define helper_vsmul_vv_w helper_vsmul_vv_w_riscv64 +#define helper_vsmul_vx_b helper_vsmul_vx_b_riscv64 +#define helper_vsmul_vx_d helper_vsmul_vx_d_riscv64 +#define helper_vsmul_vx_h helper_vsmul_vx_h_riscv64 +#define helper_vsmul_vx_w helper_vsmul_vx_w_riscv64 +#define helper_vsra_vv_b helper_vsra_vv_b_riscv64 +#define helper_vsra_vv_d helper_vsra_vv_d_riscv64 +#define helper_vsra_vv_h helper_vsra_vv_h_riscv64 +#define helper_vsra_vv_w helper_vsra_vv_w_riscv64 +#define helper_vsra_vx_b helper_vsra_vx_b_riscv64 +#define helper_vsra_vx_d helper_vsra_vx_d_riscv64 +#define helper_vsra_vx_h helper_vsra_vx_h_riscv64 +#define helper_vsra_vx_w helper_vsra_vx_w_riscv64 +#define helper_vsrl_vv_b helper_vsrl_vv_b_riscv64 +#define helper_vsrl_vv_d helper_vsrl_vv_d_riscv64 +#define helper_vsrl_vv_h helper_vsrl_vv_h_riscv64 +#define helper_vsrl_vv_w helper_vsrl_vv_w_riscv64 +#define helper_vsrl_vx_b helper_vsrl_vx_b_riscv64 +#define helper_vsrl_vx_d helper_vsrl_vx_d_riscv64 +#define helper_vsrl_vx_h helper_vsrl_vx_h_riscv64 +#define helper_vsrl_vx_w helper_vsrl_vx_w_riscv64 +#define helper_vssb_v_b helper_vssb_v_b_riscv64 +#define helper_vssb_v_d helper_vssb_v_d_riscv64 +#define helper_vssb_v_h helper_vssb_v_h_riscv64 +#define helper_vssb_v_w helper_vssb_v_w_riscv64 +#define helper_vsse_v_b helper_vsse_v_b_riscv64 +#define helper_vsse_v_d helper_vsse_v_d_riscv64 +#define helper_vsse_v_h helper_vsse_v_h_riscv64 +#define helper_vsse_v_w helper_vsse_v_w_riscv64 +#define helper_vssh_v_d helper_vssh_v_d_riscv64 +#define helper_vssh_v_h helper_vssh_v_h_riscv64 +#define helper_vssh_v_w helper_vssh_v_w_riscv64 +#define helper_vssra_vv_b helper_vssra_vv_b_riscv64 +#define helper_vssra_vv_d helper_vssra_vv_d_riscv64 +#define helper_vssra_vv_h helper_vssra_vv_h_riscv64 +#define helper_vssra_vv_w helper_vssra_vv_w_riscv64 +#define helper_vssra_vx_b helper_vssra_vx_b_riscv64 +#define helper_vssra_vx_d helper_vssra_vx_d_riscv64 +#define helper_vssra_vx_h helper_vssra_vx_h_riscv64 +#define helper_vssra_vx_w helper_vssra_vx_w_riscv64 +#define helper_vssrl_vv_b helper_vssrl_vv_b_riscv64 +#define helper_vssrl_vv_d helper_vssrl_vv_d_riscv64 +#define helper_vssrl_vv_h helper_vssrl_vv_h_riscv64 +#define helper_vssrl_vv_w helper_vssrl_vv_w_riscv64 +#define helper_vssrl_vx_b helper_vssrl_vx_b_riscv64 +#define helper_vssrl_vx_d helper_vssrl_vx_d_riscv64 +#define helper_vssrl_vx_h helper_vssrl_vx_h_riscv64 +#define helper_vssrl_vx_w helper_vssrl_vx_w_riscv64 +#define helper_vssub_vv_b helper_vssub_vv_b_riscv64 +#define helper_vssub_vv_d helper_vssub_vv_d_riscv64 +#define helper_vssub_vv_h helper_vssub_vv_h_riscv64 +#define helper_vssub_vv_w helper_vssub_vv_w_riscv64 +#define helper_vssub_vx_b helper_vssub_vx_b_riscv64 +#define helper_vssub_vx_d helper_vssub_vx_d_riscv64 +#define helper_vssub_vx_h helper_vssub_vx_h_riscv64 +#define helper_vssub_vx_w helper_vssub_vx_w_riscv64 +#define helper_vssubu_vv_b helper_vssubu_vv_b_riscv64 +#define helper_vssubu_vv_d helper_vssubu_vv_d_riscv64 +#define helper_vssubu_vv_h helper_vssubu_vv_h_riscv64 +#define helper_vssubu_vv_w helper_vssubu_vv_w_riscv64 +#define helper_vssubu_vx_b helper_vssubu_vx_b_riscv64 +#define helper_vssubu_vx_d helper_vssubu_vx_d_riscv64 +#define helper_vssubu_vx_h helper_vssubu_vx_h_riscv64 +#define helper_vssubu_vx_w helper_vssubu_vx_w_riscv64 +#define helper_vssw_v_d helper_vssw_v_d_riscv64 +#define helper_vssw_v_w helper_vssw_v_w_riscv64 +#define helper_vsub_vv_b helper_vsub_vv_b_riscv64 +#define helper_vsub_vv_d helper_vsub_vv_d_riscv64 +#define helper_vsub_vv_h helper_vsub_vv_h_riscv64 +#define helper_vsub_vv_w helper_vsub_vv_w_riscv64 +#define helper_vsub_vx_b helper_vsub_vx_b_riscv64 +#define helper_vsub_vx_d helper_vsub_vx_d_riscv64 +#define helper_vsub_vx_h helper_vsub_vx_h_riscv64 +#define helper_vsub_vx_w helper_vsub_vx_w_riscv64 +#define helper_vsw_v_d helper_vsw_v_d_riscv64 +#define helper_vsw_v_d_mask helper_vsw_v_d_mask_riscv64 +#define helper_vsw_v_w helper_vsw_v_w_riscv64 +#define helper_vsw_v_w_mask helper_vsw_v_w_mask_riscv64 +#define helper_vsxb_v_b helper_vsxb_v_b_riscv64 +#define helper_vsxb_v_d helper_vsxb_v_d_riscv64 +#define helper_vsxb_v_h helper_vsxb_v_h_riscv64 +#define helper_vsxb_v_w helper_vsxb_v_w_riscv64 +#define helper_vsxe_v_b helper_vsxe_v_b_riscv64 +#define helper_vsxe_v_d helper_vsxe_v_d_riscv64 +#define helper_vsxe_v_h helper_vsxe_v_h_riscv64 +#define helper_vsxe_v_w helper_vsxe_v_w_riscv64 +#define helper_vsxh_v_d helper_vsxh_v_d_riscv64 +#define helper_vsxh_v_h helper_vsxh_v_h_riscv64 +#define helper_vsxh_v_w helper_vsxh_v_w_riscv64 +#define helper_vsxw_v_d helper_vsxw_v_d_riscv64 +#define helper_vsxw_v_w helper_vsxw_v_w_riscv64 +#define helper_vwadd_vv_b helper_vwadd_vv_b_riscv64 +#define helper_vwadd_vv_h helper_vwadd_vv_h_riscv64 +#define helper_vwadd_vv_w helper_vwadd_vv_w_riscv64 +#define helper_vwadd_vx_b helper_vwadd_vx_b_riscv64 +#define helper_vwadd_vx_h helper_vwadd_vx_h_riscv64 +#define helper_vwadd_vx_w helper_vwadd_vx_w_riscv64 +#define helper_vwadd_wv_b helper_vwadd_wv_b_riscv64 +#define helper_vwadd_wv_h helper_vwadd_wv_h_riscv64 +#define helper_vwadd_wv_w helper_vwadd_wv_w_riscv64 +#define helper_vwadd_wx_b helper_vwadd_wx_b_riscv64 +#define helper_vwadd_wx_h helper_vwadd_wx_h_riscv64 +#define helper_vwadd_wx_w helper_vwadd_wx_w_riscv64 +#define helper_vwaddu_vv_b helper_vwaddu_vv_b_riscv64 +#define helper_vwaddu_vv_h helper_vwaddu_vv_h_riscv64 +#define helper_vwaddu_vv_w helper_vwaddu_vv_w_riscv64 +#define helper_vwaddu_vx_b helper_vwaddu_vx_b_riscv64 +#define helper_vwaddu_vx_h helper_vwaddu_vx_h_riscv64 +#define helper_vwaddu_vx_w helper_vwaddu_vx_w_riscv64 +#define helper_vwaddu_wv_b helper_vwaddu_wv_b_riscv64 +#define helper_vwaddu_wv_h helper_vwaddu_wv_h_riscv64 +#define helper_vwaddu_wv_w helper_vwaddu_wv_w_riscv64 +#define helper_vwaddu_wx_b helper_vwaddu_wx_b_riscv64 +#define helper_vwaddu_wx_h helper_vwaddu_wx_h_riscv64 +#define helper_vwaddu_wx_w helper_vwaddu_wx_w_riscv64 +#define helper_vwmacc_vv_b helper_vwmacc_vv_b_riscv64 +#define helper_vwmacc_vv_h helper_vwmacc_vv_h_riscv64 +#define helper_vwmacc_vv_w helper_vwmacc_vv_w_riscv64 +#define helper_vwmacc_vx_b helper_vwmacc_vx_b_riscv64 +#define helper_vwmacc_vx_h helper_vwmacc_vx_h_riscv64 +#define helper_vwmacc_vx_w helper_vwmacc_vx_w_riscv64 +#define helper_vwmaccsu_vv_b helper_vwmaccsu_vv_b_riscv64 +#define helper_vwmaccsu_vv_h helper_vwmaccsu_vv_h_riscv64 +#define helper_vwmaccsu_vv_w helper_vwmaccsu_vv_w_riscv64 +#define helper_vwmaccsu_vx_b helper_vwmaccsu_vx_b_riscv64 +#define helper_vwmaccsu_vx_h helper_vwmaccsu_vx_h_riscv64 +#define helper_vwmaccsu_vx_w helper_vwmaccsu_vx_w_riscv64 +#define helper_vwmaccu_vv_b helper_vwmaccu_vv_b_riscv64 +#define helper_vwmaccu_vv_h helper_vwmaccu_vv_h_riscv64 +#define helper_vwmaccu_vv_w helper_vwmaccu_vv_w_riscv64 +#define helper_vwmaccu_vx_b helper_vwmaccu_vx_b_riscv64 +#define helper_vwmaccu_vx_h helper_vwmaccu_vx_h_riscv64 +#define helper_vwmaccu_vx_w helper_vwmaccu_vx_w_riscv64 +#define helper_vwmaccus_vx_b helper_vwmaccus_vx_b_riscv64 +#define helper_vwmaccus_vx_h helper_vwmaccus_vx_h_riscv64 +#define helper_vwmaccus_vx_w helper_vwmaccus_vx_w_riscv64 +#define helper_vwmul_vv_b helper_vwmul_vv_b_riscv64 +#define helper_vwmul_vv_h helper_vwmul_vv_h_riscv64 +#define helper_vwmul_vv_w helper_vwmul_vv_w_riscv64 +#define helper_vwmul_vx_b helper_vwmul_vx_b_riscv64 +#define helper_vwmul_vx_h helper_vwmul_vx_h_riscv64 +#define helper_vwmul_vx_w helper_vwmul_vx_w_riscv64 +#define helper_vwmulsu_vv_b helper_vwmulsu_vv_b_riscv64 +#define helper_vwmulsu_vv_h helper_vwmulsu_vv_h_riscv64 +#define helper_vwmulsu_vv_w helper_vwmulsu_vv_w_riscv64 +#define helper_vwmulsu_vx_b helper_vwmulsu_vx_b_riscv64 +#define helper_vwmulsu_vx_h helper_vwmulsu_vx_h_riscv64 +#define helper_vwmulsu_vx_w helper_vwmulsu_vx_w_riscv64 +#define helper_vwmulu_vv_b helper_vwmulu_vv_b_riscv64 +#define helper_vwmulu_vv_h helper_vwmulu_vv_h_riscv64 +#define helper_vwmulu_vv_w helper_vwmulu_vv_w_riscv64 +#define helper_vwmulu_vx_b helper_vwmulu_vx_b_riscv64 +#define helper_vwmulu_vx_h helper_vwmulu_vx_h_riscv64 +#define helper_vwmulu_vx_w helper_vwmulu_vx_w_riscv64 +#define helper_vwredsum_vs_b helper_vwredsum_vs_b_riscv64 +#define helper_vwredsum_vs_h helper_vwredsum_vs_h_riscv64 +#define helper_vwredsum_vs_w helper_vwredsum_vs_w_riscv64 +#define helper_vwredsumu_vs_b helper_vwredsumu_vs_b_riscv64 +#define helper_vwredsumu_vs_h helper_vwredsumu_vs_h_riscv64 +#define helper_vwredsumu_vs_w helper_vwredsumu_vs_w_riscv64 +#define helper_vwsmacc_vv_b helper_vwsmacc_vv_b_riscv64 +#define helper_vwsmacc_vv_h helper_vwsmacc_vv_h_riscv64 +#define helper_vwsmacc_vv_w helper_vwsmacc_vv_w_riscv64 +#define helper_vwsmacc_vx_b helper_vwsmacc_vx_b_riscv64 +#define helper_vwsmacc_vx_h helper_vwsmacc_vx_h_riscv64 +#define helper_vwsmacc_vx_w helper_vwsmacc_vx_w_riscv64 +#define helper_vwsmaccsu_vv_b helper_vwsmaccsu_vv_b_riscv64 +#define helper_vwsmaccsu_vv_h helper_vwsmaccsu_vv_h_riscv64 +#define helper_vwsmaccsu_vv_w helper_vwsmaccsu_vv_w_riscv64 +#define helper_vwsmaccsu_vx_b helper_vwsmaccsu_vx_b_riscv64 +#define helper_vwsmaccsu_vx_h helper_vwsmaccsu_vx_h_riscv64 +#define helper_vwsmaccsu_vx_w helper_vwsmaccsu_vx_w_riscv64 +#define helper_vwsmaccu_vv_b helper_vwsmaccu_vv_b_riscv64 +#define helper_vwsmaccu_vv_h helper_vwsmaccu_vv_h_riscv64 +#define helper_vwsmaccu_vv_w helper_vwsmaccu_vv_w_riscv64 +#define helper_vwsmaccu_vx_b helper_vwsmaccu_vx_b_riscv64 +#define helper_vwsmaccu_vx_h helper_vwsmaccu_vx_h_riscv64 +#define helper_vwsmaccu_vx_w helper_vwsmaccu_vx_w_riscv64 +#define helper_vwsmaccus_vx_b helper_vwsmaccus_vx_b_riscv64 +#define helper_vwsmaccus_vx_h helper_vwsmaccus_vx_h_riscv64 +#define helper_vwsmaccus_vx_w helper_vwsmaccus_vx_w_riscv64 +#define helper_vwsub_vv_b helper_vwsub_vv_b_riscv64 +#define helper_vwsub_vv_h helper_vwsub_vv_h_riscv64 +#define helper_vwsub_vv_w helper_vwsub_vv_w_riscv64 +#define helper_vwsub_vx_b helper_vwsub_vx_b_riscv64 +#define helper_vwsub_vx_h helper_vwsub_vx_h_riscv64 +#define helper_vwsub_vx_w helper_vwsub_vx_w_riscv64 +#define helper_vwsub_wv_b helper_vwsub_wv_b_riscv64 +#define helper_vwsub_wv_h helper_vwsub_wv_h_riscv64 +#define helper_vwsub_wv_w helper_vwsub_wv_w_riscv64 +#define helper_vwsub_wx_b helper_vwsub_wx_b_riscv64 +#define helper_vwsub_wx_h helper_vwsub_wx_h_riscv64 +#define helper_vwsub_wx_w helper_vwsub_wx_w_riscv64 +#define helper_vwsubu_vv_b helper_vwsubu_vv_b_riscv64 +#define helper_vwsubu_vv_h helper_vwsubu_vv_h_riscv64 +#define helper_vwsubu_vv_w helper_vwsubu_vv_w_riscv64 +#define helper_vwsubu_vx_b helper_vwsubu_vx_b_riscv64 +#define helper_vwsubu_vx_h helper_vwsubu_vx_h_riscv64 +#define helper_vwsubu_vx_w helper_vwsubu_vx_w_riscv64 +#define helper_vwsubu_wv_b helper_vwsubu_wv_b_riscv64 +#define helper_vwsubu_wv_h helper_vwsubu_wv_h_riscv64 +#define helper_vwsubu_wv_w helper_vwsubu_wv_w_riscv64 +#define helper_vwsubu_wx_b helper_vwsubu_wx_b_riscv64 +#define helper_vwsubu_wx_h helper_vwsubu_wx_h_riscv64 +#define helper_vwsubu_wx_w helper_vwsubu_wx_w_riscv64 +#define helper_vxor_vv_b helper_vxor_vv_b_riscv64 +#define helper_vxor_vv_d helper_vxor_vv_d_riscv64 +#define helper_vxor_vv_h helper_vxor_vv_h_riscv64 +#define helper_vxor_vv_w helper_vxor_vv_w_riscv64 +#define helper_vxor_vx_b helper_vxor_vx_b_riscv64 +#define helper_vxor_vx_d helper_vxor_vx_d_riscv64 +#define helper_vxor_vx_h helper_vxor_vx_h_riscv64 +#define helper_vxor_vx_w helper_vxor_vx_w_riscv64 #endif diff --git a/qemu/s390x.h b/qemu/s390x.h index 1906872bc6..d777300ed6 100644 --- a/qemu/s390x.h +++ b/qemu/s390x.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_s390x #define tcg_gen_shr_i64 tcg_gen_shr_i64_s390x #define tcg_gen_st_i64 tcg_gen_st_i64_s390x +#define tcg_gen_add_i64 tcg_gen_add_i64_s390x +#define tcg_gen_sub_i64 tcg_gen_sub_i64_s390x #define tcg_gen_xor_i64 tcg_gen_xor_i64_s390x +#define tcg_gen_neg_i64 tcg_gen_neg_i64_s390x #define cpu_icount_to_ns cpu_icount_to_ns_s390x #define cpu_is_stopped cpu_is_stopped_s390x #define cpu_get_ticks cpu_get_ticks_s390x @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_s390x #define floatx80_mul floatx80_mul_s390x #define floatx80_div floatx80_div_s390x +#define floatx80_modrem floatx80_modrem_s390x +#define floatx80_mod floatx80_mod_s390x #define floatx80_rem floatx80_rem_s390x #define floatx80_sqrt floatx80_sqrt_s390x #define floatx80_eq floatx80_eq_s390x @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_s390x #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_s390x #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_s390x +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_s390x #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_s390x #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_s390x #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_s390x @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_s390x #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_s390x #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_s390x +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_s390x +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_s390x #define tcg_gen_gvec_sari tcg_gen_gvec_sari_s390x +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_s390x +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_s390x #define tcg_gen_gvec_shls tcg_gen_gvec_shls_s390x #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_s390x #define tcg_gen_gvec_sars tcg_gen_gvec_sars_s390x +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_s390x #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_s390x #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_s390x #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_s390x +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_s390x +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_s390x #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_s390x #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_s390x #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_s390x @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_s390x #define tcg_gen_shri_vec tcg_gen_shri_vec_s390x #define tcg_gen_sari_vec tcg_gen_sari_vec_s390x +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_s390x +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_s390x #define tcg_gen_cmp_vec tcg_gen_cmp_vec_s390x #define tcg_gen_add_vec tcg_gen_add_vec_s390x #define tcg_gen_sub_vec tcg_gen_sub_vec_s390x @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_s390x #define tcg_gen_shrv_vec tcg_gen_shrv_vec_s390x #define tcg_gen_sarv_vec tcg_gen_sarv_vec_s390x +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_s390x +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_s390x #define tcg_gen_shls_vec tcg_gen_shls_vec_s390x #define tcg_gen_shrs_vec tcg_gen_shrs_vec_s390x #define tcg_gen_sars_vec tcg_gen_sars_vec_s390x +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_s390x #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_s390x #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_s390x #define tb_htable_lookup tb_htable_lookup_s390x @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_s390x #define cpu_loop_exit_atomic cpu_loop_exit_atomic_s390x #define tlb_init tlb_init_s390x +#define tlb_destroy tlb_destroy_s390x #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_s390x #define tlb_flush tlb_flush_s390x #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_s390x @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_s390x #define get_page_addr_code_hostp get_page_addr_code_hostp_s390x #define get_page_addr_code get_page_addr_code_s390x +#define probe_access_flags probe_access_flags_s390x #define probe_access probe_access_s390x #define tlb_vaddr_to_host tlb_vaddr_to_host_s390x #define helper_ret_ldub_mmu helper_ret_ldub_mmu_s390x @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_s390x #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_s390x #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_s390x -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_s390x -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_s390x -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_s390x -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_s390x +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_s390x +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_s390x +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_s390x +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_s390x +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_s390x +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_s390x +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_s390x +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_s390x #define cpu_ldub_data_ra cpu_ldub_data_ra_s390x #define cpu_ldsb_data_ra cpu_ldsb_data_ra_s390x -#define cpu_lduw_data_ra cpu_lduw_data_ra_s390x -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_s390x -#define cpu_ldl_data_ra cpu_ldl_data_ra_s390x -#define cpu_ldq_data_ra cpu_ldq_data_ra_s390x +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_s390x +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_s390x +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_s390x +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_s390x +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_s390x +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_s390x +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_s390x +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_s390x #define cpu_ldub_data cpu_ldub_data_s390x #define cpu_ldsb_data cpu_ldsb_data_s390x -#define cpu_lduw_data cpu_lduw_data_s390x -#define cpu_ldsw_data cpu_ldsw_data_s390x -#define cpu_ldl_data cpu_ldl_data_s390x -#define cpu_ldq_data cpu_ldq_data_s390x +#define cpu_lduw_be_data cpu_lduw_be_data_s390x +#define cpu_lduw_le_data cpu_lduw_le_data_s390x +#define cpu_ldsw_be_data cpu_ldsw_be_data_s390x +#define cpu_ldsw_le_data cpu_ldsw_le_data_s390x +#define cpu_ldl_be_data cpu_ldl_be_data_s390x +#define cpu_ldl_le_data cpu_ldl_le_data_s390x +#define cpu_ldq_le_data cpu_ldq_le_data_s390x +#define cpu_ldq_be_data cpu_ldq_be_data_s390x #define helper_ret_stb_mmu helper_ret_stb_mmu_s390x #define helper_le_stw_mmu helper_le_stw_mmu_s390x #define helper_be_stw_mmu helper_be_stw_mmu_s390x @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_s390x #define helper_be_stq_mmu helper_be_stq_mmu_s390x #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_s390x -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_s390x -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_s390x -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_s390x +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_s390x +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_s390x +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_s390x +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_s390x +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_s390x +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_s390x #define cpu_stb_data_ra cpu_stb_data_ra_s390x -#define cpu_stw_data_ra cpu_stw_data_ra_s390x -#define cpu_stl_data_ra cpu_stl_data_ra_s390x -#define cpu_stq_data_ra cpu_stq_data_ra_s390x +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_s390x +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_s390x +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_s390x +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_s390x +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_s390x +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_s390x #define cpu_stb_data cpu_stb_data_s390x -#define cpu_stw_data cpu_stw_data_s390x -#define cpu_stl_data cpu_stl_data_s390x -#define cpu_stq_data cpu_stq_data_s390x +#define cpu_stw_be_data cpu_stw_be_data_s390x +#define cpu_stw_le_data cpu_stw_le_data_s390x +#define cpu_stl_be_data cpu_stl_be_data_s390x +#define cpu_stl_le_data cpu_stl_le_data_s390x +#define cpu_stq_be_data cpu_stq_be_data_s390x +#define cpu_stq_le_data cpu_stq_le_data_s390x #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_s390x #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_s390x #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_s390x @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_s390x #define cpu_ldl_code cpu_ldl_code_s390x #define cpu_ldq_code cpu_ldq_code_s390x +#define cpu_interrupt_handler cpu_interrupt_handler_s390x #define helper_div_i32 helper_div_i32_s390x #define helper_rem_i32 helper_rem_i32_s390x #define helper_divu_i32 helper_divu_i32_s390x @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_s390x #define helper_gvec_sar32i helper_gvec_sar32i_s390x #define helper_gvec_sar64i helper_gvec_sar64i_s390x +#define helper_gvec_rotl8i helper_gvec_rotl8i_s390x +#define helper_gvec_rotl16i helper_gvec_rotl16i_s390x +#define helper_gvec_rotl32i helper_gvec_rotl32i_s390x +#define helper_gvec_rotl64i helper_gvec_rotl64i_s390x #define helper_gvec_shl8v helper_gvec_shl8v_s390x #define helper_gvec_shl16v helper_gvec_shl16v_s390x #define helper_gvec_shl32v helper_gvec_shl32v_s390x @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_s390x #define helper_gvec_sar32v helper_gvec_sar32v_s390x #define helper_gvec_sar64v helper_gvec_sar64v_s390x +#define helper_gvec_rotl8v helper_gvec_rotl8v_s390x +#define helper_gvec_rotl16v helper_gvec_rotl16v_s390x +#define helper_gvec_rotl32v helper_gvec_rotl32v_s390x +#define helper_gvec_rotl64v helper_gvec_rotl64v_s390x +#define helper_gvec_rotr8v helper_gvec_rotr8v_s390x +#define helper_gvec_rotr16v helper_gvec_rotr16v_s390x +#define helper_gvec_rotr32v helper_gvec_rotr32v_s390x +#define helper_gvec_rotr64v helper_gvec_rotr64v_s390x #define helper_gvec_eq8 helper_gvec_eq8_s390x #define helper_gvec_ne8 helper_gvec_ne8_s390x #define helper_gvec_lt8 helper_gvec_lt8_s390x diff --git a/qemu/sparc.h b/qemu/sparc.h index 32be40ab68..aeee045594 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_sparc #define tcg_gen_shr_i64 tcg_gen_shr_i64_sparc #define tcg_gen_st_i64 tcg_gen_st_i64_sparc +#define tcg_gen_add_i64 tcg_gen_add_i64_sparc +#define tcg_gen_sub_i64 tcg_gen_sub_i64_sparc #define tcg_gen_xor_i64 tcg_gen_xor_i64_sparc +#define tcg_gen_neg_i64 tcg_gen_neg_i64_sparc #define cpu_icount_to_ns cpu_icount_to_ns_sparc #define cpu_is_stopped cpu_is_stopped_sparc #define cpu_get_ticks cpu_get_ticks_sparc @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_sparc #define floatx80_mul floatx80_mul_sparc #define floatx80_div floatx80_div_sparc +#define floatx80_modrem floatx80_modrem_sparc +#define floatx80_mod floatx80_mod_sparc #define floatx80_rem floatx80_rem_sparc #define floatx80_sqrt floatx80_sqrt_sparc #define floatx80_eq floatx80_eq_sparc @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_sparc #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_sparc #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_sparc +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_sparc #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_sparc #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_sparc #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_sparc @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_sparc #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_sparc #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_sparc +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_sparc +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_sparc #define tcg_gen_gvec_sari tcg_gen_gvec_sari_sparc +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_sparc +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_sparc #define tcg_gen_gvec_shls tcg_gen_gvec_shls_sparc #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_sparc #define tcg_gen_gvec_sars tcg_gen_gvec_sars_sparc +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_sparc #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_sparc #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_sparc #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_sparc +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_sparc +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_sparc #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_sparc #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_sparc #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_sparc @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_sparc #define tcg_gen_shri_vec tcg_gen_shri_vec_sparc #define tcg_gen_sari_vec tcg_gen_sari_vec_sparc +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_sparc +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_sparc #define tcg_gen_cmp_vec tcg_gen_cmp_vec_sparc #define tcg_gen_add_vec tcg_gen_add_vec_sparc #define tcg_gen_sub_vec tcg_gen_sub_vec_sparc @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_sparc #define tcg_gen_shrv_vec tcg_gen_shrv_vec_sparc #define tcg_gen_sarv_vec tcg_gen_sarv_vec_sparc +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_sparc +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_sparc #define tcg_gen_shls_vec tcg_gen_shls_vec_sparc #define tcg_gen_shrs_vec tcg_gen_shrs_vec_sparc #define tcg_gen_sars_vec tcg_gen_sars_vec_sparc +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_sparc #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_sparc #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_sparc #define tb_htable_lookup tb_htable_lookup_sparc @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_sparc #define cpu_loop_exit_atomic cpu_loop_exit_atomic_sparc #define tlb_init tlb_init_sparc +#define tlb_destroy tlb_destroy_sparc #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_sparc #define tlb_flush tlb_flush_sparc #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_sparc @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_sparc #define get_page_addr_code_hostp get_page_addr_code_hostp_sparc #define get_page_addr_code get_page_addr_code_sparc +#define probe_access_flags probe_access_flags_sparc #define probe_access probe_access_sparc #define tlb_vaddr_to_host tlb_vaddr_to_host_sparc #define helper_ret_ldub_mmu helper_ret_ldub_mmu_sparc @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_sparc #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_sparc #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_sparc -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_sparc -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_sparc -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_sparc -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_sparc +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_sparc +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_sparc +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_sparc +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_sparc +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_sparc +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_sparc +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_sparc +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_sparc #define cpu_ldub_data_ra cpu_ldub_data_ra_sparc #define cpu_ldsb_data_ra cpu_ldsb_data_ra_sparc -#define cpu_lduw_data_ra cpu_lduw_data_ra_sparc -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_sparc -#define cpu_ldl_data_ra cpu_ldl_data_ra_sparc -#define cpu_ldq_data_ra cpu_ldq_data_ra_sparc +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_sparc +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_sparc +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_sparc +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_sparc +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_sparc +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_sparc +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_sparc +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_sparc #define cpu_ldub_data cpu_ldub_data_sparc #define cpu_ldsb_data cpu_ldsb_data_sparc -#define cpu_lduw_data cpu_lduw_data_sparc -#define cpu_ldsw_data cpu_ldsw_data_sparc -#define cpu_ldl_data cpu_ldl_data_sparc -#define cpu_ldq_data cpu_ldq_data_sparc +#define cpu_lduw_be_data cpu_lduw_be_data_sparc +#define cpu_lduw_le_data cpu_lduw_le_data_sparc +#define cpu_ldsw_be_data cpu_ldsw_be_data_sparc +#define cpu_ldsw_le_data cpu_ldsw_le_data_sparc +#define cpu_ldl_be_data cpu_ldl_be_data_sparc +#define cpu_ldl_le_data cpu_ldl_le_data_sparc +#define cpu_ldq_le_data cpu_ldq_le_data_sparc +#define cpu_ldq_be_data cpu_ldq_be_data_sparc #define helper_ret_stb_mmu helper_ret_stb_mmu_sparc #define helper_le_stw_mmu helper_le_stw_mmu_sparc #define helper_be_stw_mmu helper_be_stw_mmu_sparc @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_sparc #define helper_be_stq_mmu helper_be_stq_mmu_sparc #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_sparc -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_sparc -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_sparc -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_sparc +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_sparc +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_sparc +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_sparc +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_sparc +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_sparc +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_sparc #define cpu_stb_data_ra cpu_stb_data_ra_sparc -#define cpu_stw_data_ra cpu_stw_data_ra_sparc -#define cpu_stl_data_ra cpu_stl_data_ra_sparc -#define cpu_stq_data_ra cpu_stq_data_ra_sparc +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_sparc +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_sparc +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_sparc +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_sparc +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_sparc +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_sparc #define cpu_stb_data cpu_stb_data_sparc -#define cpu_stw_data cpu_stw_data_sparc -#define cpu_stl_data cpu_stl_data_sparc -#define cpu_stq_data cpu_stq_data_sparc +#define cpu_stw_be_data cpu_stw_be_data_sparc +#define cpu_stw_le_data cpu_stw_le_data_sparc +#define cpu_stl_be_data cpu_stl_be_data_sparc +#define cpu_stl_le_data cpu_stl_le_data_sparc +#define cpu_stq_be_data cpu_stq_be_data_sparc +#define cpu_stq_le_data cpu_stq_le_data_sparc #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_sparc #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_sparc #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_sparc @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_sparc #define cpu_ldl_code cpu_ldl_code_sparc #define cpu_ldq_code cpu_ldq_code_sparc +#define cpu_interrupt_handler cpu_interrupt_handler_sparc #define helper_div_i32 helper_div_i32_sparc #define helper_rem_i32 helper_rem_i32_sparc #define helper_divu_i32 helper_divu_i32_sparc @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_sparc #define helper_gvec_sar32i helper_gvec_sar32i_sparc #define helper_gvec_sar64i helper_gvec_sar64i_sparc +#define helper_gvec_rotl8i helper_gvec_rotl8i_sparc +#define helper_gvec_rotl16i helper_gvec_rotl16i_sparc +#define helper_gvec_rotl32i helper_gvec_rotl32i_sparc +#define helper_gvec_rotl64i helper_gvec_rotl64i_sparc #define helper_gvec_shl8v helper_gvec_shl8v_sparc #define helper_gvec_shl16v helper_gvec_shl16v_sparc #define helper_gvec_shl32v helper_gvec_shl32v_sparc @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_sparc #define helper_gvec_sar32v helper_gvec_sar32v_sparc #define helper_gvec_sar64v helper_gvec_sar64v_sparc +#define helper_gvec_rotl8v helper_gvec_rotl8v_sparc +#define helper_gvec_rotl16v helper_gvec_rotl16v_sparc +#define helper_gvec_rotl32v helper_gvec_rotl32v_sparc +#define helper_gvec_rotl64v helper_gvec_rotl64v_sparc +#define helper_gvec_rotr8v helper_gvec_rotr8v_sparc +#define helper_gvec_rotr16v helper_gvec_rotr16v_sparc +#define helper_gvec_rotr32v helper_gvec_rotr32v_sparc +#define helper_gvec_rotr64v helper_gvec_rotr64v_sparc #define helper_gvec_eq8 helper_gvec_eq8_sparc #define helper_gvec_ne8 helper_gvec_ne8_sparc #define helper_gvec_lt8 helper_gvec_lt8_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index c9f6f2fcbd..f12e6380cf 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_sparc64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_sparc64 #define tcg_gen_st_i64 tcg_gen_st_i64_sparc64 +#define tcg_gen_add_i64 tcg_gen_add_i64_sparc64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_sparc64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_sparc64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_sparc64 #define cpu_icount_to_ns cpu_icount_to_ns_sparc64 #define cpu_is_stopped cpu_is_stopped_sparc64 #define cpu_get_ticks cpu_get_ticks_sparc64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_sparc64 #define floatx80_mul floatx80_mul_sparc64 #define floatx80_div floatx80_div_sparc64 +#define floatx80_modrem floatx80_modrem_sparc64 +#define floatx80_mod floatx80_mod_sparc64 #define floatx80_rem floatx80_rem_sparc64 #define floatx80_sqrt floatx80_sqrt_sparc64 #define floatx80_eq floatx80_eq_sparc64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_sparc64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_sparc64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_sparc64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_sparc64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_sparc64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_sparc64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_sparc64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_sparc64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_sparc64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_sparc64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_sparc64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_sparc64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_sparc64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_sparc64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_sparc64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_sparc64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_sparc64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_sparc64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_sparc64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_sparc64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_sparc64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_sparc64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_sparc64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_sparc64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_sparc64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_sparc64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_sparc64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_sparc64 #define tcg_gen_shri_vec tcg_gen_shri_vec_sparc64 #define tcg_gen_sari_vec tcg_gen_sari_vec_sparc64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_sparc64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_sparc64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_sparc64 #define tcg_gen_add_vec tcg_gen_add_vec_sparc64 #define tcg_gen_sub_vec tcg_gen_sub_vec_sparc64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_sparc64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_sparc64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_sparc64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_sparc64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_sparc64 #define tcg_gen_shls_vec tcg_gen_shls_vec_sparc64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_sparc64 #define tcg_gen_sars_vec tcg_gen_sars_vec_sparc64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_sparc64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_sparc64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_sparc64 #define tb_htable_lookup tb_htable_lookup_sparc64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_sparc64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_sparc64 #define tlb_init tlb_init_sparc64 +#define tlb_destroy tlb_destroy_sparc64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_sparc64 #define tlb_flush tlb_flush_sparc64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_sparc64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_sparc64 #define get_page_addr_code_hostp get_page_addr_code_hostp_sparc64 #define get_page_addr_code get_page_addr_code_sparc64 +#define probe_access_flags probe_access_flags_sparc64 #define probe_access probe_access_sparc64 #define tlb_vaddr_to_host tlb_vaddr_to_host_sparc64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_sparc64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_sparc64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_sparc64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_sparc64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_sparc64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_sparc64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_sparc64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_sparc64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_sparc64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_sparc64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_sparc64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_sparc64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_sparc64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_sparc64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_sparc64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_sparc64 #define cpu_ldub_data_ra cpu_ldub_data_ra_sparc64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_sparc64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_sparc64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_sparc64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_sparc64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_sparc64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_sparc64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_sparc64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_sparc64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_sparc64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_sparc64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_sparc64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_sparc64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_sparc64 #define cpu_ldub_data cpu_ldub_data_sparc64 #define cpu_ldsb_data cpu_ldsb_data_sparc64 -#define cpu_lduw_data cpu_lduw_data_sparc64 -#define cpu_ldsw_data cpu_ldsw_data_sparc64 -#define cpu_ldl_data cpu_ldl_data_sparc64 -#define cpu_ldq_data cpu_ldq_data_sparc64 +#define cpu_lduw_be_data cpu_lduw_be_data_sparc64 +#define cpu_lduw_le_data cpu_lduw_le_data_sparc64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_sparc64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_sparc64 +#define cpu_ldl_be_data cpu_ldl_be_data_sparc64 +#define cpu_ldl_le_data cpu_ldl_le_data_sparc64 +#define cpu_ldq_le_data cpu_ldq_le_data_sparc64 +#define cpu_ldq_be_data cpu_ldq_be_data_sparc64 #define helper_ret_stb_mmu helper_ret_stb_mmu_sparc64 #define helper_le_stw_mmu helper_le_stw_mmu_sparc64 #define helper_be_stw_mmu helper_be_stw_mmu_sparc64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_sparc64 #define helper_be_stq_mmu helper_be_stq_mmu_sparc64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_sparc64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_sparc64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_sparc64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_sparc64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_sparc64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_sparc64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_sparc64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_sparc64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_sparc64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_sparc64 #define cpu_stb_data_ra cpu_stb_data_ra_sparc64 -#define cpu_stw_data_ra cpu_stw_data_ra_sparc64 -#define cpu_stl_data_ra cpu_stl_data_ra_sparc64 -#define cpu_stq_data_ra cpu_stq_data_ra_sparc64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_sparc64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_sparc64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_sparc64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_sparc64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_sparc64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_sparc64 #define cpu_stb_data cpu_stb_data_sparc64 -#define cpu_stw_data cpu_stw_data_sparc64 -#define cpu_stl_data cpu_stl_data_sparc64 -#define cpu_stq_data cpu_stq_data_sparc64 +#define cpu_stw_be_data cpu_stw_be_data_sparc64 +#define cpu_stw_le_data cpu_stw_le_data_sparc64 +#define cpu_stl_be_data cpu_stl_be_data_sparc64 +#define cpu_stl_le_data cpu_stl_le_data_sparc64 +#define cpu_stq_be_data cpu_stq_be_data_sparc64 +#define cpu_stq_le_data cpu_stq_le_data_sparc64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_sparc64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_sparc64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_sparc64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_sparc64 #define cpu_ldl_code cpu_ldl_code_sparc64 #define cpu_ldq_code cpu_ldq_code_sparc64 +#define cpu_interrupt_handler cpu_interrupt_handler_sparc64 #define helper_div_i32 helper_div_i32_sparc64 #define helper_rem_i32 helper_rem_i32_sparc64 #define helper_divu_i32 helper_divu_i32_sparc64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_sparc64 #define helper_gvec_sar32i helper_gvec_sar32i_sparc64 #define helper_gvec_sar64i helper_gvec_sar64i_sparc64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_sparc64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_sparc64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_sparc64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_sparc64 #define helper_gvec_shl8v helper_gvec_shl8v_sparc64 #define helper_gvec_shl16v helper_gvec_shl16v_sparc64 #define helper_gvec_shl32v helper_gvec_shl32v_sparc64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_sparc64 #define helper_gvec_sar32v helper_gvec_sar32v_sparc64 #define helper_gvec_sar64v helper_gvec_sar64v_sparc64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_sparc64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_sparc64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_sparc64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_sparc64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_sparc64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_sparc64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_sparc64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_sparc64 #define helper_gvec_eq8 helper_gvec_eq8_sparc64 #define helper_gvec_ne8 helper_gvec_ne8_sparc64 #define helper_gvec_lt8 helper_gvec_lt8_sparc64 diff --git a/qemu/target/arm/backup.c b/qemu/target/arm/backup.c new file mode 100644 index 0000000000..5c7a651f71 --- /dev/null +++ b/qemu/target/arm/backup.c @@ -0,0 +1,5431 @@ +/* + * ARM SVE Operations + * + * Copyright (c) 2018 Linaro, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" +#include "fpu/softfloat.h" +#include "tcg/tcg.h" + + +/* Note that vector data is stored in host-endian 64-bit chunks, + so addressing units smaller than that needs a host-endian fixup. */ +#ifdef HOST_WORDS_BIGENDIAN +#define H1(x) ((x) ^ 7) +#define H1_2(x) ((x) ^ 6) +#define H1_4(x) ((x) ^ 4) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) +#else +#define H1(x) (x) +#define H1_2(x) (x) +#define H1_4(x) (x) +#define H2(x) (x) +#define H4(x) (x) +#endif + +/* Return a value for NZCV as per the ARM PredTest pseudofunction. + * + * The return value has bit 31 set if N is set, bit 1 set if Z is clear, + * and bit 0 set if C is set. Compare the definitions of these variables + * within CPUARMState. + */ + +/* For no G bits set, NZCV = C. */ +#define PREDTEST_INIT 1 + +/* This is an iterative function, called for each Pd and Pg word + * moving forward. + */ +static uint32_t iter_predtest_fwd(uint64_t d, uint64_t g, uint32_t flags) +{ + if (likely(g)) { + /* Compute N from first D & G. + Use bit 2 to signal first G bit seen. */ + if (!(flags & 4)) { +#ifdef _MSC_VER + flags |= ((d & (g & (0 - g))) != 0) << 31; +#else + flags |= ((d & (g & -g)) != 0) << 31; +#endif + flags |= 4; + } + + /* Accumulate Z from each D & G. */ + flags |= ((d & g) != 0) << 1; + + /* Compute C from last !(D & G). Replace previous. */ + flags = deposit32(flags, 0, 1, (d & pow2floor(g)) == 0); + } + return flags; +} + +/* This is an iterative function, called for each Pd and Pg word + * moving backward. + */ +static uint32_t iter_predtest_bwd(uint64_t d, uint64_t g, uint32_t flags) +{ + if (likely(g)) { + /* Compute C from first (i.e last) !(D & G). + Use bit 2 to signal first G bit seen. */ + if (!(flags & 4)) { + flags += 4 - 1; /* add bit 2, subtract C from PREDTEST_INIT */ + flags |= (d & pow2floor(g)) == 0; + } + + /* Accumulate Z from each D & G. */ + flags |= ((d & g) != 0) << 1; + + /* Compute N from last (i.e first) D & G. Replace previous. */ +#ifdef _MSC_VER + flags = deposit32(flags, 31, 1, (d & (g & (0 - g))) != 0); +#else + flags = deposit32(flags, 31, 1, (d & (g & -g)) != 0); +#endif + } + return flags; +} + +/* The same for a single word predicate. */ +uint32_t HELPER(sve_predtest1)(uint64_t d, uint64_t g) +{ + return iter_predtest_fwd(d, g, PREDTEST_INIT); +} + +/* The same for a multi-word predicate. */ +uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words) +{ + uint32_t flags = PREDTEST_INIT; + uint64_t *d = vd, *g = vg; + uintptr_t i = 0; + + do { + flags = iter_predtest_fwd(d[i], g[i], flags); + } while (++i < words); + + return flags; +} + +/* Expand active predicate bits to bytes, for byte elements. + * for (i = 0; i < 256; ++i) { + * unsigned long m = 0; + * for (j = 0; j < 8; j++) { + * if ((i >> j) & 1) { + * m |= 0xfful << (j << 3); + * } + * } + * printf("0x%016lx,\n", m); + * } + */ +static inline uint64_t expand_pred_b(uint8_t byte) +{ + static const uint64_t word[256] = { + 0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00, + 0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff, + 0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000, + 0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff, + 0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00, + 0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff, + 0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000, + 0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff, + 0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00, + 0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff, + 0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000, + 0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff, + 0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00, + 0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff, + 0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000, + 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff, + 0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00, + 0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff, + 0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000, + 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff, + 0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00, + 0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff, + 0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000, + 0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff, + 0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00, + 0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff, + 0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000, + 0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff, + 0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00, + 0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff, + 0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000, + 0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff, + 0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00, + 0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff, + 0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000, + 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff, + 0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00, + 0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff, + 0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000, + 0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff, + 0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00, + 0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff, + 0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000, + 0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff, + 0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00, + 0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff, + 0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000, + 0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff, + 0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00, + 0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff, + 0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000, + 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff, + 0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00, + 0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff, + 0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000, + 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff, + 0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00, + 0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff, + 0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000, + 0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff, + 0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00, + 0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff, + 0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000, + 0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff, + 0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00, + 0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff, + 0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000, + 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff, + 0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00, + 0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff, + 0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000, + 0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff, + 0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00, + 0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff, + 0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000, + 0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff, + 0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00, + 0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff, + 0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000, + 0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff, + 0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00, + 0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff, + 0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000, + 0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff, + 0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00, + 0xffffffffffffffff, + }; + return word[byte]; +} + +/* Similarly for half-word elements. + * for (i = 0; i < 256; ++i) { + * unsigned long m = 0; + * if (i & 0xaa) { + * continue; + * } + * for (j = 0; j < 8; j += 2) { + * if ((i >> j) & 1) { + * m |= 0xfffful << (j << 3); + * } + * } + * printf("[0x%x] = 0x%016lx,\n", i, m); + * } + */ +static inline uint64_t expand_pred_h(uint8_t byte) +{ + static const uint64_t word[] = { + [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000, + [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000, + [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000, + [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000, + [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000, + [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000, + [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000, + [0x55] = 0xffffffffffffffff, + }; + return word[byte & 0x55]; +} + +/* Similarly for single word elements. */ +static inline uint64_t expand_pred_s(uint8_t byte) +{ + static const uint64_t word[] = { + [0x01] = 0x00000000ffffffffull, + [0x10] = 0xffffffff00000000ull, + [0x11] = 0xffffffffffffffffull, + }; + return word[byte & 0x11]; +} + +/* Swap 16-bit words within a 32-bit word. */ +static inline uint32_t hswap32(uint32_t h) +{ + return rol32(h, 16); +} + +/* Swap 16-bit words within a 64-bit word. */ +static inline uint64_t hswap64(uint64_t h) +{ + uint64_t m = 0x0000ffff0000ffffull; + h = rol64(h, 32); + return ((h & m) << 16) | ((h >> 16) & m); +} + +/* Swap 32-bit words within a 64-bit word. */ +static inline uint64_t wswap64(uint64_t h) +{ + return rol64(h, 32); +} + +#define LOGICAL_PPPP(NAME, FUNC) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + uintptr_t opr_sz = simd_oprsz(desc); \ + uint64_t *d = vd, *n = vn, *m = vm, *g = vg; \ + uintptr_t i; \ + for (i = 0; i < opr_sz / 8; ++i) { \ + d[i] = FUNC(n[i], m[i], g[i]); \ + } \ +} + +#define DO_AND(N, M, G) (((N) & (M)) & (G)) +#define DO_BIC(N, M, G) (((N) & ~(M)) & (G)) +#define DO_EOR(N, M, G) (((N) ^ (M)) & (G)) +#define DO_ORR(N, M, G) (((N) | (M)) & (G)) +#define DO_ORN(N, M, G) (((N) | ~(M)) & (G)) +#define DO_NOR(N, M, G) (~((N) | (M)) & (G)) +#define DO_NAND(N, M, G) (~((N) & (M)) & (G)) +#define DO_SEL(N, M, G) (((N) & (G)) | ((M) & ~(G))) + +LOGICAL_PPPP(sve_and_pppp, DO_AND) +LOGICAL_PPPP(sve_bic_pppp, DO_BIC) +LOGICAL_PPPP(sve_eor_pppp, DO_EOR) +LOGICAL_PPPP(sve_sel_pppp, DO_SEL) +LOGICAL_PPPP(sve_orr_pppp, DO_ORR) +LOGICAL_PPPP(sve_orn_pppp, DO_ORN) +LOGICAL_PPPP(sve_nor_pppp, DO_NOR) +LOGICAL_PPPP(sve_nand_pppp, DO_NAND) + +#undef DO_AND +#undef DO_BIC +#undef DO_EOR +#undef DO_ORR +#undef DO_ORN +#undef DO_NOR +#undef DO_NAND +#undef DO_SEL +#undef LOGICAL_PPPP + +/* Fully general three-operand expander, controlled by a predicate. + * This is complicated by the host-endian storage of the register file. + */ +/* ??? I don't expect the compiler could ever vectorize this itself. + * With some tables we can convert bit masks to byte masks, and with + * extra care wrt byte/word ordering we could use gcc generic vectors + * and do 16 bytes at a time. + */ +#define DO_ZPZZ(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ +} + +/* Similarly, specialized for 64-bit operands. */ +#define DO_ZPZZ_D(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn, *m = vm; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i], mm = m[i]; \ + d[i] = OP(nn, mm); \ + } \ + } \ +} + +#define DO_AND(N, M) (N & M) +#define DO_EOR(N, M) (N ^ M) +#define DO_ORR(N, M) (N | M) +#define DO_BIC(N, M) (N & ~M) +#define DO_ADD(N, M) (N + M) +#define DO_SUB(N, M) (N - M) +#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) +#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) +#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N)) +#define DO_MUL(N, M) (N * M) + + +/* + * We must avoid the C undefined behaviour cases: division by + * zero and signed division of INT_MIN by -1. Both of these + * have architecturally defined required results for Arm. + * We special case all signed divisions by -1 to avoid having + * to deduce the minimum integer for the type involved. + */ +#define DO_SDIV(N, M) (unlikely(M == 0) ? 0 : unlikely(M == -1) ? -N : N / M) +#define DO_UDIV(N, M) (unlikely(M == 0) ? 0 : N / M) + +DO_ZPZZ(sve_and_zpzz_b, uint8_t, H1, DO_AND) +DO_ZPZZ(sve_and_zpzz_h, uint16_t, H1_2, DO_AND) +DO_ZPZZ(sve_and_zpzz_s, uint32_t, H1_4, DO_AND) +DO_ZPZZ_D(sve_and_zpzz_d, uint64_t, DO_AND) + +DO_ZPZZ(sve_orr_zpzz_b, uint8_t, H1, DO_ORR) +DO_ZPZZ(sve_orr_zpzz_h, uint16_t, H1_2, DO_ORR) +DO_ZPZZ(sve_orr_zpzz_s, uint32_t, H1_4, DO_ORR) +DO_ZPZZ_D(sve_orr_zpzz_d, uint64_t, DO_ORR) + +DO_ZPZZ(sve_eor_zpzz_b, uint8_t, H1, DO_EOR) +DO_ZPZZ(sve_eor_zpzz_h, uint16_t, H1_2, DO_EOR) +DO_ZPZZ(sve_eor_zpzz_s, uint32_t, H1_4, DO_EOR) +DO_ZPZZ_D(sve_eor_zpzz_d, uint64_t, DO_EOR) + +DO_ZPZZ(sve_bic_zpzz_b, uint8_t, H1, DO_BIC) +DO_ZPZZ(sve_bic_zpzz_h, uint16_t, H1_2, DO_BIC) +DO_ZPZZ(sve_bic_zpzz_s, uint32_t, H1_4, DO_BIC) +DO_ZPZZ_D(sve_bic_zpzz_d, uint64_t, DO_BIC) + +DO_ZPZZ(sve_add_zpzz_b, uint8_t, H1, DO_ADD) +DO_ZPZZ(sve_add_zpzz_h, uint16_t, H1_2, DO_ADD) +DO_ZPZZ(sve_add_zpzz_s, uint32_t, H1_4, DO_ADD) +DO_ZPZZ_D(sve_add_zpzz_d, uint64_t, DO_ADD) + +DO_ZPZZ(sve_sub_zpzz_b, uint8_t, H1, DO_SUB) +DO_ZPZZ(sve_sub_zpzz_h, uint16_t, H1_2, DO_SUB) +DO_ZPZZ(sve_sub_zpzz_s, uint32_t, H1_4, DO_SUB) +DO_ZPZZ_D(sve_sub_zpzz_d, uint64_t, DO_SUB) + +DO_ZPZZ(sve_smax_zpzz_b, int8_t, H1, DO_MAX) +DO_ZPZZ(sve_smax_zpzz_h, int16_t, H1_2, DO_MAX) +DO_ZPZZ(sve_smax_zpzz_s, int32_t, H1_4, DO_MAX) +DO_ZPZZ_D(sve_smax_zpzz_d, int64_t, DO_MAX) + +DO_ZPZZ(sve_umax_zpzz_b, uint8_t, H1, DO_MAX) +DO_ZPZZ(sve_umax_zpzz_h, uint16_t, H1_2, DO_MAX) +DO_ZPZZ(sve_umax_zpzz_s, uint32_t, H1_4, DO_MAX) +DO_ZPZZ_D(sve_umax_zpzz_d, uint64_t, DO_MAX) + +DO_ZPZZ(sve_smin_zpzz_b, int8_t, H1, DO_MIN) +DO_ZPZZ(sve_smin_zpzz_h, int16_t, H1_2, DO_MIN) +DO_ZPZZ(sve_smin_zpzz_s, int32_t, H1_4, DO_MIN) +DO_ZPZZ_D(sve_smin_zpzz_d, int64_t, DO_MIN) + +DO_ZPZZ(sve_umin_zpzz_b, uint8_t, H1, DO_MIN) +DO_ZPZZ(sve_umin_zpzz_h, uint16_t, H1_2, DO_MIN) +DO_ZPZZ(sve_umin_zpzz_s, uint32_t, H1_4, DO_MIN) +DO_ZPZZ_D(sve_umin_zpzz_d, uint64_t, DO_MIN) + +DO_ZPZZ(sve_sabd_zpzz_b, int8_t, H1, DO_ABD) +DO_ZPZZ(sve_sabd_zpzz_h, int16_t, H1_2, DO_ABD) +DO_ZPZZ(sve_sabd_zpzz_s, int32_t, H1_4, DO_ABD) +DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t, DO_ABD) + +DO_ZPZZ(sve_uabd_zpzz_b, uint8_t, H1, DO_ABD) +DO_ZPZZ(sve_uabd_zpzz_h, uint16_t, H1_2, DO_ABD) +DO_ZPZZ(sve_uabd_zpzz_s, uint32_t, H1_4, DO_ABD) +DO_ZPZZ_D(sve_uabd_zpzz_d, uint64_t, DO_ABD) + +/* Because the computation type is at least twice as large as required, + these work for both signed and unsigned source types. */ +static inline uint8_t do_mulh_b(int32_t n, int32_t m) +{ + return (n * m) >> 8; +} + +static inline uint16_t do_mulh_h(int32_t n, int32_t m) +{ + return (n * m) >> 16; +} + +static inline uint32_t do_mulh_s(int64_t n, int64_t m) +{ + return (n * m) >> 32; +} + +static inline uint64_t do_smulh_d(uint64_t n, uint64_t m) +{ + uint64_t lo, hi; + muls64(&lo, &hi, n, m); + return hi; +} + +static inline uint64_t do_umulh_d(uint64_t n, uint64_t m) +{ + uint64_t lo, hi; + mulu64(&lo, &hi, n, m); + return hi; +} + +DO_ZPZZ(sve_mul_zpzz_b, uint8_t, H1, DO_MUL) +DO_ZPZZ(sve_mul_zpzz_h, uint16_t, H1_2, DO_MUL) +DO_ZPZZ(sve_mul_zpzz_s, uint32_t, H1_4, DO_MUL) +DO_ZPZZ_D(sve_mul_zpzz_d, uint64_t, DO_MUL) + +DO_ZPZZ(sve_smulh_zpzz_b, int8_t, H1, do_mulh_b) +DO_ZPZZ(sve_smulh_zpzz_h, int16_t, H1_2, do_mulh_h) +DO_ZPZZ(sve_smulh_zpzz_s, int32_t, H1_4, do_mulh_s) +DO_ZPZZ_D(sve_smulh_zpzz_d, uint64_t, do_smulh_d) + +DO_ZPZZ(sve_umulh_zpzz_b, uint8_t, H1, do_mulh_b) +DO_ZPZZ(sve_umulh_zpzz_h, uint16_t, H1_2, do_mulh_h) +DO_ZPZZ(sve_umulh_zpzz_s, uint32_t, H1_4, do_mulh_s) +DO_ZPZZ_D(sve_umulh_zpzz_d, uint64_t, do_umulh_d) + +DO_ZPZZ(sve_sdiv_zpzz_s, int32_t, H1_4, DO_SDIV) +DO_ZPZZ_D(sve_sdiv_zpzz_d, int64_t, DO_SDIV) + +DO_ZPZZ(sve_udiv_zpzz_s, uint32_t, H1_4, DO_UDIV) +DO_ZPZZ_D(sve_udiv_zpzz_d, uint64_t, DO_UDIV) + +/* Note that all bits of the shift are significant + and not modulo the element size. */ +#define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1)) +#define DO_LSR(N, M) (M < sizeof(N) * 8 ? N >> M : 0) +#define DO_LSL(N, M) (M < sizeof(N) * 8 ? N << M : 0) + +DO_ZPZZ(sve_asr_zpzz_b, int8_t, H1, DO_ASR) +DO_ZPZZ(sve_lsr_zpzz_b, uint8_t, H1_2, DO_LSR) +DO_ZPZZ(sve_lsl_zpzz_b, uint8_t, H1_4, DO_LSL) + +DO_ZPZZ(sve_asr_zpzz_h, int16_t, H1, DO_ASR) +DO_ZPZZ(sve_lsr_zpzz_h, uint16_t, H1_2, DO_LSR) +DO_ZPZZ(sve_lsl_zpzz_h, uint16_t, H1_4, DO_LSL) + +DO_ZPZZ(sve_asr_zpzz_s, int32_t, H1, DO_ASR) +DO_ZPZZ(sve_lsr_zpzz_s, uint32_t, H1_2, DO_LSR) +DO_ZPZZ(sve_lsl_zpzz_s, uint32_t, H1_4, DO_LSL) + +DO_ZPZZ_D(sve_asr_zpzz_d, int64_t, DO_ASR) +DO_ZPZZ_D(sve_lsr_zpzz_d, uint64_t, DO_LSR) +DO_ZPZZ_D(sve_lsl_zpzz_d, uint64_t, DO_LSL) + +#undef DO_ZPZZ +#undef DO_ZPZZ_D + +/* Three-operand expander, controlled by a predicate, in which the + * third operand is "wide". That is, for D = N op M, the same 64-bit + * value of M is used with all of the narrower values of N. + */ +#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; ) { \ + uint8_t pg = *(uint8_t *)((char *)vg + H1(i >> 3)); \ + TYPEW mm = *(TYPEW *)((char *)vm + i); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 7); \ + } \ +} + +DO_ZPZW(sve_asr_zpzw_b, int8_t, uint64_t, H1, DO_ASR) +DO_ZPZW(sve_lsr_zpzw_b, uint8_t, uint64_t, H1, DO_LSR) +DO_ZPZW(sve_lsl_zpzw_b, uint8_t, uint64_t, H1, DO_LSL) + +DO_ZPZW(sve_asr_zpzw_h, int16_t, uint64_t, H1_2, DO_ASR) +DO_ZPZW(sve_lsr_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSR) +DO_ZPZW(sve_lsl_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSL) + +DO_ZPZW(sve_asr_zpzw_s, int32_t, uint64_t, H1_4, DO_ASR) +DO_ZPZW(sve_lsr_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSR) +DO_ZPZW(sve_lsl_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSL) + +#undef DO_ZPZW + +/* Fully general two-operand expander, controlled by a predicate. + */ +#define DO_ZPZ(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ +} + +/* Similarly, specialized for 64-bit operands. */ +#define DO_ZPZ_D(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i]; \ + d[i] = OP(nn); \ + } \ + } \ +} + +#define DO_CLS_B(N) (clrsb32(N) - 24) +#define DO_CLS_H(N) (clrsb32(N) - 16) + +DO_ZPZ(sve_cls_b, int8_t, H1, DO_CLS_B) +DO_ZPZ(sve_cls_h, int16_t, H1_2, DO_CLS_H) +DO_ZPZ(sve_cls_s, int32_t, H1_4, clrsb32) +DO_ZPZ_D(sve_cls_d, int64_t, clrsb64) + +#define DO_CLZ_B(N) (clz32(N) - 24) +#define DO_CLZ_H(N) (clz32(N) - 16) + +DO_ZPZ(sve_clz_b, uint8_t, H1, DO_CLZ_B) +DO_ZPZ(sve_clz_h, uint16_t, H1_2, DO_CLZ_H) +DO_ZPZ(sve_clz_s, uint32_t, H1_4, clz32) +DO_ZPZ_D(sve_clz_d, uint64_t, clz64) + +DO_ZPZ(sve_cnt_zpz_b, uint8_t, H1, ctpop8) +DO_ZPZ(sve_cnt_zpz_h, uint16_t, H1_2, ctpop16) +DO_ZPZ(sve_cnt_zpz_s, uint32_t, H1_4, ctpop32) +DO_ZPZ_D(sve_cnt_zpz_d, uint64_t, ctpop64) + +#define DO_CNOT(N) (N == 0) + +DO_ZPZ(sve_cnot_b, uint8_t, H1, DO_CNOT) +DO_ZPZ(sve_cnot_h, uint16_t, H1_2, DO_CNOT) +DO_ZPZ(sve_cnot_s, uint32_t, H1_4, DO_CNOT) +DO_ZPZ_D(sve_cnot_d, uint64_t, DO_CNOT) + +#ifdef _MSC_VER +#define DO_FABS16(N) (N & ((uint16_t)-1 >> 1)) +#define DO_FABS32(N) (N & ((uint32_t)-1 >> 1)) +#define DO_FABS64(N) (N & ((uint64_t)-1 >> 1)) + +DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS16) +DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS32) +DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS64) +#else +#define DO_FABS(N) (N & ((__typeof(N))-1 >> 1)) + +DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) +DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) +DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) +#endif + +#ifdef _MSC_VER +#define DO_FNEG16(N) (N ^ ~((uint16_t)-1 >> 1)) +#define DO_FNEG32(N) (N ^ ~((uint32_t)-1 >> 1)) +#define DO_FNEG64(N) (N ^ ~((uint64_t)-1 >> 1)) + +DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG16) +DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG32) +DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG64) +#else +#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) + +DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) +DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) +DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) +#endif + +#define DO_NOT(N) (~N) + +DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) +DO_ZPZ(sve_not_zpz_h, uint16_t, H1_2, DO_NOT) +DO_ZPZ(sve_not_zpz_s, uint32_t, H1_4, DO_NOT) +DO_ZPZ_D(sve_not_zpz_d, uint64_t, DO_NOT) + +#define DO_SXTB(N) ((int8_t)N) +#define DO_SXTH(N) ((int16_t)N) +#define DO_SXTS(N) ((int32_t)N) +#define DO_UXTB(N) ((uint8_t)N) +#define DO_UXTH(N) ((uint16_t)N) +#define DO_UXTS(N) ((uint32_t)N) + +DO_ZPZ(sve_sxtb_h, uint16_t, H1_2, DO_SXTB) +DO_ZPZ(sve_sxtb_s, uint32_t, H1_4, DO_SXTB) +DO_ZPZ(sve_sxth_s, uint32_t, H1_4, DO_SXTH) +DO_ZPZ_D(sve_sxtb_d, uint64_t, DO_SXTB) +DO_ZPZ_D(sve_sxth_d, uint64_t, DO_SXTH) +DO_ZPZ_D(sve_sxtw_d, uint64_t, DO_SXTS) + +DO_ZPZ(sve_uxtb_h, uint16_t, H1_2, DO_UXTB) +DO_ZPZ(sve_uxtb_s, uint32_t, H1_4, DO_UXTB) +DO_ZPZ(sve_uxth_s, uint32_t, H1_4, DO_UXTH) +DO_ZPZ_D(sve_uxtb_d, uint64_t, DO_UXTB) +DO_ZPZ_D(sve_uxth_d, uint64_t, DO_UXTH) +DO_ZPZ_D(sve_uxtw_d, uint64_t, DO_UXTS) + +#ifdef _MSC_VER +#define DO_ABS(N) (N < 0 ? (0 - N) : N) +#else +#define DO_ABS(N) (N < 0 ? -N : N) +#endif + +DO_ZPZ(sve_abs_b, int8_t, H1, DO_ABS) +DO_ZPZ(sve_abs_h, int16_t, H1_2, DO_ABS) +DO_ZPZ(sve_abs_s, int32_t, H1_4, DO_ABS) +DO_ZPZ_D(sve_abs_d, int64_t, DO_ABS) + +#ifdef _MSC_VER +#define DO_NEG(N) (0 - N) +#else +#define DO_NEG(N) (-N) +#endif + +DO_ZPZ(sve_neg_b, uint8_t, H1, DO_NEG) +DO_ZPZ(sve_neg_h, uint16_t, H1_2, DO_NEG) +DO_ZPZ(sve_neg_s, uint32_t, H1_4, DO_NEG) +DO_ZPZ_D(sve_neg_d, uint64_t, DO_NEG) + +DO_ZPZ(sve_revb_h, uint16_t, H1_2, bswap16) +DO_ZPZ(sve_revb_s, uint32_t, H1_4, bswap32) +DO_ZPZ_D(sve_revb_d, uint64_t, bswap64) + +DO_ZPZ(sve_revh_s, uint32_t, H1_4, hswap32) +DO_ZPZ_D(sve_revh_d, uint64_t, hswap64) + +DO_ZPZ_D(sve_revw_d, uint64_t, wswap64) + +DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8) +DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16) +DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32) +DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64) + +/* Three-operand expander, unpredicated, in which the third operand is "wide". + */ +#define DO_ZZW(NAME, TYPE, TYPEW, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; ) { \ + TYPEW mm = *(TYPEW *)((char *)vm + i); \ + do { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + i += sizeof(TYPE); \ + } while (i & 7); \ + } \ +} + +DO_ZZW(sve_asr_zzw_b, int8_t, uint64_t, H1, DO_ASR) +DO_ZZW(sve_lsr_zzw_b, uint8_t, uint64_t, H1, DO_LSR) +DO_ZZW(sve_lsl_zzw_b, uint8_t, uint64_t, H1, DO_LSL) + +DO_ZZW(sve_asr_zzw_h, int16_t, uint64_t, H1_2, DO_ASR) +DO_ZZW(sve_lsr_zzw_h, uint16_t, uint64_t, H1_2, DO_LSR) +DO_ZZW(sve_lsl_zzw_h, uint16_t, uint64_t, H1_2, DO_LSL) + +DO_ZZW(sve_asr_zzw_s, int32_t, uint64_t, H1_4, DO_ASR) +DO_ZZW(sve_lsr_zzw_s, uint32_t, uint64_t, H1_4, DO_LSR) +DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL) + +#undef DO_ZZW + +#undef DO_CLS_B +#undef DO_CLS_H +#undef DO_CLZ_B +#undef DO_CLZ_H +#undef DO_CNOT +#undef DO_FABS +#undef DO_FNEG +#undef DO_ABS +#undef DO_NEG +#undef DO_ZPZ +#undef DO_ZPZ_D + +/* Two-operand reduction expander, controlled by a predicate. + * The difference between TYPERED and TYPERET has to do with + * sign-extension. E.g. for SMAX, TYPERED must be signed, + * but TYPERET must be unsigned so that e.g. a 32-bit value + * is not sign-extended to the ABI uint64_t return type. + */ +/* ??? If we were to vectorize this by hand the reduction ordering + * would change. For integer operands, this is perfectly fine. + */ +#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \ +uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPERED ret = INIT; \ + for (i = 0; i < opr_sz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPEELT nn = *(TYPEELT *)((char *)vn + H(i)); \ + ret = OP(ret, nn); \ + } \ + i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT); \ + } while (i & 15); \ + } \ + return (TYPERET)ret; \ +} + +#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP) \ +uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPEE *n = vn; \ + uint8_t *pg = vg; \ + TYPER ret = INIT; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPEE nn = n[i]; \ + ret = OP(ret, nn); \ + } \ + } \ + return ret; \ +} + +DO_VPZ(sve_orv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_ORR) +DO_VPZ(sve_orv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_ORR) +DO_VPZ(sve_orv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_ORR) +DO_VPZ_D(sve_orv_d, uint64_t, uint64_t, 0, DO_ORR) + +DO_VPZ(sve_eorv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_EOR) +DO_VPZ(sve_eorv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_EOR) +DO_VPZ(sve_eorv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_EOR) +DO_VPZ_D(sve_eorv_d, uint64_t, uint64_t, 0, DO_EOR) + +DO_VPZ(sve_andv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_AND) +DO_VPZ(sve_andv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_AND) +DO_VPZ(sve_andv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_AND) +DO_VPZ_D(sve_andv_d, uint64_t, uint64_t, -1, DO_AND) + +DO_VPZ(sve_saddv_b, int8_t, uint64_t, uint64_t, H1, 0, DO_ADD) +DO_VPZ(sve_saddv_h, int16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD) +DO_VPZ(sve_saddv_s, int32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD) + +DO_VPZ(sve_uaddv_b, uint8_t, uint64_t, uint64_t, H1, 0, DO_ADD) +DO_VPZ(sve_uaddv_h, uint16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD) +DO_VPZ(sve_uaddv_s, uint32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD) +DO_VPZ_D(sve_uaddv_d, uint64_t, uint64_t, 0, DO_ADD) + +DO_VPZ(sve_smaxv_b, int8_t, int8_t, uint8_t, H1, INT8_MIN, DO_MAX) +DO_VPZ(sve_smaxv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MIN, DO_MAX) +DO_VPZ(sve_smaxv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MIN, DO_MAX) +DO_VPZ_D(sve_smaxv_d, int64_t, int64_t, INT64_MIN, DO_MAX) + +DO_VPZ(sve_umaxv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_MAX) +DO_VPZ(sve_umaxv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_MAX) +DO_VPZ(sve_umaxv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_MAX) +DO_VPZ_D(sve_umaxv_d, uint64_t, uint64_t, 0, DO_MAX) + +DO_VPZ(sve_sminv_b, int8_t, int8_t, uint8_t, H1, INT8_MAX, DO_MIN) +DO_VPZ(sve_sminv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MAX, DO_MIN) +DO_VPZ(sve_sminv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MAX, DO_MIN) +DO_VPZ_D(sve_sminv_d, int64_t, int64_t, INT64_MAX, DO_MIN) + +DO_VPZ(sve_uminv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_MIN) +DO_VPZ(sve_uminv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_MIN) +DO_VPZ(sve_uminv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_MIN) +DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN) + +#undef DO_VPZ +#undef DO_VPZ_D + +/* Two vector operand, one scalar operand, unpredicated. */ +#define DO_ZZI(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE); \ + TYPE s = s64, *d = vd, *n = vn; \ + for (i = 0; i < opr_sz; ++i) { \ + d[i] = OP(n[i], s); \ + } \ +} + +#define DO_SUBR(X, Y) (Y - X) + +DO_ZZI(sve_subri_b, uint8_t, DO_SUBR) +DO_ZZI(sve_subri_h, uint16_t, DO_SUBR) +DO_ZZI(sve_subri_s, uint32_t, DO_SUBR) +DO_ZZI(sve_subri_d, uint64_t, DO_SUBR) + +DO_ZZI(sve_smaxi_b, int8_t, DO_MAX) +DO_ZZI(sve_smaxi_h, int16_t, DO_MAX) +DO_ZZI(sve_smaxi_s, int32_t, DO_MAX) +DO_ZZI(sve_smaxi_d, int64_t, DO_MAX) + +DO_ZZI(sve_smini_b, int8_t, DO_MIN) +DO_ZZI(sve_smini_h, int16_t, DO_MIN) +DO_ZZI(sve_smini_s, int32_t, DO_MIN) +DO_ZZI(sve_smini_d, int64_t, DO_MIN) + +DO_ZZI(sve_umaxi_b, uint8_t, DO_MAX) +DO_ZZI(sve_umaxi_h, uint16_t, DO_MAX) +DO_ZZI(sve_umaxi_s, uint32_t, DO_MAX) +DO_ZZI(sve_umaxi_d, uint64_t, DO_MAX) + +DO_ZZI(sve_umini_b, uint8_t, DO_MIN) +DO_ZZI(sve_umini_h, uint16_t, DO_MIN) +DO_ZZI(sve_umini_s, uint32_t, DO_MIN) +DO_ZZI(sve_umini_d, uint64_t, DO_MIN) + +#undef DO_ZZI + +#undef DO_AND +#undef DO_ORR +#undef DO_EOR +#undef DO_BIC +#undef DO_ADD +#undef DO_SUB +#undef DO_MAX +#undef DO_MIN +#undef DO_ABD +#undef DO_MUL +#undef DO_DIV +#undef DO_ASR +#undef DO_LSR +#undef DO_LSL +#undef DO_SUBR + +/* Similar to the ARM LastActiveElement pseudocode function, except the + result is multiplied by the element size. This includes the not found + indication; e.g. not found for esz=3 is -8. */ +static intptr_t last_active_element(uint64_t *g, intptr_t words, intptr_t esz) +{ + uint64_t mask = pred_esz_masks[esz]; + intptr_t i = words; + + do { + uint64_t this_g = g[--i] & mask; + if (this_g) { + return i * 64 + (63 - clz64(this_g)); + } + } while (i > 0); + return (intptr_t)-1 << esz; +} + +uint32_t HELPER(sve_pfirst)(void *vd, void *vg, uint32_t words) +{ + uint32_t flags = PREDTEST_INIT; + uint64_t *d = vd, *g = vg; + intptr_t i = 0; + + do { + uint64_t this_d = d[i]; + uint64_t this_g = g[i]; + + if (this_g) { + if (!(flags & 4)) { + /* Set in D the first bit of G. */ +#ifdef _MSC_VER + this_d |= this_g & (0 - this_g); +#else + this_d |= this_g & -this_g; +#endif + d[i] = this_d; + } + flags = iter_predtest_fwd(this_d, this_g, flags); + } + } while (++i < words); + + return flags; +} + +uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc) +{ + intptr_t words = extract32(pred_desc, 0, SIMD_OPRSZ_BITS); + intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + uint32_t flags = PREDTEST_INIT; + uint64_t *d = vd, *g = vg, esz_mask; + intptr_t i, next; + + next = last_active_element(vd, words, esz) + (1ULL << esz); + esz_mask = pred_esz_masks[esz]; + + /* Similar to the pseudocode for pnext, but scaled by ESZ + so that we find the correct bit. */ + if (next < words * 64) { + uint64_t mask = -1; + + if (next & 63) { + mask = ~((1ull << (next & 63)) - 1); + next &= -64; + } + do { + uint64_t this_g = g[next / 64] & esz_mask & mask; + if (this_g != 0) { + next = (next & -64) + ctz64(this_g); + break; + } + next += 64; + mask = -1; + } while (next < words * 64); + } + + i = 0; + do { + uint64_t this_d = 0; + if (i == next / 64) { + this_d = 1ull << (next & 63); + } + d[i] = this_d; + flags = iter_predtest_fwd(this_d, g[i] & esz_mask, flags); + } while (++i < words); + + return flags; +} + +/* Store zero into every active element of Zd. We will use this for two + * and three-operand predicated instructions for which logic dictates a + * zero result. In particular, logical shift by element size, which is + * otherwise undefined on the host. + * + * For element sizes smaller than uint64_t, we use tables to expand + * the N bits of the controlling predicate to a byte mask, and clear + * those bytes. + */ +void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] &= ~expand_pred_b(pg[H1(i)]); + } +} + +void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] &= ~expand_pred_h(pg[H1(i)]); + } +} + +void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] &= ~expand_pred_s(pg[H1(i)]); + } +} + +void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + if (pg[H1(i)] & 1) { + d[i] = 0; + } + } +} + +/* Copy Zn into Zd, and store zero into inactive elements. */ +void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] & expand_pred_b(pg[H1(i)]); + } +} + +void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] & expand_pred_h(pg[H1(i)]); + } +} + +void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] & expand_pred_s(pg[H1(i)]); + } +} + +void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { +#ifdef _MSC_VER + d[i] = n[i] & ((uint64_t)0 - (uint64_t)(pg[H1(i)] & 1)); +#else + d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1); +#endif + } +} + +/* Three-operand expander, immediate operand, controlled by a predicate. + */ +#define DO_ZPZI(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPE imm = simd_data(desc); \ + for (i = 0; i < opr_sz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, imm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ +} + +/* Similarly, specialized for 64-bit operands. */ +#define DO_ZPZI_D(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn; \ + TYPE imm = simd_data(desc); \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i]; \ + d[i] = OP(nn, imm); \ + } \ + } \ +} + +#define DO_SHR(N, M) (N >> M) +#define DO_SHL(N, M) (N << M) + +/* Arithmetic shift right for division. This rounds negative numbers + toward zero as per signed division. Therefore before shifting, + when N is negative, add 2**M-1. */ +#ifdef _MSC_VER + #define DO_ASRD(N, M) ((N + (N < 0 ? (1 << M) - 1 : 0)) >> M) +#else + #define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M) +#endif + +DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR) +DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR) +DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR) +DO_ZPZI_D(sve_asr_zpzi_d, int64_t, DO_SHR) + +DO_ZPZI(sve_lsr_zpzi_b, uint8_t, H1, DO_SHR) +DO_ZPZI(sve_lsr_zpzi_h, uint16_t, H1_2, DO_SHR) +DO_ZPZI(sve_lsr_zpzi_s, uint32_t, H1_4, DO_SHR) +DO_ZPZI_D(sve_lsr_zpzi_d, uint64_t, DO_SHR) + +DO_ZPZI(sve_lsl_zpzi_b, uint8_t, H1, DO_SHL) +DO_ZPZI(sve_lsl_zpzi_h, uint16_t, H1_2, DO_SHL) +DO_ZPZI(sve_lsl_zpzi_s, uint32_t, H1_4, DO_SHL) +DO_ZPZI_D(sve_lsl_zpzi_d, uint64_t, DO_SHL) + +DO_ZPZI(sve_asrd_b, int8_t, H1, DO_ASRD) +DO_ZPZI(sve_asrd_h, int16_t, H1_2, DO_ASRD) +DO_ZPZI(sve_asrd_s, int32_t, H1_4, DO_ASRD) +DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD) + +#undef DO_SHR +#undef DO_SHL +#undef DO_ASRD +#undef DO_ZPZI +#undef DO_ZPZI_D + +/* Fully general four-operand expander, controlled by a predicate. + */ +#define DO_ZPZZZ(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \ + void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + TYPE aa = *(TYPE *)((char *)va + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(aa, nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ +} + +/* Similarly, specialized for 64-bit operands. */ +#define DO_ZPZZZ_D(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \ + void *vg, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *a = va, *n = vn, *m = vm; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE aa = a[i], nn = n[i], mm = m[i]; \ + d[i] = OP(aa, nn, mm); \ + } \ + } \ +} + +#define DO_MLA(A, N, M) (A + N * M) +#define DO_MLS(A, N, M) (A - N * M) + +DO_ZPZZZ(sve_mla_b, uint8_t, H1, DO_MLA) +DO_ZPZZZ(sve_mls_b, uint8_t, H1, DO_MLS) + +DO_ZPZZZ(sve_mla_h, uint16_t, H1_2, DO_MLA) +DO_ZPZZZ(sve_mls_h, uint16_t, H1_2, DO_MLS) + +DO_ZPZZZ(sve_mla_s, uint32_t, H1_4, DO_MLA) +DO_ZPZZZ(sve_mls_s, uint32_t, H1_4, DO_MLS) + +DO_ZPZZZ_D(sve_mla_d, uint64_t, DO_MLA) +DO_ZPZZZ_D(sve_mls_d, uint64_t, DO_MLS) + +#undef DO_MLA +#undef DO_MLS +#undef DO_ZPZZZ +#undef DO_ZPZZZ_D + +void HELPER(sve_index_b)(void *vd, uint32_t start, + uint32_t incr, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint8_t *d = vd; + for (i = 0; i < opr_sz; i += 1) { + d[H1(i)] = start + i * incr; + } +} + +void HELPER(sve_index_h)(void *vd, uint32_t start, + uint32_t incr, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 2; + uint16_t *d = vd; + for (i = 0; i < opr_sz; i += 1) { + d[H2(i)] = start + i * incr; + } +} + +void HELPER(sve_index_s)(void *vd, uint32_t start, + uint32_t incr, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 4; + uint32_t *d = vd; + for (i = 0; i < opr_sz; i += 1) { + d[H4(i)] = start + i * incr; + } +} + +void HELPER(sve_index_d)(void *vd, uint64_t start, + uint64_t incr, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + for (i = 0; i < opr_sz; i += 1) { + d[i] = start + i * incr; + } +} + +void HELPER(sve_adr_p32)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 4; + uint32_t sh = simd_data(desc); + uint32_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] + (m[i] << sh); + } +} + +void HELPER(sve_adr_p64)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t sh = simd_data(desc); + uint64_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] + (m[i] << sh); + } +} + +void HELPER(sve_adr_s32)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t sh = simd_data(desc); + uint64_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] + ((uint64_t)(int32_t)m[i] << sh); + } +} + +void HELPER(sve_adr_u32)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t sh = simd_data(desc); + uint64_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + d[i] = n[i] + ((uint64_t)(uint32_t)m[i] << sh); + } +} + +void HELPER(sve_fexpa_h)(void *vd, void *vn, uint32_t desc) +{ + /* These constants are cut-and-paste directly from the ARM pseudocode. */ + static const uint16_t coeff[] = { + 0x0000, 0x0016, 0x002d, 0x0045, 0x005d, 0x0075, 0x008e, 0x00a8, + 0x00c2, 0x00dc, 0x00f8, 0x0114, 0x0130, 0x014d, 0x016b, 0x0189, + 0x01a8, 0x01c8, 0x01e8, 0x0209, 0x022b, 0x024e, 0x0271, 0x0295, + 0x02ba, 0x02e0, 0x0306, 0x032e, 0x0356, 0x037f, 0x03a9, 0x03d4, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / 2; + uint16_t *d = vd, *n = vn; + + for (i = 0; i < opr_sz; i++) { + uint16_t nn = n[i]; + intptr_t idx = extract32(nn, 0, 5); + uint16_t exp = extract32(nn, 5, 5); + d[i] = coeff[idx] | (exp << 10); + } +} + +void HELPER(sve_fexpa_s)(void *vd, void *vn, uint32_t desc) +{ + /* These constants are cut-and-paste directly from the ARM pseudocode. */ + static const uint32_t coeff[] = { + 0x000000, 0x0164d2, 0x02cd87, 0x043a29, + 0x05aac3, 0x071f62, 0x08980f, 0x0a14d5, + 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, + 0x11c3d3, 0x135a2b, 0x14f4f0, 0x16942d, + 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, + 0x1ef532, 0x20b051, 0x227043, 0x243516, + 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a, + 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, + 0x3504f3, 0x36fd92, 0x38fbaf, 0x3aff5b, + 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, + 0x45672a, 0x478d75, 0x49b9be, 0x4bec15, + 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, + 0x5744fd, 0x599d16, 0x5bfbb8, 0x5e60f5, + 0x60ccdf, 0x633f89, 0x65b907, 0x68396a, + 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, + 0x75257d, 0x77d0df, 0x7a83b3, 0x7d3e0c, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / 4; + uint32_t *d = vd, *n = vn; + + for (i = 0; i < opr_sz; i++) { + uint32_t nn = n[i]; + intptr_t idx = extract32(nn, 0, 6); + uint32_t exp = extract32(nn, 6, 8); + d[i] = coeff[idx] | (exp << 23); + } +} + +void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc) +{ + /* These constants are cut-and-paste directly from the ARM pseudocode. */ + static const uint64_t coeff[] = { + 0x0000000000000ull, 0x02C9A3E778061ull, 0x059B0D3158574ull, + 0x0874518759BC8ull, 0x0B5586CF9890Full, 0x0E3EC32D3D1A2ull, + 0x11301D0125B51ull, 0x1429AAEA92DE0ull, 0x172B83C7D517Bull, + 0x1A35BEB6FCB75ull, 0x1D4873168B9AAull, 0x2063B88628CD6ull, + 0x2387A6E756238ull, 0x26B4565E27CDDull, 0x29E9DF51FDEE1ull, + 0x2D285A6E4030Bull, 0x306FE0A31B715ull, 0x33C08B26416FFull, + 0x371A7373AA9CBull, 0x3A7DB34E59FF7ull, 0x3DEA64C123422ull, + 0x4160A21F72E2Aull, 0x44E086061892Dull, 0x486A2B5C13CD0ull, + 0x4BFDAD5362A27ull, 0x4F9B2769D2CA7ull, 0x5342B569D4F82ull, + 0x56F4736B527DAull, 0x5AB07DD485429ull, 0x5E76F15AD2148ull, + 0x6247EB03A5585ull, 0x6623882552225ull, 0x6A09E667F3BCDull, + 0x6DFB23C651A2Full, 0x71F75E8EC5F74ull, 0x75FEB564267C9ull, + 0x7A11473EB0187ull, 0x7E2F336CF4E62ull, 0x82589994CCE13ull, + 0x868D99B4492EDull, 0x8ACE5422AA0DBull, 0x8F1AE99157736ull, + 0x93737B0CDC5E5ull, 0x97D829FDE4E50ull, 0x9C49182A3F090ull, + 0xA0C667B5DE565ull, 0xA5503B23E255Dull, 0xA9E6B5579FDBFull, + 0xAE89F995AD3ADull, 0xB33A2B84F15FBull, 0xB7F76F2FB5E47ull, + 0xBCC1E904BC1D2ull, 0xC199BDD85529Cull, 0xC67F12E57D14Bull, + 0xCB720DCEF9069ull, 0xD072D4A07897Cull, 0xD5818DCFBA487ull, + 0xDA9E603DB3285ull, 0xDFC97337B9B5Full, 0xE502EE78B3FF6ull, + 0xEA4AFA2A490DAull, 0xEFA1BEE615A27ull, 0xF50765B6E4540ull, + 0xFA7C1819E90D8ull, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + + for (i = 0; i < opr_sz; i++) { + uint64_t nn = n[i]; + intptr_t idx = extract32(nn, 0, 6); + uint64_t exp = extract32(nn, 6, 11); + d[i] = coeff[idx] | (exp << 52); + } +} + +void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 2; + uint16_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + uint16_t nn = n[i]; + uint16_t mm = m[i]; + if (mm & 1) { + nn = float16_one; + } + d[i] = nn ^ (mm & 2) << 14; + } +} + +void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 4; + uint32_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + uint32_t nn = n[i]; + uint32_t mm = m[i]; + if (mm & 1) { + nn = float32_one; + } + d[i] = nn ^ (mm & 2) << 30; + } +} + +void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i]; + uint64_t mm = m[i]; + if (mm & 1) { + nn = float64_one; + } + d[i] = nn ^ (mm & 2) << 62; + } +} + +/* + * Signed saturating addition with scalar operand. + */ + +void HELPER(sve_sqaddi_b)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(int8_t)) { + int r = *(int8_t *)((char *)a + i) + b; + if (r > INT8_MAX) { + r = INT8_MAX; + } else if (r < INT8_MIN) { + r = INT8_MIN; + } + *(int8_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_sqaddi_h)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(int16_t)) { + int r = *(int16_t *)((char *)a + i) + b; + if (r > INT16_MAX) { + r = INT16_MAX; + } else if (r < INT16_MIN) { + r = INT16_MIN; + } + *(int16_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_sqaddi_s)(void *d, void *a, int64_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(int32_t)) { + int64_t r = *(int32_t *)((char *)a + i) + b; + if (r > INT32_MAX) { + r = INT32_MAX; + } else if (r < INT32_MIN) { + r = INT32_MIN; + } + *(int32_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_sqaddi_d)(void *d, void *a, int64_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(int64_t)) { + int64_t ai = *(int64_t *)((char *)a + i); + int64_t r = ai + b; + if (((r ^ ai) & ~(ai ^ b)) < 0) { + /* Signed overflow. */ + r = (r < 0 ? INT64_MAX : INT64_MIN); + } + *(int64_t *)((char *)d + i) = r; + } +} + +/* + * Unsigned saturating addition with scalar operand. + */ + +void HELPER(sve_uqaddi_b)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + int r = *(uint8_t *)((char *)a + i) + b; + if (r > UINT8_MAX) { + r = UINT8_MAX; + } else if (r < 0) { + r = 0; + } + *(uint8_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_uqaddi_h)(void *d, void *a, int32_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + int r = *(uint16_t *)((char *)a + i) + b; + if (r > UINT16_MAX) { + r = UINT16_MAX; + } else if (r < 0) { + r = 0; + } + *(uint16_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_uqaddi_s)(void *d, void *a, int64_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + int64_t r = *(uint32_t *)((char *)a + i) + b; + if (r > UINT32_MAX) { + r = UINT32_MAX; + } else if (r < 0) { + r = 0; + } + *(uint32_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_uqaddi_d)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint64_t r = *(uint64_t *)((char *)a + i) + b; + if (r < b) { + r = UINT64_MAX; + } + *(uint64_t *)((char *)d + i) = r; + } +} + +void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint64_t ai = *(uint64_t *)((char *)a + i); + *(uint64_t *)((char *)d + i) = (ai < b ? 0 : ai - b); + } +} + +/* Two operand predicated copy immediate with merge. All valid immediates + * can fit within 17 signed bits in the simd_data field. + */ +void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg, + uint64_t mm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + mm = dup_const(MO_8, mm); + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i]; + uint64_t pp = expand_pred_b(pg[H1(i)]); + d[i] = (mm & pp) | (nn & ~pp); + } +} + +void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg, + uint64_t mm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + mm = dup_const(MO_16, mm); + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i]; + uint64_t pp = expand_pred_h(pg[H1(i)]); + d[i] = (mm & pp) | (nn & ~pp); + } +} + +void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg, + uint64_t mm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + mm = dup_const(MO_32, mm); + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i]; + uint64_t pp = expand_pred_s(pg[H1(i)]); + d[i] = (mm & pp) | (nn & ~pp); + } +} + +void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg, + uint64_t mm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i]; + d[i] = (pg[H1(i)] & 1 ? mm : nn); + } +} + +void HELPER(sve_cpy_z_b)(void *vd, void *vg, uint64_t val, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + + val = dup_const(MO_8, val); + for (i = 0; i < opr_sz; i += 1) { + d[i] = val & expand_pred_b(pg[H1(i)]); + } +} + +void HELPER(sve_cpy_z_h)(void *vd, void *vg, uint64_t val, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + + val = dup_const(MO_16, val); + for (i = 0; i < opr_sz; i += 1) { + d[i] = val & expand_pred_h(pg[H1(i)]); + } +} + +void HELPER(sve_cpy_z_s)(void *vd, void *vg, uint64_t val, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + + val = dup_const(MO_32, val); + for (i = 0; i < opr_sz; i += 1) { + d[i] = val & expand_pred_s(pg[H1(i)]); + } +} + +void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + d[i] = (pg[H1(i)] & 1 ? val : 0); + } +} + +/* Big-endian hosts need to frob the byte indices. If the copy + * happens to be 8-byte aligned, then no frobbing necessary. + */ +static void swap_memmove(void *vd, void *vs, size_t n) +{ + uintptr_t d = (uintptr_t)vd; + uintptr_t s = (uintptr_t)vs; + uintptr_t o = (d | s | n) & 7; + size_t i; + +#ifndef HOST_WORDS_BIGENDIAN + o = 0; +#endif + switch (o) { + case 0: + memmove(vd, vs, n); + break; + + case 4: + if (d < s || d >= s + n) { + for (i = 0; i < n; i += 4) { + *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i); + } + } else { + for (i = n; i > 0; ) { + i -= 4; + *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i); + } + } + break; + + case 2: + case 6: + if (d < s || d >= s + n) { + for (i = 0; i < n; i += 2) { + *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i); + } + } else { + for (i = n; i > 0; ) { + i -= 2; + *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i); + } + } + break; + + default: + if (d < s || d >= s + n) { + for (i = 0; i < n; i++) { + *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i); + } + } else { + for (i = n; i > 0; ) { + i -= 1; + *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i); + } + } + break; + } +} + +/* Similarly for memset of 0. */ +static void swap_memzero(void *vd, size_t n) +{ + uintptr_t d = (uintptr_t)vd; + uintptr_t o = (d | n) & 7; + size_t i; + + /* Usually, the first bit of a predicate is set, so N is 0. */ + if (likely(n == 0)) { + return; + } + +#ifndef HOST_WORDS_BIGENDIAN + o = 0; +#endif + switch (o) { + case 0: + memset(vd, 0, n); + break; + + case 4: + for (i = 0; i < n; i += 4) { + *(uint32_t *)H1_4(d + i) = 0; + } + break; + + case 2: + case 6: + for (i = 0; i < n; i += 2) { + *(uint16_t *)H1_2(d + i) = 0; + } + break; + + default: + for (i = 0; i < n; i++) { + *(uint8_t *)H1(d + i) = 0; + } + break; + } +} + +void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t opr_sz = simd_oprsz(desc); + size_t n_ofs = simd_data(desc); + size_t n_siz = opr_sz - n_ofs; + + if (vd != vm) { + swap_memmove(vd, (char *)vn + n_ofs, n_siz); + swap_memmove((char *)vd + n_siz, vm, n_ofs); + } else if (vd != vn) { + swap_memmove((char *)vd + n_siz, vd, n_ofs); + swap_memmove(vd, (char *)vn + n_ofs, n_siz); + } else { + /* vd == vn == vm. Need temp space. */ + ARMVectorReg tmp; + swap_memmove(&tmp, vm, n_ofs); + swap_memmove(vd, (char *)vd + n_ofs, n_siz); + memcpy((char *)vd + n_siz, &tmp, n_ofs); + } +} + +#define DO_INSR(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \ +{ \ + intptr_t opr_sz = simd_oprsz(desc); \ + swap_memmove((char *)vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \ + *(TYPE *)((char *)vd + H(0)) = val; \ +} + +DO_INSR(sve_insr_b, uint8_t, H1) +DO_INSR(sve_insr_h, uint16_t, H1_2) +DO_INSR(sve_insr_s, uint32_t, H1_4) +DO_INSR(sve_insr_d, uint64_t, ) + +#undef DO_INSR + +void HELPER(sve_rev_b)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { + uint64_t f = *(uint64_t *)((char *)vn + i); + uint64_t b = *(uint64_t *)((char *)vn + j); + *(uint64_t *)((char *)vd + i) = bswap64(b); + *(uint64_t *)((char *)vd + j) = bswap64(f); + } +} + +void HELPER(sve_rev_h)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { + uint64_t f = *(uint64_t *)((char *)vn + i); + uint64_t b = *(uint64_t *)((char *)vn + j); + *(uint64_t *)((char *)vd + i) = hswap64(b); + *(uint64_t *)((char *)vd + j) = hswap64(f); + } +} + +void HELPER(sve_rev_s)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { + uint64_t f = *(uint64_t *)((char *)vn + i); + uint64_t b = *(uint64_t *)((char *)vn + j); + *(uint64_t *)((char *)vd + i) = rol64(b, 32); + *(uint64_t *)((char *)vd + j) = rol64(f, 32); + } +} + +void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) { + uint64_t f = *(uint64_t *)((char *)vn + i); + uint64_t b = *(uint64_t *)((char *)vn + j); + *(uint64_t *)((char *)vd + i) = b; + *(uint64_t *)((char *)vd + j) = f; + } +} + +#define DO_TBL(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + uintptr_t elem = opr_sz / sizeof(TYPE); \ + TYPE *d = vd, *n = vn, *m = vm; \ + ARMVectorReg tmp; \ + if (unlikely(vd == vn)) { \ + n = memcpy(&tmp, vn, opr_sz); \ + } \ + for (i = 0; i < elem; i++) { \ + TYPE j = m[H(i)]; \ + d[H(i)] = j < elem ? n[H(j)] : 0; \ + } \ +} + +DO_TBL(sve_tbl_b, uint8_t, H1) +DO_TBL(sve_tbl_h, uint16_t, H2) +DO_TBL(sve_tbl_s, uint32_t, H4) +DO_TBL(sve_tbl_d, uint64_t, ) + +#undef TBL + +#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPED *d = vd; \ + TYPES *n = vn; \ + ARMVectorReg tmp; \ + if (unlikely((char *)vn - (char *)vd < opr_sz)) { \ + n = memcpy(&tmp, n, opr_sz / 2); \ + } \ + for (i = 0; i < opr_sz / sizeof(TYPED); i++) { \ + d[HD(i)] = n[HS(i)]; \ + } \ +} + +DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1) +DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2) +DO_UNPK(sve_sunpk_d, int64_t, int32_t, , H4) + +DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1) +DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2) +DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4) + +#undef DO_UNPK + +/* Mask of bits included in the even numbered predicates of width esz. + * We also use this for expand_bits/compress_bits, and so extend the + * same pattern out to 16-bit units. + */ +static const uint64_t even_bit_esz_masks[5] = { + 0x5555555555555555ull, + 0x3333333333333333ull, + 0x0f0f0f0f0f0f0f0full, + 0x00ff00ff00ff00ffull, + 0x0000ffff0000ffffull, +}; + +/* Zero-extend units of 2**N bits to units of 2**(N+1) bits. + * For N==0, this corresponds to the operation that in qemu/bitops.h + * we call half_shuffle64; this algorithm is from Hacker's Delight, + * section 7-2 Shuffling Bits. + */ +static uint64_t expand_bits(uint64_t x, int n) +{ + int i; + + x &= 0xffffffffu; + for (i = 4; i >= n; i--) { + int sh = 1 << i; + x = ((x << sh) | x) & even_bit_esz_masks[i]; + } + return x; +} + +/* Compress units of 2**(N+1) bits to units of 2**N bits. + * For N==0, this corresponds to the operation that in qemu/bitops.h + * we call half_unshuffle64; this algorithm is from Hacker's Delight, + * section 7-2 Shuffling Bits, where it is called an inverse half shuffle. + */ +static uint64_t compress_bits(uint64_t x, int n) +{ + int i; + + for (i = n; i <= 4; i++) { + int sh = 1 << i; + x &= even_bit_esz_masks[i]; + x = (x >> sh) | x; + } + return x & 0xffffffffu; +} + +void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1); + uint64_t *d = vd; + intptr_t i; + + if (oprsz <= 8) { + uint64_t nn = *(uint64_t *)vn; + uint64_t mm = *(uint64_t *)vm; + int half = 4 * oprsz; + + nn = extract64(nn, high * half, half); + mm = extract64(mm, high * half, half); + nn = expand_bits(nn, esz); + mm = expand_bits(mm, esz); + d[0] = nn + (mm << (1 << esz)); + } else { + ARMPredicateReg tmp_n, tmp_m; + + /* We produce output faster than we consume input. + Therefore we must be mindful of possible overlap. */ + if (((char *)vn - (char *)vd) < (uintptr_t)oprsz) { + vn = memcpy(&tmp_n, vn, oprsz); + } + if (((char *)vm - (char *)vd) < (uintptr_t)oprsz) { + vm = memcpy(&tmp_m, vm, oprsz); + } + if (high) { + high = oprsz >> 1; + } + + if ((high & 3) == 0) { + uint32_t *n = vn, *m = vm; + high >>= 2; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + uint64_t nn = n[H4(high + i)]; + uint64_t mm = m[H4(high + i)]; + + nn = expand_bits(nn, esz); + mm = expand_bits(mm, esz); + d[i] = nn + (mm << (1 << esz)); + } + } else { + uint8_t *n = vn, *m = vm; + uint16_t *d16 = vd; + + for (i = 0; i < oprsz / 2; i++) { + uint16_t nn = n[H1(high + i)]; + uint16_t mm = m[H1(high + i)]; + + nn = expand_bits(nn, esz); + mm = expand_bits(mm, esz); + d16[H2(i)] = nn + (mm << (1 << esz)); + } + } + } +} + +void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + int odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1) << esz; + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t l, h; + intptr_t i; + + if (oprsz <= 8) { + l = compress_bits(n[0] >> odd, esz); + h = compress_bits(m[0] >> odd, esz); + d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz); + } else { + ARMPredicateReg tmp_m; + intptr_t oprsz_16 = oprsz / 16; + + if (((char *)vm - (char *)vd) < (uintptr_t)oprsz) { + m = memcpy(&tmp_m, vm, oprsz); + } + + for (i = 0; i < oprsz_16; i++) { + l = n[2 * i + 0]; + h = n[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + d[i] = l + (h << 32); + } + + /* For VL which is not a power of 2, the results from M do not + align nicely with the uint64_t for D. Put the aligned results + from M into TMP_M and then copy it into place afterward. */ + if (oprsz & 15) { + d[i] = compress_bits(n[2 * i] >> odd, esz); + + for (i = 0; i < oprsz_16; i++) { + l = m[2 * i + 0]; + h = m[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + tmp_m.p[i] = l + (h << 32); + } + tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz); + + swap_memmove((char *)vd + oprsz / 2, &tmp_m, oprsz / 2); + } else { + for (i = 0; i < oprsz_16; i++) { + l = m[2 * i + 0]; + h = m[2 * i + 1]; + l = compress_bits(l >> odd, esz); + h = compress_bits(h >> odd, esz); + d[oprsz_16 + i] = l + (h << 32); + } + } + } +} + +void HELPER(sve_trn_p)(void *vd, void *vn, void *vm, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + uintptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + bool odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1); + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t mask; + int shr, shl; + intptr_t i; + + shl = 1 << esz; + shr = 0; + mask = even_bit_esz_masks[esz]; + if (odd) { + mask <<= shl; + shr = shl; + shl = 0; + } + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + uint64_t nn = (n[i] & mask) >> shr; + uint64_t mm = (m[i] & mask) << shl; + d[i] = nn + mm; + } +} + +/* Reverse units of 2**N bits. */ +static uint64_t reverse_bits_64(uint64_t x, int n) +{ + int i, sh; + + x = bswap64(x); + for (i = 2, sh = 4; i >= n; i--, sh >>= 1) { + uint64_t mask = even_bit_esz_masks[i]; + x = ((x & mask) << sh) | ((x >> sh) & mask); + } + return x; +} + +static uint8_t reverse_bits_8(uint8_t x, int n) +{ + static const uint8_t mask[3] = { 0x55, 0x33, 0x0f }; + int i, sh; + + for (i = 2, sh = 4; i >= n; i--, sh >>= 1) { + x = ((x & mask[i]) << sh) | ((x >> sh) & mask[i]); + } + return x; +} + +void HELPER(sve_rev_p)(void *vd, void *vn, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + intptr_t i, oprsz_2 = oprsz / 2; + + if (oprsz <= 8) { + uint64_t l = *(uint64_t *)vn; + l = reverse_bits_64(l << (64 - 8 * oprsz), esz); + *(uint64_t *)vd = l; + } else if ((oprsz & 15) == 0) { + for (i = 0; i < oprsz_2; i += 8) { + intptr_t ih = oprsz - 8 - i; + uint64_t l = reverse_bits_64(*(uint64_t *)((char *)vn + i), esz); + uint64_t h = reverse_bits_64(*(uint64_t *)((char *)vn + ih), esz); + *(uint64_t *)((char *)vd + i) = h; + *(uint64_t *)((char *)vd + ih) = l; + } + } else { + for (i = 0; i < oprsz_2; i += 1) { + intptr_t il = H1(i); + intptr_t ih = H1(oprsz - 1 - i); + uint8_t l = reverse_bits_8(*(uint8_t *)((char *)vn + il), esz); + uint8_t h = reverse_bits_8(*(uint8_t *)((char *)vn + ih), esz); + *(uint8_t *)((char *)vd + il) = h; + *(uint8_t *)((char *)vd + ih) = l; + } + } +} + +void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1); + uint64_t *d = vd; + intptr_t i; + + if (oprsz <= 8) { + uint64_t nn = *(uint64_t *)vn; + int half = 4 * oprsz; + + nn = extract64(nn, high * half, half); + nn = expand_bits(nn, 0); + d[0] = nn; + } else { + ARMPredicateReg tmp_n; + + /* We produce output faster than we consume input. + Therefore we must be mindful of possible overlap. */ + if (((char *)vn - (char *)vd) < (uintptr_t)oprsz) { + vn = memcpy(&tmp_n, vn, oprsz); + } + if (high) { + high = oprsz >> 1; + } + + if ((high & 3) == 0) { + uint32_t *n = vn; + high >>= 2; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) { + uint64_t nn = n[H4(high + i)]; + d[i] = expand_bits(nn, 0); + } + } else { + uint16_t *d16 = vd; + uint8_t *n = vn; + + for (i = 0; i < oprsz / 2; i++) { + uint16_t nn = n[H1(high + i)]; + d16[H2(i)] = expand_bits(nn, 0); + } + } + } +} + +#define DO_ZIP(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t i, oprsz_2 = oprsz / 2; \ + ARMVectorReg tmp_n, tmp_m; \ + /* We produce output faster than we consume input. \ + Therefore we must be mindful of possible overlap. */ \ + if (unlikely(((char *)vn - (char *)vd) < (uintptr_t)oprsz)) { \ + vn = memcpy(&tmp_n, vn, oprsz_2); \ + } \ + if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ + vm = memcpy(&tmp_m, vm, oprsz_2); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(2 * i + 0)) = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)((char *)vm + H(i)); \ + } \ +} + +DO_ZIP(sve_zip_b, uint8_t, H1) +DO_ZIP(sve_zip_h, uint16_t, H1_2) +DO_ZIP(sve_zip_s, uint32_t, H1_4) +DO_ZIP(sve_zip_d, uint64_t, ) + +#define DO_UZP(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t oprsz_2 = oprsz / 2; \ + intptr_t odd_ofs = simd_data(desc); \ + intptr_t i; \ + ARMVectorReg tmp_m; \ + if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ + vm = memcpy(&tmp_m, vm, oprsz); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(i)) = *(TYPE *)((char *)vn + H(2 * i + odd_ofs)); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(oprsz_2 + i)) = *(TYPE *)((char *)vm + H(2 * i + odd_ofs)); \ + } \ +} + +DO_UZP(sve_uzp_b, uint8_t, H1) +DO_UZP(sve_uzp_h, uint16_t, H1_2) +DO_UZP(sve_uzp_s, uint32_t, H1_4) +DO_UZP(sve_uzp_d, uint64_t, ) + +#define DO_TRN(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t odd_ofs = simd_data(desc); \ + intptr_t i; \ + for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) { \ + TYPE ae = *(TYPE *)((char *)vn + H(i + odd_ofs)); \ + TYPE be = *(TYPE *)((char *)vm + H(i + odd_ofs)); \ + *(TYPE *)((char *)vd + H(i + 0)) = ae; \ + *(TYPE *)((char *)vd + H(i + sizeof(TYPE))) = be; \ + } \ +} + +DO_TRN(sve_trn_b, uint8_t, H1) +DO_TRN(sve_trn_h, uint16_t, H1_2) +DO_TRN(sve_trn_s, uint32_t, H1_4) +DO_TRN(sve_trn_d, uint64_t, ) + +#undef DO_ZIP +#undef DO_UZP +#undef DO_TRN + +void HELPER(sve_compact_s)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc) / 4; + uint32_t *d = vd, *n = vn; + uint8_t *pg = vg; + + for (i = j = 0; i < opr_sz; i++) { + if (pg[H1(i / 2)] & (i & 1 ? 0x10 : 0x01)) { + d[H4(j)] = n[H4(i)]; + j++; + } + } + for (; j < opr_sz; j++) { + d[H4(j)] = 0; + } +} + +void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + for (i = j = 0; i < opr_sz; i++) { + if (pg[H1(i)] & 1) { + d[j] = n[i]; + j++; + } + } + for (; j < opr_sz; j++) { + d[j] = 0; + } +} + +/* Similar to the ARM LastActiveElement pseudocode function, except the + * result is multiplied by the element size. This includes the not found + * indication; e.g. not found for esz=3 is -8. + */ +int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + + return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz); +} + +void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) +{ + intptr_t opr_sz = simd_oprsz(desc) / 8; + int esz = simd_data(desc); + uint64_t pg, first_g, last_g, len, mask = pred_esz_masks[esz]; + intptr_t i, first_i, last_i; + ARMVectorReg tmp; + + first_i = last_i = 0; + first_g = last_g = 0; + + /* Find the extent of the active elements within VG. */ + for (i = QEMU_ALIGN_UP(opr_sz, 8) - 8; i >= 0; i -= 8) { + pg = *(uint64_t *)((char *)vg + i) & mask; + if (pg) { + if (last_g == 0) { + last_g = pg; + last_i = i; + } + first_g = pg; + first_i = i; + } + } + + len = 0; + if (first_g != 0) { + first_i = first_i * 8 + ctz64(first_g); + last_i = last_i * 8 + 63 - clz64(last_g); + len = last_i - first_i + (1ULL << esz); + if (vd == vm) { + vm = memcpy(&tmp, vm, opr_sz * 8); + } + swap_memmove(vd, (char *)vn + first_i, len); + } + swap_memmove((char *)vd + len, vm, opr_sz * 8 - len); +} + +void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm, + void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn, *m = vm; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i], mm = m[i]; + uint64_t pp = expand_pred_b(pg[H1(i)]); + d[i] = (nn & pp) | (mm & ~pp); + } +} + +void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm, + void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn, *m = vm; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i], mm = m[i]; + uint64_t pp = expand_pred_h(pg[H1(i)]); + d[i] = (nn & pp) | (mm & ~pp); + } +} + +void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm, + void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn, *m = vm; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i], mm = m[i]; + uint64_t pp = expand_pred_s(pg[H1(i)]); + d[i] = (nn & pp) | (mm & ~pp); + } +} + +void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, + void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn, *m = vm; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + uint64_t nn = n[i], mm = m[i]; + d[i] = (pg[H1(i)] & 1 ? nn : mm); + } +} + +/* Two operand comparison controlled by a predicate. + * ??? It is very tempting to want to be able to expand this inline + * with x86 instructions, e.g. + * + * vcmpeqw zm, zn, %ymm0 + * vpmovmskb %ymm0, %eax + * and $0x5555, %eax + * and pg, %eax + * + * or even aarch64, e.g. + * + * // mask = 4000 1000 0400 0100 0040 0010 0004 0001 + * cmeq v0.8h, zn, zm + * and v0.8h, v0.8h, mask + * addv h0, v0.8h + * and v0.8b, pg + * + * However, coming up with an abstraction that allows vector inputs and + * a scalar output, and also handles the byte-ordering of sub-uint64_t + * scalar outputs, is tricky. + */ +#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK) \ +uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + out |= nn OP mm; \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ +} + +#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, H1_2, 0x5555555555555555ull) +#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull) +#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull) + +DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==) +DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==) +DO_CMP_PPZZ_S(sve_cmpeq_ppzz_s, uint32_t, ==) +DO_CMP_PPZZ_D(sve_cmpeq_ppzz_d, uint64_t, ==) + +DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t, !=) +DO_CMP_PPZZ_H(sve_cmpne_ppzz_h, uint16_t, !=) +DO_CMP_PPZZ_S(sve_cmpne_ppzz_s, uint32_t, !=) +DO_CMP_PPZZ_D(sve_cmpne_ppzz_d, uint64_t, !=) + +DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t, >) +DO_CMP_PPZZ_H(sve_cmpgt_ppzz_h, int16_t, >) +DO_CMP_PPZZ_S(sve_cmpgt_ppzz_s, int32_t, >) +DO_CMP_PPZZ_D(sve_cmpgt_ppzz_d, int64_t, >) + +DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t, >=) +DO_CMP_PPZZ_H(sve_cmpge_ppzz_h, int16_t, >=) +DO_CMP_PPZZ_S(sve_cmpge_ppzz_s, int32_t, >=) +DO_CMP_PPZZ_D(sve_cmpge_ppzz_d, int64_t, >=) + +DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t, >) +DO_CMP_PPZZ_H(sve_cmphi_ppzz_h, uint16_t, >) +DO_CMP_PPZZ_S(sve_cmphi_ppzz_s, uint32_t, >) +DO_CMP_PPZZ_D(sve_cmphi_ppzz_d, uint64_t, >) + +DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t, >=) +DO_CMP_PPZZ_H(sve_cmphs_ppzz_h, uint16_t, >=) +DO_CMP_PPZZ_S(sve_cmphs_ppzz_s, uint32_t, >=) +DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=) + +#undef DO_CMP_PPZZ_B +#undef DO_CMP_PPZZ_H +#undef DO_CMP_PPZZ_S +#undef DO_CMP_PPZZ_D +#undef DO_CMP_PPZZ + +/* Similar, but the second source is "wide". */ +#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK) \ +uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ +{ \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + TYPEW mm = *(TYPEW *)((char *)vm + i - 8); \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= nn OP mm; \ + } while (i & 7); \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ +} + +#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \ + DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \ + DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_2, 0x5555555555555555ull) +#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \ + DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_4, 0x1111111111111111ull) + +DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t, uint64_t, ==) +DO_CMP_PPZW_H(sve_cmpeq_ppzw_h, int16_t, uint64_t, ==) +DO_CMP_PPZW_S(sve_cmpeq_ppzw_s, int32_t, uint64_t, ==) + +DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t, uint64_t, !=) +DO_CMP_PPZW_H(sve_cmpne_ppzw_h, int16_t, uint64_t, !=) +DO_CMP_PPZW_S(sve_cmpne_ppzw_s, int32_t, uint64_t, !=) + +DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t, int64_t, >) +DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t, int64_t, >) +DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t, int64_t, >) + +DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t, int64_t, >=) +DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t, int64_t, >=) +DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t, int64_t, >=) + +DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t, uint64_t, >) +DO_CMP_PPZW_H(sve_cmphi_ppzw_h, uint16_t, uint64_t, >) +DO_CMP_PPZW_S(sve_cmphi_ppzw_s, uint32_t, uint64_t, >) + +DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t, uint64_t, >=) +DO_CMP_PPZW_H(sve_cmphs_ppzw_h, uint16_t, uint64_t, >=) +DO_CMP_PPZW_S(sve_cmphs_ppzw_s, uint32_t, uint64_t, >=) + +DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t, int64_t, <) +DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t, int64_t, <) +DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t, int64_t, <) + +DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t, int64_t, <=) +DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t, int64_t, <=) +DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t, int64_t, <=) + +DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t, uint64_t, <) +DO_CMP_PPZW_H(sve_cmplo_ppzw_h, uint16_t, uint64_t, <) +DO_CMP_PPZW_S(sve_cmplo_ppzw_s, uint32_t, uint64_t, <) + +DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t, uint64_t, <=) +DO_CMP_PPZW_H(sve_cmpls_ppzw_h, uint16_t, uint64_t, <=) +DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=) + +#undef DO_CMP_PPZW_B +#undef DO_CMP_PPZW_H +#undef DO_CMP_PPZW_S +#undef DO_CMP_PPZW + +/* Similar, but the second source is immediate. */ +#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK) \ +uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ +{ \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + TYPE mm = simd_data(desc); \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= nn OP mm; \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ +} + +#define DO_CMP_PPZI_B(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZI_H(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, H1_2, 0x5555555555555555ull) +#define DO_CMP_PPZI_S(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull) +#define DO_CMP_PPZI_D(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull) + +DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==) +DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==) +DO_CMP_PPZI_S(sve_cmpeq_ppzi_s, uint32_t, ==) +DO_CMP_PPZI_D(sve_cmpeq_ppzi_d, uint64_t, ==) + +DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t, !=) +DO_CMP_PPZI_H(sve_cmpne_ppzi_h, uint16_t, !=) +DO_CMP_PPZI_S(sve_cmpne_ppzi_s, uint32_t, !=) +DO_CMP_PPZI_D(sve_cmpne_ppzi_d, uint64_t, !=) + +DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t, >) +DO_CMP_PPZI_H(sve_cmpgt_ppzi_h, int16_t, >) +DO_CMP_PPZI_S(sve_cmpgt_ppzi_s, int32_t, >) +DO_CMP_PPZI_D(sve_cmpgt_ppzi_d, int64_t, >) + +DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t, >=) +DO_CMP_PPZI_H(sve_cmpge_ppzi_h, int16_t, >=) +DO_CMP_PPZI_S(sve_cmpge_ppzi_s, int32_t, >=) +DO_CMP_PPZI_D(sve_cmpge_ppzi_d, int64_t, >=) + +DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t, >) +DO_CMP_PPZI_H(sve_cmphi_ppzi_h, uint16_t, >) +DO_CMP_PPZI_S(sve_cmphi_ppzi_s, uint32_t, >) +DO_CMP_PPZI_D(sve_cmphi_ppzi_d, uint64_t, >) + +DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t, >=) +DO_CMP_PPZI_H(sve_cmphs_ppzi_h, uint16_t, >=) +DO_CMP_PPZI_S(sve_cmphs_ppzi_s, uint32_t, >=) +DO_CMP_PPZI_D(sve_cmphs_ppzi_d, uint64_t, >=) + +DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t, <) +DO_CMP_PPZI_H(sve_cmplt_ppzi_h, int16_t, <) +DO_CMP_PPZI_S(sve_cmplt_ppzi_s, int32_t, <) +DO_CMP_PPZI_D(sve_cmplt_ppzi_d, int64_t, <) + +DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t, <=) +DO_CMP_PPZI_H(sve_cmple_ppzi_h, int16_t, <=) +DO_CMP_PPZI_S(sve_cmple_ppzi_s, int32_t, <=) +DO_CMP_PPZI_D(sve_cmple_ppzi_d, int64_t, <=) + +DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t, <) +DO_CMP_PPZI_H(sve_cmplo_ppzi_h, uint16_t, <) +DO_CMP_PPZI_S(sve_cmplo_ppzi_s, uint32_t, <) +DO_CMP_PPZI_D(sve_cmplo_ppzi_d, uint64_t, <) + +DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t, <=) +DO_CMP_PPZI_H(sve_cmpls_ppzi_h, uint16_t, <=) +DO_CMP_PPZI_S(sve_cmpls_ppzi_s, uint32_t, <=) +DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=) + +#undef DO_CMP_PPZI_B +#undef DO_CMP_PPZI_H +#undef DO_CMP_PPZI_S +#undef DO_CMP_PPZI_D +#undef DO_CMP_PPZI + +/* Similar to the ARM LastActive pseudocode function. */ +static bool last_active_pred(void *vd, void *vg, intptr_t oprsz) +{ + intptr_t i; + + for (i = QEMU_ALIGN_UP(oprsz, 8) - 8; i >= 0; i -= 8) { + uint64_t pg = *(uint64_t *)((char *)vg + i); + if (pg) { + return (pow2floor(pg) & *(uint64_t *)((char *)vd + i)) != 0; + } + } + return 0; +} + +/* Compute a mask into RETB that is true for all G, up to and including + * (if after) or excluding (if !after) the first G & N. + * Return true if BRK found. + */ +static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, + bool brk, bool after) +{ + uint64_t b; + + if (brk) { + b = 0; + } else if ((g & n) == 0) { + /* For all G, no N are set; break not found. */ + b = g; + } else { + /* Break somewhere in N. Locate it. */ + b = g & n; /* guard true, pred true */ +#ifdef _MSC_VER + b = b & (0 - b); /* first such */ +#else + b = b & -b; /* first such */ +#endif + if (after) { + b = b | (b - 1); /* break after same */ + } else { + b = b - 1; /* break before same */ + } + brk = true; + } + + *retb = b; + return brk; +} + +/* Compute a zeroing BRK. */ +static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g, + intptr_t oprsz, bool after) +{ + bool brk = false; + intptr_t i; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + uint64_t this_b, this_g = g[i]; + + brk = compute_brk(&this_b, n[i], this_g, brk, after); + d[i] = this_b & this_g; + } +} + +/* Likewise, but also compute flags. */ +static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g, + intptr_t oprsz, bool after) +{ + uint32_t flags = PREDTEST_INIT; + bool brk = false; + intptr_t i; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + uint64_t this_b, this_d, this_g = g[i]; + + brk = compute_brk(&this_b, n[i], this_g, brk, after); + d[i] = this_d = this_b & this_g; + flags = iter_predtest_fwd(this_d, this_g, flags); + } + return flags; +} + +/* Compute a merging BRK. */ +static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g, + intptr_t oprsz, bool after) +{ + bool brk = false; + intptr_t i; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + uint64_t this_b, this_g = g[i]; + + brk = compute_brk(&this_b, n[i], this_g, brk, after); + d[i] = (this_b & this_g) | (d[i] & ~this_g); + } +} + +/* Likewise, but also compute flags. */ +static uint32_t compute_brks_m(uint64_t *d, uint64_t *n, uint64_t *g, + intptr_t oprsz, bool after) +{ + uint32_t flags = PREDTEST_INIT; + bool brk = false; + intptr_t i; + + for (i = 0; i < oprsz / 8; ++i) { + uint64_t this_b, this_d = d[i], this_g = g[i]; + + brk = compute_brk(&this_b, n[i], this_g, brk, after); + d[i] = this_d = (this_b & this_g) | (this_d & ~this_g); + flags = iter_predtest_fwd(this_d, this_g, flags); + } + return flags; +} + +static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz) +{ + /* It is quicker to zero the whole predicate than loop on OPRSZ. + * The compiler should turn this into 4 64-bit integer stores. + */ + memset(d, 0, sizeof(ARMPredicateReg)); + return PREDTEST_INIT; +} + +void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg, + uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + if (last_active_pred(vn, vg, oprsz)) { + compute_brk_z(vd, vm, vg, oprsz, true); + } else { + do_zero(vd, oprsz); + } +} + +uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg, + uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + if (last_active_pred(vn, vg, oprsz)) { + return compute_brks_z(vd, vm, vg, oprsz, true); + } else { + return do_zero(vd, oprsz); + } +} + +void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg, + uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + if (last_active_pred(vn, vg, oprsz)) { + compute_brk_z(vd, vm, vg, oprsz, false); + } else { + do_zero(vd, oprsz); + } +} + +uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg, + uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + if (last_active_pred(vn, vg, oprsz)) { + return compute_brks_z(vd, vm, vg, oprsz, false); + } else { + return do_zero(vd, oprsz); + } +} + +void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + compute_brk_z(vd, vn, vg, oprsz, true); +} + +uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + return compute_brks_z(vd, vn, vg, oprsz, true); +} + +void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + compute_brk_z(vd, vn, vg, oprsz, false); +} + +uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + return compute_brks_z(vd, vn, vg, oprsz, false); +} + +void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + compute_brk_m(vd, vn, vg, oprsz, true); +} + +uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + return compute_brks_m(vd, vn, vg, oprsz, true); +} + +void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + compute_brk_m(vd, vn, vg, oprsz, false); +} + +uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + return compute_brks_m(vd, vn, vg, oprsz, false); +} + +void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + + if (!last_active_pred(vn, vg, oprsz)) { + do_zero(vd, oprsz); + } +} + +/* As if PredTest(Ones(PL), D, esz). */ +static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz, + uint64_t esz_mask) +{ + uint32_t flags = PREDTEST_INIT; + intptr_t i; + + for (i = 0; i < oprsz / 8; i++) { + flags = iter_predtest_fwd(d->p[i], esz_mask, flags); + } + if (oprsz & 7) { + uint64_t mask = ~(0xffffffffffffffffULL << (8 * (oprsz & 7))); + flags = iter_predtest_fwd(d->p[i], esz_mask & mask, flags); + } + return flags; +} + +uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + + if (last_active_pred(vn, vg, oprsz)) { + return predtest_ones(vd, oprsz, -1); + } else { + return do_zero(vd, oprsz); + } +} + +uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc) +{ + intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz]; + intptr_t i; + + for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) { + uint64_t t = n[i] & g[i] & mask; + sum += ctpop64(t); + } + return sum; +} + +uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc) +{ + uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2; + intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2); + uint64_t esz_mask = pred_esz_masks[esz]; + ARMPredicateReg *d = vd; + uint32_t flags; + intptr_t i; + + /* Begin with a zero predicate register. */ + flags = do_zero(d, oprsz); + if (count == 0) { + return flags; + } + + /* Set all of the requested bits. */ + for (i = 0; i < count / 64; ++i) { + d->p[i] = esz_mask; + } + if (count & 63) { + d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask; + } + + return predtest_ones(d, oprsz, esz_mask); +} + +/* Recursive reduction on a function; + * C.f. the ARM ARM function ReducePredicated. + * + * While it would be possible to write this without the DATA temporary, + * it is much simpler to process the predicate register this way. + * The recursion is bounded to depth 7 (128 fp16 elements), so there's + * little to gain with a more complex non-recursive form. + */ +#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT) \ +static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ +{ \ + if (n == 1) { \ + return *data; \ + } else { \ + uintptr_t half = n / 2; \ + TYPE lo = NAME##_reduce(data, status, half); \ + TYPE hi = NAME##_reduce(data + half, status, half); \ + return TYPE##_##FUNC(lo, hi, status); \ + } \ +} \ +uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ +{ \ + uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \ + TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ + for (i = 0; i < oprsz; ) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)data + i) = (pg & 1 ? nn : IDENT); \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + for (; i < maxsz; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)data + i) = IDENT; \ + } \ + return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \ +} + +DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) +DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) +DO_REDUCE(sve_faddv_d, float64, , add, float64_zero) + +/* Identity is floatN_default_nan, without the function call. */ +DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) +DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) +DO_REDUCE(sve_fminnmv_d, float64, , minnum, 0x7FF8000000000000ULL) + +DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) +DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) +DO_REDUCE(sve_fmaxnmv_d, float64, , maxnum, 0x7FF8000000000000ULL) + +DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) +DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) +DO_REDUCE(sve_fminv_d, float64, , min, float64_infinity) + +DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) +DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) +DO_REDUCE(sve_fmaxv_d, float64, , max, float64_chs(float64_infinity)) + +#undef DO_REDUCE + +uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, + void *status, uint32_t desc) +{ + intptr_t i = 0, opr_sz = simd_oprsz(desc); + float16 result = nn; + + do { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + float16 mm = *(float16 *)((char *)vm + H1_2(i)); + result = float16_add(result, mm, status); + } + i += sizeof(float16), pg >>= sizeof(float16); + } while (i & 15); + } while (i < opr_sz); + + return result; +} + +uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, + void *status, uint32_t desc) +{ + intptr_t i = 0, opr_sz = simd_oprsz(desc); + float32 result = nn; + + do { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + float32 mm = *(float32 *)((char *)vm + H1_2(i)); + result = float32_add(result, mm, status); + } + i += sizeof(float32), pg >>= sizeof(float32); + } while (i & 15); + } while (i < opr_sz); + + return result; +} + +uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, + void *status, uint32_t desc) +{ + intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8; + uint64_t *m = vm; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i++) { + if (pg[H1(i)] & 1) { + nn = float64_add(nn, m[i], status); + } + } + + return nn; +} + +/* Fully general three-operand expander, controlled by a predicate, + * With the extra float_status parameter. + */ +#define DO_ZPZZ_FP(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ + void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ +} + +DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add) +DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add) +DO_ZPZZ_FP(sve_fadd_d, uint64_t, , float64_add) + +DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub) +DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub) +DO_ZPZZ_FP(sve_fsub_d, uint64_t, , float64_sub) + +DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul) +DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul) +DO_ZPZZ_FP(sve_fmul_d, uint64_t, , float64_mul) + +DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div) +DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div) +DO_ZPZZ_FP(sve_fdiv_d, uint64_t, , float64_div) + +DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min) +DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min) +DO_ZPZZ_FP(sve_fmin_d, uint64_t, , float64_min) + +DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) +DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) +DO_ZPZZ_FP(sve_fmax_d, uint64_t, , float64_max) + +DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) +DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) +DO_ZPZZ_FP(sve_fminnum_d, uint64_t, , float64_minnum) + +DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum) +DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum) +DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, , float64_maxnum) + +static inline float16 abd_h(float16 a, float16 b, float_status *s) +{ + return float16_abs(float16_sub(a, b, s)); +} + +static inline float32 abd_s(float32 a, float32 b, float_status *s) +{ + return float32_abs(float32_sub(a, b, s)); +} + +static inline float64 abd_d(float64 a, float64 b, float_status *s) +{ + return float64_abs(float64_sub(a, b, s)); +} + +DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) +DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) +DO_ZPZZ_FP(sve_fabd_d, uint64_t, , abd_d) + +static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) +{ + int b_int = MIN(MAX(b, INT_MIN), INT_MAX); + return float64_scalbn(a, b_int, s); +} + +DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn) +DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn) +DO_ZPZZ_FP(sve_fscalbn_d, int64_t, , scalbn_d) + +DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh) +DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs) +DO_ZPZZ_FP(sve_fmulx_d, uint64_t, , helper_vfp_mulxd) + +#undef DO_ZPZZ_FP + +/* Three-operand expander, with one scalar operand, controlled by + * a predicate, with the extra float_status parameter. + */ +#define DO_ZPZS_FP(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \ + void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + TYPE mm = scalar; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ +} + +DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add) +DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add) +DO_ZPZS_FP(sve_fadds_d, float64, , float64_add) + +DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub) +DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub) +DO_ZPZS_FP(sve_fsubs_d, float64, , float64_sub) + +DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul) +DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul) +DO_ZPZS_FP(sve_fmuls_d, float64, , float64_mul) + +static inline float16 subr_h(float16 a, float16 b, float_status *s) +{ + return float16_sub(b, a, s); +} + +static inline float32 subr_s(float32 a, float32 b, float_status *s) +{ + return float32_sub(b, a, s); +} + +static inline float64 subr_d(float64 a, float64 b, float_status *s) +{ + return float64_sub(b, a, s); +} + +DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h) +DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s) +DO_ZPZS_FP(sve_fsubrs_d, float64, , subr_d) + +DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum) +DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum) +DO_ZPZS_FP(sve_fmaxnms_d, float64, , float64_maxnum) + +DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum) +DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum) +DO_ZPZS_FP(sve_fminnms_d, float64, , float64_minnum) + +DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max) +DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max) +DO_ZPZS_FP(sve_fmaxs_d, float64, , float64_max) + +DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) +DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) +DO_ZPZS_FP(sve_fmins_d, float64, , float64_min) + +/* Fully general two-operand expander, controlled by a predicate, + * With the extra float_status parameter. + */ +#define DO_ZPZ_FP(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ +} + +/* SVE fp16 conversions always use IEEE mode. Like AdvSIMD, they ignore + * FZ16. When converting from fp16, this affects flushing input denormals; + * when converting to fp16, this affects flushing output denormals. + */ +static inline float32 sve_f16_to_f32(float16 f, float_status *fpst) +{ + bool save = get_flush_inputs_to_zero(fpst); + float32 ret; + + set_flush_inputs_to_zero(false, fpst); + ret = float16_to_float32(f, true, fpst); + set_flush_inputs_to_zero(save, fpst); + return ret; +} + +static inline float64 sve_f16_to_f64(float16 f, float_status *fpst) +{ + bool save = get_flush_inputs_to_zero(fpst); + float64 ret; + + set_flush_inputs_to_zero(false, fpst); + ret = float16_to_float64(f, true, fpst); + set_flush_inputs_to_zero(save, fpst); + return ret; +} + +static inline float16 sve_f32_to_f16(float32 f, float_status *fpst) +{ + bool save = get_flush_to_zero(fpst); + float16 ret; + + set_flush_to_zero(false, fpst); + ret = float32_to_float16(f, true, fpst); + set_flush_to_zero(save, fpst); + return ret; +} + +static inline float16 sve_f64_to_f16(float64 f, float_status *fpst) +{ + bool save = get_flush_to_zero(fpst); + float16 ret; + + set_flush_to_zero(false, fpst); + ret = float64_to_float16(f, true, fpst); + set_flush_to_zero(save, fpst); + return ret; +} + +static inline int16_t vfp_float16_to_int16_rtz(float16 f, float_status *s) +{ + if (float16_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float16_to_int16_round_to_zero(f, s); +} + +static inline int64_t vfp_float16_to_int64_rtz(float16 f, float_status *s) +{ + if (float16_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float16_to_int64_round_to_zero(f, s); +} + +static inline int64_t vfp_float32_to_int64_rtz(float32 f, float_status *s) +{ + if (float32_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float32_to_int64_round_to_zero(f, s); +} + +static inline int64_t vfp_float64_to_int64_rtz(float64 f, float_status *s) +{ + if (float64_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float64_to_int64_round_to_zero(f, s); +} + +static inline uint16_t vfp_float16_to_uint16_rtz(float16 f, float_status *s) +{ + if (float16_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float16_to_uint16_round_to_zero(f, s); +} + +static inline uint64_t vfp_float16_to_uint64_rtz(float16 f, float_status *s) +{ + if (float16_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float16_to_uint64_round_to_zero(f, s); +} + +static inline uint64_t vfp_float32_to_uint64_rtz(float32 f, float_status *s) +{ + if (float32_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float32_to_uint64_round_to_zero(f, s); +} + +static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s) +{ + if (float64_is_any_nan(f)) { + float_raise(float_flag_invalid, s); + return 0; + } + return float64_to_uint64_round_to_zero(f, s); +} + +DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16) +DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32) +DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16) +DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64) +DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32) +DO_ZPZ_FP(sve_fcvt_sd, uint64_t, , float32_to_float64) + +DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz) +DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh) +DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs) +DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, , vfp_float16_to_int64_rtz) +DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, , vfp_float32_to_int64_rtz) +DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, , helper_vfp_tosizd) +DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, , vfp_float64_to_int64_rtz) + +DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz) +DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh) +DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs) +DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, , vfp_float16_to_uint64_rtz) +DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, , vfp_float32_to_uint64_rtz) +DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, , helper_vfp_touizd) +DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, , vfp_float64_to_uint64_rtz) + +DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth) +DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints) +DO_ZPZ_FP(sve_frint_d, uint64_t, , helper_rintd) + +DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int) +DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int) +DO_ZPZ_FP(sve_frintx_d, uint64_t, , float64_round_to_int) + +DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16) +DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32) +DO_ZPZ_FP(sve_frecpx_d, uint64_t, , helper_frecpx_f64) + +DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt) +DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt) +DO_ZPZ_FP(sve_fsqrt_d, uint64_t, , float64_sqrt) + +DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16) +DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16) +DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32) +DO_ZPZ_FP(sve_scvt_sd, uint64_t, , int32_to_float64) +DO_ZPZ_FP(sve_scvt_dh, uint64_t, , int64_to_float16) +DO_ZPZ_FP(sve_scvt_ds, uint64_t, , int64_to_float32) +DO_ZPZ_FP(sve_scvt_dd, uint64_t, , int64_to_float64) + +DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16) +DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16) +DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32) +DO_ZPZ_FP(sve_ucvt_sd, uint64_t, , uint32_to_float64) +DO_ZPZ_FP(sve_ucvt_dh, uint64_t, , uint64_to_float16) +DO_ZPZ_FP(sve_ucvt_ds, uint64_t, , uint64_to_float32) +DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64) + +#undef DO_ZPZ_FP + +static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint16_t neg1, uint16_t neg3) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 2; + if (likely((pg >> (i & 63)) & 1)) { + float16 e1, e2, e3, r; + + e1 = *(uint16_t *)((char *)vn + H1_2(i)) ^ neg1; + e2 = *(uint16_t *)((char *)vm + H1_2(i)); + e3 = *(uint16_t *)((char *)va + H1_2(i)) ^ neg3; + r = float16_muladd(e1, e2, e3, 0, status); + *(uint16_t *)((char *)vd + H1_2(i)) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); +} + +void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); +} + +void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); +} + +static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint32_t neg1, uint32_t neg3) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 4; + if (likely((pg >> (i & 63)) & 1)) { + float32 e1, e2, e3, r; + + e1 = *(uint32_t *)((char *)vn + H1_4(i)) ^ neg1; + e2 = *(uint32_t *)((char *)vm + H1_4(i)); + e3 = *(uint32_t *)((char *)va + H1_4(i)) ^ neg3; + r = float32_muladd(e1, e2, e3, 0, status); + *(uint32_t *)((char *)vd + H1_4(i)) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); +} + +void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); +} + +void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); +} + +static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint64_t neg1, uint64_t neg3) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 8; + if (likely((pg >> (i & 63)) & 1)) { + float64 e1, e2, e3, r; + + e1 = *(uint64_t *)((char *)vn + i) ^ neg1; + e2 = *(uint64_t *)((char *)vm + i); + e3 = *(uint64_t *)((char *)va + i) ^ neg3; + r = float64_muladd(e1, e2, e3, 0, status); + *(uint64_t *)((char *)vd + i) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); +} + +void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); +} + +void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); +} + +/* Two operand floating-point comparison controlled by a predicate. + * Unlike the integer version, we are not allowed to optimistically + * compare operands, since the comparison may have side effects wrt + * the FPSR. + */ +#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ + void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ + uint64_t *d = vd, *g = vg; \ + do { \ + uint64_t out = 0, pg = g[j]; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + out |= OP(TYPE, nn, mm, status); \ + } \ + } while (i & 63); \ + d[j--] = out; \ + } while (i > 0); \ +} + +#define DO_FPCMP_PPZZ_H(NAME, OP) \ + DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP) +#define DO_FPCMP_PPZZ_S(NAME, OP) \ + DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP) +#define DO_FPCMP_PPZZ_D(NAME, OP) \ + DO_FPCMP_PPZZ(NAME##_d, float64, , OP) + +#define DO_FPCMP_PPZZ_ALL(NAME, OP) \ + DO_FPCMP_PPZZ_H(NAME, OP) \ + DO_FPCMP_PPZZ_S(NAME, OP) \ + DO_FPCMP_PPZZ_D(NAME, OP) + +#define DO_FCMGE(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) <= 0 +#define DO_FCMGT(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) < 0 +#define DO_FCMLE(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) <= 0 +#define DO_FCMLT(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) < 0 +#define DO_FCMEQ(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) == 0 +#define DO_FCMNE(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) != 0 +#define DO_FCMUO(TYPE, X, Y, ST) \ + TYPE##_compare_quiet(X, Y, ST) == float_relation_unordered +#define DO_FACGE(TYPE, X, Y, ST) \ + TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) <= 0 +#define DO_FACGT(TYPE, X, Y, ST) \ + TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) < 0 + +DO_FPCMP_PPZZ_ALL(sve_fcmge, DO_FCMGE) +DO_FPCMP_PPZZ_ALL(sve_fcmgt, DO_FCMGT) +DO_FPCMP_PPZZ_ALL(sve_fcmeq, DO_FCMEQ) +DO_FPCMP_PPZZ_ALL(sve_fcmne, DO_FCMNE) +DO_FPCMP_PPZZ_ALL(sve_fcmuo, DO_FCMUO) +DO_FPCMP_PPZZ_ALL(sve_facge, DO_FACGE) +DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT) + +#undef DO_FPCMP_PPZZ_ALL +#undef DO_FPCMP_PPZZ_D +#undef DO_FPCMP_PPZZ_S +#undef DO_FPCMP_PPZZ_H +#undef DO_FPCMP_PPZZ + +/* One operand floating-point comparison against zero, controlled + * by a predicate. + */ +#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vg, \ + void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ + uint64_t *d = vd, *g = vg; \ + do { \ + uint64_t out = 0, pg = g[j]; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + if ((pg >> (i & 63)) & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= OP(TYPE, nn, 0, status); \ + } \ + } while (i & 63); \ + d[j--] = out; \ + } while (i > 0); \ +} + +#define DO_FPCMP_PPZ0_H(NAME, OP) \ + DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP) +#define DO_FPCMP_PPZ0_S(NAME, OP) \ + DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP) +#define DO_FPCMP_PPZ0_D(NAME, OP) \ + DO_FPCMP_PPZ0(NAME##_d, float64, , OP) + +#define DO_FPCMP_PPZ0_ALL(NAME, OP) \ + DO_FPCMP_PPZ0_H(NAME, OP) \ + DO_FPCMP_PPZ0_S(NAME, OP) \ + DO_FPCMP_PPZ0_D(NAME, OP) + +DO_FPCMP_PPZ0_ALL(sve_fcmge0, DO_FCMGE) +DO_FPCMP_PPZ0_ALL(sve_fcmgt0, DO_FCMGT) +DO_FPCMP_PPZ0_ALL(sve_fcmle0, DO_FCMLE) +DO_FPCMP_PPZ0_ALL(sve_fcmlt0, DO_FCMLT) +DO_FPCMP_PPZ0_ALL(sve_fcmeq0, DO_FCMEQ) +DO_FPCMP_PPZ0_ALL(sve_fcmne0, DO_FCMNE) + +/* FP Trig Multiply-Add. */ + +void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) +{ + static const float16 coeff[16] = { + 0x3c00, 0xb155, 0x2030, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16); + intptr_t x = simd_data(desc); + float16 *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i++) { + float16 mm = m[i]; + intptr_t xx = x; + if (float16_is_neg(mm)) { + mm = float16_abs(mm); + xx += 8; + } + d[i] = float16_muladd(n[i], mm, coeff[xx], 0, vs); + } +} + +void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) +{ + static const float32 coeff[16] = { + 0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9, + 0x36369d6d, 0x00000000, 0x00000000, 0x00000000, + 0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705, + 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); + intptr_t x = simd_data(desc); + float32 *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i++) { + float32 mm = m[i]; + intptr_t xx = x; + if (float32_is_neg(mm)) { + mm = float32_abs(mm); + xx += 8; + } + d[i] = float32_muladd(n[i], mm, coeff[xx], 0, vs); + } +} + +void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) +{ + static const float64 coeff[16] = { + 0x3ff0000000000000ull, 0xbfc5555555555543ull, + 0x3f8111111110f30cull, 0xbf2a01a019b92fc6ull, + 0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull, + 0x3de5d8408868552full, 0x0000000000000000ull, + 0x3ff0000000000000ull, 0xbfe0000000000000ull, + 0x3fa5555555555536ull, 0xbf56c16c16c13a0bull, + 0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull, + 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, + }; + intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); + intptr_t x = simd_data(desc); + float64 *d = vd, *n = vn, *m = vm; + for (i = 0; i < opr_sz; i++) { + float64 mm = m[i]; + intptr_t xx = x; + if (float64_is_neg(mm)) { + mm = float64_abs(mm); + xx += 8; + } + d[i] = float64_muladd(n[i], mm, coeff[xx], 0, vs); + } +} + +/* + * FP Complex Add + */ + +void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, + void *vs, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + uint64_t *g = vg; + float16 neg_imag = float16_set_sign(0, simd_data(desc)); + float16 neg_real = float16_chs(neg_imag); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float16 e0, e1, e2, e3; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float16); + i -= 2 * sizeof(float16); + + e0 = *(float16 *)((char *)vn + H1_2(i)); + e1 = *(float16 *)((char *)vm + H1_2(j)) ^ neg_real; + e2 = *(float16 *)((char *)vn + H1_2(j)); + e3 = *(float16 *)((char *)vm + H1_2(i)) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + *(float16 *)((char *)vd + H1_2(i)) = float16_add(e0, e1, vs); + } + if (likely((pg >> (j & 63)) & 1)) { + *(float16 *)((char *)vd + H1_2(j)) = float16_add(e2, e3, vs); + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, + void *vs, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + uint64_t *g = vg; + float32 neg_imag = float32_set_sign(0, simd_data(desc)); + float32 neg_real = float32_chs(neg_imag); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float32 e0, e1, e2, e3; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float32); + i -= 2 * sizeof(float32); + + e0 = *(float32 *)((char *)vn + H1_2(i)); + e1 = *(float32 *)((char *)vm + H1_2(j)) ^ neg_real; + e2 = *(float32 *)((char *)vn + H1_2(j)); + e3 = *(float32 *)((char *)vm + H1_2(i)) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + *(float32 *)((char *)vd + H1_2(i)) = float32_add(e0, e1, vs); + } + if (likely((pg >> (j & 63)) & 1)) { + *(float32 *)((char *)vd + H1_2(j)) = float32_add(e2, e3, vs); + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, + void *vs, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + uint64_t *g = vg; + float64 neg_imag = float64_set_sign(0, simd_data(desc)); + float64 neg_real = float64_chs(neg_imag); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float64 e0, e1, e2, e3; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float64); + i -= 2 * sizeof(float64); + + e0 = *(float64 *)((char *)vn + H1_2(i)); + e1 = *(float64 *)((char *)vm + H1_2(j)) ^ neg_real; + e2 = *(float64 *)((char *)vn + H1_2(j)); + e3 = *(float64 *)((char *)vm + H1_2(i)) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + *(float64 *)((char *)vd + H1_2(i)) = float64_add(e0, e1, vs); + } + if (likely((pg >> (j & 63)) & 1)) { + *(float64 *)((char *)vd + H1_2(j)) = float64_add(e2, e3, vs); + } + } while (i & 63); + } while (i != 0); +} + +/* + * FP Complex Multiply + */ + +void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + unsigned rot = simd_data(desc); + bool flip = rot & 1; + float16 neg_imag, neg_real; + uint64_t *g = vg; + + neg_imag = float16_set_sign(0, (rot & 2) != 0); + neg_real = float16_set_sign(0, rot == 1 || rot == 2); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float16 e1, e2, e3, e4, nr, ni, mr, mi, d; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float16); + i -= 2 * sizeof(float16); + + nr = *(float16 *)((char *)vn + H1_2(i)); + ni = *(float16 *)((char *)vn + H1_2(j)); + mr = *(float16 *)((char *)vm + H1_2(i)); + mi = *(float16 *)((char *)vm + H1_2(j)); + + e2 = (flip ? ni : nr); + e1 = (flip ? mi : mr) ^ neg_real; + e4 = e2; + e3 = (flip ? mr : mi) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + d = *(float16 *)((char *)va + H1_2(i)); + d = float16_muladd(e2, e1, d, 0, status); + *(float16 *)((char *)vd + H1_2(i)) = d; + } + if (likely((pg >> (j & 63)) & 1)) { + d = *(float16 *)((char *)va + H1_2(j)); + d = float16_muladd(e4, e3, d, 0, status); + *(float16 *)((char *)vd + H1_2(j)) = d; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + unsigned rot = simd_data(desc); + bool flip = rot & 1; + float32 neg_imag, neg_real; + uint64_t *g = vg; + + neg_imag = float32_set_sign(0, (rot & 2) != 0); + neg_real = float32_set_sign(0, rot == 1 || rot == 2); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float32 e1, e2, e3, e4, nr, ni, mr, mi, d; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float32); + i -= 2 * sizeof(float32); + + nr = *(float32 *)((char *)vn + H1_2(i)); + ni = *(float32 *)((char *)vn + H1_2(j)); + mr = *(float32 *)((char *)vm + H1_2(i)); + mi = *(float32 *)((char *)vm + H1_2(j)); + + e2 = (flip ? ni : nr); + e1 = (flip ? mi : mr) ^ neg_real; + e4 = e2; + e3 = (flip ? mr : mi) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + d = *(float32 *)((char *)va + H1_2(i)); + d = float32_muladd(e2, e1, d, 0, status); + *(float32 *)((char *)vd + H1_2(i)) = d; + } + if (likely((pg >> (j & 63)) & 1)) { + d = *(float32 *)((char *)va + H1_2(j)); + d = float32_muladd(e4, e3, d, 0, status); + *(float32 *)((char *)vd + H1_2(j)) = d; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, + void *vg, void *status, uint32_t desc) +{ + intptr_t j, i = simd_oprsz(desc); + unsigned rot = simd_data(desc); + bool flip = rot & 1; + float64 neg_imag, neg_real; + uint64_t *g = vg; + + neg_imag = float64_set_sign(0, (rot & 2) != 0); + neg_real = float64_set_sign(0, rot == 1 || rot == 2); + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + float64 e1, e2, e3, e4, nr, ni, mr, mi, d; + + /* I holds the real index; J holds the imag index. */ + j = i - sizeof(float64); + i -= 2 * sizeof(float64); + + nr = *(float64 *)((char *)vn + H1_2(i)); + ni = *(float64 *)((char *)vn + H1_2(j)); + mr = *(float64 *)((char *)vm + H1_2(i)); + mi = *(float64 *)((char *)vm + H1_2(j)); + + e2 = (flip ? ni : nr); + e1 = (flip ? mi : mr) ^ neg_real; + e4 = e2; + e3 = (flip ? mr : mi) ^ neg_imag; + + if (likely((pg >> (i & 63)) & 1)) { + d = *(float64 *)((char *)va + H1_2(i)); + d = float64_muladd(e2, e1, d, 0, status); + *(float64 *)((char *)vd + H1_2(i)) = d; + } + if (likely((pg >> (j & 63)) & 1)) { + d = *(float64 *)((char *)va + H1_2(j)); + d = float64_muladd(e4, e3, d, 0, status); + *(float64 *)((char *)vd + H1_2(j)) = d; + } + } while (i & 63); + } while (i != 0); +} + +/* + * Load contiguous data, protected by a governing predicate. + */ + +/* + * Load one element into @vd + @reg_off from @host. + * The controlling predicate is known to be true. + */ +typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host); + +/* + * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). + * The controlling predicate is known to be true. + */ +typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, uintptr_t retaddr); + +/* + * Generate the above primitives. + */ + +#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ +static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ +{ \ + TYPEM val = HOST(host); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ +} + +#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \ +static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ +{ HOST(host, (TYPEM)*(TYPEE *)(vd + H(reg_off))); } + +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, uintptr_t ra) \ +{ \ + *(TYPEE *)(vd + H(reg_off)) = \ + (TYPEM)TLB(env, useronly_clean_ptr(addr), ra); \ +} + +#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, uintptr_t ra) \ +{ \ + TLB(env, useronly_clean_ptr(addr), \ + (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \ +} + + +#define DO_LD_PRIM_1(NAME, H, TE, TM) \ + DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ + DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra) + +DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) +DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) +DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) +DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) +DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) +DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t) +DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) + +#define DO_ST_PRIM_1(NAME, H, TE, TM) \ + DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \ + DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra) + +DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t) +DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t) +DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t) +DO_ST_PRIM_1(bd, , uint64_t, uint8_t) + +#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \ + DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \ + DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \ + DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \ + DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra) + +#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \ + DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \ + DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \ + DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \ + DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra) + +DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw) +DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw) +DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw) +DO_LD_PRIM_2(hdu, , uint64_t, uint16_t, lduw) +DO_LD_PRIM_2(hds, , uint64_t, int16_t, lduw) + +DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw) +DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw) +DO_ST_PRIM_2(hd, , uint64_t, uint16_t, stw) + +DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl) +DO_LD_PRIM_2(sdu, , uint64_t, uint32_t, ldl) +DO_LD_PRIM_2(sds, , uint64_t, int32_t, ldl) + +DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl) +DO_ST_PRIM_2(sd, , uint64_t, uint32_t, stl) + +DO_LD_PRIM_2(dd, , uint64_t, uint64_t, ldq) +DO_ST_PRIM_2(dd, , uint64_t, uint64_t, stq) + +#undef DO_LD_TLB +#undef DO_ST_TLB +#undef DO_LD_HOST +#undef DO_LD_PRIM_1 +#undef DO_ST_PRIM_1 +#undef DO_LD_PRIM_2 +#undef DO_ST_PRIM_2 + +/* + * Skip through a sequence of inactive elements in the guarding predicate @vg, + * beginning at @reg_off bounded by @reg_max. Return the offset of the active + * element >= @reg_off, or @reg_max if there were no active elements at all. + */ +static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off, + intptr_t reg_max, int esz) +{ + uint64_t pg_mask = pred_esz_masks[esz]; + uint64_t pg = (vg[reg_off >> 6] & pg_mask) >> (reg_off & 63); + + /* In normal usage, the first element is active. */ + if (likely(pg & 1)) { + return reg_off; + } + + if (pg == 0) { + reg_off &= -64; + do { + reg_off += 64; + if (unlikely(reg_off >= reg_max)) { + /* The entire predicate was false. */ + return reg_max; + } + pg = vg[reg_off >> 6] & pg_mask; + } while (pg == 0); + } + reg_off += ctz64(pg); + + /* We should never see an out of range predicate bit set. */ + tcg_debug_assert(reg_off < reg_max); + return reg_off; +} + +/* + * Resolve the guest virtual address to info->host and info->flags. + * If @nofault, return false if the page is invalid, otherwise + * exit via page fault exception. + */ + +typedef struct { + void *host; + int flags; + MemTxAttrs attrs; +} SVEHostPage; + +static bool sve_probe_page(SVEHostPage *info, bool nofault, + CPUARMState *env, target_ulong addr, + int mem_off, MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + int flags; + + addr += mem_off; + + /* + * User-only currently always issues with TBI. See the comment + * above useronly_clean_ptr. Usually we clean this top byte away + * during translation, but we can't do that for e.g. vector + imm + * addressing modes. + * + * We currently always enable TBI for user-only, and do not provide + * a way to turn it off. So clean the pointer unconditionally here, + * rather than look it up here, or pass it down from above. + */ + addr = useronly_clean_ptr(addr); + + flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault, + &info->host, retaddr); + info->flags = flags; + + if (flags & TLB_INVALID_MASK) { + g_assert(nofault); + return false; + } + + /* Ensure that info->host[] is relative to addr, not addr + mem_off. */ + info->host -= mem_off; + + /* + * Find the iotlbentry for addr and return the transaction attributes. + * This *must* be present in the TLB because we just found the mapping. + */ + { + uintptr_t index = tlb_index(env, mmu_idx, addr); + +# ifdef CONFIG_DEBUG_TCG + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong comparator = (access_type == MMU_DATA_LOAD + ? entry->addr_read + : tlb_addr_write(entry)); + g_assert(tlb_hit(comparator, addr)); +# endif + + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + info->attrs = iotlbentry->attrs; + } + + return true; +} + + +/* + * Analyse contiguous data, protected by a governing predicate. + */ + +typedef enum { + FAULT_NO, + FAULT_FIRST, + FAULT_ALL, +} SVEContFault; + +typedef struct { + /* + * First and last element wholly contained within the two pages. + * mem_off_first[0] and reg_off_first[0] are always set >= 0. + * reg_off_last[0] may be < 0 if the first element crosses pages. + * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1] + * are set >= 0 only if there are complete elements on a second page. + * + * The reg_off_* offsets are relative to the internal vector register. + * The mem_off_first offset is relative to the memory address; the + * two offsets are different when a load operation extends, a store + * operation truncates, or for multi-register operations. + */ + int16_t mem_off_first[2]; + int16_t reg_off_first[2]; + int16_t reg_off_last[2]; + + /* + * One element that is misaligned and spans both pages, + * or -1 if there is no such active element. + */ + int16_t mem_off_split; + int16_t reg_off_split; + + /* + * The byte offset at which the entire operation crosses a page boundary. + * Set >= 0 if and only if the entire operation spans two pages. + */ + int16_t page_split; + + /* TLB data for the two pages. */ + SVEHostPage page[2]; +} SVEContLdSt; + +/* + * Common helper for all contiguous one-register predicated loads. + */ +static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, + sve_ld1_host_fn *host_fn, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + ARMVectorReg scratch; + void *host; + intptr_t split, reg_off, mem_off; + + /* Find the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; + } + mem_off = reg_off >> diffsz; + set_helper_retaddr(retaddr); + + /* + * If the (remaining) load is entirely within a single page, then: + * For softmmu, and the tlb hits, then no faults will occur; + * For user-only, either the first load will fault or none will. + * We can thus perform the load directly to the destination and + * Vd will be unmodified on any exception path. + */ + split = max_for_page(env->uc, addr, mem_off, mem_max); + if (likely(split == mem_max)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (test_host_page(host)) { + mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max); + tcg_debug_assert(mem_off == mem_max); + clear_helper_retaddr(); + /* After having taken any fault, zero leading inactive elements. */ + swap_memzero(vd, reg_off); + return; + } + } + + /* + * Perform the predicated read into a temporary, thus ensuring + * if the load of the last element faults, Vd is not modified. + */ + memset(&scratch, 0, reg_max); + goto start; + while (1) { + reg_off = find_next_active(vg, reg_off, reg_max, esz); + if (reg_off >= reg_max) { + break; + } + mem_off = reg_off >> diffsz; + split = max_for_page(env->uc, addr, mem_off, mem_max); + + start: + if (split - mem_off >= (1ULL << msz)) { + /* At least one whole element on this page. */ + host = tlb_vaddr_to_host(env, addr + mem_off, + MMU_DATA_LOAD, mmu_idx); + if (host) { + mem_off = host_fn(&scratch, vg, (char *)host - mem_off, + mem_off, split); + reg_off = mem_off << diffsz; + continue; + } + } + + /* + * Perform one normal read. This may fault, longjmping out to the + * main loop in order to raise an exception. It may succeed, and + * as a side-effect load the TLB entry for the next round. Finally, + * in the extremely unlikely case we're performing this operation + * on I/O memory, it may succeed but not bring in the TLB entry. + * But even then we have still made forward progress. + */ + tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr); + reg_off += 1ULL << esz; + } + + clear_helper_retaddr(); + memcpy(vd, &scratch, reg_max); +} + +#define DO_LD1_1(NAME, ESZ) \ +void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ + sve_##NAME##_host, sve_##NAME##_tlb); \ +} + +#define DO_LD1_2(NAME, ESZ, MSZ) \ +void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ +} \ +void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ +} + +DO_LD1_1(ld1bb, 0) +DO_LD1_1(ld1bhu, 1) +DO_LD1_1(ld1bhs, 1) +DO_LD1_1(ld1bsu, 2) +DO_LD1_1(ld1bss, 2) +DO_LD1_1(ld1bdu, 3) +DO_LD1_1(ld1bds, 3) + +DO_LD1_2(ld1hh, 1, 1) +DO_LD1_2(ld1hsu, 2, 1) +DO_LD1_2(ld1hss, 2, 1) +DO_LD1_2(ld1hdu, 3, 1) +DO_LD1_2(ld1hds, 3, 1) + +DO_LD1_2(ld1ss, 2, 2) +DO_LD1_2(ld1sdu, 3, 2) +DO_LD1_2(ld1sds, 3, 2) + +DO_LD1_2(ld1dd, 3, 3) + +#undef DO_LD1_1 +#undef DO_LD1_2 + +/* + * Common helpers for all contiguous 2,3,4-register predicated loads. + */ +static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[2] = { 0 }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + } + i += size, pg >>= size; + addr += 2 * size; + } while (i & 15); + } + clear_helper_retaddr(); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); +} + +static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[3] = { 0 }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); + } + i += size, pg >>= size; + addr += 3 * size; + } while (i & 15); + } + clear_helper_retaddr(); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); + memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); +} + +static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, int size, uintptr_t ra, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch[4] = { 0 }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, &scratch[0], i, addr, oi, ra); + tlb_fn(env, &scratch[1], i, addr + size, oi, ra); + tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); + tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra); + } + i += size, pg >>= size; + addr += 4 * size; + } while (i & 15); + } + clear_helper_retaddr(); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); + memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); + memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); + memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz); +} + +#define DO_LDN_1(N) \ +void QEMU_FLATTEN HELPER(sve_ld##N##bb_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb); \ +} + +#define DO_LDN_2(N, SUFF, SIZE) \ +void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_le_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ + sve_ld1##SUFF##_le_tlb); \ +} \ +void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_be_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ + sve_ld1##SUFF##_be_tlb); \ +} + +DO_LDN_1(2) +DO_LDN_1(3) +DO_LDN_1(4) + +DO_LDN_2(2, hh, 2) +DO_LDN_2(3, hh, 2) +DO_LDN_2(4, hh, 2) + +DO_LDN_2(2, ss, 4) +DO_LDN_2(3, ss, 4) +DO_LDN_2(4, ss, 4) + +DO_LDN_2(2, dd, 8) +DO_LDN_2(3, dd, 8) +DO_LDN_2(4, dd, 8) + +#undef DO_LDN_1 +#undef DO_LDN_2 + +/* + * Load contiguous data, first-fault and no-fault. + * + * For user-only, one could argue that we should hold the mmap_lock during + * the operation so that there is no race between page_check_range and the + * load operation. However, unmapping pages out from under a running thread + * is extraordinarily unlikely. This theoretical race condition also affects + * linux-user/ in its get_user/put_user macros. + * + * TODO: Construct some helpers, written in assembly, that interact with + * handle_cpu_signal to produce memory ops which can properly report errors + * without racing. + */ + +/* Fault on byte I. All bits in FFR from I are cleared. The vector + * result from I is CONSTRAINED UNPREDICTABLE; we choose the MERGE + * option, which leaves subsequent data unchanged. + */ +static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz) +{ + uint64_t *ffr = env->vfp.pregs[FFR_PRED_NUM].p; + + if (i & 63) { + ffr[i / 64] &= MAKE_64BIT_MASK(0, i & 63); + i = ROUND_UP(i, 64); + } + for (; i < oprsz; i += 64) { + ffr[i / 64] = 0; + } +} + +/* + * Common helper for all contiguous first-fault loads. + */ +static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, + sve_ld1_host_fn *host_fn, + sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + intptr_t split, reg_off, mem_off; + void *host; + + /* Skip to the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; + } + mem_off = reg_off >> diffsz; + set_helper_retaddr(retaddr); + + /* + * If the (remaining) load is entirely within a single page, then: + * For softmmu, and the tlb hits, then no faults will occur; + * For user-only, either the first load will fault or none will. + * We can thus perform the load directly to the destination and + * Vd will be unmodified on any exception path. + */ + split = max_for_page(env->uc, addr, mem_off, mem_max); + if (likely(split == mem_max)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (test_host_page(host)) { + mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max); + tcg_debug_assert(mem_off == mem_max); + clear_helper_retaddr(); + /* After any fault, zero any leading inactive elements. */ + swap_memzero(vd, reg_off); + return; + } + } + + /* + * Perform one normal read, which will fault or not. + * But it is likely to bring the page into the tlb. + */ + tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr); + + /* After any fault, zero any leading predicated false elts. */ + swap_memzero(vd, reg_off); + mem_off += 1ULL << msz; + reg_off += 1ULL << esz; + + /* Try again to read the balance of the page. */ + split = max_for_page(env->uc, addr, mem_off - 1, mem_max); + if (split >= (1ULL << msz)) { + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + if (host) { + mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split); + reg_off = mem_off << diffsz; + } + } + + clear_helper_retaddr(); + record_fault(env, reg_off, reg_max); +} + +/* + * Common helper for all contiguous no-fault loads. + */ +static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const int esz, const int msz, + sve_ld1_host_fn *host_fn) +{ + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + void *vd = &env->vfp.zregs[rd]; + const int diffsz = esz - msz; + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t mem_max = reg_max >> diffsz; + const int mmu_idx = cpu_mmu_index(env, false); + intptr_t split, reg_off, mem_off; + void *host; + + /* There will be no fault, so we may modify in advance. */ + memset(vd, 0, reg_max); + + /* Skip to the first active element. */ + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off == reg_max)) { + /* The entire predicate was false; no load occurs. */ + return; + } + mem_off = reg_off >> diffsz; + + /* + * If the address is not in the TLB, we have no way to bring the + * entry into the TLB without also risking a fault. Note that + * the corollary is that we never load from an address not in RAM. + * + * This last is out of spec, in a weird corner case. + * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory + * must not actually hit the bus -- it returns UNKNOWN data instead. + * But if you map non-RAM with Normal memory attributes and do a NF + * load then it should access the bus. (Nobody ought actually do this + * in the real world, obviously.) + * + * Then there are the annoying special cases with watchpoints... + * TODO: Add a form of non-faulting loads using cc->tlb_fill(probe=true). + */ + host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); + split = max_for_page(env->uc, addr, mem_off, mem_max); + if (host && split >= (1ULL << msz)) { + mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split); + reg_off = mem_off << diffsz; + } + + record_fault(env, reg_off, reg_max); +} + +#define DO_LDFF1_LDNF1_1(PART, ESZ) \ +void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \ +} + +#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ +void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \ +} \ +void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \ +} + +DO_LDFF1_LDNF1_1(bb, 0) +DO_LDFF1_LDNF1_1(bhu, 1) +DO_LDFF1_LDNF1_1(bhs, 1) +DO_LDFF1_LDNF1_1(bsu, 2) +DO_LDFF1_LDNF1_1(bss, 2) +DO_LDFF1_LDNF1_1(bdu, 3) +DO_LDFF1_LDNF1_1(bds, 3) + +DO_LDFF1_LDNF1_2(hh, 1, 1) +DO_LDFF1_LDNF1_2(hsu, 2, 1) +DO_LDFF1_LDNF1_2(hss, 2, 1) +DO_LDFF1_LDNF1_2(hdu, 3, 1) +DO_LDFF1_LDNF1_2(hds, 3, 1) + +DO_LDFF1_LDNF1_2(ss, 2, 2) +DO_LDFF1_LDNF1_2(sdu, 3, 2) +DO_LDFF1_LDNF1_2(sds, 3, 2) + +DO_LDFF1_LDNF1_2(dd, 3, 3) + +#undef DO_LDFF1_LDNF1_1 +#undef DO_LDFF1_LDNF1_2 + +/* + * Store contiguous data, protected by a governing predicate. + */ + +#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \ +static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ +{ \ + TLB(env, addr, *(TYPEM *)((char *)vd + H(reg_off)), oi, ra); \ +} + +DO_ST_TLB(st1bb, H1, uint8_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu) +DO_ST_TLB(st1bd, , uint64_t, stb_p, 0, helper_ret_stb_mmu) + +DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu) +DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu) +DO_ST_TLB(st1hd_le, , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu) + +DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu) +DO_ST_TLB(st1sd_le, , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu) + +DO_ST_TLB(st1dd_le, , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu) + +DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu) +DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu) +DO_ST_TLB(st1hd_be, , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu) + +DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu) +DO_ST_TLB(st1sd_be, , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu) + +DO_ST_TLB(st1dd_be, , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu) + +#undef DO_ST_TLB + +/* + * Common helpers for all contiguous 1,2,3,4-register predicated stores. + */ +static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *vd = &env->vfp.zregs[rd]; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, vd, i, addr, oi, ra); + } + i += esize, pg >>= esize; + addr += msize; + } while (i & 15); + } + clear_helper_retaddr(); +} + +static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 2 * msize; + } while (i & 15); + } + clear_helper_retaddr(); +} + +static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + void *d3 = &env->vfp.zregs[(rd + 2) & 31]; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 3 * msize; + } while (i & 15); + } + clear_helper_retaddr(); +} + +static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esize, const int msize, + sve_st1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); + intptr_t i, oprsz = simd_oprsz(desc); + void *d1 = &env->vfp.zregs[rd]; + void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + void *d3 = &env->vfp.zregs[(rd + 2) & 31]; + void *d4 = &env->vfp.zregs[(rd + 3) & 31]; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (pg & 1) { + tlb_fn(env, d1, i, addr, oi, ra); + tlb_fn(env, d2, i, addr + msize, oi, ra); + tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); + tlb_fn(env, d4, i, addr + 3 * msize, oi, ra); + } + i += esize, pg >>= esize; + addr += 4 * msize; + } while (i & 15); + } + clear_helper_retaddr(); +} + +#define DO_STN_1(N, NAME, ESIZE) \ +void QEMU_FLATTEN HELPER(sve_st##N##NAME##_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1, \ + sve_st1##NAME##_tlb); \ +} + +#define DO_STN_2(N, NAME, ESIZE, MSIZE) \ +void QEMU_FLATTEN HELPER(sve_st##N##NAME##_le_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ + sve_st1##NAME##_le_tlb); \ +} \ +void QEMU_FLATTEN HELPER(sve_st##N##NAME##_be_r) \ + (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ +{ \ + sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ + sve_st1##NAME##_be_tlb); \ +} + +DO_STN_1(1, bb, 1) +DO_STN_1(1, bh, 2) +DO_STN_1(1, bs, 4) +DO_STN_1(1, bd, 8) +DO_STN_1(2, bb, 1) +DO_STN_1(3, bb, 1) +DO_STN_1(4, bb, 1) + +DO_STN_2(1, hh, 2, 2) +DO_STN_2(1, hs, 4, 2) +DO_STN_2(1, hd, 8, 2) +DO_STN_2(2, hh, 2, 2) +DO_STN_2(3, hh, 2, 2) +DO_STN_2(4, hh, 2, 2) + +DO_STN_2(1, ss, 4, 4) +DO_STN_2(1, sd, 8, 4) +DO_STN_2(2, ss, 4, 4) +DO_STN_2(3, ss, 4, 4) +DO_STN_2(4, ss, 4, 4) + +DO_STN_2(1, dd, 8, 8) +DO_STN_2(2, dd, 8, 8) +DO_STN_2(3, dd, 8, 8) +DO_STN_2(4, dd, 8, 8) + +#undef DO_STN_1 +#undef DO_STN_2 + +/* + * Loads with a vector index. + */ + +/* + * Load the element at @reg + @reg_ofs, sign or zero-extend as needed. + */ +typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs); + +static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs) +{ + return *(uint32_t *)((char *)reg + H1_4(reg_ofs)); +} + +static target_ulong off_zss_s(void *reg, intptr_t reg_ofs) +{ + return *(int32_t *)((char *)reg + H1_4(reg_ofs)); +} + +static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs) +{ + return (uint32_t)*(uint64_t *)((char *)reg + reg_ofs); +} + +static target_ulong off_zss_d(void *reg, intptr_t reg_ofs) +{ + return (int32_t)*(uint64_t *)((char *)reg + reg_ofs); +} + +static target_ulong off_zd_d(void *reg, intptr_t reg_ofs) +{ + return *(uint64_t *)((char *)reg + reg_ofs); +} + +static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc); + ARMVectorReg scratch = { 0 }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i); + tlb_fn(env, &scratch, i, base + (off << scale), oi, ra); + } + i += 4, pg >>= 4; + } while (i & 15); + } + clear_helper_retaddr(); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(vd, &scratch, oprsz); +} + +static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc) / 8; + ARMVectorReg scratch = { 0 }; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; i++) { + uint8_t pg = *(uint8_t *)((char *)vg + H1(i)); + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i * 8); + tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra); + } + } + clear_helper_retaddr(); + + /* Wait until all exceptions have been raised to write back. */ + memcpy(vd, &scratch, oprsz * 8); +} + +#define DO_LD1_ZPZ_S(MEM, OFS) \ +void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_ld1##MEM##_tlb); \ +} + +#define DO_LD1_ZPZ_D(MEM, OFS) \ +void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_ld1##MEM##_tlb); \ +} + +DO_LD1_ZPZ_S(bsu, zsu) +DO_LD1_ZPZ_S(bsu, zss) +DO_LD1_ZPZ_D(bdu, zsu) +DO_LD1_ZPZ_D(bdu, zss) +DO_LD1_ZPZ_D(bdu, zd) + +DO_LD1_ZPZ_S(bss, zsu) +DO_LD1_ZPZ_S(bss, zss) +DO_LD1_ZPZ_D(bds, zsu) +DO_LD1_ZPZ_D(bds, zss) +DO_LD1_ZPZ_D(bds, zd) + +DO_LD1_ZPZ_S(hsu_le, zsu) +DO_LD1_ZPZ_S(hsu_le, zss) +DO_LD1_ZPZ_D(hdu_le, zsu) +DO_LD1_ZPZ_D(hdu_le, zss) +DO_LD1_ZPZ_D(hdu_le, zd) + +DO_LD1_ZPZ_S(hsu_be, zsu) +DO_LD1_ZPZ_S(hsu_be, zss) +DO_LD1_ZPZ_D(hdu_be, zsu) +DO_LD1_ZPZ_D(hdu_be, zss) +DO_LD1_ZPZ_D(hdu_be, zd) + +DO_LD1_ZPZ_S(hss_le, zsu) +DO_LD1_ZPZ_S(hss_le, zss) +DO_LD1_ZPZ_D(hds_le, zsu) +DO_LD1_ZPZ_D(hds_le, zss) +DO_LD1_ZPZ_D(hds_le, zd) + +DO_LD1_ZPZ_S(hss_be, zsu) +DO_LD1_ZPZ_S(hss_be, zss) +DO_LD1_ZPZ_D(hds_be, zsu) +DO_LD1_ZPZ_D(hds_be, zss) +DO_LD1_ZPZ_D(hds_be, zd) + +DO_LD1_ZPZ_S(ss_le, zsu) +DO_LD1_ZPZ_S(ss_le, zss) +DO_LD1_ZPZ_D(sdu_le, zsu) +DO_LD1_ZPZ_D(sdu_le, zss) +DO_LD1_ZPZ_D(sdu_le, zd) + +DO_LD1_ZPZ_S(ss_be, zsu) +DO_LD1_ZPZ_S(ss_be, zss) +DO_LD1_ZPZ_D(sdu_be, zsu) +DO_LD1_ZPZ_D(sdu_be, zss) +DO_LD1_ZPZ_D(sdu_be, zd) + +DO_LD1_ZPZ_D(sds_le, zsu) +DO_LD1_ZPZ_D(sds_le, zss) +DO_LD1_ZPZ_D(sds_le, zd) + +DO_LD1_ZPZ_D(sds_be, zsu) +DO_LD1_ZPZ_D(sds_be, zss) +DO_LD1_ZPZ_D(sds_be, zd) + +DO_LD1_ZPZ_D(dd_le, zsu) +DO_LD1_ZPZ_D(dd_le, zss) +DO_LD1_ZPZ_D(dd_le, zd) + +DO_LD1_ZPZ_D(dd_be, zsu) +DO_LD1_ZPZ_D(dd_be, zss) +DO_LD1_ZPZ_D(dd_be, zd) + +#undef DO_LD1_ZPZ_S +#undef DO_LD1_ZPZ_D + +/* First fault loads with a vector index. */ + +/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting. + * The controlling predicate is known to be true. Return true if the + * load was successful. + */ +typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, int mmu_idx); + +#ifdef _MSC_VER +#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ +static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, int mmu_idx) \ +{ \ + struct uc_struct *uc = env->uc; \ + target_ulong next_page = 0ULL - (addr | TARGET_PAGE_MASK); \ + if (likely(next_page - addr >= sizeof(TYPEM))) { \ + void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ + if (likely(host)) { \ + TYPEM val = HOST(host); \ + *(TYPEE *)((char *)vd + H(reg_off)) = val; \ + return true; \ + } \ + } \ + return false; \ +} +#else +#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ +static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, int mmu_idx) \ +{ \ + struct uc_struct *uc = env->uc; \ + target_ulong next_page = -(addr | TARGET_PAGE_MASK); \ + if (likely(next_page - addr >= sizeof(TYPEM))) { \ + void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ + if (likely(host)) { \ + TYPEM val = HOST(host); \ + *(TYPEE *)((char *)vd + H(reg_off)) = val; \ + return true; \ + } \ + } \ + return false; \ +} +#endif + +DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p) +DO_LD_NF(bss, H1_4, uint32_t, int8_t, ldsb_p) +DO_LD_NF(bdu, , uint64_t, uint8_t, ldub_p) +DO_LD_NF(bds, , uint64_t, int8_t, ldsb_p) + +DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p) +DO_LD_NF(hss_le, H1_4, uint32_t, int16_t, ldsw_le_p) +DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p) +DO_LD_NF(hss_be, H1_4, uint32_t, int16_t, ldsw_be_p) +DO_LD_NF(hdu_le, , uint64_t, uint16_t, lduw_le_p) +DO_LD_NF(hds_le, , uint64_t, int16_t, ldsw_le_p) +DO_LD_NF(hdu_be, , uint64_t, uint16_t, lduw_be_p) +DO_LD_NF(hds_be, , uint64_t, int16_t, ldsw_be_p) + +DO_LD_NF(ss_le, H1_4, uint32_t, uint32_t, ldl_le_p) +DO_LD_NF(ss_be, H1_4, uint32_t, uint32_t, ldl_be_p) +DO_LD_NF(sdu_le, , uint64_t, uint32_t, ldl_le_p) +DO_LD_NF(sds_le, , uint64_t, int32_t, ldl_le_p) +DO_LD_NF(sdu_be, , uint64_t, uint32_t, ldl_be_p) +DO_LD_NF(sds_be, , uint64_t, int32_t, ldl_be_p) + +DO_LD_NF(dd_le, , uint64_t, uint64_t, ldq_le_p) +DO_LD_NF(dd_be, , uint64_t, uint64_t, ldq_be_p) + +/* + * Common helper for all gather first-faulting loads. + */ +static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, + sve_ld1_nf_fn *nonfault_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t reg_off, reg_max = simd_oprsz(desc); + target_ulong addr; + + /* Skip to the first true predicate. */ + reg_off = find_next_active(vg, 0, reg_max, MO_32); + if (likely(reg_off < reg_max)) { + /* Perform one normal read, which will fault or not. */ + set_helper_retaddr(ra); + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + tlb_fn(env, vd, reg_off, addr, oi, ra); + + /* The rest of the reads will be non-faulting. */ + clear_helper_retaddr(); + } + + /* After any fault, zero the leading predicated false elements. */ + swap_memzero(vd, reg_off); + + while (likely((reg_off += 4) < reg_max)) { + uint64_t pg = *(uint64_t *)((char *)vg + (reg_off >> 6) * 8); + if (likely((pg >> (reg_off & 63)) & 1)) { + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { + record_fault(env, reg_off, reg_max); + break; + } + } else { + *(uint32_t *)((char *)vd + H1_4(reg_off)) = 0; + } + } +} + +static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, + sve_ld1_nf_fn *nonfault_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int mmu_idx = get_mmuidx(oi); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t reg_off, reg_max = simd_oprsz(desc); + target_ulong addr; + + /* Skip to the first true predicate. */ + reg_off = find_next_active(vg, 0, reg_max, MO_64); + if (likely(reg_off < reg_max)) { + /* Perform one normal read, which will fault or not. */ + set_helper_retaddr(ra); + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + tlb_fn(env, vd, reg_off, addr, oi, ra); + + /* The rest of the reads will be non-faulting. */ + clear_helper_retaddr(); + } + + /* After any fault, zero the leading predicated false elements. */ + swap_memzero(vd, reg_off); + + while (likely((reg_off += 8) < reg_max)) { + uint8_t pg = *(uint8_t *)((char *)vg + H1(reg_off >> 3)); + if (likely(pg & 1)) { + addr = off_fn(vm, reg_off); + addr = base + (addr << scale); + if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { + record_fault(env, reg_off, reg_max); + break; + } + } else { + *(uint64_t *)((char *)vd + reg_off) = 0; + } + } +} + +#define DO_LDFF1_ZPZ_S(MEM, OFS) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ +} + +#define DO_LDFF1_ZPZ_D(MEM, OFS) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ +} + +DO_LDFF1_ZPZ_S(bsu, zsu) +DO_LDFF1_ZPZ_S(bsu, zss) +DO_LDFF1_ZPZ_D(bdu, zsu) +DO_LDFF1_ZPZ_D(bdu, zss) +DO_LDFF1_ZPZ_D(bdu, zd) + +DO_LDFF1_ZPZ_S(bss, zsu) +DO_LDFF1_ZPZ_S(bss, zss) +DO_LDFF1_ZPZ_D(bds, zsu) +DO_LDFF1_ZPZ_D(bds, zss) +DO_LDFF1_ZPZ_D(bds, zd) + +DO_LDFF1_ZPZ_S(hsu_le, zsu) +DO_LDFF1_ZPZ_S(hsu_le, zss) +DO_LDFF1_ZPZ_D(hdu_le, zsu) +DO_LDFF1_ZPZ_D(hdu_le, zss) +DO_LDFF1_ZPZ_D(hdu_le, zd) + +DO_LDFF1_ZPZ_S(hsu_be, zsu) +DO_LDFF1_ZPZ_S(hsu_be, zss) +DO_LDFF1_ZPZ_D(hdu_be, zsu) +DO_LDFF1_ZPZ_D(hdu_be, zss) +DO_LDFF1_ZPZ_D(hdu_be, zd) + +DO_LDFF1_ZPZ_S(hss_le, zsu) +DO_LDFF1_ZPZ_S(hss_le, zss) +DO_LDFF1_ZPZ_D(hds_le, zsu) +DO_LDFF1_ZPZ_D(hds_le, zss) +DO_LDFF1_ZPZ_D(hds_le, zd) + +DO_LDFF1_ZPZ_S(hss_be, zsu) +DO_LDFF1_ZPZ_S(hss_be, zss) +DO_LDFF1_ZPZ_D(hds_be, zsu) +DO_LDFF1_ZPZ_D(hds_be, zss) +DO_LDFF1_ZPZ_D(hds_be, zd) + +DO_LDFF1_ZPZ_S(ss_le, zsu) +DO_LDFF1_ZPZ_S(ss_le, zss) +DO_LDFF1_ZPZ_D(sdu_le, zsu) +DO_LDFF1_ZPZ_D(sdu_le, zss) +DO_LDFF1_ZPZ_D(sdu_le, zd) + +DO_LDFF1_ZPZ_S(ss_be, zsu) +DO_LDFF1_ZPZ_S(ss_be, zss) +DO_LDFF1_ZPZ_D(sdu_be, zsu) +DO_LDFF1_ZPZ_D(sdu_be, zss) +DO_LDFF1_ZPZ_D(sdu_be, zd) + +DO_LDFF1_ZPZ_D(sds_le, zsu) +DO_LDFF1_ZPZ_D(sds_le, zss) +DO_LDFF1_ZPZ_D(sds_le, zd) + +DO_LDFF1_ZPZ_D(sds_be, zsu) +DO_LDFF1_ZPZ_D(sds_be, zss) +DO_LDFF1_ZPZ_D(sds_be, zd) + +DO_LDFF1_ZPZ_D(dd_le, zsu) +DO_LDFF1_ZPZ_D(dd_le, zss) +DO_LDFF1_ZPZ_D(dd_le, zd) + +DO_LDFF1_ZPZ_D(dd_be, zsu) +DO_LDFF1_ZPZ_D(dd_be, zss) +DO_LDFF1_ZPZ_D(dd_be, zd) + +/* Stores with a vector index. */ + +static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc); + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + do { + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i); + tlb_fn(env, vd, i, base + (off << scale), oi, ra); + } + i += 4, pg >>= 4; + } while (i & 15); + } + clear_helper_retaddr(); +} + +static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); + const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); + intptr_t i, oprsz = simd_oprsz(desc) / 8; + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; i++) { + uint8_t pg = *(uint8_t *)((char *)vg + H1(i)); + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i * 8); + tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra); + } + } + clear_helper_retaddr(); +} + +#define DO_ST1_ZPZ_S(MEM, OFS) \ +void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_st1##MEM##_tlb); \ +} + +#define DO_ST1_ZPZ_D(MEM, OFS) \ +void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_st1##MEM##_tlb); \ +} + +DO_ST1_ZPZ_S(bs, zsu) +DO_ST1_ZPZ_S(hs_le, zsu) +DO_ST1_ZPZ_S(hs_be, zsu) +DO_ST1_ZPZ_S(ss_le, zsu) +DO_ST1_ZPZ_S(ss_be, zsu) + +DO_ST1_ZPZ_S(bs, zss) +DO_ST1_ZPZ_S(hs_le, zss) +DO_ST1_ZPZ_S(hs_be, zss) +DO_ST1_ZPZ_S(ss_le, zss) +DO_ST1_ZPZ_S(ss_be, zss) + +DO_ST1_ZPZ_D(bd, zsu) +DO_ST1_ZPZ_D(hd_le, zsu) +DO_ST1_ZPZ_D(hd_be, zsu) +DO_ST1_ZPZ_D(sd_le, zsu) +DO_ST1_ZPZ_D(sd_be, zsu) +DO_ST1_ZPZ_D(dd_le, zsu) +DO_ST1_ZPZ_D(dd_be, zsu) + +DO_ST1_ZPZ_D(bd, zss) +DO_ST1_ZPZ_D(hd_le, zss) +DO_ST1_ZPZ_D(hd_be, zss) +DO_ST1_ZPZ_D(sd_le, zss) +DO_ST1_ZPZ_D(sd_be, zss) +DO_ST1_ZPZ_D(dd_le, zss) +DO_ST1_ZPZ_D(dd_be, zss) + +DO_ST1_ZPZ_D(bd, zd) +DO_ST1_ZPZ_D(hd_le, zd) +DO_ST1_ZPZ_D(hd_be, zd) +DO_ST1_ZPZ_D(sd_le, zd) +DO_ST1_ZPZ_D(sd_be, zd) +DO_ST1_ZPZ_D(dd_le, zd) +DO_ST1_ZPZ_D(dd_be, zd) + +#undef DO_ST1_ZPZ_S +#undef DO_ST1_ZPZ_D diff --git a/qemu/target/arm/cpu-param.h b/qemu/target/arm/cpu-param.h index 208858c700..9e7aaea79f 100644 --- a/qemu/target/arm/cpu-param.h +++ b/qemu/target/arm/cpu-param.h @@ -25,6 +25,6 @@ # define TARGET_PAGE_BITS_VARY # define TARGET_PAGE_BITS_MIN 10 -#define NB_MMU_MODES 12 +#define NB_MMU_MODES 11 #endif diff --git a/qemu/target/arm/cpu-qom.h b/qemu/target/arm/cpu-qom.h index 963a628d7c..bf5037d346 100644 --- a/qemu/target/arm/cpu-qom.h +++ b/qemu/target/arm/cpu-qom.h @@ -32,7 +32,14 @@ struct arm_boot_info; #define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU -typedef struct ARMCPUInfo ARMCPUInfo; +typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(struct uc_struct *uc, CPUState *obj); + void (*class_init)(struct uc_struct *uc, CPUClass *oc, void *data); +} ARMCPUInfo; + +void arm_cpu_register(const ARMCPUInfo *info); +void aarch64_cpu_register(const ARMCPUInfo *info); /** * ARMCPUClass: diff --git a/qemu/target/arm/cpu.c b/qemu/target/arm/cpu.c index 7613381980..e0c59cf0f1 100644 --- a/qemu/target/arm/cpu.c +++ b/qemu/target/arm/cpu.c @@ -596,16 +596,6 @@ void arm_cpu_update_vfiq(ARMCPU *cpu) } } -static inline void set_feature(CPUARMState *env, int feature) -{ - env->features |= 1ULL << feature; -} - -static inline void unset_feature(CPUARMState *env, int feature) -{ - env->features &= ~(1ULL << feature); -} - static uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz) { uint32_t Aff1 = idx / clustersz; @@ -2003,6 +1993,7 @@ static void arm_max_initfn(struct uc_struct *uc, CPUState *obj) FIELD_DP32(t, ID_MMFR4, HPDS, 1, t); /* AA32HPD */ FIELD_DP32(t, ID_MMFR4, AC2, 1, t); /* ACTLR2, HACTLR2 */ FIELD_DP32(t, ID_MMFR4, CNP, 1, t); /* TTCNP */ + FIELD_DP32(t, ID_MMFR4, XNX, 1, t); /* TTS2UXN */ cpu->isar.id_mmfr4 = t; } //#endif @@ -2012,12 +2003,6 @@ static void arm_max_initfn(struct uc_struct *uc, CPUState *obj) #endif /* !defined(TARGET_AARCH64) */ -struct ARMCPUInfo { - const char *name; - void (*initfn)(struct uc_struct *uc, CPUState *obj); - void (*class_init)(struct uc_struct *uc, CPUClass *oc, void *data); -}; - #if !defined(TARGET_AARCH64) static struct ARMCPUInfo arm_cpus[] = { { "arm926", arm926_initfn }, diff --git a/qemu/target/arm/cpu.h b/qemu/target/arm/cpu.h index f857850cfc..794c5ab05b 100644 --- a/qemu/target/arm/cpu.h +++ b/qemu/target/arm/cpu.h @@ -480,6 +480,9 @@ typedef struct CPUARMState { uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */ uint64_t vpidr_el2; /* Virtualization Processor ID Register */ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */ + uint64_t tfsr_el[4]; /* tfsre0_el1 is index 0. */ + uint64_t gcr_el1; + uint64_t rgsr_el1; } cp15; struct { @@ -548,6 +551,8 @@ typedef struct CPUARMState { uint64_t esr; } serror; + uint8_t ext_dabt_raised; /* Tracking/verifying injection of ext DABT */ + /* State of our input IRQ/FIQ/VIRQ/VFIQ lines */ uint32_t irq_line_state; @@ -680,6 +685,16 @@ typedef struct CPUARMState { struct uc_struct *uc; } CPUARMState; +static inline void set_feature(CPUARMState *env, int feature) +{ + env->features |= 1ULL << feature; +} + +static inline void unset_feature(CPUARMState *env, int feature) +{ + env->features &= ~(1ULL << feature); +} + /** * ARMELChangeHookFn: * type of a function which can be registered via arm_register_el_change_hook() @@ -757,6 +772,10 @@ struct ARMCPU { /* MemoryRegion to use for secure physical accesses */ MemoryRegion *secure_memory; + /* MemoryRegion to use for allocation tag accesses */ + MemoryRegion *tag_memory; + MemoryRegion *secure_tag_memory; + /* For v8M, pointer to the IDAU interface provided by board/SoC */ void *idau; @@ -858,7 +877,7 @@ struct ARMCPU { uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; } isar; - uint32_t midr; + uint64_t midr; uint32_t revidr; uint32_t reset_fpsid; uint32_t ctr; @@ -1152,7 +1171,7 @@ void pmu_init(ARMCPU *cpu); #define CACHED_CPSR_BITS (CPSR_T | CPSR_AIF | CPSR_GE | CPSR_IT | CPSR_Q \ | CPSR_NZCV) /* Bits writable in user mode. */ -#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE) +#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE | CPSR_E) /* Execution state bits. MRS read as zero, MSR writes ignored. */ #define CPSR_EXEC (CPSR_T | CPSR_IT | CPSR_J | CPSR_IL) @@ -1204,6 +1223,7 @@ void pmu_init(ARMCPU *cpu); #define PSTATE_SS (1U << 21) #define PSTATE_PAN (1U << 22) #define PSTATE_UAO (1U << 23) +#define PSTATE_TCO (1U << 25) #define PSTATE_V (1U << 28) #define PSTATE_C (1U << 29) #define PSTATE_Z (1U << 30) @@ -2327,7 +2347,7 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) * migration or KVM state synchronization. (Typically this is for "registers" * which are actually used as instructions for cache maintenance and so on.) * IO indicates that this register does I/O and therefore its accesses - * need to be surrounded by gen_io_start()/gen_io_end(). In particular, + * need to be marked with gen_io_start() and also end the TB. In particular, * registers which implement clocks or timers require this. * RAISES_EXC is for when the read or write hook might raise an exception; * the generated code will synchronize the CPU state before calling the hook @@ -2349,7 +2369,9 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) #define ARM_CP_NZCV (ARM_CP_SPECIAL | 0x0300) #define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | 0x0400) #define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | 0x0500) -#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA +#define ARM_CP_DC_GVA (ARM_CP_SPECIAL | 0x0600) +#define ARM_CP_DC_GZVA (ARM_CP_SPECIAL | 0x0700) +#define ARM_LAST_SPECIAL ARM_CP_DC_GZVA #define ARM_CP_FPU 0x1000 #define ARM_CP_SVE 0x2000 #define ARM_CP_NO_GDB 0x4000 @@ -2804,6 +2826,9 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * handling via the TLB. The only way to do a stage 1 translation without * the immediate stage 2 translation is via the ATS or AT system insns, * which can be slow-pathed and always do a page table walk. + * The only use of stage 2 translations is either as part of an s1+2 + * lookup or when loading the descriptors during a stage 1 page table walk, + * and in both those cases we don't use the TLB. * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" * translation regimes, because they map reasonably well to each other * and they can't both be active at the same time. @@ -2819,15 +2844,15 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * NS EL1 EL1&0 stage 1+2 (aka NS PL1) * NS EL1 EL1&0 stage 1+2 +PAN * NS EL0 EL2&0 + * NS EL2 EL2&0 * NS EL2 EL2&0 +PAN * NS EL2 (aka NS PL2) * S EL0 EL1&0 (aka S PL0) * S EL1 EL1&0 (not used if EL3 is 32 bit) * S EL1 EL1&0 +PAN * S EL3 (aka S PL1) - * NS EL1&0 stage 2 * - * for a total of 12 different mmu_idx. + * for a total of 11 different mmu_idx. * * R profile CPUs have an MPU, but can use the same set of MMU indexes * as A profile. They only need to distinguish NS EL0 and NS EL1 (and @@ -2850,8 +2875,8 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * vs A/R profile) would like to use MMU indexes with different semantics, * but since we don't ever need to use all of those in a single CPU we * can avoid setting NB_MMU_MODES to more than 8. The lower bits of - * ARMMMUIdx are the core TLB mmu index, and the higher bits are always - * the same for any particular CPU. + * can avoid having to set NB_MMU_MODES to "total number of A profile MMU + * modes + total number of M profile MMU modes". The lower bits of * Variables of type ARMMUIdx are always full values, and the core * index values are in variables of type 'int'. * @@ -2897,8 +2922,6 @@ typedef enum ARMMMUIdx { ARMMMUIdx_SE10_1_PAN = 9 | ARM_MMU_IDX_A, ARMMMUIdx_SE3 = 10 | ARM_MMU_IDX_A, - ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A, - /* * These are not allocated TLBs and are used only for AT system * instructions or for the first stage of an S12 page table walk. @@ -2906,6 +2929,14 @@ typedef enum ARMMMUIdx { ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB, ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB, ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB, + /* + * Not allocated a TLB: used only for second stage of an S12 page + * table walk, or for descriptor loads during first stage of an S1 + * page table walk. Note that if we ever want to have a TLB for this + * then various TLB flush insns which currently are no-ops or flush + * only stage 1 MMU indexes will need to change to flush stage 2. + */ + ARMMMUIdx_Stage2 = 3 | ARM_MMU_IDX_NOTLB, /* * M-profile. @@ -2939,7 +2970,6 @@ typedef enum ARMMMUIdxBit { TO_CORE_BIT(SE10_1), TO_CORE_BIT(SE10_1_PAN), TO_CORE_BIT(SE3), - TO_CORE_BIT(Stage2), TO_CORE_BIT(MUser), TO_CORE_BIT(MPriv), @@ -2959,6 +2989,8 @@ typedef enum ARMMMUIdxBit { typedef enum ARMASIdx { ARMASIdx_NS = 0, ARMASIdx_S = 1, + ARMASIdx_TagNS = 2, + ARMASIdx_TagS = 3, } ARMASIdx; /* Return the Exception Level targeted by debug exceptions. */ @@ -3145,10 +3177,10 @@ typedef ARMCPU ArchCPU; * | | | TBFLAG_A32 | | * | | +-----+----------+ TBFLAG_AM32 | * | TBFLAG_ANY | |TBFLAG_M32| | - * | | +-+----------+--------------| - * | | | TBFLAG_A64 | - * +--------------+---------+---------------------------+ - * 31 20 15 0 + * | +-----------+----------+--------------| + * | | TBFLAG_A64 | + * +--------------+-------------------------------------+ + * 31 20 0 * * Unless otherwise noted, these bits are cached in env->hflags. */ @@ -3215,6 +3247,10 @@ FIELD(TBFLAG_A64, BT, 9, 1) FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */ FIELD(TBFLAG_A64, TBID, 12, 2) FIELD(TBFLAG_A64, UNPRIV, 14, 1) +FIELD(TBFLAG_A64, ATA, 15, 1) +FIELD(TBFLAG_A64, TCMA, 16, 2) +FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1) +FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) /** * cpu_mmu_index: @@ -3300,6 +3336,20 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) /* Shared between translate-sve.c and sve_helper.c. */ extern const uint64_t pred_esz_masks[4]; +/* Helper for the macros below, validating the argument type. */ +static inline MemTxAttrs *typecheck_memtxattrs(MemTxAttrs *x) +{ + return x; +} + +/* + * Lvalue macros for ARM TLB bits that we must cache in the TCG TLB. + * Using these should be a bit more self-documenting than using the + * generic target bits directly. + */ +#define arm_tlb_bti_gp(x) (typecheck_memtxattrs(x)->target_tlb_bit0) +#define arm_tlb_mte_tagged(x) (typecheck_memtxattrs(x)->target_tlb_bit1) + /* * Naming convention for isar_feature functions: * Functions which test 32-bit ID registers should have _aa32_ in @@ -3539,6 +3589,11 @@ static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id) return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0; } +static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0; +} + /* * 64-bit feature tests via id registers. */ @@ -3727,6 +3782,18 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; } +static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0; +} + +static inline bool isar_feature_aa64_mte(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2; +} + + + static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && @@ -3754,6 +3821,11 @@ static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0; } +static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; +} + /* * Feature tests for "does this exist in either 32-bit or 64-bit?" */ @@ -3782,6 +3854,11 @@ static inline bool isar_feature_any_ccidx(const ARMISARegisters *id) return isar_feature_aa64_ccidx(id) || isar_feature_aa32_ccidx(id); } +static inline bool isar_feature_any_tts2uxn(const ARMISARegisters *id) +{ + return isar_feature_aa64_tts2uxn(id) || isar_feature_aa32_tts2uxn(id); +} + /* * Forward to the above feature tests given an ARMCPU pointer. */ diff --git a/qemu/target/arm/cpu64.c b/qemu/target/arm/cpu64.c index 3c57a52aee..b012e5d185 100644 --- a/qemu/target/arm/cpu64.c +++ b/qemu/target/arm/cpu64.c @@ -28,12 +28,6 @@ void arm_cpu_post_init(CPUState *obj); void arm_cpu_initfn(struct uc_struct *uc, CPUState *obj); ARMCPU *cpu_arm_init(struct uc_struct *uc); - -static inline void set_feature(CPUARMState *env, int feature) -{ - env->features |= 1ULL << feature; -} - static void aarch64_a57_initfn(struct uc_struct *uc, CPUState *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -262,6 +256,7 @@ static void aarch64_max_initfn(struct uc_struct *uc, CPUState *obj) FIELD_DP64(t, ID_AA64MMFR1, VH, 1, t); FIELD_DP64(t, ID_AA64MMFR1, PAN, 2, t); /* ATS1E1 */ FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2, t); /* VMID16 */ + FIELD_DP64(t, ID_AA64MMFR1, XNX, 1, t); /* TTS2UXN */ cpu->isar.id_aa64mmfr1 = t; t = cpu->isar.id_aa64mmfr2; @@ -295,22 +290,18 @@ static void aarch64_max_initfn(struct uc_struct *uc, CPUState *obj) FIELD_DP32(u, ID_MMFR4, HPDS, 1, u); /* AA32HPD */ FIELD_DP32(u, ID_MMFR4, AC2, 1, u); /* ACTLR2, HACTLR2 */ FIELD_DP32(u, ID_MMFR4, CNP, 1, u); /* TTCNP */ + FIELD_DP32(u, ID_MMFR4, XNX, 1, t); /* TTS2UXN */ cpu->isar.id_mmfr4 = u; - u = cpu->isar.id_aa64dfr0; - FIELD_DP64(u, ID_AA64DFR0, PMUVER, 5, u); /* v8.4-PMU */ - cpu->isar.id_aa64dfr0 = u; + t = cpu->isar.id_aa64dfr0; + FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5, t); /* v8.4-PMU */ + cpu->isar.id_aa64dfr0 = t; u = cpu->isar.id_dfr0; FIELD_DP32(u, ID_DFR0, PERFMON, 5, u); /* v8.4-PMU */ cpu->isar.id_dfr0 = u; } -struct ARMCPUInfo { - const char *name; - void (*initfn)(struct uc_struct *uc, CPUState *obj); -}; - static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, diff --git a/qemu/target/arm/crypto_helper.c b/qemu/target/arm/crypto_helper.c index 117be6f89f..137e776059 100644 --- a/qemu/target/arm/crypto_helper.c +++ b/qemu/target/arm/crypto_helper.c @@ -13,7 +13,9 @@ #include "cpu.h" #include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" #include "crypto/aes.h" +#include "vec_internal.h" union CRYPTO_STATE { uint8_t bytes[16]; @@ -22,25 +24,35 @@ union CRYPTO_STATE { }; #ifdef HOST_WORDS_BIGENDIAN -#define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8]) -#define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2]) +#define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) +#define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) #else -#define CR_ST_BYTE(state, i) (state.bytes[i]) -#define CR_ST_WORD(state, i) (state.words[i]) +#define CR_ST_BYTE(state, i) ((state).bytes[i]) +#define CR_ST_WORD(state, i) ((state).words[i]) #endif -void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) +/* + * The caller has not been converted to full gvec, and so only + * modifies the low 16 bytes of the vector register. + */ +static void clear_tail_16(void *vd, uint32_t desc) +{ + int opr_sz = simd_oprsz(desc); + int max_sz = simd_maxsz(desc); + + assert(opr_sz == 16); + clear_tail(vd, opr_sz, max_sz); +} + +static void do_crypto_aese(uint64_t *rd, uint64_t *rn, + uint64_t *rm, bool decrypt) { static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; - uint64_t *rd = vd; - uint64_t *rm = vm; union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; - union CRYPTO_STATE st = { .l = { rd[0], rd[1] } }; + union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; int i; - assert(decrypt < 2); - /* xor state vector with round key */ rk.l[0] ^= st.l[0]; rk.l[1] ^= st.l[1]; @@ -54,7 +66,18 @@ void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) rd[1] = st.l[1]; } -void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) +void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + bool decrypt = simd_data(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_aese((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vn + i), (uint64_t*)((char*)vm + i), decrypt); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) { static uint32_t const mc[][256] = { { /* MixColumns lookup table */ @@ -190,13 +213,9 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, } }; - uint64_t *rd = vd; - uint64_t *rm = vm; union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; int i; - assert(decrypt < 2); - for (i = 0; i < 16; i += 4) { CR_ST_WORD(st, i >> 2) = mc[decrypt][CR_ST_BYTE(st, i)] ^ @@ -209,6 +228,17 @@ void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) rd[1] = st.l[1]; } +void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + bool decrypt = simd_data(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_aesmc((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vm + i), decrypt); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + /* * SHA-1 logical functions */ @@ -228,52 +258,78 @@ static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) return (x & y) | ((x | y) & z); } -void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op) +void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc) +{ + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t d0, d1; + + d0 = d[1] ^ d[0] ^ m[0]; + d1 = n[0] ^ d[1] ^ m[1]; + d[0] = d0; + d[1] = d1; + + clear_tail_16(vd, desc); +} + + +static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn, + uint64_t *rm, uint32_t desc, + uint32_t (*fn)(union CRYPTO_STATE *d)) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + int i; - if (op == 3) { /* sha1su0 */ - d.l[0] ^= d.l[1] ^ m.l[0]; - d.l[1] ^= n.l[0] ^ m.l[1]; - } else { - int i; - - for (i = 0; i < 4; i++) { - uint32_t t = 0; - - switch (op) { - case 0: /* sha1c */ - t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - case 1: /* sha1p */ - t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - case 2: /* sha1m */ - t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); - break; - default: - g_assert_not_reached(); - } - t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) - + CR_ST_WORD(m, i); - - CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); - CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); - CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); - CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); - CR_ST_WORD(d, 0) = t; - } + for (i = 0; i < 4; i++) { + uint32_t t = fn(&d); + + t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) + + CR_ST_WORD(m, i); + + CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); + CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); + CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); + CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); + CR_ST_WORD(d, 0) = t; } rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(rd, desc); +} + +static uint32_t do_sha1c(union CRYPTO_STATE *d) +{ + return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); +} + +void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c); +} + +static uint32_t do_sha1p(union CRYPTO_STATE *d) +{ + return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); } -void HELPER(crypto_sha1h)(void *vd, void *vm) +void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p); +} + +static uint32_t do_sha1m(union CRYPTO_STATE *d) +{ + return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3)); +} + +void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc) +{ + crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m); +} + +void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -284,9 +340,11 @@ void HELPER(crypto_sha1h)(void *vd, void *vm) rd[0] = m.l[0]; rd[1] = m.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha1su1)(void *vd, void *vm) +void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -300,6 +358,8 @@ void HELPER(crypto_sha1su1)(void *vd, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } /* @@ -327,7 +387,7 @@ static uint32_t s1(uint32_t x) return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); } -void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -358,9 +418,11 @@ void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -383,9 +445,11 @@ void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256su0)(void *vd, void *vm) +void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rm = vm; @@ -399,9 +463,11 @@ void HELPER(crypto_sha256su0)(void *vd, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -417,6 +483,8 @@ void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } /* @@ -453,7 +521,7 @@ static uint64_t s1_512(uint64_t x) return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); } -void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -466,9 +534,11 @@ void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -481,9 +551,11 @@ void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) rd[0] = d0; rd[1] = d1; + + clear_tail_16(rd, desc); } -void HELPER(crypto_sha512su0)(void *vd, void *vn) +void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -495,9 +567,11 @@ void HELPER(crypto_sha512su0)(void *vd, void *vn) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -505,9 +579,11 @@ void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) rd[0] += s1_512(rn[0]) + rm[0]; rd[1] += s1_512(rn[1]) + rm[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -531,9 +607,11 @@ void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -551,17 +629,18 @@ void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, - uint32_t opcode) +static inline void QEMU_ALWAYS_INLINE +crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm, + uint32_t desc, uint32_t opcode) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; + uint32_t imm2 = simd_data(desc); uint32_t t = 0; assert(imm2 < 4); @@ -576,7 +655,7 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, /* SM3TT2B */ t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1)); } else { - g_assert_not_reached(); + qemu_build_not_reached(); } t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2); @@ -601,8 +680,21 @@ void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(rd, desc); } +#define DO_SM3TT(NAME, OPCODE) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { crypto_sm3tt(vd, vn, vm, desc, OPCODE); } + +DO_SM3TT(crypto_sm3tt1a, 0) +DO_SM3TT(crypto_sm3tt1b, 1) +DO_SM3TT(crypto_sm3tt2a, 2) +DO_SM3TT(crypto_sm3tt2b, 3) + +#undef DO_SM3TT + static uint8_t const sm4_sbox[] = { 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, @@ -638,12 +730,10 @@ static uint8_t const sm4_sbox[] = { 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, }; -void HELPER(crypto_sm4e)(void *vd, void *vn) +static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) { - uint64_t *rd = vd; - uint64_t *rn = vn; - union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; - union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; + union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; uint32_t t, i; for (i = 0; i < 4; i++) { @@ -665,11 +755,18 @@ void HELPER(crypto_sm4e)(void *vd, void *vn) rd[1] = d.l[1]; } -void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) +void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_sm4e((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vn + i), (uint64_t*)((char*)vm + i)); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) { - uint64_t *rd = vd; - uint64_t *rn = vn; - uint64_t *rm = vm; union CRYPTO_STATE d; union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; @@ -693,3 +790,24 @@ void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) rd[0] = d.l[0]; rd[1] = d.l[1]; } + +void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + + for (i = 0; i < opr_sz; i += 16) { + do_crypto_sm4ekey((uint64_t*)((char*)vd + i), (uint64_t*)((char*)vn + i), (uint64_t*)((char*)vm + i)); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} + +void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + d[i] = n[i] ^ rol64(m[i], 1); + } + clear_tail(vd, opr_sz, simd_maxsz(desc)); +} diff --git a/qemu/target/arm/decode-a32.inc.c b/qemu/target/arm/decode-a32.inc.c index cf6c644a83..a1b30a2caa 100644 --- a/qemu/target/arm/decode-a32.inc.c +++ b/qemu/target/arm/decode-a32.inc.c @@ -21,9 +21,7 @@ typedef struct { } arg_disas_a3226; typedef struct { -#ifdef _MSC_VER - int dummy; -#endif + int : 0; } arg_empty; typedef struct { diff --git a/qemu/target/arm/decode-neon-dp.inc.c b/qemu/target/arm/decode-neon-dp.inc.c new file mode 100644 index 0000000000..c09bfb5e55 --- /dev/null +++ b/qemu/target/arm/decode-neon-dp.inc.c @@ -0,0 +1,2806 @@ +/* This file is autogenerated by scripts/decodetree.py. */ + +typedef struct { + int cmode; + int imm; + int op; + int q; + int vd; +} arg_1reg_imm; + +typedef struct { + int q; + int size; + int vd; + int vm; +} arg_2misc; + +typedef struct { + int q; + int shift; + int size; + int vd; + int vm; +} arg_2reg_shift; + +typedef struct { + int q; + int size; + int vd; + int vm; + int vn; +} arg_2scalar; + +typedef struct { + int size; + int vd; + int vm; + int vn; +} arg_3diff; + +typedef struct { + int q; + int size; + int vd; + int vm; + int vn; +} arg_3same; + +typedef struct { + int imm; + int q; + int vd; + int vm; + int vn; +} arg_disas_neon_dp3; + +typedef struct { + int len; + int op; + int vd; + int vm; + int vn; +} arg_disas_neon_dp4; + +typedef struct { + int index; + int q; + int size; + int vd; + int vm; +} arg_disas_neon_dp5; + +typedef arg_3same arg_VHADD_S_3s; +static bool trans_VHADD_S_3s(DisasContext *ctx, arg_VHADD_S_3s *a); +typedef arg_3same arg_VHADD_U_3s; +static bool trans_VHADD_U_3s(DisasContext *ctx, arg_VHADD_U_3s *a); +typedef arg_3same arg_VQADD_S_3s; +static bool trans_VQADD_S_3s(DisasContext *ctx, arg_VQADD_S_3s *a); +typedef arg_3same arg_VQADD_U_3s; +static bool trans_VQADD_U_3s(DisasContext *ctx, arg_VQADD_U_3s *a); +typedef arg_3same arg_VRHADD_S_3s; +static bool trans_VRHADD_S_3s(DisasContext *ctx, arg_VRHADD_S_3s *a); +typedef arg_3same arg_VRHADD_U_3s; +static bool trans_VRHADD_U_3s(DisasContext *ctx, arg_VRHADD_U_3s *a); +typedef arg_3same arg_VAND_3s; +static bool trans_VAND_3s(DisasContext *ctx, arg_VAND_3s *a); +typedef arg_3same arg_VBIC_3s; +static bool trans_VBIC_3s(DisasContext *ctx, arg_VBIC_3s *a); +typedef arg_3same arg_VORR_3s; +static bool trans_VORR_3s(DisasContext *ctx, arg_VORR_3s *a); +typedef arg_3same arg_VORN_3s; +static bool trans_VORN_3s(DisasContext *ctx, arg_VORN_3s *a); +typedef arg_3same arg_VEOR_3s; +static bool trans_VEOR_3s(DisasContext *ctx, arg_VEOR_3s *a); +typedef arg_3same arg_VBSL_3s; +static bool trans_VBSL_3s(DisasContext *ctx, arg_VBSL_3s *a); +typedef arg_3same arg_VBIT_3s; +static bool trans_VBIT_3s(DisasContext *ctx, arg_VBIT_3s *a); +typedef arg_3same arg_VBIF_3s; +static bool trans_VBIF_3s(DisasContext *ctx, arg_VBIF_3s *a); +typedef arg_3same arg_VHSUB_S_3s; +static bool trans_VHSUB_S_3s(DisasContext *ctx, arg_VHSUB_S_3s *a); +typedef arg_3same arg_VHSUB_U_3s; +static bool trans_VHSUB_U_3s(DisasContext *ctx, arg_VHSUB_U_3s *a); +typedef arg_3same arg_VQSUB_S_3s; +static bool trans_VQSUB_S_3s(DisasContext *ctx, arg_VQSUB_S_3s *a); +typedef arg_3same arg_VQSUB_U_3s; +static bool trans_VQSUB_U_3s(DisasContext *ctx, arg_VQSUB_U_3s *a); +typedef arg_3same arg_VCGT_S_3s; +static bool trans_VCGT_S_3s(DisasContext *ctx, arg_VCGT_S_3s *a); +typedef arg_3same arg_VCGT_U_3s; +static bool trans_VCGT_U_3s(DisasContext *ctx, arg_VCGT_U_3s *a); +typedef arg_3same arg_VCGE_S_3s; +static bool trans_VCGE_S_3s(DisasContext *ctx, arg_VCGE_S_3s *a); +typedef arg_3same arg_VCGE_U_3s; +static bool trans_VCGE_U_3s(DisasContext *ctx, arg_VCGE_U_3s *a); +typedef arg_3same arg_VSHL_S_3s; +static bool trans_VSHL_S_3s(DisasContext *ctx, arg_VSHL_S_3s *a); +typedef arg_3same arg_VSHL_U_3s; +static bool trans_VSHL_U_3s(DisasContext *ctx, arg_VSHL_U_3s *a); +typedef arg_3same arg_VQSHL_S64_3s; +static bool trans_VQSHL_S64_3s(DisasContext *ctx, arg_VQSHL_S64_3s *a); +typedef arg_3same arg_VQSHL_S_3s; +static bool trans_VQSHL_S_3s(DisasContext *ctx, arg_VQSHL_S_3s *a); +typedef arg_3same arg_VQSHL_U64_3s; +static bool trans_VQSHL_U64_3s(DisasContext *ctx, arg_VQSHL_U64_3s *a); +typedef arg_3same arg_VQSHL_U_3s; +static bool trans_VQSHL_U_3s(DisasContext *ctx, arg_VQSHL_U_3s *a); +typedef arg_3same arg_VRSHL_S64_3s; +static bool trans_VRSHL_S64_3s(DisasContext *ctx, arg_VRSHL_S64_3s *a); +typedef arg_3same arg_VRSHL_S_3s; +static bool trans_VRSHL_S_3s(DisasContext *ctx, arg_VRSHL_S_3s *a); +typedef arg_3same arg_VRSHL_U64_3s; +static bool trans_VRSHL_U64_3s(DisasContext *ctx, arg_VRSHL_U64_3s *a); +typedef arg_3same arg_VRSHL_U_3s; +static bool trans_VRSHL_U_3s(DisasContext *ctx, arg_VRSHL_U_3s *a); +typedef arg_3same arg_VQRSHL_S64_3s; +static bool trans_VQRSHL_S64_3s(DisasContext *ctx, arg_VQRSHL_S64_3s *a); +typedef arg_3same arg_VQRSHL_S_3s; +static bool trans_VQRSHL_S_3s(DisasContext *ctx, arg_VQRSHL_S_3s *a); +typedef arg_3same arg_VQRSHL_U64_3s; +static bool trans_VQRSHL_U64_3s(DisasContext *ctx, arg_VQRSHL_U64_3s *a); +typedef arg_3same arg_VQRSHL_U_3s; +static bool trans_VQRSHL_U_3s(DisasContext *ctx, arg_VQRSHL_U_3s *a); +typedef arg_3same arg_VMAX_S_3s; +static bool trans_VMAX_S_3s(DisasContext *ctx, arg_VMAX_S_3s *a); +typedef arg_3same arg_VMAX_U_3s; +static bool trans_VMAX_U_3s(DisasContext *ctx, arg_VMAX_U_3s *a); +typedef arg_3same arg_VMIN_S_3s; +static bool trans_VMIN_S_3s(DisasContext *ctx, arg_VMIN_S_3s *a); +typedef arg_3same arg_VMIN_U_3s; +static bool trans_VMIN_U_3s(DisasContext *ctx, arg_VMIN_U_3s *a); +typedef arg_3same arg_VABD_S_3s; +static bool trans_VABD_S_3s(DisasContext *ctx, arg_VABD_S_3s *a); +typedef arg_3same arg_VABD_U_3s; +static bool trans_VABD_U_3s(DisasContext *ctx, arg_VABD_U_3s *a); +typedef arg_3same arg_VABA_S_3s; +static bool trans_VABA_S_3s(DisasContext *ctx, arg_VABA_S_3s *a); +typedef arg_3same arg_VABA_U_3s; +static bool trans_VABA_U_3s(DisasContext *ctx, arg_VABA_U_3s *a); +typedef arg_3same arg_VADD_3s; +static bool trans_VADD_3s(DisasContext *ctx, arg_VADD_3s *a); +typedef arg_3same arg_VSUB_3s; +static bool trans_VSUB_3s(DisasContext *ctx, arg_VSUB_3s *a); +typedef arg_3same arg_VTST_3s; +static bool trans_VTST_3s(DisasContext *ctx, arg_VTST_3s *a); +typedef arg_3same arg_VCEQ_3s; +static bool trans_VCEQ_3s(DisasContext *ctx, arg_VCEQ_3s *a); +typedef arg_3same arg_VMLA_3s; +static bool trans_VMLA_3s(DisasContext *ctx, arg_VMLA_3s *a); +typedef arg_3same arg_VMLS_3s; +static bool trans_VMLS_3s(DisasContext *ctx, arg_VMLS_3s *a); +typedef arg_3same arg_VMUL_3s; +static bool trans_VMUL_3s(DisasContext *ctx, arg_VMUL_3s *a); +typedef arg_3same arg_VMUL_p_3s; +static bool trans_VMUL_p_3s(DisasContext *ctx, arg_VMUL_p_3s *a); +typedef arg_3same arg_VPMAX_S_3s; +static bool trans_VPMAX_S_3s(DisasContext *ctx, arg_VPMAX_S_3s *a); +typedef arg_3same arg_VPMAX_U_3s; +static bool trans_VPMAX_U_3s(DisasContext *ctx, arg_VPMAX_U_3s *a); +typedef arg_3same arg_VPMIN_S_3s; +static bool trans_VPMIN_S_3s(DisasContext *ctx, arg_VPMIN_S_3s *a); +typedef arg_3same arg_VPMIN_U_3s; +static bool trans_VPMIN_U_3s(DisasContext *ctx, arg_VPMIN_U_3s *a); +typedef arg_3same arg_VQDMULH_3s; +static bool trans_VQDMULH_3s(DisasContext *ctx, arg_VQDMULH_3s *a); +typedef arg_3same arg_VQRDMULH_3s; +static bool trans_VQRDMULH_3s(DisasContext *ctx, arg_VQRDMULH_3s *a); +typedef arg_3same arg_VPADD_3s; +static bool trans_VPADD_3s(DisasContext *ctx, arg_VPADD_3s *a); +typedef arg_3same arg_VQRDMLAH_3s; +static bool trans_VQRDMLAH_3s(DisasContext *ctx, arg_VQRDMLAH_3s *a); +typedef arg_3same arg_SHA1C_3s; +static bool trans_SHA1C_3s(DisasContext *ctx, arg_SHA1C_3s *a); +typedef arg_3same arg_SHA1P_3s; +static bool trans_SHA1P_3s(DisasContext *ctx, arg_SHA1P_3s *a); +typedef arg_3same arg_SHA1M_3s; +static bool trans_SHA1M_3s(DisasContext *ctx, arg_SHA1M_3s *a); +typedef arg_3same arg_SHA1SU0_3s; +static bool trans_SHA1SU0_3s(DisasContext *ctx, arg_SHA1SU0_3s *a); +typedef arg_3same arg_SHA256H_3s; +static bool trans_SHA256H_3s(DisasContext *ctx, arg_SHA256H_3s *a); +typedef arg_3same arg_SHA256H2_3s; +static bool trans_SHA256H2_3s(DisasContext *ctx, arg_SHA256H2_3s *a); +typedef arg_3same arg_SHA256SU1_3s; +static bool trans_SHA256SU1_3s(DisasContext *ctx, arg_SHA256SU1_3s *a); +typedef arg_3same arg_VFMA_fp_3s; +static bool trans_VFMA_fp_3s(DisasContext *ctx, arg_VFMA_fp_3s *a); +typedef arg_3same arg_VFMS_fp_3s; +static bool trans_VFMS_fp_3s(DisasContext *ctx, arg_VFMS_fp_3s *a); +typedef arg_3same arg_VQRDMLSH_3s; +static bool trans_VQRDMLSH_3s(DisasContext *ctx, arg_VQRDMLSH_3s *a); +typedef arg_3same arg_VADD_fp_3s; +static bool trans_VADD_fp_3s(DisasContext *ctx, arg_VADD_fp_3s *a); +typedef arg_3same arg_VSUB_fp_3s; +static bool trans_VSUB_fp_3s(DisasContext *ctx, arg_VSUB_fp_3s *a); +typedef arg_3same arg_VPADD_fp_3s; +static bool trans_VPADD_fp_3s(DisasContext *ctx, arg_VPADD_fp_3s *a); +typedef arg_3same arg_VABD_fp_3s; +static bool trans_VABD_fp_3s(DisasContext *ctx, arg_VABD_fp_3s *a); +typedef arg_3same arg_VMLA_fp_3s; +static bool trans_VMLA_fp_3s(DisasContext *ctx, arg_VMLA_fp_3s *a); +typedef arg_3same arg_VMLS_fp_3s; +static bool trans_VMLS_fp_3s(DisasContext *ctx, arg_VMLS_fp_3s *a); +typedef arg_3same arg_VMUL_fp_3s; +static bool trans_VMUL_fp_3s(DisasContext *ctx, arg_VMUL_fp_3s *a); +typedef arg_3same arg_VCEQ_fp_3s; +static bool trans_VCEQ_fp_3s(DisasContext *ctx, arg_VCEQ_fp_3s *a); +typedef arg_3same arg_VCGE_fp_3s; +static bool trans_VCGE_fp_3s(DisasContext *ctx, arg_VCGE_fp_3s *a); +typedef arg_3same arg_VACGE_fp_3s; +static bool trans_VACGE_fp_3s(DisasContext *ctx, arg_VACGE_fp_3s *a); +typedef arg_3same arg_VCGT_fp_3s; +static bool trans_VCGT_fp_3s(DisasContext *ctx, arg_VCGT_fp_3s *a); +typedef arg_3same arg_VACGT_fp_3s; +static bool trans_VACGT_fp_3s(DisasContext *ctx, arg_VACGT_fp_3s *a); +typedef arg_3same arg_VMAX_fp_3s; +static bool trans_VMAX_fp_3s(DisasContext *ctx, arg_VMAX_fp_3s *a); +typedef arg_3same arg_VMIN_fp_3s; +static bool trans_VMIN_fp_3s(DisasContext *ctx, arg_VMIN_fp_3s *a); +typedef arg_3same arg_VPMAX_fp_3s; +static bool trans_VPMAX_fp_3s(DisasContext *ctx, arg_VPMAX_fp_3s *a); +typedef arg_3same arg_VPMIN_fp_3s; +static bool trans_VPMIN_fp_3s(DisasContext *ctx, arg_VPMIN_fp_3s *a); +typedef arg_3same arg_VRECPS_fp_3s; +static bool trans_VRECPS_fp_3s(DisasContext *ctx, arg_VRECPS_fp_3s *a); +typedef arg_3same arg_VRSQRTS_fp_3s; +static bool trans_VRSQRTS_fp_3s(DisasContext *ctx, arg_VRSQRTS_fp_3s *a); +typedef arg_3same arg_VMAXNM_fp_3s; +static bool trans_VMAXNM_fp_3s(DisasContext *ctx, arg_VMAXNM_fp_3s *a); +typedef arg_3same arg_VMINNM_fp_3s; +static bool trans_VMINNM_fp_3s(DisasContext *ctx, arg_VMINNM_fp_3s *a); +typedef arg_2reg_shift arg_VSHR_S_2sh; +static bool trans_VSHR_S_2sh(DisasContext *ctx, arg_VSHR_S_2sh *a); +typedef arg_2reg_shift arg_VSHR_U_2sh; +static bool trans_VSHR_U_2sh(DisasContext *ctx, arg_VSHR_U_2sh *a); +typedef arg_2reg_shift arg_VSRA_S_2sh; +static bool trans_VSRA_S_2sh(DisasContext *ctx, arg_VSRA_S_2sh *a); +typedef arg_2reg_shift arg_VSRA_U_2sh; +static bool trans_VSRA_U_2sh(DisasContext *ctx, arg_VSRA_U_2sh *a); +typedef arg_2reg_shift arg_VRSHR_S_2sh; +static bool trans_VRSHR_S_2sh(DisasContext *ctx, arg_VRSHR_S_2sh *a); +typedef arg_2reg_shift arg_VRSHR_U_2sh; +static bool trans_VRSHR_U_2sh(DisasContext *ctx, arg_VRSHR_U_2sh *a); +typedef arg_2reg_shift arg_VRSRA_S_2sh; +static bool trans_VRSRA_S_2sh(DisasContext *ctx, arg_VRSRA_S_2sh *a); +typedef arg_2reg_shift arg_VRSRA_U_2sh; +static bool trans_VRSRA_U_2sh(DisasContext *ctx, arg_VRSRA_U_2sh *a); +typedef arg_2reg_shift arg_VSRI_2sh; +static bool trans_VSRI_2sh(DisasContext *ctx, arg_VSRI_2sh *a); +typedef arg_2reg_shift arg_VSHL_2sh; +static bool trans_VSHL_2sh(DisasContext *ctx, arg_VSHL_2sh *a); +typedef arg_2reg_shift arg_VSLI_2sh; +static bool trans_VSLI_2sh(DisasContext *ctx, arg_VSLI_2sh *a); +typedef arg_2reg_shift arg_VQSHLU_64_2sh; +static bool trans_VQSHLU_64_2sh(DisasContext *ctx, arg_VQSHLU_64_2sh *a); +typedef arg_2reg_shift arg_VQSHLU_2sh; +static bool trans_VQSHLU_2sh(DisasContext *ctx, arg_VQSHLU_2sh *a); +typedef arg_2reg_shift arg_VQSHL_S_64_2sh; +static bool trans_VQSHL_S_64_2sh(DisasContext *ctx, arg_VQSHL_S_64_2sh *a); +typedef arg_2reg_shift arg_VQSHL_S_2sh; +static bool trans_VQSHL_S_2sh(DisasContext *ctx, arg_VQSHL_S_2sh *a); +typedef arg_2reg_shift arg_VQSHL_U_64_2sh; +static bool trans_VQSHL_U_64_2sh(DisasContext *ctx, arg_VQSHL_U_64_2sh *a); +typedef arg_2reg_shift arg_VQSHL_U_2sh; +static bool trans_VQSHL_U_2sh(DisasContext *ctx, arg_VQSHL_U_2sh *a); +typedef arg_2reg_shift arg_VSHRN_64_2sh; +static bool trans_VSHRN_64_2sh(DisasContext *ctx, arg_VSHRN_64_2sh *a); +typedef arg_2reg_shift arg_VSHRN_32_2sh; +static bool trans_VSHRN_32_2sh(DisasContext *ctx, arg_VSHRN_32_2sh *a); +typedef arg_2reg_shift arg_VSHRN_16_2sh; +static bool trans_VSHRN_16_2sh(DisasContext *ctx, arg_VSHRN_16_2sh *a); +typedef arg_2reg_shift arg_VRSHRN_64_2sh; +static bool trans_VRSHRN_64_2sh(DisasContext *ctx, arg_VRSHRN_64_2sh *a); +typedef arg_2reg_shift arg_VRSHRN_32_2sh; +static bool trans_VRSHRN_32_2sh(DisasContext *ctx, arg_VRSHRN_32_2sh *a); +typedef arg_2reg_shift arg_VRSHRN_16_2sh; +static bool trans_VRSHRN_16_2sh(DisasContext *ctx, arg_VRSHRN_16_2sh *a); +typedef arg_2reg_shift arg_VQSHRUN_64_2sh; +static bool trans_VQSHRUN_64_2sh(DisasContext *ctx, arg_VQSHRUN_64_2sh *a); +typedef arg_2reg_shift arg_VQSHRUN_32_2sh; +static bool trans_VQSHRUN_32_2sh(DisasContext *ctx, arg_VQSHRUN_32_2sh *a); +typedef arg_2reg_shift arg_VQSHRUN_16_2sh; +static bool trans_VQSHRUN_16_2sh(DisasContext *ctx, arg_VQSHRUN_16_2sh *a); +typedef arg_2reg_shift arg_VQRSHRUN_64_2sh; +static bool trans_VQRSHRUN_64_2sh(DisasContext *ctx, arg_VQRSHRUN_64_2sh *a); +typedef arg_2reg_shift arg_VQRSHRUN_32_2sh; +static bool trans_VQRSHRUN_32_2sh(DisasContext *ctx, arg_VQRSHRUN_32_2sh *a); +typedef arg_2reg_shift arg_VQRSHRUN_16_2sh; +static bool trans_VQRSHRUN_16_2sh(DisasContext *ctx, arg_VQRSHRUN_16_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_S64_2sh; +static bool trans_VQSHRN_S64_2sh(DisasContext *ctx, arg_VQSHRN_S64_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_S32_2sh; +static bool trans_VQSHRN_S32_2sh(DisasContext *ctx, arg_VQSHRN_S32_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_S16_2sh; +static bool trans_VQSHRN_S16_2sh(DisasContext *ctx, arg_VQSHRN_S16_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_S64_2sh; +static bool trans_VQRSHRN_S64_2sh(DisasContext *ctx, arg_VQRSHRN_S64_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_S32_2sh; +static bool trans_VQRSHRN_S32_2sh(DisasContext *ctx, arg_VQRSHRN_S32_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_S16_2sh; +static bool trans_VQRSHRN_S16_2sh(DisasContext *ctx, arg_VQRSHRN_S16_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_U64_2sh; +static bool trans_VQSHRN_U64_2sh(DisasContext *ctx, arg_VQSHRN_U64_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_U32_2sh; +static bool trans_VQSHRN_U32_2sh(DisasContext *ctx, arg_VQSHRN_U32_2sh *a); +typedef arg_2reg_shift arg_VQSHRN_U16_2sh; +static bool trans_VQSHRN_U16_2sh(DisasContext *ctx, arg_VQSHRN_U16_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_U64_2sh; +static bool trans_VQRSHRN_U64_2sh(DisasContext *ctx, arg_VQRSHRN_U64_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_U32_2sh; +static bool trans_VQRSHRN_U32_2sh(DisasContext *ctx, arg_VQRSHRN_U32_2sh *a); +typedef arg_2reg_shift arg_VQRSHRN_U16_2sh; +static bool trans_VQRSHRN_U16_2sh(DisasContext *ctx, arg_VQRSHRN_U16_2sh *a); +typedef arg_2reg_shift arg_VSHLL_S_2sh; +static bool trans_VSHLL_S_2sh(DisasContext *ctx, arg_VSHLL_S_2sh *a); +typedef arg_2reg_shift arg_VSHLL_U_2sh; +static bool trans_VSHLL_U_2sh(DisasContext *ctx, arg_VSHLL_U_2sh *a); +typedef arg_2reg_shift arg_VCVT_SF_2sh; +static bool trans_VCVT_SF_2sh(DisasContext *ctx, arg_VCVT_SF_2sh *a); +typedef arg_2reg_shift arg_VCVT_UF_2sh; +static bool trans_VCVT_UF_2sh(DisasContext *ctx, arg_VCVT_UF_2sh *a); +typedef arg_2reg_shift arg_VCVT_FS_2sh; +static bool trans_VCVT_FS_2sh(DisasContext *ctx, arg_VCVT_FS_2sh *a); +typedef arg_2reg_shift arg_VCVT_FU_2sh; +static bool trans_VCVT_FU_2sh(DisasContext *ctx, arg_VCVT_FU_2sh *a); +typedef arg_1reg_imm arg_Vimm_1r; +static bool trans_Vimm_1r(DisasContext *ctx, arg_Vimm_1r *a); +typedef arg_disas_neon_dp3 arg_VEXT; +static bool trans_VEXT(DisasContext *ctx, arg_VEXT *a); +typedef arg_disas_neon_dp4 arg_VTBL; +static bool trans_VTBL(DisasContext *ctx, arg_VTBL *a); +typedef arg_disas_neon_dp5 arg_VDUP_scalar; +static bool trans_VDUP_scalar(DisasContext *ctx, arg_VDUP_scalar *a); +typedef arg_2misc arg_VREV64; +static bool trans_VREV64(DisasContext *ctx, arg_VREV64 *a); +typedef arg_2misc arg_VREV32; +static bool trans_VREV32(DisasContext *ctx, arg_VREV32 *a); +typedef arg_2misc arg_VREV16; +static bool trans_VREV16(DisasContext *ctx, arg_VREV16 *a); +typedef arg_2misc arg_VPADDL_S; +static bool trans_VPADDL_S(DisasContext *ctx, arg_VPADDL_S *a); +typedef arg_2misc arg_VPADDL_U; +static bool trans_VPADDL_U(DisasContext *ctx, arg_VPADDL_U *a); +typedef arg_2misc arg_AESE; +static bool trans_AESE(DisasContext *ctx, arg_AESE *a); +typedef arg_2misc arg_AESD; +static bool trans_AESD(DisasContext *ctx, arg_AESD *a); +typedef arg_2misc arg_AESMC; +static bool trans_AESMC(DisasContext *ctx, arg_AESMC *a); +typedef arg_2misc arg_AESIMC; +static bool trans_AESIMC(DisasContext *ctx, arg_AESIMC *a); +typedef arg_2misc arg_VCLS; +static bool trans_VCLS(DisasContext *ctx, arg_VCLS *a); +typedef arg_2misc arg_VCLZ; +static bool trans_VCLZ(DisasContext *ctx, arg_VCLZ *a); +typedef arg_2misc arg_VCNT; +static bool trans_VCNT(DisasContext *ctx, arg_VCNT *a); +typedef arg_2misc arg_VMVN; +static bool trans_VMVN(DisasContext *ctx, arg_VMVN *a); +typedef arg_2misc arg_VPADAL_S; +static bool trans_VPADAL_S(DisasContext *ctx, arg_VPADAL_S *a); +typedef arg_2misc arg_VPADAL_U; +static bool trans_VPADAL_U(DisasContext *ctx, arg_VPADAL_U *a); +typedef arg_2misc arg_VQABS; +static bool trans_VQABS(DisasContext *ctx, arg_VQABS *a); +typedef arg_2misc arg_VQNEG; +static bool trans_VQNEG(DisasContext *ctx, arg_VQNEG *a); +typedef arg_2misc arg_VCGT0; +static bool trans_VCGT0(DisasContext *ctx, arg_VCGT0 *a); +typedef arg_2misc arg_VCGE0; +static bool trans_VCGE0(DisasContext *ctx, arg_VCGE0 *a); +typedef arg_2misc arg_VCEQ0; +static bool trans_VCEQ0(DisasContext *ctx, arg_VCEQ0 *a); +typedef arg_2misc arg_VCLE0; +static bool trans_VCLE0(DisasContext *ctx, arg_VCLE0 *a); +typedef arg_2misc arg_VCLT0; +static bool trans_VCLT0(DisasContext *ctx, arg_VCLT0 *a); +typedef arg_2misc arg_SHA1H; +static bool trans_SHA1H(DisasContext *ctx, arg_SHA1H *a); +typedef arg_2misc arg_VABS; +static bool trans_VABS(DisasContext *ctx, arg_VABS *a); +typedef arg_2misc arg_VNEG; +static bool trans_VNEG(DisasContext *ctx, arg_VNEG *a); +typedef arg_2misc arg_VCGT0_F; +static bool trans_VCGT0_F(DisasContext *ctx, arg_VCGT0_F *a); +typedef arg_2misc arg_VCGE0_F; +static bool trans_VCGE0_F(DisasContext *ctx, arg_VCGE0_F *a); +typedef arg_2misc arg_VCEQ0_F; +static bool trans_VCEQ0_F(DisasContext *ctx, arg_VCEQ0_F *a); +typedef arg_2misc arg_VCLE0_F; +static bool trans_VCLE0_F(DisasContext *ctx, arg_VCLE0_F *a); +typedef arg_2misc arg_VCLT0_F; +static bool trans_VCLT0_F(DisasContext *ctx, arg_VCLT0_F *a); +typedef arg_2misc arg_VABS_F; +static bool trans_VABS_F(DisasContext *ctx, arg_VABS_F *a); +typedef arg_2misc arg_VNEG_F; +static bool trans_VNEG_F(DisasContext *ctx, arg_VNEG_F *a); +typedef arg_2misc arg_VSWP; +static bool trans_VSWP(DisasContext *ctx, arg_VSWP *a); +typedef arg_2misc arg_VTRN; +static bool trans_VTRN(DisasContext *ctx, arg_VTRN *a); +typedef arg_2misc arg_VUZP; +static bool trans_VUZP(DisasContext *ctx, arg_VUZP *a); +typedef arg_2misc arg_VZIP; +static bool trans_VZIP(DisasContext *ctx, arg_VZIP *a); +typedef arg_2misc arg_VMOVN; +static bool trans_VMOVN(DisasContext *ctx, arg_VMOVN *a); +typedef arg_2misc arg_VQMOVUN; +static bool trans_VQMOVUN(DisasContext *ctx, arg_VQMOVUN *a); +typedef arg_2misc arg_VQMOVN_S; +static bool trans_VQMOVN_S(DisasContext *ctx, arg_VQMOVN_S *a); +typedef arg_2misc arg_VQMOVN_U; +static bool trans_VQMOVN_U(DisasContext *ctx, arg_VQMOVN_U *a); +typedef arg_2misc arg_VSHLL; +static bool trans_VSHLL(DisasContext *ctx, arg_VSHLL *a); +typedef arg_2misc arg_SHA1SU1; +static bool trans_SHA1SU1(DisasContext *ctx, arg_SHA1SU1 *a); +typedef arg_2misc arg_SHA256SU0; +static bool trans_SHA256SU0(DisasContext *ctx, arg_SHA256SU0 *a); +typedef arg_2misc arg_VRINTN; +static bool trans_VRINTN(DisasContext *ctx, arg_VRINTN *a); +typedef arg_2misc arg_VRINTX; +static bool trans_VRINTX(DisasContext *ctx, arg_VRINTX *a); +typedef arg_2misc arg_VRINTA; +static bool trans_VRINTA(DisasContext *ctx, arg_VRINTA *a); +typedef arg_2misc arg_VRINTZ; +static bool trans_VRINTZ(DisasContext *ctx, arg_VRINTZ *a); +typedef arg_2misc arg_VCVT_F16_F32; +static bool trans_VCVT_F16_F32(DisasContext *ctx, arg_VCVT_F16_F32 *a); +typedef arg_2misc arg_VRINTM; +static bool trans_VRINTM(DisasContext *ctx, arg_VRINTM *a); +typedef arg_2misc arg_VCVT_F32_F16; +static bool trans_VCVT_F32_F16(DisasContext *ctx, arg_VCVT_F32_F16 *a); +typedef arg_2misc arg_VRINTP; +static bool trans_VRINTP(DisasContext *ctx, arg_VRINTP *a); +typedef arg_2misc arg_VCVTAS; +static bool trans_VCVTAS(DisasContext *ctx, arg_VCVTAS *a); +typedef arg_2misc arg_VCVTAU; +static bool trans_VCVTAU(DisasContext *ctx, arg_VCVTAU *a); +typedef arg_2misc arg_VCVTNS; +static bool trans_VCVTNS(DisasContext *ctx, arg_VCVTNS *a); +typedef arg_2misc arg_VCVTNU; +static bool trans_VCVTNU(DisasContext *ctx, arg_VCVTNU *a); +typedef arg_2misc arg_VCVTPS; +static bool trans_VCVTPS(DisasContext *ctx, arg_VCVTPS *a); +typedef arg_2misc arg_VCVTPU; +static bool trans_VCVTPU(DisasContext *ctx, arg_VCVTPU *a); +typedef arg_2misc arg_VCVTMS; +static bool trans_VCVTMS(DisasContext *ctx, arg_VCVTMS *a); +typedef arg_2misc arg_VCVTMU; +static bool trans_VCVTMU(DisasContext *ctx, arg_VCVTMU *a); +typedef arg_2misc arg_VRECPE; +static bool trans_VRECPE(DisasContext *ctx, arg_VRECPE *a); +typedef arg_2misc arg_VRSQRTE; +static bool trans_VRSQRTE(DisasContext *ctx, arg_VRSQRTE *a); +typedef arg_2misc arg_VRECPE_F; +static bool trans_VRECPE_F(DisasContext *ctx, arg_VRECPE_F *a); +typedef arg_2misc arg_VRSQRTE_F; +static bool trans_VRSQRTE_F(DisasContext *ctx, arg_VRSQRTE_F *a); +typedef arg_2misc arg_VCVT_FS; +static bool trans_VCVT_FS(DisasContext *ctx, arg_VCVT_FS *a); +typedef arg_2misc arg_VCVT_FU; +static bool trans_VCVT_FU(DisasContext *ctx, arg_VCVT_FU *a); +typedef arg_2misc arg_VCVT_SF; +static bool trans_VCVT_SF(DisasContext *ctx, arg_VCVT_SF *a); +typedef arg_2misc arg_VCVT_UF; +static bool trans_VCVT_UF(DisasContext *ctx, arg_VCVT_UF *a); +typedef arg_3diff arg_VADDL_S_3d; +static bool trans_VADDL_S_3d(DisasContext *ctx, arg_VADDL_S_3d *a); +typedef arg_3diff arg_VADDL_U_3d; +static bool trans_VADDL_U_3d(DisasContext *ctx, arg_VADDL_U_3d *a); +typedef arg_3diff arg_VADDW_S_3d; +static bool trans_VADDW_S_3d(DisasContext *ctx, arg_VADDW_S_3d *a); +typedef arg_3diff arg_VADDW_U_3d; +static bool trans_VADDW_U_3d(DisasContext *ctx, arg_VADDW_U_3d *a); +typedef arg_3diff arg_VSUBL_S_3d; +static bool trans_VSUBL_S_3d(DisasContext *ctx, arg_VSUBL_S_3d *a); +typedef arg_3diff arg_VSUBL_U_3d; +static bool trans_VSUBL_U_3d(DisasContext *ctx, arg_VSUBL_U_3d *a); +typedef arg_3diff arg_VSUBW_S_3d; +static bool trans_VSUBW_S_3d(DisasContext *ctx, arg_VSUBW_S_3d *a); +typedef arg_3diff arg_VSUBW_U_3d; +static bool trans_VSUBW_U_3d(DisasContext *ctx, arg_VSUBW_U_3d *a); +typedef arg_3diff arg_VADDHN_3d; +static bool trans_VADDHN_3d(DisasContext *ctx, arg_VADDHN_3d *a); +typedef arg_3diff arg_VRADDHN_3d; +static bool trans_VRADDHN_3d(DisasContext *ctx, arg_VRADDHN_3d *a); +typedef arg_3diff arg_VABAL_S_3d; +static bool trans_VABAL_S_3d(DisasContext *ctx, arg_VABAL_S_3d *a); +typedef arg_3diff arg_VABAL_U_3d; +static bool trans_VABAL_U_3d(DisasContext *ctx, arg_VABAL_U_3d *a); +typedef arg_3diff arg_VSUBHN_3d; +static bool trans_VSUBHN_3d(DisasContext *ctx, arg_VSUBHN_3d *a); +typedef arg_3diff arg_VRSUBHN_3d; +static bool trans_VRSUBHN_3d(DisasContext *ctx, arg_VRSUBHN_3d *a); +typedef arg_3diff arg_VABDL_S_3d; +static bool trans_VABDL_S_3d(DisasContext *ctx, arg_VABDL_S_3d *a); +typedef arg_3diff arg_VABDL_U_3d; +static bool trans_VABDL_U_3d(DisasContext *ctx, arg_VABDL_U_3d *a); +typedef arg_3diff arg_VMLAL_S_3d; +static bool trans_VMLAL_S_3d(DisasContext *ctx, arg_VMLAL_S_3d *a); +typedef arg_3diff arg_VMLAL_U_3d; +static bool trans_VMLAL_U_3d(DisasContext *ctx, arg_VMLAL_U_3d *a); +typedef arg_3diff arg_VQDMLAL_3d; +static bool trans_VQDMLAL_3d(DisasContext *ctx, arg_VQDMLAL_3d *a); +typedef arg_3diff arg_VMLSL_S_3d; +static bool trans_VMLSL_S_3d(DisasContext *ctx, arg_VMLSL_S_3d *a); +typedef arg_3diff arg_VMLSL_U_3d; +static bool trans_VMLSL_U_3d(DisasContext *ctx, arg_VMLSL_U_3d *a); +typedef arg_3diff arg_VQDMLSL_3d; +static bool trans_VQDMLSL_3d(DisasContext *ctx, arg_VQDMLSL_3d *a); +typedef arg_3diff arg_VMULL_S_3d; +static bool trans_VMULL_S_3d(DisasContext *ctx, arg_VMULL_S_3d *a); +typedef arg_3diff arg_VMULL_U_3d; +static bool trans_VMULL_U_3d(DisasContext *ctx, arg_VMULL_U_3d *a); +typedef arg_3diff arg_VQDMULL_3d; +static bool trans_VQDMULL_3d(DisasContext *ctx, arg_VQDMULL_3d *a); +typedef arg_3diff arg_VMULL_P_3d; +static bool trans_VMULL_P_3d(DisasContext *ctx, arg_VMULL_P_3d *a); +typedef arg_2scalar arg_VMLA_2sc; +static bool trans_VMLA_2sc(DisasContext *ctx, arg_VMLA_2sc *a); +typedef arg_2scalar arg_VMLA_F_2sc; +static bool trans_VMLA_F_2sc(DisasContext *ctx, arg_VMLA_F_2sc *a); +typedef arg_2scalar arg_VMLAL_S_2sc; +static bool trans_VMLAL_S_2sc(DisasContext *ctx, arg_VMLAL_S_2sc *a); +typedef arg_2scalar arg_VMLAL_U_2sc; +static bool trans_VMLAL_U_2sc(DisasContext *ctx, arg_VMLAL_U_2sc *a); +typedef arg_2scalar arg_VQDMLAL_2sc; +static bool trans_VQDMLAL_2sc(DisasContext *ctx, arg_VQDMLAL_2sc *a); +typedef arg_2scalar arg_VMLS_2sc; +static bool trans_VMLS_2sc(DisasContext *ctx, arg_VMLS_2sc *a); +typedef arg_2scalar arg_VMLS_F_2sc; +static bool trans_VMLS_F_2sc(DisasContext *ctx, arg_VMLS_F_2sc *a); +typedef arg_2scalar arg_VMLSL_S_2sc; +static bool trans_VMLSL_S_2sc(DisasContext *ctx, arg_VMLSL_S_2sc *a); +typedef arg_2scalar arg_VMLSL_U_2sc; +static bool trans_VMLSL_U_2sc(DisasContext *ctx, arg_VMLSL_U_2sc *a); +typedef arg_2scalar arg_VQDMLSL_2sc; +static bool trans_VQDMLSL_2sc(DisasContext *ctx, arg_VQDMLSL_2sc *a); +typedef arg_2scalar arg_VMUL_2sc; +static bool trans_VMUL_2sc(DisasContext *ctx, arg_VMUL_2sc *a); +typedef arg_2scalar arg_VMUL_F_2sc; +static bool trans_VMUL_F_2sc(DisasContext *ctx, arg_VMUL_F_2sc *a); +typedef arg_2scalar arg_VMULL_S_2sc; +static bool trans_VMULL_S_2sc(DisasContext *ctx, arg_VMULL_S_2sc *a); +typedef arg_2scalar arg_VMULL_U_2sc; +static bool trans_VMULL_U_2sc(DisasContext *ctx, arg_VMULL_U_2sc *a); +typedef arg_2scalar arg_VQDMULL_2sc; +static bool trans_VQDMULL_2sc(DisasContext *ctx, arg_VQDMULL_2sc *a); +typedef arg_2scalar arg_VQDMULH_2sc; +static bool trans_VQDMULH_2sc(DisasContext *ctx, arg_VQDMULH_2sc *a); +typedef arg_2scalar arg_VQRDMULH_2sc; +static bool trans_VQRDMULH_2sc(DisasContext *ctx, arg_VQRDMULH_2sc *a); +typedef arg_2scalar arg_VQRDMLAH_2sc; +static bool trans_VQRDMLAH_2sc(DisasContext *ctx, arg_VQRDMLAH_2sc *a); +typedef arg_2scalar arg_VQRDMLSH_2sc; +static bool trans_VQRDMLSH_2sc(DisasContext *ctx, arg_VQRDMLSH_2sc *a); + +static void disas_neon_dp_extract_1reg_imm(DisasContext *ctx, arg_1reg_imm *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->imm = deposit32(deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 16, 3)), 7, 25, extract32(insn, 24, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_2misc(DisasContext *ctx, arg_2misc *a, uint32_t insn) +{ + a->size = extract32(insn, 18, 2); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_2misc_q0(DisasContext *ctx, arg_2misc *a, uint32_t insn) +{ + a->size = extract32(insn, 18, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_dp_extract_2misc_q1(DisasContext *ctx, arg_2misc *a, uint32_t insn) +{ + a->size = extract32(insn, 18, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 1; +} + +static void disas_neon_dp_extract_2reg_shl_b(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 3); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; +} + +static void disas_neon_dp_extract_2reg_shl_d(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 6); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 3; +} + +static void disas_neon_dp_extract_2reg_shl_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 4); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; +} + +static void disas_neon_dp_extract_2reg_shl_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 5); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; +} + +static void disas_neon_dp_extract_2reg_shll_b(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 3); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; + a->q = 0; +} + +static void disas_neon_dp_extract_2reg_shll_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 4); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; + a->q = 0; +} + +static void disas_neon_dp_extract_2reg_shll_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->shift = extract32(insn, 16, 5); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; + a->q = 0; +} + +static void disas_neon_dp_extract_2reg_shr_b(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; + a->shift = rsub_8(ctx, extract32(insn, 16, 3)); +} + +static void disas_neon_dp_extract_2reg_shr_d(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 3; + a->shift = rsub_64(ctx, extract32(insn, 16, 6)); +} + +static void disas_neon_dp_extract_2reg_shr_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; + a->shift = rsub_16(ctx, extract32(insn, 16, 4)); +} + +static void disas_neon_dp_extract_2reg_shr_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; + a->shift = rsub_32(ctx, extract32(insn, 16, 5)); +} + +static void disas_neon_dp_extract_2reg_shrn_d(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 3; + a->q = 0; + a->shift = rsub_32(ctx, extract32(insn, 16, 5)); +} + +static void disas_neon_dp_extract_2reg_shrn_h(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; + a->q = 0; + a->shift = rsub_8(ctx, extract32(insn, 16, 3)); +} + +static void disas_neon_dp_extract_2reg_shrn_s(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; + a->q = 0; + a->shift = rsub_16(ctx, extract32(insn, 16, 4)); +} + +static void disas_neon_dp_extract_2reg_vcvt(DisasContext *ctx, arg_2reg_shift *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; + a->shift = rsub_32(ctx, extract32(insn, 16, 5)); +} + +static void disas_neon_dp_extract_2scalar(DisasContext *ctx, arg_2scalar *a, uint32_t insn) +{ + a->q = extract32(insn, 24, 1); + a->size = extract32(insn, 20, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_2scalar_q0(DisasContext *ctx, arg_2scalar *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_dp_extract_3diff(DisasContext *ctx, arg_3diff *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_3same(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 2); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_3same_64_rev(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vn = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 3; +} + +static void disas_neon_dp_extract_3same_crypto(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; + a->q = 1; +} + +static void disas_neon_dp_extract_3same_fp(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 1); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_3same_fp_q0(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_dp_extract_3same_logic(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; +} + +static void disas_neon_dp_extract_3same_q0(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 2); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_dp_extract_3same_rev(DisasContext *ctx, arg_3same *a, uint32_t insn) +{ + a->size = extract32(insn, 20, 2); + a->q = extract32(insn, 6, 1); + a->vn = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vm = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_disas_neon_dp_Fmt_24(DisasContext *ctx, arg_disas_neon_dp3 *a, uint32_t insn) +{ + a->imm = extract32(insn, 8, 4); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_disas_neon_dp_Fmt_25(DisasContext *ctx, arg_disas_neon_dp4 *a, uint32_t insn) +{ + a->len = extract32(insn, 8, 2); + a->op = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_dp_extract_disas_neon_dp_Fmt_26(DisasContext *ctx, arg_disas_neon_dp5 *a, uint32_t insn) +{ + a->index = extract32(insn, 17, 3); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; +} + +static void disas_neon_dp_extract_disas_neon_dp_Fmt_27(DisasContext *ctx, arg_disas_neon_dp5 *a, uint32_t insn) +{ + a->index = extract32(insn, 18, 2); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; +} + +static void disas_neon_dp_extract_disas_neon_dp_Fmt_28(DisasContext *ctx, arg_disas_neon_dp5 *a, uint32_t insn) +{ + a->index = extract32(insn, 19, 1); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; +} + +static bool disas_neon_dp(DisasContext *ctx, uint32_t insn) +{ + union { + arg_1reg_imm f_1reg_imm; + arg_2misc f_2misc; + arg_2reg_shift f_2reg_shift; + arg_2scalar f_2scalar; + arg_3diff f_3diff; + arg_3same f_3same; + arg_disas_neon_dp3 f_disas_neon_dp3; + arg_disas_neon_dp4 f_disas_neon_dp4; + arg_disas_neon_dp5 f_disas_neon_dp5; + } u; + + switch (insn & 0xfe800010) { + case 0xf2000000: + /* 1111001. 0....... ........ ...0.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 0....... ....0000 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VHADD_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000100: + /* 11110010 0....... ....0001 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VRHADD_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000200: + /* 11110010 0....... ....0010 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VHSUB_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000300: + /* 11110010 0....... ....0011 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VCGT_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000400: + /* 11110010 0....... ....0100 ...0.... */ + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VSHL_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000500: + /* 11110010 0....... ....0101 ...0.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110010 0.11.... ....0101 ...0.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VRSHL_S64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VRSHL_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000600: + /* 11110010 0....... ....0110 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMAX_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000700: + /* 11110010 0....... ....0111 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VABD_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000800: + /* 11110010 0....... ....1000 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VADD_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000900: + /* 11110010 0....... ....1001 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMLA_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000a00: + /* 11110010 0....... ....1010 ...0.... */ + disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 0....... ....1010 .0.0.... */ + if (trans_VPMAX_S_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000b00: + /* 11110010 0....... ....1011 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQDMULH_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000c00: + /* 11110010 0....... ....1100 ...0.... */ + disas_neon_dp_extract_3same_crypto(ctx, &u.f_3same, insn); + switch (insn & 0x00300040) { + case 0x00000040: + /* 11110010 0.00.... ....1100 .1.0.... */ + if (trans_SHA1C_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00100040: + /* 11110010 0.01.... ....1100 .1.0.... */ + if (trans_SHA1P_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00200040: + /* 11110010 0.10.... ....1100 .1.0.... */ + if (trans_SHA1M_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00300040: + /* 11110010 0.11.... ....1100 .1.0.... */ + if (trans_SHA1SU0_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000d00: + /* 11110010 0....... ....1101 ...0.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1101 ...0.... */ + if (trans_VADD_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.1..... ....1101 ...0.... */ + if (trans_VSUB_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000e00: + /* 11110010 0....... ....1110 ...0.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1110 ...0.... */ + if (trans_VCEQ_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000f00: + /* 11110010 0....... ....1111 ...0.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1111 ...0.... */ + if (trans_VMAX_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.1..... ....1111 ...0.... */ + if (trans_VMIN_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000000: + /* 11110011 0....... ....0000 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VHADD_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000100: + /* 11110011 0....... ....0001 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VRHADD_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000200: + /* 11110011 0....... ....0010 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VHSUB_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000300: + /* 11110011 0....... ....0011 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VCGT_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000400: + /* 11110011 0....... ....0100 ...0.... */ + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VSHL_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000500: + /* 11110011 0....... ....0101 ...0.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110011 0.11.... ....0101 ...0.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VRSHL_U64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VRSHL_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000600: + /* 11110011 0....... ....0110 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMAX_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000700: + /* 11110011 0....... ....0111 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VABD_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000800: + /* 11110011 0....... ....1000 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VSUB_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000900: + /* 11110011 0....... ....1001 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMLS_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000a00: + /* 11110011 0....... ....1010 ...0.... */ + disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 0....... ....1010 .0.0.... */ + if (trans_VPMAX_U_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000b00: + /* 11110011 0....... ....1011 ...0.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQRDMULH_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000c00: + /* 11110011 0....... ....1100 ...0.... */ + disas_neon_dp_extract_3same_crypto(ctx, &u.f_3same, insn); + switch (insn & 0x00300040) { + case 0x00000040: + /* 11110011 0.00.... ....1100 .1.0.... */ + if (trans_SHA256H_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00100040: + /* 11110011 0.01.... ....1100 .1.0.... */ + if (trans_SHA256H2_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00200040: + /* 11110011 0.10.... ....1100 .1.0.... */ + if (trans_SHA256SU1_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000d00: + /* 11110011 0....... ....1101 ...0.... */ + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1101 ...0.... */ + disas_neon_dp_extract_3same_fp_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1101 .0.0.... */ + if (trans_VPADD_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x1: + /* 11110011 0.1..... ....1101 ...0.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + if (trans_VABD_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000e00: + /* 11110011 0....... ....1110 ...0.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1110 ...0.... */ + if (trans_VCGE_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110011 0.1..... ....1110 ...0.... */ + if (trans_VCGT_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000f00: + /* 11110011 0....... ....1111 ...0.... */ + disas_neon_dp_extract_3same_fp_q0(ctx, &u.f_3same, insn); + switch (insn & 0x00200040) { + case 0x00000000: + /* 11110011 0.0..... ....1111 .0.0.... */ + if (trans_VPMAX_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00200000: + /* 11110011 0.1..... ....1111 .0.0.... */ + if (trans_VPMIN_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + } + return false; + case 0xf2000010: + /* 1111001. 0....... ........ ...1.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 0....... ....0000 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQADD_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000100: + /* 11110010 0....... ....0001 ...1.... */ + disas_neon_dp_extract_3same_logic(ctx, &u.f_3same, insn); + switch ((insn >> 20) & 0x3) { + case 0x0: + /* 11110010 0.00.... ....0001 ...1.... */ + if (trans_VAND_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.01.... ....0001 ...1.... */ + if (trans_VBIC_3s(ctx, &u.f_3same)) return true; + return false; + case 0x2: + /* 11110010 0.10.... ....0001 ...1.... */ + if (trans_VORR_3s(ctx, &u.f_3same)) return true; + return false; + case 0x3: + /* 11110010 0.11.... ....0001 ...1.... */ + if (trans_VORN_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000200: + /* 11110010 0....... ....0010 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQSUB_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000300: + /* 11110010 0....... ....0011 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VCGE_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000400: + /* 11110010 0....... ....0100 ...1.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110010 0.11.... ....0100 ...1.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VQSHL_S64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VQSHL_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000500: + /* 11110010 0....... ....0101 ...1.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110010 0.11.... ....0101 ...1.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VQRSHL_S64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VQRSHL_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000600: + /* 11110010 0....... ....0110 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMIN_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000700: + /* 11110010 0....... ....0111 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VABA_S_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000800: + /* 11110010 0....... ....1000 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VTST_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000900: + /* 11110010 0....... ....1001 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMUL_3s(ctx, &u.f_3same)) return true; + return false; + case 0x00000a00: + /* 11110010 0....... ....1010 ...1.... */ + disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 0....... ....1010 .0.1.... */ + if (trans_VPMIN_S_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000b00: + /* 11110010 0....... ....1011 ...1.... */ + disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 0....... ....1011 .0.1.... */ + if (trans_VPADD_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000c00: + /* 11110010 0....... ....1100 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1100 ...1.... */ + if (trans_VFMA_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.1..... ....1100 ...1.... */ + if (trans_VFMS_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000d00: + /* 11110010 0....... ....1101 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1101 ...1.... */ + if (trans_VMLA_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.1..... ....1101 ...1.... */ + if (trans_VMLS_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x00000f00: + /* 11110010 0....... ....1111 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110010 0.0..... ....1111 ...1.... */ + if (trans_VRECPS_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110010 0.1..... ....1111 ...1.... */ + if (trans_VRSQRTS_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000000: + /* 11110011 0....... ....0000 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQADD_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000100: + /* 11110011 0....... ....0001 ...1.... */ + disas_neon_dp_extract_3same_logic(ctx, &u.f_3same, insn); + switch ((insn >> 20) & 0x3) { + case 0x0: + /* 11110011 0.00.... ....0001 ...1.... */ + if (trans_VEOR_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110011 0.01.... ....0001 ...1.... */ + if (trans_VBSL_3s(ctx, &u.f_3same)) return true; + return false; + case 0x2: + /* 11110011 0.10.... ....0001 ...1.... */ + if (trans_VBIT_3s(ctx, &u.f_3same)) return true; + return false; + case 0x3: + /* 11110011 0.11.... ....0001 ...1.... */ + if (trans_VBIF_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000200: + /* 11110011 0....... ....0010 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQSUB_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000300: + /* 11110011 0....... ....0011 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VCGE_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000400: + /* 11110011 0....... ....0100 ...1.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110011 0.11.... ....0100 ...1.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VQSHL_U64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VQSHL_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000500: + /* 11110011 0....... ....0101 ...1.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 11110011 0.11.... ....0101 ...1.... */ + disas_neon_dp_extract_3same_64_rev(ctx, &u.f_3same, insn); + if (trans_VQRSHL_U64_3s(ctx, &u.f_3same)) return true; + } + disas_neon_dp_extract_3same_rev(ctx, &u.f_3same, insn); + if (trans_VQRSHL_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000600: + /* 11110011 0....... ....0110 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMIN_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000700: + /* 11110011 0....... ....0111 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VABA_U_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000800: + /* 11110011 0....... ....1000 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VCEQ_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000900: + /* 11110011 0....... ....1001 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VMUL_p_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000a00: + /* 11110011 0....... ....1010 ...1.... */ + disas_neon_dp_extract_3same_q0(ctx, &u.f_3same, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 0....... ....1010 .0.1.... */ + if (trans_VPMIN_U_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000b00: + /* 11110011 0....... ....1011 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQRDMLAH_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000c00: + /* 11110011 0....... ....1100 ...1.... */ + disas_neon_dp_extract_3same(ctx, &u.f_3same, insn); + if (trans_VQRDMLSH_3s(ctx, &u.f_3same)) return true; + return false; + case 0x01000d00: + /* 11110011 0....... ....1101 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1101 ...1.... */ + if (trans_VMUL_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000e00: + /* 11110011 0....... ....1110 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1110 ...1.... */ + if (trans_VACGE_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110011 0.1..... ....1110 ...1.... */ + if (trans_VACGT_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + case 0x01000f00: + /* 11110011 0....... ....1111 ...1.... */ + disas_neon_dp_extract_3same_fp(ctx, &u.f_3same, insn); + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 11110011 0.0..... ....1111 ...1.... */ + if (trans_VMAXNM_fp_3s(ctx, &u.f_3same)) return true; + return false; + case 0x1: + /* 11110011 0.1..... ....1111 ...1.... */ + if (trans_VMINNM_fp_3s(ctx, &u.f_3same)) return true; + return false; + } + return false; + } + return false; + case 0xf2800000: + /* 1111001. 1....... ........ ...0.... */ + if ((insn & 0x00300000) == 0x00300000) { + /* 1111001. 1.11.... ........ ...0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1.11.... ........ ...0.... */ + disas_neon_dp_extract_disas_neon_dp_Fmt_24(ctx, &u.f_disas_neon_dp3, insn); + if (trans_VEXT(ctx, &u.f_disas_neon_dp3)) return true; + return false; + case 0x1: + /* 11110011 1.11.... ........ ...0.... */ + switch ((insn >> 10) & 0x3) { + case 0x0: + /* 11110011 1.11.... ....00.. ...0.... */ + switch (insn & 0x00030380) { + case 0x00000000: + /* 11110011 1.11..00 ....0000 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VREV64(ctx, &u.f_2misc)) return true; + return false; + case 0x00000080: + /* 11110011 1.11..00 ....0000 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VREV32(ctx, &u.f_2misc)) return true; + return false; + case 0x00000100: + /* 11110011 1.11..00 ....0001 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VREV16(ctx, &u.f_2misc)) return true; + return false; + case 0x00000200: + /* 11110011 1.11..00 ....0010 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VPADDL_S(ctx, &u.f_2misc)) return true; + return false; + case 0x00000280: + /* 11110011 1.11..00 ....0010 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VPADDL_U(ctx, &u.f_2misc)) return true; + return false; + case 0x00000300: + /* 11110011 1.11..00 ....0011 0..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..00 ....0011 00.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_AESE(ctx, &u.f_2misc)) return true; + return false; + case 0x1: + /* 11110011 1.11..00 ....0011 01.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_AESD(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00000380: + /* 11110011 1.11..00 ....0011 1..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..00 ....0011 10.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_AESMC(ctx, &u.f_2misc)) return true; + return false; + case 0x1: + /* 11110011 1.11..00 ....0011 11.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_AESIMC(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00010000: + /* 11110011 1.11..01 ....0000 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCGT0(ctx, &u.f_2misc)) return true; + return false; + case 0x00010080: + /* 11110011 1.11..01 ....0000 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCGE0(ctx, &u.f_2misc)) return true; + return false; + case 0x00010100: + /* 11110011 1.11..01 ....0001 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCEQ0(ctx, &u.f_2misc)) return true; + return false; + case 0x00010180: + /* 11110011 1.11..01 ....0001 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLE0(ctx, &u.f_2misc)) return true; + return false; + case 0x00010200: + /* 11110011 1.11..01 ....0010 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLT0(ctx, &u.f_2misc)) return true; + return false; + case 0x00010280: + /* 11110011 1.11..01 ....0010 1..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x1: + /* 11110011 1.11..01 ....0010 11.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_SHA1H(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00010300: + /* 11110011 1.11..01 ....0011 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VABS(ctx, &u.f_2misc)) return true; + return false; + case 0x00010380: + /* 11110011 1.11..01 ....0011 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VNEG(ctx, &u.f_2misc)) return true; + return false; + case 0x00020000: + /* 11110011 1.11..10 ....0000 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VSWP(ctx, &u.f_2misc)) return true; + return false; + case 0x00020080: + /* 11110011 1.11..10 ....0000 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VTRN(ctx, &u.f_2misc)) return true; + return false; + case 0x00020100: + /* 11110011 1.11..10 ....0001 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VUZP(ctx, &u.f_2misc)) return true; + return false; + case 0x00020180: + /* 11110011 1.11..10 ....0001 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VZIP(ctx, &u.f_2misc)) return true; + return false; + case 0x00020200: + /* 11110011 1.11..10 ....0010 0..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0010 00.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VMOVN(ctx, &u.f_2misc)) return true; + return false; + case 0x1: + /* 11110011 1.11..10 ....0010 01.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VQMOVUN(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00020280: + /* 11110011 1.11..10 ....0010 1..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0010 10.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VQMOVN_S(ctx, &u.f_2misc)) return true; + return false; + case 0x1: + /* 11110011 1.11..10 ....0010 11.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VQMOVN_U(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00020300: + /* 11110011 1.11..10 ....0011 0..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0011 00.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VSHLL(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00020380: + /* 11110011 1.11..10 ....0011 1..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0011 10.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_SHA1SU1(ctx, &u.f_2misc)) return true; + return false; + case 0x1: + /* 11110011 1.11..10 ....0011 11.0.... */ + disas_neon_dp_extract_2misc_q1(ctx, &u.f_2misc, insn); + if (trans_SHA256SU0(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00030000: + /* 11110011 1.11..11 ....0000 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTAS(ctx, &u.f_2misc)) return true; + return false; + case 0x00030080: + /* 11110011 1.11..11 ....0000 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTAU(ctx, &u.f_2misc)) return true; + return false; + case 0x00030100: + /* 11110011 1.11..11 ....0001 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTNS(ctx, &u.f_2misc)) return true; + return false; + case 0x00030180: + /* 11110011 1.11..11 ....0001 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTNU(ctx, &u.f_2misc)) return true; + return false; + case 0x00030200: + /* 11110011 1.11..11 ....0010 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTPS(ctx, &u.f_2misc)) return true; + return false; + case 0x00030280: + /* 11110011 1.11..11 ....0010 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTPU(ctx, &u.f_2misc)) return true; + return false; + case 0x00030300: + /* 11110011 1.11..11 ....0011 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTMS(ctx, &u.f_2misc)) return true; + return false; + case 0x00030380: + /* 11110011 1.11..11 ....0011 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVTMU(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x1: + /* 11110011 1.11.... ....01.. ...0.... */ + switch (insn & 0x00030380) { + case 0x00000000: + /* 11110011 1.11..00 ....0100 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLS(ctx, &u.f_2misc)) return true; + return false; + case 0x00000080: + /* 11110011 1.11..00 ....0100 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLZ(ctx, &u.f_2misc)) return true; + return false; + case 0x00000100: + /* 11110011 1.11..00 ....0101 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCNT(ctx, &u.f_2misc)) return true; + return false; + case 0x00000180: + /* 11110011 1.11..00 ....0101 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VMVN(ctx, &u.f_2misc)) return true; + return false; + case 0x00000200: + /* 11110011 1.11..00 ....0110 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VPADAL_S(ctx, &u.f_2misc)) return true; + return false; + case 0x00000280: + /* 11110011 1.11..00 ....0110 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VPADAL_U(ctx, &u.f_2misc)) return true; + return false; + case 0x00000300: + /* 11110011 1.11..00 ....0111 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VQABS(ctx, &u.f_2misc)) return true; + return false; + case 0x00000380: + /* 11110011 1.11..00 ....0111 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VQNEG(ctx, &u.f_2misc)) return true; + return false; + case 0x00010000: + /* 11110011 1.11..01 ....0100 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCGT0_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010080: + /* 11110011 1.11..01 ....0100 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCGE0_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010100: + /* 11110011 1.11..01 ....0101 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCEQ0_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010180: + /* 11110011 1.11..01 ....0101 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLE0_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010200: + /* 11110011 1.11..01 ....0110 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCLT0_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010300: + /* 11110011 1.11..01 ....0111 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VABS_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00010380: + /* 11110011 1.11..01 ....0111 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VNEG_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00020000: + /* 11110011 1.11..10 ....0100 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTN(ctx, &u.f_2misc)) return true; + return false; + case 0x00020080: + /* 11110011 1.11..10 ....0100 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTX(ctx, &u.f_2misc)) return true; + return false; + case 0x00020100: + /* 11110011 1.11..10 ....0101 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTA(ctx, &u.f_2misc)) return true; + return false; + case 0x00020180: + /* 11110011 1.11..10 ....0101 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTZ(ctx, &u.f_2misc)) return true; + return false; + case 0x00020200: + /* 11110011 1.11..10 ....0110 0..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0110 00.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VCVT_F16_F32(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00020280: + /* 11110011 1.11..10 ....0110 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTM(ctx, &u.f_2misc)) return true; + return false; + case 0x00020300: + /* 11110011 1.11..10 ....0111 0..0.... */ + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.11..10 ....0111 00.0.... */ + disas_neon_dp_extract_2misc_q0(ctx, &u.f_2misc, insn); + if (trans_VCVT_F32_F16(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x00020380: + /* 11110011 1.11..10 ....0111 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRINTP(ctx, &u.f_2misc)) return true; + return false; + case 0x00030000: + /* 11110011 1.11..11 ....0100 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRECPE(ctx, &u.f_2misc)) return true; + return false; + case 0x00030080: + /* 11110011 1.11..11 ....0100 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRSQRTE(ctx, &u.f_2misc)) return true; + return false; + case 0x00030100: + /* 11110011 1.11..11 ....0101 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRECPE_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00030180: + /* 11110011 1.11..11 ....0101 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VRSQRTE_F(ctx, &u.f_2misc)) return true; + return false; + case 0x00030200: + /* 11110011 1.11..11 ....0110 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVT_FS(ctx, &u.f_2misc)) return true; + return false; + case 0x00030280: + /* 11110011 1.11..11 ....0110 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVT_FU(ctx, &u.f_2misc)) return true; + return false; + case 0x00030300: + /* 11110011 1.11..11 ....0111 0..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVT_SF(ctx, &u.f_2misc)) return true; + return false; + case 0x00030380: + /* 11110011 1.11..11 ....0111 1..0.... */ + disas_neon_dp_extract_2misc(ctx, &u.f_2misc, insn); + if (trans_VCVT_UF(ctx, &u.f_2misc)) return true; + return false; + } + return false; + case 0x2: + /* 11110011 1.11.... ....10.. ...0.... */ + disas_neon_dp_extract_disas_neon_dp_Fmt_25(ctx, &u.f_disas_neon_dp4, insn); + if (trans_VTBL(ctx, &u.f_disas_neon_dp4)) return true; + return false; + case 0x3: + /* 11110011 1.11.... ....11.. ...0.... */ + switch (insn & 0x00010380) { + case 0x00000000: + /* 11110011 1.11...0 ....1100 0..0.... */ + switch ((insn >> 17) & 0x1) { + case 0x0: + /* 11110011 1.11..00 ....1100 0..0.... */ + switch ((insn >> 18) & 0x1) { + case 0x1: + /* 11110011 1.11.100 ....1100 0..0.... */ + disas_neon_dp_extract_disas_neon_dp_Fmt_28(ctx, &u.f_disas_neon_dp5, insn); + if (trans_VDUP_scalar(ctx, &u.f_disas_neon_dp5)) return true; + return false; + } + return false; + case 0x1: + /* 11110011 1.11..10 ....1100 0..0.... */ + disas_neon_dp_extract_disas_neon_dp_Fmt_27(ctx, &u.f_disas_neon_dp5, insn); + if (trans_VDUP_scalar(ctx, &u.f_disas_neon_dp5)) return true; + return false; + } + return false; + case 0x00010000: + /* 11110011 1.11...1 ....1100 0..0.... */ + disas_neon_dp_extract_disas_neon_dp_Fmt_26(ctx, &u.f_disas_neon_dp5, insn); + if (trans_VDUP_scalar(ctx, &u.f_disas_neon_dp5)) return true; + return false; + } + return false; + } + return false; + } + } + switch (insn & 0x00000f40) { + case 0x00000000: + /* 1111001. 1....... ....0000 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0000 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VADDL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0000 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VADDL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000040: + /* 1111001. 1....... ....0000 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMLA_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000100: + /* 1111001. 1....... ....0001 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0001 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VADDW_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0001 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VADDW_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000140: + /* 1111001. 1....... ....0001 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMLA_F_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000200: + /* 1111001. 1....... ....0010 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0010 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VSUBL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0010 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VSUBL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000240: + /* 1111001. 1....... ....0010 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0010 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMLAL_S_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0010 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMLAL_U_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000300: + /* 1111001. 1....... ....0011 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0011 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VSUBW_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0011 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VSUBW_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000340: + /* 1111001. 1....... ....0011 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0011 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VQDMLAL_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000400: + /* 1111001. 1....... ....0100 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0100 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VADDHN_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0100 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VRADDHN_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000440: + /* 1111001. 1....... ....0100 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMLS_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000500: + /* 1111001. 1....... ....0101 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0101 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VABAL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0101 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VABAL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000540: + /* 1111001. 1....... ....0101 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMLS_F_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000600: + /* 1111001. 1....... ....0110 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0110 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VSUBHN_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0110 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VRSUBHN_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000640: + /* 1111001. 1....... ....0110 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0110 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMLSL_S_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0110 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMLSL_U_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000700: + /* 1111001. 1....... ....0111 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0111 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VABDL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....0111 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VABDL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000740: + /* 1111001. 1....... ....0111 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....0111 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VQDMLSL_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000800: + /* 1111001. 1....... ....1000 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1000 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMLAL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....1000 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMLAL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000840: + /* 1111001. 1....... ....1000 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMUL_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000900: + /* 1111001. 1....... ....1001 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1001 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VQDMLAL_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000940: + /* 1111001. 1....... ....1001 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VMUL_F_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000a00: + /* 1111001. 1....... ....1010 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1010 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMLSL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....1010 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMLSL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000a40: + /* 1111001. 1....... ....1010 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1010 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMULL_S_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x1: + /* 11110011 1....... ....1010 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VMULL_U_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000b00: + /* 1111001. 1....... ....1011 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1011 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VQDMLSL_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000b40: + /* 1111001. 1....... ....1011 .1.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1011 .1.0.... */ + disas_neon_dp_extract_2scalar_q0(ctx, &u.f_2scalar, insn); + if (trans_VQDMULL_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0x00000c00: + /* 1111001. 1....... ....1100 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1100 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMULL_S_3d(ctx, &u.f_3diff)) return true; + return false; + case 0x1: + /* 11110011 1....... ....1100 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMULL_U_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000c40: + /* 1111001. 1....... ....1100 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VQDMULH_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000d00: + /* 1111001. 1....... ....1101 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1101 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VQDMULL_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000d40: + /* 1111001. 1....... ....1101 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VQRDMULH_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000e00: + /* 1111001. 1....... ....1110 .0.0.... */ + switch ((insn >> 24) & 0x1) { + case 0x0: + /* 11110010 1....... ....1110 .0.0.... */ + disas_neon_dp_extract_3diff(ctx, &u.f_3diff, insn); + if (trans_VMULL_P_3d(ctx, &u.f_3diff)) return true; + return false; + } + return false; + case 0x00000e40: + /* 1111001. 1....... ....1110 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VQRDMLAH_2sc(ctx, &u.f_2scalar)) return true; + return false; + case 0x00000f40: + /* 1111001. 1....... ....1111 .1.0.... */ + disas_neon_dp_extract_2scalar(ctx, &u.f_2scalar, insn); + if (trans_VQRDMLSH_2sc(ctx, &u.f_2scalar)) return true; + return false; + } + return false; + case 0xf2800010: + /* 1111001. 1....... ........ ...1.... */ + switch ((insn >> 7) & 0x1) { + case 0x0: + /* 1111001. 1....... ........ 0..1.... */ + switch ((insn >> 21) & 0x1) { + case 0x0: + /* 1111001. 1.0..... ........ 0..1.... */ + switch ((insn >> 20) & 0x1) { + case 0x0: + /* 1111001. 1.00.... ........ 0..1.... */ + switch ((insn >> 19) & 0x1) { + case 0x0: + /* 1111001. 1.000... ........ 0..1.... */ + disas_neon_dp_extract_1reg_imm(ctx, &u.f_1reg_imm, insn); + u.f_1reg_imm.cmode = extract32(insn, 8, 4); + u.f_1reg_imm.op = extract32(insn, 5, 1); + if (trans_Vimm_1r(ctx, &u.f_1reg_imm)) return true; + return false; + case 0x1: + /* 1111001. 1.001... ........ 0..1.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 1.001... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000100: + /* 11110010 1.001... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000200: + /* 11110010 1.001... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000300: + /* 11110010 1.001... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000500: + /* 11110010 1.001... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000700: + /* 11110010 1.001... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000800: + /* 11110010 1.001... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.001... ....1000 00.1.... */ + if (trans_VSHRN_16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.001... ....1000 01.1.... */ + if (trans_VRSHRN_16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000900: + /* 11110010 1.001... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.001... ....1001 00.1.... */ + if (trans_VQSHRN_S16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.001... ....1001 01.1.... */ + if (trans_VQRSHRN_S16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000a00: + /* 11110010 1.001... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_b(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.001... ....1010 00.1.... */ + if (trans_VSHLL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000000: + /* 11110011 1.001... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000100: + /* 11110011 1.001... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000200: + /* 11110011 1.001... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000300: + /* 11110011 1.001... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000400: + /* 11110011 1.001... ....0100 0..1.... */ + disas_neon_dp_extract_2reg_shr_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000500: + /* 11110011 1.001... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn); + if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000600: + /* 11110011 1.001... ....0110 0..1.... */ + disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHLU_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000700: + /* 11110011 1.001... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_b(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000800: + /* 11110011 1.001... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.001... ....1000 00.1.... */ + if (trans_VQSHRUN_16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.001... ....1000 01.1.... */ + if (trans_VQRSHRUN_16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000900: + /* 11110011 1.001... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.001... ....1001 00.1.... */ + if (trans_VQSHRN_U16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.001... ....1001 01.1.... */ + if (trans_VQRSHRN_U16_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000a00: + /* 11110011 1.001... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_b(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.001... ....1010 00.1.... */ + if (trans_VSHLL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + } + return false; + } + return false; + case 0x1: + /* 1111001. 1.01.... ........ 0..1.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 1.01.... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000100: + /* 11110010 1.01.... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000200: + /* 11110010 1.01.... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000300: + /* 11110010 1.01.... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000500: + /* 11110010 1.01.... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000700: + /* 11110010 1.01.... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000800: + /* 11110010 1.01.... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.01.... ....1000 00.1.... */ + if (trans_VSHRN_32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.01.... ....1000 01.1.... */ + if (trans_VRSHRN_32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000900: + /* 11110010 1.01.... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.01.... ....1001 00.1.... */ + if (trans_VQSHRN_S32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.01.... ....1001 01.1.... */ + if (trans_VQRSHRN_S32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000a00: + /* 11110010 1.01.... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.01.... ....1010 00.1.... */ + if (trans_VSHLL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000000: + /* 11110011 1.01.... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000100: + /* 11110011 1.01.... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000200: + /* 11110011 1.01.... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000300: + /* 11110011 1.01.... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000400: + /* 11110011 1.01.... ....0100 0..1.... */ + disas_neon_dp_extract_2reg_shr_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000500: + /* 11110011 1.01.... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn); + if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000600: + /* 11110011 1.01.... ....0110 0..1.... */ + disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHLU_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000700: + /* 11110011 1.01.... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_h(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000800: + /* 11110011 1.01.... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.01.... ....1000 00.1.... */ + if (trans_VQSHRUN_32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.01.... ....1000 01.1.... */ + if (trans_VQRSHRUN_32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000900: + /* 11110011 1.01.... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.01.... ....1001 00.1.... */ + if (trans_VQSHRN_U32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.01.... ....1001 01.1.... */ + if (trans_VQRSHRN_U32_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000a00: + /* 11110011 1.01.... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_h(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.01.... ....1010 00.1.... */ + if (trans_VSHLL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + } + return false; + } + return false; + case 0x1: + /* 1111001. 1.1..... ........ 0..1.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 1.1..... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000100: + /* 11110010 1.1..... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000200: + /* 11110010 1.1..... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000300: + /* 11110010 1.1..... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000500: + /* 11110010 1.1..... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000700: + /* 11110010 1.1..... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000800: + /* 11110010 1.1..... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.1..... ....1000 00.1.... */ + if (trans_VSHRN_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.1..... ....1000 01.1.... */ + if (trans_VRSHRN_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000900: + /* 11110010 1.1..... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.1..... ....1001 00.1.... */ + if (trans_VQSHRN_S64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110010 1.1..... ....1001 01.1.... */ + if (trans_VQRSHRN_S64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000a00: + /* 11110010 1.1..... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110010 1.1..... ....1010 00.1.... */ + if (trans_VSHLL_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x00000e00: + /* 11110010 1.1..... ....1110 0..1.... */ + disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn); + if (trans_VCVT_SF_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000f00: + /* 11110010 1.1..... ....1111 0..1.... */ + disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn); + if (trans_VCVT_FS_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000000: + /* 11110011 1.1..... ....0000 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000100: + /* 11110011 1.1..... ....0001 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000200: + /* 11110011 1.1..... ....0010 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000300: + /* 11110011 1.1..... ....0011 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000400: + /* 11110011 1.1..... ....0100 0..1.... */ + disas_neon_dp_extract_2reg_shr_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000500: + /* 11110011 1.1..... ....0101 0..1.... */ + disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn); + if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000600: + /* 11110011 1.1..... ....0110 0..1.... */ + disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHLU_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000700: + /* 11110011 1.1..... ....0111 0..1.... */ + disas_neon_dp_extract_2reg_shl_s(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000800: + /* 11110011 1.1..... ....1000 0..1.... */ + disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.1..... ....1000 00.1.... */ + if (trans_VQSHRUN_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.1..... ....1000 01.1.... */ + if (trans_VQRSHRUN_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000900: + /* 11110011 1.1..... ....1001 0..1.... */ + disas_neon_dp_extract_2reg_shrn_d(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.1..... ....1001 00.1.... */ + if (trans_VQSHRN_U64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x1: + /* 11110011 1.1..... ....1001 01.1.... */ + if (trans_VQRSHRN_U64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000a00: + /* 11110011 1.1..... ....1010 0..1.... */ + disas_neon_dp_extract_2reg_shll_s(ctx, &u.f_2reg_shift, insn); + switch ((insn >> 6) & 0x1) { + case 0x0: + /* 11110011 1.1..... ....1010 00.1.... */ + if (trans_VSHLL_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + case 0x01000e00: + /* 11110011 1.1..... ....1110 0..1.... */ + disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn); + if (trans_VCVT_UF_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000f00: + /* 11110011 1.1..... ....1111 0..1.... */ + disas_neon_dp_extract_2reg_vcvt(ctx, &u.f_2reg_shift, insn); + if (trans_VCVT_FU_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + } + return false; + case 0x1: + /* 1111001. 1....... ........ 1..1.... */ + switch (insn & 0x01000f00) { + case 0x00000000: + /* 11110010 1....... ....0000 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000100: + /* 11110010 1....... ....0001 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000200: + /* 11110010 1....... ....0010 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000300: + /* 11110010 1....... ....0011 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_S_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000500: + /* 11110010 1....... ....0101 1..1.... */ + disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSHL_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x00000700: + /* 11110010 1....... ....0111 1..1.... */ + disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_S_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000000: + /* 11110011 1....... ....0000 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000100: + /* 11110011 1....... ....0001 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000200: + /* 11110011 1....... ....0010 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VRSHR_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000300: + /* 11110011 1....... ....0011 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VRSRA_U_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000400: + /* 11110011 1....... ....0100 1..1.... */ + disas_neon_dp_extract_2reg_shr_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSRI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000500: + /* 11110011 1....... ....0101 1..1.... */ + disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn); + if (trans_VSLI_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000600: + /* 11110011 1....... ....0110 1..1.... */ + disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHLU_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + case 0x01000700: + /* 11110011 1....... ....0111 1..1.... */ + disas_neon_dp_extract_2reg_shl_d(ctx, &u.f_2reg_shift, insn); + if (trans_VQSHL_U_64_2sh(ctx, &u.f_2reg_shift)) return true; + return false; + } + return false; + } + return false; + } + return false; +} diff --git a/qemu/target/arm/decode-neon-ls.inc.c b/qemu/target/arm/decode-neon-ls.inc.c new file mode 100644 index 0000000000..aaf998abb2 --- /dev/null +++ b/qemu/target/arm/decode-neon-ls.inc.c @@ -0,0 +1,149 @@ +/* This file is autogenerated by scripts/decodetree.py. */ + +typedef struct { + int align; + int itype; + int l; + int rm; + int rn; + int size; + int vd; +} arg_disas_neon_ls0; + +typedef struct { + int a; + int n; + int rm; + int rn; + int size; + int t; + int vd; +} arg_disas_neon_ls1; + +typedef struct { + int align; + int l; + int n; + int reg_idx; + int rm; + int rn; + int size; + int stride; + int vd; +} arg_disas_neon_ls2; + +typedef arg_disas_neon_ls0 arg_VLDST_multiple; +static bool trans_VLDST_multiple(DisasContext *ctx, arg_VLDST_multiple *a); +typedef arg_disas_neon_ls1 arg_VLD_all_lanes; +static bool trans_VLD_all_lanes(DisasContext *ctx, arg_VLD_all_lanes *a); +typedef arg_disas_neon_ls2 arg_VLDST_single; +static bool trans_VLDST_single(DisasContext *ctx, arg_VLDST_single *a); + +static void disas_neon_ls_extract_disas_neon_ls_Fmt_0(DisasContext *ctx, arg_disas_neon_ls0 *a, uint32_t insn) +{ + a->l = extract32(insn, 21, 1); + a->rn = extract32(insn, 16, 4); + a->itype = extract32(insn, 8, 4); + a->size = extract32(insn, 6, 2); + a->align = extract32(insn, 4, 2); + a->rm = extract32(insn, 0, 4); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_ls_extract_disas_neon_ls_Fmt_1(DisasContext *ctx, arg_disas_neon_ls1 *a, uint32_t insn) +{ + a->rn = extract32(insn, 16, 4); + a->n = extract32(insn, 8, 2); + a->size = extract32(insn, 6, 2); + a->t = extract32(insn, 5, 1); + a->a = extract32(insn, 4, 1); + a->rm = extract32(insn, 0, 4); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_ls_extract_disas_neon_ls_Fmt_2(DisasContext *ctx, arg_disas_neon_ls2 *a, uint32_t insn) +{ + a->l = extract32(insn, 21, 1); + a->rn = extract32(insn, 16, 4); + a->n = extract32(insn, 8, 2); + a->reg_idx = extract32(insn, 5, 3); + a->align = extract32(insn, 4, 1); + a->rm = extract32(insn, 0, 4); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; + a->stride = 1; +} + +static void disas_neon_ls_extract_disas_neon_ls_Fmt_3(DisasContext *ctx, arg_disas_neon_ls2 *a, uint32_t insn) +{ + a->l = extract32(insn, 21, 1); + a->rn = extract32(insn, 16, 4); + a->n = extract32(insn, 8, 2); + a->reg_idx = extract32(insn, 6, 2); + a->align = extract32(insn, 4, 2); + a->rm = extract32(insn, 0, 4); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; + a->stride = plus1(ctx, extract32(insn, 5, 1)); +} + +static void disas_neon_ls_extract_disas_neon_ls_Fmt_4(DisasContext *ctx, arg_disas_neon_ls2 *a, uint32_t insn) +{ + a->l = extract32(insn, 21, 1); + a->rn = extract32(insn, 16, 4); + a->n = extract32(insn, 8, 2); + a->reg_idx = extract32(insn, 7, 1); + a->align = extract32(insn, 4, 3); + a->rm = extract32(insn, 0, 4); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 2; + a->stride = plus1(ctx, extract32(insn, 6, 1)); +} + +static bool disas_neon_ls(DisasContext *ctx, uint32_t insn) +{ + union { + arg_disas_neon_ls0 f_disas_neon_ls0; + arg_disas_neon_ls1 f_disas_neon_ls1; + arg_disas_neon_ls2 f_disas_neon_ls2; + } u; + + switch (insn & 0xff900000) { + case 0xf4000000: + /* 11110100 0..0.... ........ ........ */ + disas_neon_ls_extract_disas_neon_ls_Fmt_0(ctx, &u.f_disas_neon_ls0, insn); + if (trans_VLDST_multiple(ctx, &u.f_disas_neon_ls0)) return true; + return false; + case 0xf4800000: + /* 11110100 1..0.... ........ ........ */ + switch ((insn >> 10) & 0x3) { + case 0x0: + /* 11110100 1..0.... ....00.. ........ */ + disas_neon_ls_extract_disas_neon_ls_Fmt_2(ctx, &u.f_disas_neon_ls2, insn); + if (trans_VLDST_single(ctx, &u.f_disas_neon_ls2)) return true; + return false; + case 0x1: + /* 11110100 1..0.... ....01.. ........ */ + disas_neon_ls_extract_disas_neon_ls_Fmt_3(ctx, &u.f_disas_neon_ls2, insn); + if (trans_VLDST_single(ctx, &u.f_disas_neon_ls2)) return true; + return false; + case 0x2: + /* 11110100 1..0.... ....10.. ........ */ + disas_neon_ls_extract_disas_neon_ls_Fmt_4(ctx, &u.f_disas_neon_ls2, insn); + if (trans_VLDST_single(ctx, &u.f_disas_neon_ls2)) return true; + return false; + case 0x3: + /* 11110100 1..0.... ....11.. ........ */ + disas_neon_ls_extract_disas_neon_ls_Fmt_1(ctx, &u.f_disas_neon_ls1, insn); + switch ((insn >> 21) & 0x1) { + case 0x1: + /* 11110100 1.10.... ....11.. ........ */ + if (trans_VLD_all_lanes(ctx, &u.f_disas_neon_ls1)) return true; + return false; + } + return false; + } + return false; + } + return false; +} diff --git a/qemu/target/arm/decode-neon-shared.inc.c b/qemu/target/arm/decode-neon-shared.inc.c new file mode 100644 index 0000000000..58913b4365 --- /dev/null +++ b/qemu/target/arm/decode-neon-shared.inc.c @@ -0,0 +1,271 @@ +/* This file is autogenerated by scripts/decodetree.py. */ + +typedef struct { + int q; + int rot; + int size; + int vd; + int vm; + int vn; +} arg_disas_neon_shared0; + +typedef struct { + int q; + int u; + int vd; + int vm; + int vn; +} arg_disas_neon_shared1; + +typedef struct { + int q; + int s; + int vd; + int vm; + int vn; +} arg_disas_neon_shared2; + +typedef struct { + int index; + int q; + int rot; + int size; + int vd; + int vm; + int vn; +} arg_disas_neon_shared3; + +typedef struct { + int index; + int q; + int rm; + int u; + int vd; + int vm; + int vn; +} arg_disas_neon_shared4; + +typedef struct { + int index; + int q; + int rm; + int s; + int vd; + int vn; +} arg_disas_neon_shared5; + +typedef arg_disas_neon_shared0 arg_VCMLA; +static bool trans_VCMLA(DisasContext *ctx, arg_VCMLA *a); +typedef arg_disas_neon_shared0 arg_VCADD; +static bool trans_VCADD(DisasContext *ctx, arg_VCADD *a); +typedef arg_disas_neon_shared1 arg_VDOT; +static bool trans_VDOT(DisasContext *ctx, arg_VDOT *a); +typedef arg_disas_neon_shared2 arg_VFML; +static bool trans_VFML(DisasContext *ctx, arg_VFML *a); +typedef arg_disas_neon_shared3 arg_VCMLA_scalar; +static bool trans_VCMLA_scalar(DisasContext *ctx, arg_VCMLA_scalar *a); +typedef arg_disas_neon_shared4 arg_VDOT_scalar; +static bool trans_VDOT_scalar(DisasContext *ctx, arg_VDOT_scalar *a); +typedef arg_disas_neon_shared5 arg_VFML_scalar; +static bool trans_VFML_scalar(DisasContext *ctx, arg_VFML_scalar *a); + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_0(DisasContext *ctx, arg_disas_neon_shared0 *a, uint32_t insn) +{ + a->rot = extract32(insn, 23, 2); + a->size = extract32(insn, 20, 1); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_1(DisasContext *ctx, arg_disas_neon_shared0 *a, uint32_t insn) +{ + a->rot = extract32(insn, 24, 1); + a->size = extract32(insn, 20, 1); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_2(DisasContext *ctx, arg_disas_neon_shared1 *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->u = extract32(insn, 4, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_3(DisasContext *ctx, arg_disas_neon_shared2 *a, uint32_t insn) +{ + a->s = extract32(insn, 23, 1); + a->vm = deposit32(extract32(insn, 5, 1), 1, 31, extract32(insn, 0, 4)); + a->vn = deposit32(extract32(insn, 7, 1), 1, 31, extract32(insn, 16, 4)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_4(DisasContext *ctx, arg_disas_neon_shared2 *a, uint32_t insn) +{ + a->s = extract32(insn, 23, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 1; +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_5(DisasContext *ctx, arg_disas_neon_shared3 *a, uint32_t insn) +{ + a->rot = extract32(insn, 20, 2); + a->q = extract32(insn, 6, 1); + a->index = extract32(insn, 5, 1); + a->vm = extract32(insn, 0, 4); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 0; +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_6(DisasContext *ctx, arg_disas_neon_shared3 *a, uint32_t insn) +{ + a->rot = extract32(insn, 20, 2); + a->q = extract32(insn, 6, 1); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->size = 1; + a->index = 0; +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_7(DisasContext *ctx, arg_disas_neon_shared4 *a, uint32_t insn) +{ + a->q = extract32(insn, 6, 1); + a->index = extract32(insn, 5, 1); + a->u = extract32(insn, 4, 1); + a->rm = extract32(insn, 0, 4); + a->vm = deposit32(extract32(insn, 0, 4), 4, 28, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_8(DisasContext *ctx, arg_disas_neon_shared5 *a, uint32_t insn) +{ + a->s = extract32(insn, 20, 1); + a->index = extract32(insn, 3, 1); + a->rm = deposit32(extract32(insn, 5, 1), 1, 31, extract32(insn, 0, 3)); + a->vn = deposit32(extract32(insn, 7, 1), 1, 31, extract32(insn, 16, 4)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 0; +} + +static void disas_neon_shared_extract_disas_neon_shared_Fmt_9(DisasContext *ctx, arg_disas_neon_shared5 *a, uint32_t insn) +{ + a->s = extract32(insn, 20, 1); + a->rm = extract32(insn, 0, 3); + a->index = deposit32(extract32(insn, 3, 1), 1, 31, extract32(insn, 5, 1)); + a->vn = deposit32(extract32(insn, 16, 4), 4, 28, extract32(insn, 7, 1)); + a->vd = deposit32(extract32(insn, 12, 4), 4, 28, extract32(insn, 22, 1)); + a->q = 1; +} + +static bool disas_neon_shared(DisasContext *ctx, uint32_t insn) +{ + union { + arg_disas_neon_shared0 f_disas_neon_shared0; + arg_disas_neon_shared1 f_disas_neon_shared1; + arg_disas_neon_shared2 f_disas_neon_shared2; + arg_disas_neon_shared3 f_disas_neon_shared3; + arg_disas_neon_shared4 f_disas_neon_shared4; + arg_disas_neon_shared5 f_disas_neon_shared5; + } u; + + switch (insn & 0xfe000f00) { + case 0xfc000800: + /* 1111110. ........ ....1000 ........ */ + switch (insn & 0x00200010) { + case 0x00000000: + /* 1111110. ..0..... ....1000 ...0.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_1(ctx, &u.f_disas_neon_shared0, insn); + switch ((insn >> 23) & 0x1) { + case 0x1: + /* 1111110. 1.0..... ....1000 ...0.... */ + if (trans_VCADD(ctx, &u.f_disas_neon_shared0)) return true; + return false; + } + return false; + case 0x00200000: + /* 1111110. ..1..... ....1000 ...0.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_0(ctx, &u.f_disas_neon_shared0, insn); + if (trans_VCMLA(ctx, &u.f_disas_neon_shared0)) return true; + return false; + case 0x00200010: + /* 1111110. ..1..... ....1000 ...1.... */ + switch (insn & 0x01100040) { + case 0x00000000: + /* 11111100 ..10.... ....1000 .0.1.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_3(ctx, &u.f_disas_neon_shared2, insn); + if (trans_VFML(ctx, &u.f_disas_neon_shared2)) return true; + return false; + case 0x00000040: + /* 11111100 ..10.... ....1000 .1.1.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_4(ctx, &u.f_disas_neon_shared2, insn); + if (trans_VFML(ctx, &u.f_disas_neon_shared2)) return true; + return false; + } + return false; + } + return false; + case 0xfc000d00: + /* 1111110. ........ ....1101 ........ */ + disas_neon_shared_extract_disas_neon_shared_Fmt_2(ctx, &u.f_disas_neon_shared1, insn); + switch (insn & 0x01b00000) { + case 0x00200000: + /* 11111100 0.10.... ....1101 ........ */ + if (trans_VDOT(ctx, &u.f_disas_neon_shared1)) return true; + return false; + } + return false; + case 0xfe000800: + /* 1111111. ........ ....1000 ........ */ + switch (insn & 0x01800010) { + case 0x00000000: + /* 11111110 0....... ....1000 ...0.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_5(ctx, &u.f_disas_neon_shared3, insn); + if (trans_VCMLA_scalar(ctx, &u.f_disas_neon_shared3)) return true; + return false; + case 0x00000010: + /* 11111110 0....... ....1000 ...1.... */ + switch (insn & 0x00200040) { + case 0x00000000: + /* 11111110 0.0..... ....1000 .0.1.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_8(ctx, &u.f_disas_neon_shared5, insn); + if (trans_VFML_scalar(ctx, &u.f_disas_neon_shared5)) return true; + return false; + case 0x00000040: + /* 11111110 0.0..... ....1000 .1.1.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_9(ctx, &u.f_disas_neon_shared5, insn); + if (trans_VFML_scalar(ctx, &u.f_disas_neon_shared5)) return true; + return false; + } + return false; + case 0x00800000: + /* 11111110 1....... ....1000 ...0.... */ + disas_neon_shared_extract_disas_neon_shared_Fmt_6(ctx, &u.f_disas_neon_shared3, insn); + if (trans_VCMLA_scalar(ctx, &u.f_disas_neon_shared3)) return true; + return false; + } + return false; + case 0xfe000d00: + /* 1111111. ........ ....1101 ........ */ + disas_neon_shared_extract_disas_neon_shared_Fmt_7(ctx, &u.f_disas_neon_shared4, insn); + switch (insn & 0x01b00000) { + case 0x00200000: + /* 11111110 0.10.... ....1101 ........ */ + if (trans_VDOT_scalar(ctx, &u.f_disas_neon_shared4)) return true; + return false; + } + return false; + } + return false; +} diff --git a/qemu/target/arm/decode-sve.inc.c b/qemu/target/arm/decode-sve.inc.c index 9740f1aa80..d04c24c0ee 100644 --- a/qemu/target/arm/decode-sve.inc.c +++ b/qemu/target/arm/decode-sve.inc.c @@ -43,9 +43,7 @@ typedef struct { } arg_disas_sve31; typedef struct { -#ifdef _MSC_VER - int dummy; -#endif + int : 0; } arg_disas_sve32; typedef struct { diff --git a/qemu/target/arm/helper-a64.c b/qemu/target/arm/helper-a64.c index 12da114039..df30f11c47 100644 --- a/qemu/target/arm/helper-a64.c +++ b/qemu/target/arm/helper-a64.c @@ -1096,78 +1096,40 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) * alignment faults or any memory attribute handling). */ - struct uc_struct *uc = env->uc; - ARMCPU *cpu = env_archcpu(env); - uint64_t blocklen = 4 << cpu->dcz_blocksize; + UNICORN_UNUSED struct uc_struct *uc = env->uc; + int blocklen = 4 << env_archcpu(env)->dcz_blocksize; uint64_t vaddr = vaddr_in & ~(blocklen - 1); + int mmu_idx = cpu_mmu_index(env, false); + void *mem; + /* - * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than - * the block size so we might have to do more than one TLB lookup. - * We know that in fact for any v8 CPU the page size is at least 4K - * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only - * 1K as an artefact of legacy v5 subpage support being present in the - * same QEMU executable. So in practice the hostaddr[] array has - * two entries, given the current setting of TARGET_PAGE_BITS_MIN. + * Trapless lookup. In addition to actual invalid page, may + * return NULL for I/O, watchpoints, clean pages, etc. */ - int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); - void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; - int try, i; - unsigned mmu_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); - - assert(maxidx <= ARRAY_SIZE(hostaddr)); - - for (try = 0; try < 2; try++) { - - for (i = 0; i < maxidx; i++) { - hostaddr[i] = tlb_vaddr_to_host(env, - vaddr + TARGET_PAGE_SIZE * i, - 1, mmu_idx); - if (!hostaddr[i]) { - break; - } - } - if (i == maxidx) { + mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); + + if (unlikely(!mem)) { + uintptr_t ra = GETPC(); + + /* + * Trap if accessing an invalid page. DC_ZVA requires that we supply + * the original pointer for an invalid page. But watchpoints require + * that we probe the actual space. So do both. + */ + (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); + mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); + + if (unlikely(!mem)) { /* - * If it's all in the TLB it's fair game for just writing to; - * we know we don't need to update dirty status, etc. + * The only remaining reason for mem == NULL is I/O. + * Just do a series of byte writes as the architecture demands. */ - for (i = 0; i < maxidx - 1; i++) { - memset(hostaddr[i], 0, TARGET_PAGE_SIZE); + for (int i = 0; i < blocklen; i++) { + cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); } - memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); return; } - /* - * OK, try a store and see if we can populate the tlb. This - * might cause an exception if the memory isn't writable, - * in which case we will longjmp out of here. We must for - * this purpose use the actual register value passed to us - * so that we get the fault address right. - */ - helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); - /* Now we can populate the other TLB entries, if any */ - for (i = 0; i < maxidx; i++) { - uint64_t va = vaddr + TARGET_PAGE_SIZE * i; - if (va != (vaddr_in & TARGET_PAGE_MASK)) { - helper_ret_stb_mmu(env, va, 0, oi, GETPC()); - } - } } - /* - * Slow path (probably attempt to do this to an I/O device or - * similar, or clearing of a block of code we have translations - * cached for). Just do a series of byte writes as the architecture - * demands. It's not worth trying to use a cpu_physical_memory_map(), - * memset(), unmap() sequence here because: - * + we'd need to account for the blocksize being larger than a page - * + the direct-RAM access case is almost always going to be dealt - * with in the fastpath code above, so there's no speed benefit - * + we would have to deal with the map returning NULL because the - * bounce buffer was in use - */ - for (i = 0; i < blocklen; i++) { - helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); - } + memset(mem, 0, blocklen); } diff --git a/qemu/target/arm/helper-a64.h b/qemu/target/arm/helper-a64.h index 3df7c185aa..5b0b699a50 100644 --- a/qemu/target/arm/helper-a64.h +++ b/qemu/target/arm/helper-a64.h @@ -103,3 +103,19 @@ DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) + +DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) +DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(stg, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(stg_parallel, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64) diff --git a/qemu/target/arm/helper-sve.h b/qemu/target/arm/helper-sve.h index 2f47279155..199ffee9cc 100644 --- a/qemu/target/arm/helper-sve.h +++ b/qemu/target/arm/helper-sve.h @@ -1099,25 +1099,40 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) - -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -1181,6 +1196,64 @@ DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1212,6 +1285,55 @@ DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1243,6 +1365,55 @@ DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1290,6 +1461,53 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG, @@ -1399,6 +1617,116 @@ DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbsu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbss_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbsu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbss_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + + DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG, @@ -1508,6 +1836,115 @@ DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbss_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbsu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbss_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG, @@ -1575,4 +2012,71 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stbs_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbs_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/qemu/target/arm/helper.c b/qemu/target/arm/helper.c index 60c9db9e3e..6e28646ad2 100644 --- a/qemu/target/arm/helper.c +++ b/qemu/target/arm/helper.c @@ -31,9 +31,11 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool s1_is_el0, hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, target_ulong *page_size_ptr, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs); + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) + UNICORN_NONNULL; static void switch_mode(CPUARMState *env, int mode); @@ -78,35 +80,19 @@ uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri) } /* - * Some registers are not accessible if EL3.NS=0 and EL3 is using AArch32 but - * they are accessible when EL3 is using AArch64 regardless of EL3.NS. - * - * access_el3_aa32ns: Used to check AArch32 register views. - * access_el3_aa32ns_aa64any: Used to check both AArch32/64 register views. + * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0. */ static CPAccessResult access_el3_aa32ns(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - bool secure = arm_is_secure_below_el3(env); - - assert(!arm_el_is_aa64(env, 3)); - if (secure) { + if (!is_a64(env) && arm_current_el(env) == 3 && + arm_is_secure_below_el3(env)) { return CP_ACCESS_TRAP_UNCATEGORIZED; } return CP_ACCESS_OK; } -static CPAccessResult access_el3_aa32ns_aa64any(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) -{ - if (!arm_el_is_aa64(env, 3)) { - return access_el3_aa32ns(env, ri, isread); - } - return CP_ACCESS_OK; -} - /* Some secure-only AArch32 registers trap to EL3 if used from * Secure EL1 (but are just ordinary UNDEF in other non-EL3 contexts). * Note that an access from Secure EL1 can only happen if EL3 is AArch64. @@ -394,8 +380,7 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2); + ARMMMUIdxBit_E10_0); } static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -406,45 +391,7 @@ static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2); -} - -static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by IPA. This has to invalidate any structures that - * contain only stage 2 translation information, but does not need - * to apply to structures that contain combined stage 1 and stage 2 - * translation information. - * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero. - */ - CPUState *cs = env_cpu(env); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 40); - - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2); -} - -static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 40); - - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_Stage2); + ARMMMUIdxBit_E10_0); } static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -1622,9 +1569,19 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) uint32_t valid_mask = 0x3fff; ARMCPU *cpu = env_archcpu(env); - if (arm_el_is_aa64(env, 3)) { + if (ri->state == ARM_CP_STATE_AA64) { value |= SCR_FW | SCR_AW; /* these two bits are RES1. */ valid_mask &= ~SCR_NET; + + if (cpu_isar_feature(aa64_lor, cpu)) { + valid_mask |= SCR_TLOR; + } + if (cpu_isar_feature(aa64_pauth, cpu)) { + valid_mask |= SCR_API | SCR_APK; + } + if (cpu_isar_feature(aa64_mte, cpu)) { + valid_mask |= SCR_ATA; + } } else { valid_mask &= ~(SCR_RW | SCR_ST); } @@ -1643,12 +1600,6 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) valid_mask &= ~SCR_SMD; } } - if (cpu_isar_feature(aa64_lor, cpu)) { - valid_mask |= SCR_TLOR; - } - if (cpu_isar_feature(aa64_pauth, cpu)) { - valid_mask |= SCR_API | SCR_APK; - } /* Clear all-context RES0 bits. */ value &= valid_mask; @@ -1875,13 +1826,13 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .resetvalue = 0x0 }, { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, .access = PL1_RW, .accessfn = access_tpm, - .type = ARM_CP_ALIAS | ARM_CP_IO, + .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenclr_write, }, { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, .access = PL1_RW, .accessfn = access_tpm, - .type = ARM_CP_ALIAS | ARM_CP_IO, + .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenclr_write }, { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, @@ -3044,7 +2995,7 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, * Report exception with ESR indicating a fault due to a * translation table walk for a cache maintenance instruction. */ - syn = syn_data_abort_no_iss(current_el == target_el, + syn = syn_data_abort_no_iss(current_el == target_el, 0, fi.ea, 1, fi.s1ptw, 1, fsc); env->exception.vaddress = value; env->exception.fsr = fsr; @@ -3567,8 +3518,7 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2); + ARMMMUIdxBit_E10_0); raw_write(env, ri, value); } } @@ -4050,11 +4000,6 @@ static int alle1_tlbmask(CPUARMState *env) return ARMMMUIdxBit_SE10_1 | ARMMMUIdxBit_SE10_1_PAN | ARMMMUIdxBit_SE10_0; - } else if (arm_feature(env, ARM_FEATURE_EL2)) { - return ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0 | - ARMMMUIdxBit_Stage2; } else { return ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | @@ -4201,44 +4146,6 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, ARMMMUIdxBit_SE3); } -static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - /* Invalidate by IPA. This has to invalidate any structures that - * contain only stage 2 translation information, but does not need - * to apply to structures that contain combined stage 1 and stage 2 - * translation information. - * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero. - */ - ARMCPU *cpu = env_archcpu(env); - CPUState *cs = CPU(cpu); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 48); - - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2); -} - -static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - CPUState *cs = env_cpu(env); - uint64_t pageaddr; - - if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) { - return; - } - - pageaddr = sextract64(value << 12, 0, 48); - - tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_Stage2); -} - static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -4306,6 +4213,15 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, { ARMCPU *cpu = env_archcpu(env); + if (ri->state == ARM_CP_STATE_AA64 && !cpu_isar_feature(aa64_mte, cpu)) { + if (ri->opc1 == 6) { /* SCTLR_EL3 */ + value &= ~(SCTLR_ITFSB | SCTLR_TCF | SCTLR_ATA); + } else { + value &= ~(SCTLR_ITFSB | SCTLR_TCF0 | SCTLR_TCF | + SCTLR_ATA0 | SCTLR_ATA); + } + } + if (raw_read(env, ri) == value) { /* Skip the TLB flush if nothing actually changed; Linux likes * to do a lot of pointless SCTLR writes. @@ -4320,6 +4236,7 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); /* ??? Lots of these bits are not implemented. */ + /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(CPU(cpu)); @@ -4475,12 +4392,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbi_aa64_vae1_write }, { .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1is_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1is_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4, .access = PL2_W, .type = ARM_CP_NO_RAW, @@ -4491,12 +4406,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbi_aa64_alle1is_write }, { .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, - .writefn = tlbi_aa64_ipas2e1_write }, + .access = PL2_W, .type = ARM_CP_NOP }, { .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4, .access = PL2_W, .type = ARM_CP_NO_RAW, @@ -4575,20 +4488,16 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbimva_hyp_is_write }, { .name = "TLBIIPAS2", .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_write }, + .type = ARM_CP_NOP, .access = PL2_W }, { .name = "TLBIIPAS2IS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_is_write }, + .type = ARM_CP_NOP, .access = PL2_W }, { .name = "TLBIIPAS2L", .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_write }, + .type = ARM_CP_NOP, .access = PL2_W }, { .name = "TLBIIPAS2LIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL2_W, - .writefn = tlbiipas2_is_write }, + .type = ARM_CP_NOP, .access = PL2_W }, /* 32 bit cache operations */ { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, @@ -4702,7 +4611,6 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .access = PL2_RW, .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore }, { .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH, - .type = ARM_CP_NO_RAW, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, @@ -4744,7 +4652,7 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "VTCR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "VTTBR", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 6, .crm = 2, @@ -4792,7 +4700,7 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "HPFAR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3, @@ -4849,15 +4757,19 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) if (cpu_isar_feature(aa64_pauth, cpu)) { valid_mask |= HCR_API | HCR_APK; } + if (cpu_isar_feature(aa64_mte, cpu)) { + valid_mask |= HCR_ATA | HCR_DCT | HCR_TID5; + } } /* Clear RES0 bits. */ value &= valid_mask; - /* These bits change the MMU setup: + /* + * These bits change the MMU setup: * HCR_VM enables stage 2 translation * HCR_PTW forbids certain page-table setups - * HCR_DC Disables stage1 and enables stage2 translation + * HCR_DC disables stage1 and enables stage2 translation */ if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) { tlb_flush(CPU(cpu)); @@ -5430,6 +5342,9 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu) { K(3, 0, 1, 2, 0), K(3, 4, 1, 2, 0), K(3, 5, 1, 2, 0), "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve }, + { K(3, 0, 5, 6, 0), K(3, 4, 5, 6, 0), K(3, 5, 5, 6, 0), + "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte }, + /* TODO: ARMv8.2-SPE -- PMSCR_EL2 */ /* TODO: ARMv8.4-Trace -- TRFCR_EL2 */ }; @@ -6382,7 +6297,7 @@ static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque, /* RCU lock is already being held */ mr = memory_region_from_host(uc, haddr, &offset); if (mr) { - // memory_region_do_writeback(mr, offset, dline_size); FIXME + // memory_region_writeback(mr, offset, dline_size); FIXME } } } @@ -6405,6 +6320,159 @@ static const ARMCPRegInfo dcpodp_reg[] = { #endif +static CPAccessResult access_aa64_tid5(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if ((arm_current_el(env) < 2) && (arm_hcr_el2_eff(env) & HCR_TID5)) { + return CP_ACCESS_TRAP_EL2; + } + + return CP_ACCESS_OK; +} + +static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + + if (el < 2 && + arm_feature(env, ARM_FEATURE_EL2) && + !(arm_hcr_el2_eff(env) & HCR_ATA)) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && + arm_feature(env, ARM_FEATURE_EL3) && + !(env->cp15.scr_el3 & SCR_ATA)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static uint64_t tco_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pstate & PSTATE_TCO; +} + +static void tco_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val) +{ + env->pstate = (env->pstate & ~PSTATE_TCO) | (val & PSTATE_TCO); +} + +static const ARMCPRegInfo mte_reginfo[] = { + { .name = "TFSRE0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 1, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) }, + { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) }, + { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL2_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) }, + { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[3]) }, + { .name = "RGSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 5, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.rgsr_el1) }, + { .name = "GCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) }, + { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4, + .access = PL1_R, .accessfn = access_aa64_tid5, + .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS }, + { .name = "TCO", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, + .type = ARM_CP_NO_RAW, + .access = PL0_RW, .readfn = tco_read, .writefn = tco_write }, + { .name = "DC_IGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL1_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_IGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_IGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL1_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_IGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CIGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CIGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo mte_tco_ro_reginfo[] = { + { .name = "TCO", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, + .type = ARM_CP_CONST, .access = PL0_RW, }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = { + { .name = "DC_CGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGVAP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVAP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGVADP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVADP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CIGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CIGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_GVA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 3, + .access = PL0_W, .type = ARM_CP_DC_GVA, + .accessfn = aa64_zva_access, + }, + { .name = "DC_GZVA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 4, + .access = PL0_W, .type = ARM_CP_DC_GZVA, + .accessfn = aa64_zva_access, + }, + REGINFO_SENTINEL +}; + static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -7048,12 +7116,12 @@ void register_cp_regs_for_features(ARMCPU *cpu) ARMCPRegInfo vpidr_regs[] = { { .name = "VPIDR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_CONST, .resetvalue = cpu->midr, .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) }, { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5, - .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any, + .access = PL2_RW, .accessfn = access_el3_aa32ns, .type = ARM_CP_NO_RAW, .writefn = arm_cp_write_ignore, .readfn = mpidr_read }, REGINFO_SENTINEL @@ -7466,6 +7534,19 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, dcpodp_reg); } } + + /* + * If full MTE is enabled, add all of the system registers. + * If only "instructions available at EL0" are enabled, + * then define only a RAZ/WI version of PSTATE.TCO. + */ + if (cpu_isar_feature(aa64_mte, cpu)) { + define_arm_cp_regs(cpu, mte_reginfo); + define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo); + } else if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) { + define_arm_cp_regs(cpu, mte_tco_ro_reginfo); + define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo); + } #endif if (cpu_isar_feature(any_predinv, cpu)) { @@ -8725,6 +8806,77 @@ static void arm_cpu_do_interrupt_aarch32_qemu(CPUState *cs) take_aarch32_exception(env, new_mode, mask, offset, addr); } +static int aarch64_regnum(CPUARMState *env, int aarch32_reg) +{ + /* + * Return the register number of the AArch64 view of the AArch32 + * register @aarch32_reg. The CPUARMState CPSR is assumed to still + * be that of the AArch32 mode the exception came from. + */ + int mode = env->uncached_cpsr & CPSR_M; + + switch (aarch32_reg) { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + return aarch32_reg; + case 8: + case 9: + case 10: + case 11: + case 12: + return mode == ARM_CPU_MODE_FIQ ? aarch32_reg + 16 : aarch32_reg; + case 13: + switch (mode) { + case ARM_CPU_MODE_USR: + case ARM_CPU_MODE_SYS: + return 13; + case ARM_CPU_MODE_HYP: + return 15; + case ARM_CPU_MODE_IRQ: + return 17; + case ARM_CPU_MODE_SVC: + return 19; + case ARM_CPU_MODE_ABT: + return 21; + case ARM_CPU_MODE_UND: + return 23; + case ARM_CPU_MODE_FIQ: + return 29; + default: + g_assert_not_reached(); + } + case 14: + switch (mode) { + case ARM_CPU_MODE_USR: + case ARM_CPU_MODE_SYS: + case ARM_CPU_MODE_HYP: + return 14; + case ARM_CPU_MODE_IRQ: + return 16; + case ARM_CPU_MODE_SVC: + return 18; + case ARM_CPU_MODE_ABT: + return 20; + case ARM_CPU_MODE_UND: + return 22; + case ARM_CPU_MODE_FIQ: + return 30; + default: + g_assert_not_reached(); + } + case 15: + return 31; + default: + g_assert_not_reached(); + } +} + /* Handle exception entry to a target EL which is using AArch64 */ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs) { @@ -8735,6 +8887,7 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs) unsigned int new_mode = aarch64_pstate_mode(new_el, true); unsigned int old_mode; unsigned int cur_el = arm_current_el(env); + int rt; /* * Note that new_el can never be 0. If cur_el is 0, then @@ -8790,7 +8943,8 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs) case EXCP_HVC: case EXCP_HYP_TRAP: case EXCP_SMC: - if (syn_get_ec(env->exception.syndrome) == EC_ADVSIMDFPACCESSTRAP) { + switch (syn_get_ec(env->exception.syndrome)) { + case EC_ADVSIMDFPACCESSTRAP: /* * QEMU internal FP/SIMD syndromes from AArch32 include the * TA and coproc fields which are only exposed if the exception @@ -8798,6 +8952,34 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs) * AArch64 format syndrome. */ env->exception.syndrome &= ~MAKE_64BIT_MASK(0, 20); + break; + case EC_CP14RTTRAP: + case EC_CP15RTTRAP: + case EC_CP14DTTRAP: + /* + * For a trap on AArch32 MRC/MCR/LDC/STC the Rt field is currently + * the raw register field from the insn; when taking this to + * AArch64 we must convert it to the AArch64 view of the register + * number. Notice that we read a 4-bit AArch32 register number and + * write back a 5-bit AArch64 one. + */ + rt = extract32(env->exception.syndrome, 5, 4); + rt = aarch64_regnum(env, rt); + env->exception.syndrome = deposit32(env->exception.syndrome, + 5, 5, rt); + break; + case EC_CP15RRTTRAP: + case EC_CP14RRTTRAP: + /* Similarly for MRRC/MCRR traps for Rt and Rt2 fields */ + rt = extract32(env->exception.syndrome, 5, 4); + rt = aarch64_regnum(env, rt); + env->exception.syndrome = deposit32(env->exception.syndrome, + 5, 5, rt); + rt = extract32(env->exception.syndrome, 10, 4); + rt = aarch64_regnum(env, rt); + env->exception.syndrome = deposit32(env->exception.syndrome, + 10, 5, rt); + break; } env->cp15.esr_el[new_el] = env->exception.syndrome; break; @@ -8850,6 +9032,9 @@ static void arm_cpu_do_interrupt_aarch64_qemu(CPUState *cs) break; } } + if (cpu_isar_feature(aa64_mte, cpu)) { + new_mode |= PSTATE_TCO; + } pstate_write(env, PSTATE_DAIF | new_mode); env->aarch64 = 1; @@ -8908,44 +9093,6 @@ void arm_cpu_do_interrupt(CPUState *cs) cs->interrupt_request |= CPU_INTERRUPT_EXITTB; } -/* Return the exception level which controls this address translation regime */ -static uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_Stage2: - case ARMMMUIdx_E2: - return 2; - case ARMMMUIdx_SE3: - return 3; - case ARMMMUIdx_SE10_0: - return arm_el_is_aa64(env, 3) ? 1 : 3; - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_MPrivNegPri: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MPriv: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSPrivNegPri: - case ARMMMUIdx_MSUserNegPri: - case ARMMMUIdx_MSPriv: - case ARMMMUIdx_MSUser: - return 1; - default: - g_assert_not_reached(); - // never reach here - return 1; - } -} - uint64_t arm_sctlr(CPUARMState *env, int el) { /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ @@ -9024,15 +9171,6 @@ static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, } } -/* Return the TCR controlling this translation regime */ -static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - if (mmu_idx == ARMMMUIdx_Stage2) { - return &env->cp15.vtcr_el2; - } - return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; -} - /* Convert a possible stage1+2 MMU index into the appropriate * stage 1 MMU index */ @@ -9189,9 +9327,10 @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) * * @env: CPUARMState * @s2ap: The 2-bit stage2 access permissions (S2AP) - * @xn: XN (execute-never) bit + * @xn: XN (execute-never) bits + * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 */ -static int get_S2prot(CPUARMState *env, int s2ap, int xn) +static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) { int prot = 0; @@ -9201,8 +9340,32 @@ static int get_S2prot(CPUARMState *env, int s2ap, int xn) if (s2ap & 2) { prot |= PAGE_WRITE; } - if (!xn) { - if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) { + + if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) { + switch (xn) { + case 0: + prot |= PAGE_EXEC; + break; + case 1: + if (s1_is_el0) { + prot |= PAGE_EXEC; + } + break; + case 2: + break; + case 3: + if (!s1_is_el0) { + prot |= PAGE_EXEC; + } + break; + default: + g_assert_not_reached(); + } + } else { + if (!extract32(xn, 1, 1)) { + if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) { + prot |= PAGE_EXEC; + } prot |= PAGE_EXEC; } } @@ -9323,19 +9486,11 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, int s2prot; int ret; ARMCacheAttrs cacheattrs = { 0 }; - ARMCacheAttrs *pcacheattrs = NULL; - - if (env->cp15.hcr_el2 & HCR_PTW) { - /* - * PTW means we must fault if this S1 walk touches S2 Device - * memory; otherwise we don't care about the attributes and can - * save the S2 translation the effort of computing them. - */ - pcacheattrs = &cacheattrs; - } - ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_Stage2, &s2pa, - &txattrs, &s2prot, &s2size, fi, pcacheattrs); + ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, ARMMMUIdx_Stage2, + false, + &s2pa, &txattrs, &s2prot, &s2size, fi, + &cacheattrs); if (ret) { assert(fi->type != ARMFault_None); fi->s2addr = addr; @@ -9343,8 +9498,11 @@ static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, fi->s1ptw = true; return ~0; } - if (pcacheattrs && (pcacheattrs->attrs & 0xf0) == 0) { - /* Access was to Device memory: generate Permission fault */ + if ((env->cp15.hcr_el2 & HCR_PTW) && (cacheattrs.attrs & 0xf0) == 0) { + /* + * PTW set and S1 walk touched S2 Device memory: + * generate Permission fault. + */ fi->type = ARMFault_Permission; fi->s2addr = addr; fi->stage2 = true; @@ -9829,6 +9987,16 @@ static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) } } +static int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx) +{ + if (regime_has_2_ranges(mmu_idx)) { + return extract64(tcr, 57, 2); + } else { + /* Replicate the single TCMA bit so we always have 2 bits. */ + return extract32(tcr, 30, 1) * 3; + } +} + ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ARMMMUIdx mmu_idx, bool data) { @@ -9952,8 +10120,32 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, }; } +/** + * get_phys_addr_lpae: perform one stage of page table walk, LPAE format + * + * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, + * prot and page_size may not be filled in, and the populated fsr value provides + * information on why the translation aborted, in the format of a long-format + * DFSR/IFSR fault register, with the following caveats: + * * the WnR bit is never set (the caller must do this). + * + * @env: CPUARMState + * @address: virtual address to get physical address for + * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH + * @mmu_idx: MMU index indicating required translation regime + * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page table + * walk), must be true if this is stage 2 of a stage 1+2 walk for an + * EL0 access). If @mmu_idx is anything else, @s1_is_el0 is ignored. + * @phys_ptr: set to the physical address corresponding to the virtual address + * @attrs: set to the memory transaction attributes to use + * @prot: set to the permissions for the page containing phys_ptr + * @page_size_ptr: set to the size of the page containing phys_ptr + * @fi: set to fault info if the translation fails + * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes + */ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool s1_is_el0, hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, target_ulong *page_size_ptr, ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) @@ -10176,13 +10368,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, } ap = extract32(attrs, 4, 2); - xn = extract32(attrs, 12, 1); if (mmu_idx == ARMMMUIdx_Stage2) { ns = true; - *prot = get_S2prot(env, ap, xn); + xn = extract32(attrs, 11, 2); + *prot = get_S2prot(env, ap, xn, s1_is_el0); } else { ns = extract32(attrs, 3, 1); + xn = extract32(attrs, 12, 1); pxn = extract32(attrs, 11, 1); *prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); } @@ -10201,22 +10394,19 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, } /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */ if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) { - txattrs->target_tlb_bit0 = true; + arm_tlb_bti_gp(txattrs) = true; } - if (cacheattrs != NULL) { - if (mmu_idx == ARMMMUIdx_Stage2) { - cacheattrs->attrs = convert_stage2_attrs(env, - extract32(attrs, 0, 4)); - } else { - /* Index into MAIR registers for cache attributes */ - uint8_t attrindx = extract32(attrs, 0, 3); - uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; - assert(attrindx <= 7); - cacheattrs->attrs = extract64(mair, attrindx * 8, 8); - } - cacheattrs->shareability = extract32(attrs, 6, 2); + if (mmu_idx == ARMMMUIdx_Stage2) { + cacheattrs->attrs = convert_stage2_attrs(env, extract32(attrs, 0, 4)); + } else { + /* Index into MAIR registers for cache attributes */ + uint8_t attrindx = extract32(attrs, 0, 3); + uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + assert(attrindx <= 7); + cacheattrs->attrs = extract64(mair, attrindx * 8, 8); } + cacheattrs->shareability = extract32(attrs, 6, 2); *phys_ptr = descaddr; *page_size_ptr = page_size; @@ -10923,9 +11113,19 @@ static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2) */ static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2) { - uint8_t s1lo = extract32(s1.attrs, 0, 4), s2lo = extract32(s2.attrs, 0, 4); - uint8_t s1hi = extract32(s1.attrs, 4, 4), s2hi = extract32(s2.attrs, 4, 4); + uint8_t s1lo, s2lo, s1hi, s2hi; ARMCacheAttrs ret; + bool tagged = false; + + if (s1.attrs == 0xf0) { + tagged = true; + s1.attrs = 0xff; + } + + s1lo = extract32(s1.attrs, 0, 4); + s2lo = extract32(s2.attrs, 0, 4); + s1hi = extract32(s1.attrs, 4, 4); + s2hi = extract32(s2.attrs, 4, 4); /* Combine shareability attributes (table D4-43) */ if (s1.shareability == 2 || s2.shareability == 2) { @@ -10973,6 +11173,11 @@ static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2) } } + /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */ + if (tagged && ret.attrs == 0xff) { + ret.attrs = 0xf0; + } + return ret; } @@ -11034,29 +11239,35 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, /* S1 is done. Now do S2 translation. */ ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_Stage2, + mmu_idx == ARMMMUIdx_E10_0, phys_ptr, attrs, &s2_prot, - page_size, fi, - cacheattrs != NULL ? &cacheattrs2 : NULL); + page_size, fi, &cacheattrs2); fi->s2addr = ipa; /* Combine the S1 and S2 perms. */ *prot &= s2_prot; - /* Combine the S1 and S2 cache attributes, if needed */ - if (!ret && cacheattrs != NULL) { - if (env->cp15.hcr_el2 & HCR_DC) { - /* - * HCR.DC forces the first stage attributes to - * Normal Non-Shareable, - * Inner Write-Back Read-Allocate Write-Allocate, - * Outer Write-Back Read-Allocate Write-Allocate. - */ + /* If S2 fails, return early. */ + if (ret) { + return ret; + } + + /* Combine the S1 and S2 cache attributes. */ + if (env->cp15.hcr_el2 & HCR_DC) { + /* + * HCR.DC forces the first stage attributes to + * Normal Non-Shareable, + * Inner Write-Back Read-Allocate Write-Allocate, + * Outer Write-Back Read-Allocate Write-Allocate. + * Do not overwrite Tagged within attrs. + */ + if (cacheattrs->attrs != 0xf0) { cacheattrs->attrs = 0xff; - cacheattrs->shareability = 0; } - *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); + cacheattrs->shareability = 0; } - return ret; + *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); + return 0; } else { /* * For non-EL2 CPUs a stage1+stage2 translation is just stage 1. @@ -11117,6 +11328,9 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, /* Definitely a real MMU, not an MPU */ if (regime_translation_disabled(env, mmu_idx)) { + uint64_t hcr; + uint8_t memattr; + /* * MMU disabled. S1 addresses within aa64 translation regimes are * still checked for bounds -- see AArch64.TranslateAddressS1Off. @@ -11154,11 +11368,32 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, *phys_ptr = address; *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; *page_size = TARGET_PAGE_SIZE; + + /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ + hcr = arm_hcr_el2_eff(env); + cacheattrs->shareability = 0; + if (hcr & HCR_DC) { + if (hcr & HCR_DCT) { + memattr = 0xf0; /* Tagged, Normal, WB, RWA */ + } else { + memattr = 0xff; /* Normal, WB, RWA */ + } + } else if (access_type == MMU_INST_FETCH) { + if (regime_sctlr(env, mmu_idx) & SCTLR_I) { + memattr = 0xee; /* Normal, WT, RA, NT */ + } else { + memattr = 0x44; /* Normal, NC, No */ + } + cacheattrs->shareability = 2; /* outer sharable */ + } else { + memattr = 0x00; /* Device, nGnRnE */ + } + cacheattrs->attrs = memattr; return 0; } if (regime_using_lpae_format(env, mmu_idx)) { - return get_phys_addr_lpae(env, address, access_type, mmu_idx, + return get_phys_addr_lpae(env, address, access_type, mmu_idx, false, phys_ptr, attrs, prot, page_size, fi, cacheattrs); } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) { @@ -11181,11 +11416,12 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, bool ret; ARMMMUFaultInfo fi = { 0 }; ARMMMUIdx mmu_idx = arm_mmu_idx(env); + ARMCacheAttrs cacheattrs = {}; *attrs = (MemTxAttrs) { 0 }; ret = get_phys_addr(env, addr, 0, mmu_idx, &phys_addr, - attrs, &prot, &page_size, &fi, NULL); + attrs, &prot, &page_size, &fi, &cacheattrs); if (ret) { return -1; @@ -11719,6 +11955,35 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, } } + if (cpu_isar_feature(aa64_mte, env_archcpu(env))) { + /* + * Set MTE_ACTIVE if any access may be Checked, and leave clear + * if all accesses must be Unchecked: + * 1) If no TBI, then there are no tags in the address to check, + * 2) If Tag Check Override, then all accesses are Unchecked, + * 3) If Tag Check Fail == 0, then Checked access have no effect, + * 4) If no Allocation Tag Access, then all accesses are Unchecked. + */ + if (allocation_tag_access_enabled(env, el, sctlr)) { + FIELD_DP32(flags, TBFLAG_A64, ATA, 1, flags); + if (tbid + && !(env->pstate & PSTATE_TCO) + && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) { + FIELD_DP32(flags, TBFLAG_A64, MTE_ACTIVE, 1, flags); + } + } + /* And again for unprivileged accesses, if required. */ + if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV) + && tbid + && !(env->pstate & PSTATE_TCO) + && (sctlr & SCTLR_TCF0) + && allocation_tag_access_enabled(env, 0, sctlr)) { + FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1, flags); + } + /* Cache TCMA as well as TBI. */ + FIELD_DP32(flags, TBFLAG_A64, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx), flags); + } + return rebuild_hflags_common(env, fp_el, mmu_idx, flags); } diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h index 616d032c84..b48d6eb94e 100644 --- a/qemu/target/arm/helper.h +++ b/qemu/target/arm/helper.h @@ -100,6 +100,8 @@ DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int) DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int) +DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32) + DEF_HELPER_1(vfp_get_fpscr, i32, env) DEF_HELPER_2(vfp_set_fpscr, void, env, i32) @@ -207,16 +209,16 @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32) DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr) DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr) -DEF_HELPER_3(recps_f32, f32, f32, f32, env) -DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env) +DEF_HELPER_3(recps_f32, f32, env, f32, f32) +DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32) DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr) -DEF_HELPER_2(recpe_u32, i32, i32, ptr) -DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr) +DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32) +DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32) DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i32, i32, i32, ptr, i32) DEF_HELPER_3(shl_cc, i32, env, i32, i32) @@ -279,19 +281,6 @@ DEF_HELPER_2(neon_hsub_u16, i32, i32, i32) DEF_HELPER_2(neon_hsub_s32, s32, s32, s32) DEF_HELPER_2(neon_hsub_u32, i32, i32, i32) -DEF_HELPER_2(neon_cgt_u8, i32, i32, i32) -DEF_HELPER_2(neon_cgt_s8, i32, i32, i32) -DEF_HELPER_2(neon_cgt_u16, i32, i32, i32) -DEF_HELPER_2(neon_cgt_s16, i32, i32, i32) -DEF_HELPER_2(neon_cgt_u32, i32, i32, i32) -DEF_HELPER_2(neon_cgt_s32, i32, i32, i32) -DEF_HELPER_2(neon_cge_u8, i32, i32, i32) -DEF_HELPER_2(neon_cge_s8, i32, i32, i32) -DEF_HELPER_2(neon_cge_u16, i32, i32, i32) -DEF_HELPER_2(neon_cge_s16, i32, i32, i32) -DEF_HELPER_2(neon_cge_u32, i32, i32, i32) -DEF_HELPER_2(neon_cge_s32, i32, i32, i32) - DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) @@ -301,13 +290,6 @@ DEF_HELPER_2(neon_pmax_s8, i32, i32, i32) DEF_HELPER_2(neon_pmax_u16, i32, i32, i32) DEF_HELPER_2(neon_pmax_s16, i32, i32, i32) -DEF_HELPER_2(neon_abd_u8, i32, i32, i32) -DEF_HELPER_2(neon_abd_s8, i32, i32, i32) -DEF_HELPER_2(neon_abd_u16, i32, i32, i32) -DEF_HELPER_2(neon_abd_s16, i32, i32, i32) -DEF_HELPER_2(neon_abd_u32, i32, i32, i32) -DEF_HELPER_2(neon_abd_s32, i32, i32, i32) - DEF_HELPER_2(neon_shl_u16, i32, i32, i32) DEF_HELPER_2(neon_shl_s16, i32, i32, i32) DEF_HELPER_2(neon_rshl_u8, i32, i32, i32) @@ -351,9 +333,6 @@ DEF_HELPER_2(neon_mul_u16, i32, i32, i32) DEF_HELPER_2(neon_tst_u8, i32, i32, i32) DEF_HELPER_2(neon_tst_u16, i32, i32, i32) DEF_HELPER_2(neon_tst_u32, i32, i32, i32) -DEF_HELPER_2(neon_ceq_u8, i32, i32, i32) -DEF_HELPER_2(neon_ceq_u16, i32, i32, i32) -DEF_HELPER_2(neon_ceq_u32, i32, i32, i32) DEF_HELPER_1(neon_clz_u8, i32, i32) DEF_HELPER_1(neon_clz_u16, i32, i32) @@ -423,7 +402,6 @@ DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32) DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32) DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64) -DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr) @@ -538,29 +516,40 @@ DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_2(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr) - -DEF_HELPER_FLAGS_3(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) - -DEF_HELPER_FLAGS_3(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) - -DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) -DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) - -DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) @@ -622,6 +611,8 @@ DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG, @@ -690,6 +681,17 @@ DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -700,6 +702,66 @@ DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/qemu/target/arm/internals.h b/qemu/target/arm/internals.h index 5bb1ad0e61..2bd763072b 100644 --- a/qemu/target/arm/internals.h +++ b/qemu/target/arm/internals.h @@ -454,13 +454,14 @@ static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc) | ARM_EL_IL | (ea << 9) | (s1ptw << 7) | fsc; } -static inline uint32_t syn_data_abort_no_iss(int same_el, +static inline uint32_t syn_data_abort_no_iss(int same_el, int fnv, int ea, int cm, int s1ptw, int wnr, int fsc) { return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT) | ARM_EL_IL - | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc; + | (fnv << 10) | (ea << 9) | (cm << 8) | (s1ptw << 7) + | (wnr << 6) | fsc; } static inline uint32_t syn_data_abort_with_iss(int same_el, @@ -908,6 +909,51 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) } } +/* Return the exception level which controls this address translation regime */ +static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_Stage2: + case ARMMMUIdx_E2: + return 2; + case ARMMMUIdx_SE3: + return 3; + case ARMMMUIdx_SE10_0: + return arm_el_is_aa64(env, 3) ? 1 : 3; + case ARMMMUIdx_SE10_1: + case ARMMMUIdx_SE10_1_PAN: + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + case ARMMMUIdx_MPrivNegPri: + case ARMMMUIdx_MUserNegPri: + case ARMMMUIdx_MPriv: + case ARMMMUIdx_MUser: + case ARMMMUIdx_MSPrivNegPri: + case ARMMMUIdx_MSUserNegPri: + case ARMMMUIdx_MSPriv: + case ARMMMUIdx_MSUser: + return 1; + default: + g_assert_not_reached(); + } +} + +/* Return the TCR controlling this translation regime */ +static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + if (mmu_idx == ARMMMUIdx_Stage2) { + return &env->cp15.vtcr_el2; + } + return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; +} + /* Return the FSR value for a debug exception (watchpoint, hardware * breakpoint or BKPT insn) targeting the specified exception level. */ @@ -975,11 +1021,6 @@ static inline int arm_num_ctx_cmps(ARMCPU *cpu) } } -/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3. - * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits. - */ -#define MEMOPIDX_SHIFT 8 - /** * v7m_using_psp: Return true if using process stack pointer * Return true if the CPU is currently using the process stack @@ -1154,6 +1195,9 @@ static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id) if (isar_feature_aa64_uao(id)) { valid |= PSTATE_UAO; } + if (isar_feature_aa64_mte(id)) { + valid |= PSTATE_TCO; + } return valid; } @@ -1190,6 +1234,24 @@ static inline int exception_target_el(CPUARMState *env) return target_el; } +/* Determine if allocation tags are available. */ +static inline bool allocation_tag_access_enabled(CPUARMState *env, int el, + uint64_t sctlr) +{ + if (el < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_ATA)) { + return false; + } + if (el < 2 + && arm_feature(env, ARM_FEATURE_EL2) + && !(arm_hcr_el2_eff(env) & HCR_ATA)) { + return false; + } + sctlr &= (el == 0 ? SCTLR_ATA0 : SCTLR_ATA); + return sctlr != 0; +} + /* Security attributes for an address, as returned by v8m_security_lookup. */ typedef struct V8M_SAttributes { bool subpage; /* true if these attrs don't cover the whole TARGET_PAGE */ @@ -1221,8 +1283,89 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, target_ulong *page_size, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs); + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) + UNICORN_NONNULL; void arm_log_exception(int idx); +/* + * The log2 of the words in the tag block, for GMID_EL1.BS. + * The is the maximum, 256 bytes, which manipulates 64-bits of tags. + */ +#define GMID_EL1_BS 6 + +/* We associate one allocation tag per 16 bytes, the minimum. */ +#define LOG2_TAG_GRANULE 4 +#define TAG_GRANULE (1 << LOG2_TAG_GRANULE) + +/* + * The SVE simd_data field, for memory ops, contains either + * rd (5 bits) or a shift count (2 bits). + */ +#define SVE_MTEDESC_SHIFT 5 + +/* Bits within a descriptor passed to the helper_mte_check* functions. */ +FIELD(MTEDESC, MIDX, 0, 4) +FIELD(MTEDESC, TBI, 4, 2) +FIELD(MTEDESC, TCMA, 6, 2) +FIELD(MTEDESC, WRITE, 8, 1) +FIELD(MTEDESC, ESIZE, 9, 5) +FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */ + +bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr); +uint64_t mte_check1(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra); +uint64_t mte_checkN(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra); + +static inline int allocation_tag_from_addr(uint64_t ptr) +{ + return extract64(ptr, 56, 4); +} + +static inline uint64_t address_with_allocation_tag(uint64_t ptr, int rtag) +{ + return deposit64(ptr, 56, 4, rtag); +} + +/* Return true if tbi bits mean that the access is checked. */ +static inline bool tbi_check(uint32_t desc, int bit55) +{ + return (desc >> (R_MTEDESC_TBI_SHIFT + bit55)) & 1; +} + +/* Return true if tcma bits mean that the access is unchecked. */ +static inline bool tcma_check(uint32_t desc, int bit55, int ptr_tag) +{ + /* + * We had extracted bit55 and ptr_tag for other reasons, so fold + * (ptr<59:55> == 00000 || ptr<59:55> == 11111) into a single test. + */ + bool match = ((ptr_tag + bit55) & 0xf) == 0; + bool tcma = (desc >> (R_MTEDESC_TCMA_SHIFT + bit55)) & 1; + return tcma && match; +} + +/* + * For TBI, ideally, we would do nothing. Proper behaviour on fault is + * for the tag to be present in the FAR_ELx register. But for user-only + * mode, we do not have a TLB with which to implement this, so we must + * remove the top byte. + */ +static inline uint64_t useronly_clean_ptr(uint64_t ptr) +{ + /* TBI is known to be enabled. */ + ptr = sextract64(ptr, 0, 56); + return ptr; +} + +static inline uint64_t useronly_maybe_clean_ptr(uint32_t desc, uint64_t ptr) +{ + int64_t clean_ptr = sextract64(ptr, 0, 56); + if (tbi_check(desc, clean_ptr < 0)) { + ptr = clean_ptr; + } + return ptr; +} + #endif diff --git a/qemu/target/arm/m_helper.c b/qemu/target/arm/m_helper.c index 7fd9d21965..22f4b1b949 100644 --- a/qemu/target/arm/m_helper.c +++ b/qemu/target/arm/m_helper.c @@ -87,12 +87,13 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, hwaddr physaddr; int prot; ARMMMUFaultInfo fi = { 0 }; + ARMCacheAttrs cacheattrs = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; // int exc; // bool exc_secure; if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, NULL)) { + &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { if (mode == STACK_LAZYFP) { @@ -187,13 +188,14 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, hwaddr physaddr; int prot; ARMMMUFaultInfo fi = { 0 }; + ARMCacheAttrs cacheattrs = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; uint32_t value; if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, NULL)) { + &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { qemu_log_mask(CPU_LOG_INT, @@ -1859,6 +1861,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, V8M_SAttributes sattrs = { 0 }; MemTxAttrs attrs = { 0 }; ARMMMUFaultInfo fi = { 0 }; + ARMCacheAttrs cacheattrs = {}; MemTxResult txres; target_ulong page_size; hwaddr physaddr; @@ -1877,7 +1880,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, return false; } if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, - &physaddr, &attrs, &prot, &page_size, &fi, NULL)) { + &physaddr, &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* the MPU lookup failed */ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure); diff --git a/qemu/target/arm/mte_helper.c b/qemu/target/arm/mte_helper.c new file mode 100644 index 0000000000..630e18a8ac --- /dev/null +++ b/qemu/target/arm/mte_helper.c @@ -0,0 +1,913 @@ +/* + * ARM v8.5-MemTag Operations + * + * Copyright (c) 2020 Linaro, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "exec/exec-all.h" +#include "exec/ram_addr.h" +#include "exec/cpu_ldst.h" +#include "exec/helper-proto.h" +#include "qemu/guest-random.h" + + +static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) +{ + if (exclude == 0xffff) { + return 0; + } + if (offset == 0) { + while (exclude & (1 << tag)) { + tag = (tag + 1) & 15; + } + } else { + do { + do { + tag = (tag + 1) & 15; + } while (exclude & (1 << tag)); + } while (--offset > 0); + } + return tag; +} + +/** + * allocation_tag_mem: + * @env: the cpu environment + * @ptr_mmu_idx: the addressing regime to use for the virtual address + * @ptr: the virtual address for which to look up tag memory + * @ptr_access: the access to use for the virtual address + * @ptr_size: the number of bytes in the normal memory access + * @tag_access: the access to use for the tag memory + * @tag_size: the number of bytes in the tag memory access + * @ra: the return address for exception handling + * + * Our tag memory is formatted as a sequence of little-endian nibbles. + * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two + * tags, with the tag at [3:0] for the lower addr and the tag at [7:4] + * for the higher addr. + * + * Here, resolve the physical address from the virtual address, and return + * a pointer to the corresponding tag byte. Exit with exception if the + * virtual address is not accessible for @ptr_access. + * + * The @ptr_size and @tag_size values may not have an obvious relation + * due to the alignment of @ptr, and the number of tag checks required. + * + * If there is no tag storage corresponding to @ptr, return NULL. + */ +static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, + uint64_t ptr, MMUAccessType ptr_access, + int ptr_size, MMUAccessType tag_access, + int tag_size, uintptr_t ra) +{ + struct uc_struct *uc = env->uc; + uintptr_t index; + CPUIOTLBEntry *iotlbentry; + int in_page, flags; + ram_addr_t ptr_ra; + hwaddr ptr_paddr, tag_paddr, xlat; + MemoryRegion *mr; + ARMASIdx tag_asi; + AddressSpace *tag_as; + void *host; + + /* + * Probe the first byte of the virtual address. This raises an + * exception for inaccessible pages, and resolves the virtual address + * into the softmmu tlb. + * + * When RA == 0, this is for mte_probe1. The page is expected to be + * valid. Indicate to probe_access_flags no-fault, then assert that + * we received a valid page. + */ + flags = probe_access_flags(env, ptr, ptr_access, ptr_mmu_idx, + ra == 0, &host, ra); + assert(!(flags & TLB_INVALID_MASK)); + + /* + * Find the iotlbentry for ptr. This *must* be present in the TLB + * because we just found the mapping. + * TODO: Perhaps there should be a cputlb helper that returns a + * matching tlb entry + iotlb entry. + */ + index = tlb_index(env, ptr_mmu_idx, ptr); + iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index]; + + /* If the virtual page MemAttr != Tagged, access unchecked. */ + if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) { + return NULL; + } + + /* + * If not backed by host ram, there is no tag storage: access unchecked. + * This is probably a guest os bug though, so log it. + */ + if (unlikely(flags & TLB_MMIO)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Page @ 0x%" PRIx64 " indicates Tagged Normal memory " + "but is not backed by host ram\n", ptr); + return NULL; + } + + /* + * The Normal memory access can extend to the next page. E.g. a single + * 8-byte access to the last byte of a page will check only the last + * tag on the first page. + * Any page access exception has priority over tag check exception. + */ + in_page = -(ptr | TARGET_PAGE_MASK); + if (unlikely(ptr_size > in_page)) { + void *ignore; + flags |= probe_access_flags(env, ptr + in_page, ptr_access, + ptr_mmu_idx, ra == 0, &ignore, ra); + assert(!(flags & TLB_INVALID_MASK)); + } + + /* Any debug exception has priority over a tag check exception. */ + if (unlikely(flags & TLB_WATCHPOINT)) { + int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE; + assert(ra != 0); + cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, + iotlbentry->attrs, wp, ra); + } + + /* + * Find the physical address within the normal mem space. + * The memory region lookup must succeed because TLB_MMIO was + * not set in the cputlb lookup above. + */ + mr = memory_region_from_host(uc, host, &ptr_ra); + tcg_debug_assert(mr != NULL); + tcg_debug_assert(memory_region_is_ram(mr)); + ptr_paddr = ptr_ra; + do { + ptr_paddr += mr->addr; + mr = mr->container; + } while (mr); + + /* Convert to the physical address in tag space. */ + tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1); + + /* Look up the address in tag space. */ + tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS; + tag_as = cpu_get_address_space(env_cpu(env), tag_asi); + mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL, + tag_access == MMU_DATA_STORE, + iotlbentry->attrs); + + /* + * Note that @mr will never be NULL. If there is nothing in the address + * space at @tag_paddr, the translation will return the unallocated memory + * region. For our purposes, the result must be ram. + */ + if (unlikely(!memory_region_is_ram(mr))) { + /* ??? Failure is a board configuration error. */ + qemu_log_mask(LOG_UNIMP, + "Tag Memory @ 0x%" HWADDR_PRIx " not found for " + "Normal Memory @ 0x%" HWADDR_PRIx "\n", + tag_paddr, ptr_paddr); + return NULL; + } + + /* + * Ensure the tag memory is dirty on write, for migration. + * Tag memory can never contain code or display memory (vga). + */ + if (tag_access == MMU_DATA_STORE) { + ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat; + cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION); + } + + return (uint8_t*)memory_region_get_ram_ptr(mr) + xlat; +} + +uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) +{ + uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16); + int rrnd = extract32(env->cp15.gcr_el1, 16, 1); + int start = extract32(env->cp15.rgsr_el1, 0, 4); + int seed = extract32(env->cp15.rgsr_el1, 8, 16); + int offset, i, rtag; + + /* + * Our IMPDEF choice for GCR_EL1.RRND==1 is to continue to use the + * deterministic algorithm. Except that with RRND==1 the kernel is + * not required to have set RGSR_EL1.SEED != 0, which is required for + * the deterministic algorithm to function. So we force a non-zero + * SEED for that case. + */ + if (unlikely(seed == 0) && rrnd) { + do { + uint16_t two; + + if (qemu_guest_getrandom(&two, sizeof(two)) < 0) { + /* + * Failed, for unknown reasons in the crypto subsystem. + * Best we can do is use a constant seed. + */ + two = 1; + } + seed = two; + } while (seed == 0); + } + + /* RandomTag */ + for (i = offset = 0; i < 4; ++i) { + /* NextRandomTagBit */ + int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^ + extract32(seed, 2, 1) ^ extract32(seed, 0, 1)); + seed = (top << 15) | (seed >> 1); + offset |= top << i; + } + rtag = choose_nonexcluded_tag(start, offset, exclude); + env->cp15.rgsr_el1 = rtag | (seed << 8); + + return address_with_allocation_tag(rn, rtag); +} + +uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr, + int32_t offset, uint32_t tag_offset) +{ + int start_tag = allocation_tag_from_addr(ptr); + uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16); + int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude); + + return address_with_allocation_tag(ptr + offset, rtag); +} + +static int load_tag1(uint64_t ptr, uint8_t *mem) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + return extract32(*mem, ofs, 4); +} + +uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + int mmu_idx = cpu_mmu_index(env, false); + uint8_t *mem; + int rtag = 0; + + /* Trap if accessing an invalid page. */ + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1, + MMU_DATA_LOAD, 1, GETPC()); + + /* Load if page supports tags. */ + if (mem) { + rtag = load_tag1(ptr, mem); + } + + return address_with_allocation_tag(xt, rtag); +} + +static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra) +{ + if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) { + arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, + cpu_mmu_index(env, false), ra); + g_assert_not_reached(); + } +} + +/* For use in a non-parallel context, store to the given nibble. */ +static void store_tag1(uint64_t ptr, uint8_t *mem, int tag) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + *mem = deposit32(*mem, ofs, 4, tag); +} + +/* For use in a parallel context, atomically store to the given nibble. */ +static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + uint8_t old = atomic_read(mem); + + while (1) { + uint8_t new = deposit32(old, ofs, 4, tag); + uint8_t cmp = atomic_cmpxchg(mem, old, new); + if (likely(cmp == old)) { + return; + } + old = cmp; + } +} + +typedef void stg_store1(uint64_t, uint8_t *, int); + +static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt, + uintptr_t ra, stg_store1 store1) +{ + int mmu_idx = cpu_mmu_index(env, false); + uint8_t *mem; + + check_tag_aligned(env, ptr, ra); + + /* Trap if accessing an invalid page. */ + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE, + MMU_DATA_STORE, 1, ra); + + /* Store if page supports tags. */ + if (mem) { + store1(ptr, mem, allocation_tag_from_addr(xt)); + } +} + +void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_stg(env, ptr, xt, GETPC(), store_tag1); +} + +void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_stg(env, ptr, xt, GETPC(), store_tag1_parallel); +} + +void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + + check_tag_aligned(env, ptr, ra); + probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); +} + +static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, + uintptr_t ra, stg_store1 store1) +{ + int mmu_idx = cpu_mmu_index(env, false); + int tag = allocation_tag_from_addr(xt); + uint8_t *mem1, *mem2; + + check_tag_aligned(env, ptr, ra); + + /* + * Trap if accessing an invalid page(s). + * This takes priority over !allocation_tag_access_enabled. + */ + if (ptr & TAG_GRANULE) { + /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */ + mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + TAG_GRANULE, MMU_DATA_STORE, 1, ra); + mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE, + MMU_DATA_STORE, TAG_GRANULE, + MMU_DATA_STORE, 1, ra); + + /* Store if page(s) support tags. */ + if (mem1) { + store1(TAG_GRANULE, mem1, tag); + } + if (mem2) { + store1(0, mem2, tag); + } + } else { + /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */ + mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + 2 * TAG_GRANULE, MMU_DATA_STORE, 1, ra); + if (mem1) { + tag |= tag << 4; + atomic_set(mem1, tag); + } + } +} + +void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_st2g(env, ptr, xt, GETPC(), store_tag1); +} + +void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel); +} + +void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) +{ + uc_engine *uc = env->uc; + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + int in_page = -(ptr | TARGET_PAGE_MASK); + + check_tag_aligned(env, ptr, ra); + + if (likely(in_page >= 2 * TAG_GRANULE)) { + probe_write(env, ptr, 2 * TAG_GRANULE, mmu_idx, ra); + } else { + probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); + probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra); + } +} + +#define LDGM_STGM_SIZE (4 << GMID_EL1_BS) + +uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* The tag is squashed to zero if the page does not support tags. */ + if (!tag_mem) { + return 0; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are loading 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + return ldq_le_p(tag_mem); +} + +void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* + * Tag store only happens if the page support tags, + * and if the OS has enabled access to the tags. + */ + if (!tag_mem) { + return; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are storing 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + stq_le_p(tag_mem, val); +} + +void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + uintptr_t ra = GETPC(); + int mmu_idx = cpu_mmu_index(env, false); + int log2_dcz_bytes, log2_tag_bytes; + intptr_t dcz_bytes, tag_bytes; + uint8_t *mem; + + /* + * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, + * to make sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + ptr &= -dcz_bytes; + + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes, + MMU_DATA_STORE, tag_bytes, ra); + if (mem) { + int tag_pair = (val & 0xf) * 0x11; + memset(mem, tag_pair, tag_bytes); + } +} + +/* Record a tag check failure. */ +static void mte_check_fail(CPUARMState *env, int mmu_idx, + uint64_t dirty_ptr, uintptr_t ra) +{ + ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); + int el, reg_el, tcf, select; + uint64_t sctlr; + + reg_el = regime_el(env, arm_mmu_idx); + sctlr = env->cp15.sctlr_el[reg_el]; + + switch (arm_mmu_idx) { + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E20_0: + el = 0; + tcf = extract64(sctlr, 38, 2); + break; + default: + el = reg_el; + tcf = extract64(sctlr, 40, 2); + } + + switch (tcf) { + case 1: + /* + * Tag check fail causes a synchronous exception. + * + * In restore_state_to_opc, we set the exception syndrome + * for the load or store operation. Unwind first so we + * may overwrite that with the syndrome for the tag check. + */ + cpu_restore_state(env_cpu(env), ra, true); + env->exception.vaddress = dirty_ptr; + raise_exception(env, EXCP_DATA_ABORT, + syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11), + exception_target_el(env)); + /* noreturn, but fall through to the assert anyway */ + + case 0: + /* + * Tag check fail does not affect the PE. + * We eliminate this case by not setting MTE_ACTIVE + * in tb_flags, so that we never make this runtime call. + */ + g_assert_not_reached(); + + case 2: + /* Tag check fail causes asynchronous flag set. */ + mmu_idx = arm_mmu_idx_el(env, el); + if (regime_has_2_ranges(mmu_idx)) { + select = extract64(dirty_ptr, 55, 1); + } else { + select = 0; + } + env->cp15.tfsr_el[el] |= 1 << select; + break; + + default: + /* Case 3: Reserved. */ + qemu_log_mask(LOG_GUEST_ERROR, + "Tag check failure with SCTLR_EL%d.TCF%s " + "set to reserved value %d\n", + reg_el, el ? "" : "0", tcf); + break; + } +} + +/* + * Perform an MTE checked access for a single logical or atomic access. + */ +static bool mte_probe1_int(CPUARMState *env, uint32_t desc, uint64_t ptr, + uintptr_t ra, int bit55) +{ + int mem_tag, mmu_idx, ptr_tag, size; + MMUAccessType type; + uint8_t *mem; + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + return true; + } + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; + size = FIELD_EX32(desc, MTEDESC, ESIZE); + + mem = allocation_tag_mem(env, mmu_idx, ptr, type, size, + MMU_DATA_LOAD, 1, ra); + if (!mem) { + return true; + } + + mem_tag = load_tag1(ptr, mem); + return ptr_tag == mem_tag; +} + +/* + * No-fault version of mte_check1, to be used by SVE for MemSingleNF. + * Returns false if the access is Checked and the check failed. This + * is only intended to probe the tag -- the validity of the page must + * be checked beforehand. + */ +bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + int bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked. */ + if (unlikely(!tbi_check(desc, bit55))) { + return true; + } + + return mte_probe1_int(env, desc, ptr, 0, bit55); +} + +uint64_t mte_check1(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) +{ + int bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) { + int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + mte_check_fail(env, mmu_idx, ptr, ra); + } + + return useronly_clean_ptr(ptr); +} + +uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + return mte_check1(env, desc, ptr, GETPC()); +} + +/* + * Perform an MTE checked access for multiple logical accesses. + */ + +/** + * checkN: + * @tag: tag memory to test + * @odd: true to begin testing at tags at odd nibble + * @cmp: the tag to compare against + * @count: number of tags to test + * + * Return the number of successful tests. + * Thus a return value < @count indicates a failure. + * + * A note about sizes: count is expected to be small. + * + * The most common use will be LDP/STP of two integer registers, + * which means 16 bytes of memory touching at most 2 tags, but + * often the access is aligned and thus just 1 tag. + * + * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory, + * touching at most 5 tags. SVE LDR/STR (vector) with the default + * vector length is also 64 bytes; the maximum architectural length + * is 256 bytes touching at most 9 tags. + * + * The loop below uses 7 logical operations and 1 memory operation + * per tag pair. An implementation that loads an aligned word and + * uses masking to ignore adjacent tags requires 18 logical operations + * and thus does not begin to pay off until 6 tags. + * Which, according to the survey above, is unlikely to be common. + */ +static int checkN(uint8_t *mem, int odd, int cmp, int count) +{ + int n = 0, diff; + + /* Replicate the test tag and compare. */ + cmp *= 0x11; + diff = *mem++ ^ cmp; + + if (odd) { + goto start_odd; + } + + while (1) { + /* Test even tag. */ + if (unlikely((diff) & 0x0f)) { + break; + } + if (++n == count) { + break; + } + + start_odd: + /* Test odd tag. */ + if (unlikely((diff) & 0xf0)) { + break; + } + if (++n == count) { + break; + } + + diff = *mem++ ^ cmp; + } + return n; +} + +uint64_t mte_checkN(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) +{ + uc_engine *uc = env->uc; + int mmu_idx, ptr_tag, bit55; + uint64_t ptr_last, ptr_end, prev_page, next_page; + uint64_t tag_first, tag_end; + uint64_t tag_byte_first, tag_byte_end; + uint32_t esize, total, tag_count, tag_size, n, c; + uint8_t *mem1, *mem2; + MMUAccessType type; + + bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + goto done; + } + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; + esize = FIELD_EX32(desc, MTEDESC, ESIZE); + total = FIELD_EX32(desc, MTEDESC, TSIZE); + + /* Find the addr of the end of the access, and of the last element. */ + ptr_end = ptr + total; + ptr_last = ptr_end - esize; + + /* Round the bounds to the tag granule, and compute the number of tags. */ + tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); + tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE); + tag_count = (tag_end - tag_first) / TAG_GRANULE; + + /* Round the bounds to twice the tag granule, and compute the bytes. */ + tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); + tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE); + + /* Locate the page boundaries. */ + prev_page = ptr & TARGET_PAGE_MASK; + next_page = prev_page + TARGET_PAGE_SIZE; + + if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) { + /* Memory access stays on one page. */ + tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE); + mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total, + MMU_DATA_LOAD, tag_size, ra); + if (!mem1) { + goto done; + } + /* Perform all of the comparisons. */ + n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count); + } else { + /* Memory access crosses to next page. */ + tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE); + mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, + MMU_DATA_LOAD, tag_size, ra); + + tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE); + mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, + ptr_end - next_page, + MMU_DATA_LOAD, tag_size, ra); + + /* + * Perform all of the comparisons. + * Note the possible but unlikely case of the operation spanning + * two pages that do not both have tagging enabled. + */ + n = c = (next_page - tag_first) / TAG_GRANULE; + if (mem1) { + n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c); + } + if (n == c) { + if (!mem2) { + goto done; + } + n += checkN(mem2, 0, ptr_tag, tag_count - c); + } + } + + /* + * If we failed, we know which granule. Compute the element that + * is first in that granule, and signal failure on that element. + */ + if (unlikely(n < tag_count)) { + uint64_t fail_ofs; + + fail_ofs = tag_first + n * TAG_GRANULE - ptr; + fail_ofs = ROUND_UP(fail_ofs, esize); + mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra); + } + + done: + return useronly_clean_ptr(ptr); +} + +uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + return mte_checkN(env, desc, ptr, GETPC()); +} + +/* + * Perform an MTE checked access for DC_ZVA. + */ +uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + uintptr_t ra = GETPC(); + int log2_dcz_bytes, log2_tag_bytes; + int mmu_idx, bit55; + intptr_t dcz_bytes, tag_bytes, i; + void *mem; + uint64_t ptr_tag, mem_tag, align_ptr; + + bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + goto done; + } + + /* + * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make + * sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + align_ptr = ptr & -dcz_bytes; + + /* + * Trap if accessing an invalid page. DC_ZVA requires that we supply + * the original pointer for an invalid page. But watchpoints require + * that we probe the actual space. So do both. + */ + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + (void) probe_write(env, ptr, 1, mmu_idx, ra); + mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE, + dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra); + if (!mem) { + goto done; + } + + /* + * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus + * it is quite easy to perform all of the comparisons at once without + * any extra masking. + * + * The most common zva block size is 64; some of the thunderx cpus use + * a block size of 128. For user-only, aarch64_max_initfn will set the + * block size to 512. Fill out the other cases for future-proofing. + * + * In order to be able to find the first miscompare later, we want the + * tag bytes to be in little-endian order. + */ + switch (log2_tag_bytes) { + case 0: /* zva_blocksize 32 */ + mem_tag = *(uint8_t *)mem; + ptr_tag *= 0x11u; + break; + case 1: /* zva_blocksize 64 */ + mem_tag = cpu_to_le16(*(uint16_t *)mem); + ptr_tag *= 0x1111u; + break; + case 2: /* zva_blocksize 128 */ + mem_tag = cpu_to_le32(*(uint32_t *)mem); + ptr_tag *= 0x11111111u; + break; + case 3: /* zva_blocksize 256 */ + mem_tag = cpu_to_le64(*(uint64_t *)mem); + ptr_tag *= 0x1111111111111111ull; + break; + + default: /* zva_blocksize 512, 1024, 2048 */ + ptr_tag *= 0x1111111111111111ull; + i = 0; + do { + mem_tag = cpu_to_le64(*(uint64_t *)((char*)mem + i)); + if (unlikely(mem_tag != ptr_tag)) { + goto fail; + } + i += 8; + align_ptr += 16 * TAG_GRANULE; + } while (i < tag_bytes); + goto done; + } + + if (likely(mem_tag == ptr_tag)) { + goto done; + } + + fail: + /* Locate the first nibble that differs. */ + i = ctz64(mem_tag ^ ptr_tag) >> 4; + mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra); + + done: + return useronly_clean_ptr(ptr); +} diff --git a/qemu/target/arm/neon_helper.c b/qemu/target/arm/neon_helper.c index 0c2828e6f3..7a9568a4e6 100644 --- a/qemu/target/arm/neon_helper.c +++ b/qemu/target/arm/neon_helper.c @@ -562,24 +562,6 @@ uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) return dest; } -#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 -NEON_VOP(cgt_s8, neon_s8, 4) -NEON_VOP(cgt_u8, neon_u8, 4) -NEON_VOP(cgt_s16, neon_s16, 2) -NEON_VOP(cgt_u16, neon_u16, 2) -NEON_VOP(cgt_s32, neon_s32, 1) -NEON_VOP(cgt_u32, neon_u32, 1) -#undef NEON_FN - -#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 -NEON_VOP(cge_s8, neon_s8, 4) -NEON_VOP(cge_u8, neon_u8, 4) -NEON_VOP(cge_s16, neon_s16, 2) -NEON_VOP(cge_u16, neon_u16, 2) -NEON_VOP(cge_s32, neon_s32, 1) -NEON_VOP(cge_u32, neon_u32, 1) -#undef NEON_FN - #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 NEON_POP(pmin_s8, neon_s8, 4) NEON_POP(pmin_u8, neon_u8, 4) @@ -594,16 +576,6 @@ NEON_POP(pmax_s16, neon_s16, 2) NEON_POP(pmax_u16, neon_u16, 2) #undef NEON_FN -#define NEON_FN(dest, src1, src2) \ - dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) -NEON_VOP(abd_s8, neon_s8, 4) -NEON_VOP(abd_u8, neon_u8, 4) -NEON_VOP(abd_s16, neon_s16, 2) -NEON_VOP(abd_u16, neon_u16, 2) -NEON_VOP(abd_s32, neon_s32, 1) -NEON_VOP(abd_u32, neon_u32, 1) -#undef NEON_FN - #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -1135,12 +1107,6 @@ NEON_VOP(tst_u16, neon_u16, 2) NEON_VOP(tst_u32, neon_u32, 1) #undef NEON_FN -#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 -NEON_VOP(ceq_u8, neon_u8, 4) -NEON_VOP(ceq_u16, neon_u16, 2) -NEON_VOP(ceq_u32, neon_u32, 1) -#undef NEON_FN - /* Count Leading Sign/Zero Bits. */ static inline int do_clz8(uint8_t x) { @@ -1889,13 +1855,6 @@ uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x) } /* NEON Float helpers. */ -uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - float32 f0 = make_float32(a); - float32 f1 = make_float32(b); - return float32_val(float32_abs(float32_sub(f0, f1, fpst))); -} /* Floating point comparisons produce an integer result. * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do. diff --git a/qemu/target/arm/op_helper.c b/qemu/target/arm/op_helper.c index a9cbc79287..8844d13eae 100644 --- a/qemu/target/arm/op_helper.c +++ b/qemu/target/arm/op_helper.c @@ -933,6 +933,23 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) } } +void HELPER(probe_access)(CPUARMState *env, target_ulong ptr, + uint32_t access_type, uint32_t mmu_idx, + uint32_t size) +{ + uc_engine *uc = env->uc; + uint32_t in_page = -((uint32_t)ptr | TARGET_PAGE_SIZE); + uintptr_t ra = GETPC(); + + if (likely(size <= in_page)) { + probe_access(env, ptr, size, access_type, mmu_idx, ra); + } else { + probe_access(env, ptr, in_page, access_type, mmu_idx, ra); + probe_access(env, ptr + in_page, size - in_page, + access_type, mmu_idx, ra); + } +} + uint32_t HELPER(uc_hooksys64)(CPUARMState *env, uint32_t insn, void *hk) { uc_arm64_reg uc_rt; diff --git a/qemu/target/arm/pauth_helper.c b/qemu/target/arm/pauth_helper.c index b909630317..6dbab03768 100644 --- a/qemu/target/arm/pauth_helper.c +++ b/qemu/target/arm/pauth_helper.c @@ -300,7 +300,11 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, */ test = sextract64(ptr, bot_bit, top_bit - bot_bit); if (test != 0 && test != -1) { - pac ^= MAKE_64BIT_MASK(top_bit - 1, 1); + /* + * Note that our top_bit is one greater than the pseudocode's + * version, hence "- 2" here. + */ + pac ^= MAKE_64BIT_MASK(top_bit - 2, 1); } /* diff --git a/qemu/target/arm/sve_helper.c b/qemu/target/arm/sve_helper.c index 2abbeba57b..c575b8f7db 100644 --- a/qemu/target/arm/sve_helper.c +++ b/qemu/target/arm/sve_helper.c @@ -27,21 +27,20 @@ #include "fpu/softfloat.h" #include "tcg/tcg.h" - /* Note that vector data is stored in host-endian 64-bit chunks, so addressing units smaller than that needs a host-endian fixup. */ #ifdef HOST_WORDS_BIGENDIAN -#define H1(x) ((x) ^ 7) +#define H1(x) ((x) ^ 7) #define H1_2(x) ((x) ^ 6) #define H1_4(x) ((x) ^ 4) -#define H2(x) ((x) ^ 3) -#define H4(x) ((x) ^ 1) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) #else -#define H1(x) (x) +#define H1(x) (x) #define H1_2(x) (x) #define H1_4(x) (x) -#define H2(x) (x) -#define H4(x) (x) +#define H2(x) (x) +#define H4(x) (x) #endif /* Return a value for NZCV as per the ARM PredTest pseudofunction. @@ -52,7 +51,7 @@ */ /* For no G bits set, NZCV = C. */ -#define PREDTEST_INIT 1 +#define PREDTEST_INIT 1 /* This is an iterative function, called for each Pd and Pg word * moving forward. @@ -290,25 +289,25 @@ static inline uint64_t wswap64(uint64_t h) return rol64(h, 32); } -#define LOGICAL_PPPP(NAME, FUNC) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - uintptr_t opr_sz = simd_oprsz(desc); \ - uint64_t *d = vd, *n = vn, *m = vm, *g = vg; \ - uintptr_t i; \ - for (i = 0; i < opr_sz / 8; ++i) { \ - d[i] = FUNC(n[i], m[i], g[i]); \ - } \ -} +#define LOGICAL_PPPP(NAME, FUNC) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ + { \ + uintptr_t opr_sz = simd_oprsz(desc); \ + uint64_t *d = vd, *n = vn, *m = vm, *g = vg; \ + uintptr_t i; \ + for (i = 0; i < opr_sz / 8; ++i) { \ + d[i] = FUNC(n[i], m[i], g[i]); \ + } \ + } -#define DO_AND(N, M, G) (((N) & (M)) & (G)) -#define DO_BIC(N, M, G) (((N) & ~(M)) & (G)) -#define DO_EOR(N, M, G) (((N) ^ (M)) & (G)) -#define DO_ORR(N, M, G) (((N) | (M)) & (G)) -#define DO_ORN(N, M, G) (((N) | ~(M)) & (G)) -#define DO_NOR(N, M, G) (~((N) | (M)) & (G)) +#define DO_AND(N, M, G) (((N) & (M)) & (G)) +#define DO_BIC(N, M, G) (((N) & ~(M)) & (G)) +#define DO_EOR(N, M, G) (((N) ^ (M)) & (G)) +#define DO_ORR(N, M, G) (((N) | (M)) & (G)) +#define DO_ORN(N, M, G) (((N) | ~(M)) & (G)) +#define DO_NOR(N, M, G) (~((N) | (M)) & (G)) #define DO_NAND(N, M, G) (~((N) & (M)) & (G)) -#define DO_SEL(N, M, G) (((N) & (G)) | ((M) & ~(G))) +#define DO_SEL(N, M, G) (((N) & (G)) | ((M) & ~(G))) LOGICAL_PPPP(sve_and_pppp, DO_AND) LOGICAL_PPPP(sve_bic_pppp, DO_BIC) @@ -337,49 +336,48 @@ LOGICAL_PPPP(sve_nand_pppp, DO_NAND) * extra care wrt byte/word ordering we could use gcc generic vectors * and do 16 bytes at a time. */ -#define DO_ZPZZ(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - for (i = 0; i < opr_sz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - TYPE mm = *(TYPE *)((char *)vm + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ - } \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 15); \ - } \ -} +#define DO_ZPZZ(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + } /* Similarly, specialized for 64-bit operands. */ -#define DO_ZPZZ_D(NAME, TYPE, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ - TYPE *d = vd, *n = vn, *m = vm; \ - uint8_t *pg = vg; \ - for (i = 0; i < opr_sz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - TYPE nn = n[i], mm = m[i]; \ - d[i] = OP(nn, mm); \ - } \ - } \ -} - -#define DO_AND(N, M) (N & M) -#define DO_EOR(N, M) (N ^ M) -#define DO_ORR(N, M) (N | M) -#define DO_BIC(N, M) (N & ~M) -#define DO_ADD(N, M) (N + M) -#define DO_SUB(N, M) (N - M) -#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) -#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) -#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N)) -#define DO_MUL(N, M) (N * M) +#define DO_ZPZZ_D(NAME, TYPE, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn, *m = vm; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i], mm = m[i]; \ + d[i] = OP(nn, mm); \ + } \ + } \ + } +#define DO_AND(N, M) (N & M) +#define DO_EOR(N, M) (N ^ M) +#define DO_ORR(N, M) (N | M) +#define DO_BIC(N, M) (N & ~M) +#define DO_ADD(N, M) (N + M) +#define DO_SUB(N, M) (N - M) +#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) +#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) +#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N)) +#define DO_MUL(N, M) (N * M) /* * We must avoid the C undefined behaviour cases: division by @@ -431,20 +429,20 @@ DO_ZPZZ(sve_umax_zpzz_h, uint16_t, H1_2, DO_MAX) DO_ZPZZ(sve_umax_zpzz_s, uint32_t, H1_4, DO_MAX) DO_ZPZZ_D(sve_umax_zpzz_d, uint64_t, DO_MAX) -DO_ZPZZ(sve_smin_zpzz_b, int8_t, H1, DO_MIN) -DO_ZPZZ(sve_smin_zpzz_h, int16_t, H1_2, DO_MIN) -DO_ZPZZ(sve_smin_zpzz_s, int32_t, H1_4, DO_MIN) -DO_ZPZZ_D(sve_smin_zpzz_d, int64_t, DO_MIN) +DO_ZPZZ(sve_smin_zpzz_b, int8_t, H1, DO_MIN) +DO_ZPZZ(sve_smin_zpzz_h, int16_t, H1_2, DO_MIN) +DO_ZPZZ(sve_smin_zpzz_s, int32_t, H1_4, DO_MIN) +DO_ZPZZ_D(sve_smin_zpzz_d, int64_t, DO_MIN) DO_ZPZZ(sve_umin_zpzz_b, uint8_t, H1, DO_MIN) DO_ZPZZ(sve_umin_zpzz_h, uint16_t, H1_2, DO_MIN) DO_ZPZZ(sve_umin_zpzz_s, uint32_t, H1_4, DO_MIN) DO_ZPZZ_D(sve_umin_zpzz_d, uint64_t, DO_MIN) -DO_ZPZZ(sve_sabd_zpzz_b, int8_t, H1, DO_ABD) -DO_ZPZZ(sve_sabd_zpzz_h, int16_t, H1_2, DO_ABD) -DO_ZPZZ(sve_sabd_zpzz_s, int32_t, H1_4, DO_ABD) -DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t, DO_ABD) +DO_ZPZZ(sve_sabd_zpzz_b, int8_t, H1, DO_ABD) +DO_ZPZZ(sve_sabd_zpzz_h, int16_t, H1_2, DO_ABD) +DO_ZPZZ(sve_sabd_zpzz_s, int32_t, H1_4, DO_ABD) +DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t, DO_ABD) DO_ZPZZ(sve_uabd_zpzz_b, uint8_t, H1, DO_ABD) DO_ZPZZ(sve_uabd_zpzz_h, uint16_t, H1_2, DO_ABD) @@ -505,9 +503,9 @@ DO_ZPZZ_D(sve_udiv_zpzz_d, uint64_t, DO_UDIV) /* Note that all bits of the shift are significant and not modulo the element size. */ -#define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1)) -#define DO_LSR(N, M) (M < sizeof(N) * 8 ? N >> M : 0) -#define DO_LSL(N, M) (M < sizeof(N) * 8 ? N << M : 0) +#define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1)) +#define DO_LSR(N, M) (M < sizeof(N) * 8 ? N >> M : 0) +#define DO_LSL(N, M) (M < sizeof(N) * 8 ? N << M : 0) DO_ZPZZ(sve_asr_zpzz_b, int8_t, H1, DO_ASR) DO_ZPZZ(sve_lsr_zpzz_b, uint8_t, H1_2, DO_LSR) @@ -532,22 +530,22 @@ DO_ZPZZ_D(sve_lsl_zpzz_d, uint64_t, DO_LSL) * third operand is "wide". That is, for D = N op M, the same 64-bit * value of M is used with all of the narrower values of N. */ -#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - for (i = 0; i < opr_sz; ) { \ - uint8_t pg = *(uint8_t *)((char *)vg + H1(i >> 3)); \ - TYPEW mm = *(TYPEW *)((char *)vm + i); \ - do { \ - if (pg & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ - } \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 7); \ - } \ -} +#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz;) { \ + uint8_t pg = *(uint8_t *)((char *)vg + H1(i >> 3)); \ + TYPEW mm = *(TYPEW *)((char *)vm + i); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 7); \ + } \ + } DO_ZPZW(sve_asr_zpzw_b, int8_t, uint64_t, H1, DO_ASR) DO_ZPZW(sve_lsr_zpzw_b, uint8_t, uint64_t, H1, DO_LSR) @@ -565,47 +563,47 @@ DO_ZPZW(sve_lsl_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSL) /* Fully general two-operand expander, controlled by a predicate. */ -#define DO_ZPZ(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - for (i = 0; i < opr_sz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn); \ - } \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 15); \ - } \ -} +#define DO_ZPZ(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + } /* Similarly, specialized for 64-bit operands. */ -#define DO_ZPZ_D(NAME, TYPE, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ - TYPE *d = vd, *n = vn; \ - uint8_t *pg = vg; \ - for (i = 0; i < opr_sz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - TYPE nn = n[i]; \ - d[i] = OP(nn); \ - } \ - } \ -} - -#define DO_CLS_B(N) (clrsb32(N) - 24) -#define DO_CLS_H(N) (clrsb32(N) - 16) +#define DO_ZPZ_D(NAME, TYPE, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i]; \ + d[i] = OP(nn); \ + } \ + } \ + } + +#define DO_CLS_B(N) (clrsb32(N) - 24) +#define DO_CLS_H(N) (clrsb32(N) - 16) DO_ZPZ(sve_cls_b, int8_t, H1, DO_CLS_B) DO_ZPZ(sve_cls_h, int16_t, H1_2, DO_CLS_H) DO_ZPZ(sve_cls_s, int32_t, H1_4, clrsb32) DO_ZPZ_D(sve_cls_d, int64_t, clrsb64) -#define DO_CLZ_B(N) (clz32(N) - 24) -#define DO_CLZ_H(N) (clz32(N) - 16) +#define DO_CLZ_B(N) (clz32(N) - 24) +#define DO_CLZ_H(N) (clz32(N) - 16) DO_ZPZ(sve_clz_b, uint8_t, H1, DO_CLZ_B) DO_ZPZ(sve_clz_h, uint16_t, H1_2, DO_CLZ_H) @@ -617,7 +615,7 @@ DO_ZPZ(sve_cnt_zpz_h, uint16_t, H1_2, ctpop16) DO_ZPZ(sve_cnt_zpz_s, uint32_t, H1_4, ctpop32) DO_ZPZ_D(sve_cnt_zpz_d, uint64_t, ctpop64) -#define DO_CNOT(N) (N == 0) +#define DO_CNOT(N) (N == 0) DO_ZPZ(sve_cnot_b, uint8_t, H1, DO_CNOT) DO_ZPZ(sve_cnot_h, uint16_t, H1_2, DO_CNOT) @@ -625,15 +623,15 @@ DO_ZPZ(sve_cnot_s, uint32_t, H1_4, DO_CNOT) DO_ZPZ_D(sve_cnot_d, uint64_t, DO_CNOT) #ifdef _MSC_VER -#define DO_FABS16(N) (N & ((uint16_t)-1 >> 1)) -#define DO_FABS32(N) (N & ((uint32_t)-1 >> 1)) -#define DO_FABS64(N) (N & ((uint64_t)-1 >> 1)) +#define DO_FABS16(N) (N & ((uint16_t)-1 >> 1)) +#define DO_FABS32(N) (N & ((uint32_t)-1 >> 1)) +#define DO_FABS64(N) (N & ((uint64_t)-1 >> 1)) DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS16) DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS32) DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS64) #else -#define DO_FABS(N) (N & ((__typeof(N))-1 >> 1)) +#define DO_FABS(N) (N & ((__typeof(N))-1 >> 1)) DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS) DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS) @@ -641,34 +639,34 @@ DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS) #endif #ifdef _MSC_VER -#define DO_FNEG16(N) (N ^ ~((uint16_t)-1 >> 1)) -#define DO_FNEG32(N) (N ^ ~((uint32_t)-1 >> 1)) -#define DO_FNEG64(N) (N ^ ~((uint64_t)-1 >> 1)) +#define DO_FNEG16(N) (N ^ ~((uint16_t)-1 >> 1)) +#define DO_FNEG32(N) (N ^ ~((uint32_t)-1 >> 1)) +#define DO_FNEG64(N) (N ^ ~((uint64_t)-1 >> 1)) DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG16) DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG32) DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG64) #else -#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) +#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1)) DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG) DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG) DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG) #endif -#define DO_NOT(N) (~N) +#define DO_NOT(N) (~N) DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT) DO_ZPZ(sve_not_zpz_h, uint16_t, H1_2, DO_NOT) DO_ZPZ(sve_not_zpz_s, uint32_t, H1_4, DO_NOT) DO_ZPZ_D(sve_not_zpz_d, uint64_t, DO_NOT) -#define DO_SXTB(N) ((int8_t)N) -#define DO_SXTH(N) ((int16_t)N) -#define DO_SXTS(N) ((int32_t)N) -#define DO_UXTB(N) ((uint8_t)N) -#define DO_UXTH(N) ((uint16_t)N) -#define DO_UXTS(N) ((uint32_t)N) +#define DO_SXTB(N) ((int8_t)N) +#define DO_SXTH(N) ((int16_t)N) +#define DO_SXTS(N) ((int32_t)N) +#define DO_UXTB(N) ((uint8_t)N) +#define DO_UXTH(N) ((uint16_t)N) +#define DO_UXTS(N) ((uint32_t)N) DO_ZPZ(sve_sxtb_h, uint16_t, H1_2, DO_SXTB) DO_ZPZ(sve_sxtb_s, uint32_t, H1_4, DO_SXTB) @@ -685,9 +683,9 @@ DO_ZPZ_D(sve_uxth_d, uint64_t, DO_UXTH) DO_ZPZ_D(sve_uxtw_d, uint64_t, DO_UXTS) #ifdef _MSC_VER -#define DO_ABS(N) (N < 0 ? (0 - N) : N) +#define DO_ABS(N) (N < 0 ? (0 - N) : N) #else -#define DO_ABS(N) (N < 0 ? -N : N) +#define DO_ABS(N) (N < 0 ? -N : N) #endif DO_ZPZ(sve_abs_b, int8_t, H1, DO_ABS) @@ -696,9 +694,9 @@ DO_ZPZ(sve_abs_s, int32_t, H1_4, DO_ABS) DO_ZPZ_D(sve_abs_d, int64_t, DO_ABS) #ifdef _MSC_VER -#define DO_NEG(N) (0 - N) +#define DO_NEG(N) (0 - N) #else -#define DO_NEG(N) (-N) +#define DO_NEG(N) (-N) #endif DO_ZPZ(sve_neg_b, uint8_t, H1, DO_NEG) @@ -722,19 +720,19 @@ DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64) /* Three-operand expander, unpredicated, in which the third operand is "wide". */ -#define DO_ZZW(NAME, TYPE, TYPEW, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - for (i = 0; i < opr_sz; ) { \ - TYPEW mm = *(TYPEW *)((char *)vm + i); \ - do { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ - i += sizeof(TYPE); \ - } while (i & 7); \ - } \ -} +#define DO_ZZW(NAME, TYPE, TYPEW, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz;) { \ + TYPEW mm = *(TYPEW *)((char *)vm + i); \ + do { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm); \ + i += sizeof(TYPE); \ + } while (i & 7); \ + } \ + } DO_ZZW(sve_asr_zzw_b, int8_t, uint64_t, H1, DO_ASR) DO_ZZW(sve_lsr_zzw_b, uint8_t, uint64_t, H1, DO_LSR) @@ -771,39 +769,39 @@ DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL) /* ??? If we were to vectorize this by hand the reduction ordering * would change. For integer operands, this is perfectly fine. */ -#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \ -uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - TYPERED ret = INIT; \ - for (i = 0; i < opr_sz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPEELT nn = *(TYPEELT *)((char *)vn + H(i)); \ - ret = OP(ret, nn); \ - } \ - i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT); \ - } while (i & 15); \ - } \ - return (TYPERET)ret; \ -} - -#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP) \ -uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ - TYPEE *n = vn; \ - uint8_t *pg = vg; \ - TYPER ret = INIT; \ - for (i = 0; i < opr_sz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - TYPEE nn = n[i]; \ - ret = OP(ret, nn); \ - } \ - } \ - return ret; \ -} +#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \ + uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPERED ret = INIT; \ + for (i = 0; i < opr_sz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPEELT nn = *(TYPEELT *)((char *)vn + H(i)); \ + ret = OP(ret, nn); \ + } \ + i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT); \ + } while (i & 15); \ + } \ + return (TYPERET)ret; \ + } + +#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP) \ + uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPEE *n = vn; \ + uint8_t *pg = vg; \ + TYPER ret = INIT; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPEE nn = n[i]; \ + ret = OP(ret, nn); \ + } \ + } \ + return ret; \ + } DO_VPZ(sve_orv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_ORR) DO_VPZ(sve_orv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_ORR) @@ -853,17 +851,17 @@ DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN) #undef DO_VPZ_D /* Two vector operand, one scalar operand, unpredicated. */ -#define DO_ZZI(NAME, TYPE, OP) \ -void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE); \ - TYPE s = s64, *d = vd, *n = vn; \ - for (i = 0; i < opr_sz; ++i) { \ - d[i] = OP(n[i], s); \ - } \ -} +#define DO_ZZI(NAME, TYPE, OP) \ + void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE); \ + TYPE s = s64, *d = vd, *n = vn; \ + for (i = 0; i < opr_sz; ++i) { \ + d[i] = OP(n[i], s); \ + } \ + } -#define DO_SUBR(X, Y) (Y - X) +#define DO_SUBR(X, Y) (Y - X) DO_ZZI(sve_subri_b, uint8_t, DO_SUBR) DO_ZZI(sve_subri_h, uint16_t, DO_SUBR) @@ -1094,49 +1092,49 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc) /* Three-operand expander, immediate operand, controlled by a predicate. */ -#define DO_ZPZI(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - TYPE imm = simd_data(desc); \ - for (i = 0; i < opr_sz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, imm); \ - } \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 15); \ - } \ -} +#define DO_ZPZI(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPE imm = simd_data(desc); \ + for (i = 0; i < opr_sz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, imm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + } /* Similarly, specialized for 64-bit operands. */ -#define DO_ZPZI_D(NAME, TYPE, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ - TYPE *d = vd, *n = vn; \ - TYPE imm = simd_data(desc); \ - uint8_t *pg = vg; \ - for (i = 0; i < opr_sz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - TYPE nn = n[i]; \ - d[i] = OP(nn, imm); \ - } \ - } \ -} - -#define DO_SHR(N, M) (N >> M) -#define DO_SHL(N, M) (N << M) +#define DO_ZPZI_D(NAME, TYPE, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *n = vn; \ + TYPE imm = simd_data(desc); \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE nn = n[i]; \ + d[i] = OP(nn, imm); \ + } \ + } \ + } + +#define DO_SHR(N, M) (N >> M) +#define DO_SHL(N, M) (N << M) /* Arithmetic shift right for division. This rounds negative numbers toward zero as per signed division. Therefore before shifting, when N is negative, add 2**M-1. */ #ifdef _MSC_VER - #define DO_ASRD(N, M) ((N + (N < 0 ? (1 << M) - 1 : 0)) >> M) +#define DO_ASRD(N, M) ((N + (N < 0 ? (1 << M) - 1 : 0)) >> M) #else - #define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M) +#define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M) #endif DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR) @@ -1167,43 +1165,43 @@ DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD) /* Fully general four-operand expander, controlled by a predicate. */ -#define DO_ZPZZZ(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \ - void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - for (i = 0; i < opr_sz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - if (pg & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - TYPE mm = *(TYPE *)((char *)vm + H(i)); \ - TYPE aa = *(TYPE *)((char *)va + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(aa, nn, mm); \ - } \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 15); \ - } \ -} +#define DO_ZPZZZ(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, void *vg, \ + uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + TYPE aa = *(TYPE *)((char *)va + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(aa, nn, mm); \ + } \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + } /* Similarly, specialized for 64-bit operands. */ -#define DO_ZPZZZ_D(NAME, TYPE, OP) \ -void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \ - void *vg, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ - TYPE *d = vd, *a = va, *n = vn, *m = vm; \ - uint8_t *pg = vg; \ - for (i = 0; i < opr_sz; i += 1) { \ - if (pg[H1(i)] & 1) { \ - TYPE aa = a[i], nn = n[i], mm = m[i]; \ - d[i] = OP(aa, nn, mm); \ - } \ - } \ -} - -#define DO_MLA(A, N, M) (A + N * M) -#define DO_MLS(A, N, M) (A - N * M) +#define DO_ZPZZZ_D(NAME, TYPE, OP) \ + void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, void *vg, \ + uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; \ + TYPE *d = vd, *a = va, *n = vn, *m = vm; \ + uint8_t *pg = vg; \ + for (i = 0; i < opr_sz; i += 1) { \ + if (pg[H1(i)] & 1) { \ + TYPE aa = a[i], nn = n[i], mm = m[i]; \ + d[i] = OP(aa, nn, mm); \ + } \ + } \ + } + +#define DO_MLA(A, N, M) (A + N * M) +#define DO_MLS(A, N, M) (A - N * M) DO_ZPZZZ(sve_mla_b, uint8_t, H1, DO_MLA) DO_ZPZZZ(sve_mls_b, uint8_t, H1, DO_MLS) @@ -1222,8 +1220,7 @@ DO_ZPZZZ_D(sve_mls_d, uint64_t, DO_MLS) #undef DO_ZPZZZ #undef DO_ZPZZZ_D -void HELPER(sve_index_b)(void *vd, uint32_t start, - uint32_t incr, uint32_t desc) +void HELPER(sve_index_b)(void *vd, uint32_t start, uint32_t incr, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc); uint8_t *d = vd; @@ -1232,8 +1229,7 @@ void HELPER(sve_index_b)(void *vd, uint32_t start, } } -void HELPER(sve_index_h)(void *vd, uint32_t start, - uint32_t incr, uint32_t desc) +void HELPER(sve_index_h)(void *vd, uint32_t start, uint32_t incr, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 2; uint16_t *d = vd; @@ -1242,8 +1238,7 @@ void HELPER(sve_index_h)(void *vd, uint32_t start, } } -void HELPER(sve_index_s)(void *vd, uint32_t start, - uint32_t incr, uint32_t desc) +void HELPER(sve_index_s)(void *vd, uint32_t start, uint32_t incr, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 4; uint32_t *d = vd; @@ -1252,8 +1247,7 @@ void HELPER(sve_index_s)(void *vd, uint32_t start, } } -void HELPER(sve_index_d)(void *vd, uint64_t start, - uint64_t incr, uint32_t desc) +void HELPER(sve_index_d)(void *vd, uint64_t start, uint64_t incr, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd; @@ -1326,22 +1320,16 @@ void HELPER(sve_fexpa_s)(void *vd, void *vn, uint32_t desc) { /* These constants are cut-and-paste directly from the ARM pseudocode. */ static const uint32_t coeff[] = { - 0x000000, 0x0164d2, 0x02cd87, 0x043a29, - 0x05aac3, 0x071f62, 0x08980f, 0x0a14d5, - 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, - 0x11c3d3, 0x135a2b, 0x14f4f0, 0x16942d, - 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, - 0x1ef532, 0x20b051, 0x227043, 0x243516, - 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a, - 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, - 0x3504f3, 0x36fd92, 0x38fbaf, 0x3aff5b, - 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, - 0x45672a, 0x478d75, 0x49b9be, 0x4bec15, - 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, - 0x5744fd, 0x599d16, 0x5bfbb8, 0x5e60f5, - 0x60ccdf, 0x633f89, 0x65b907, 0x68396a, - 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, - 0x75257d, 0x77d0df, 0x7a83b3, 0x7d3e0c, + 0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f, + 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b, + 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532, + 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a, + 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf, + 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75, + 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd, + 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a, + 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3, + 0x7d3e0c, }; intptr_t i, opr_sz = simd_oprsz(desc) / 4; uint32_t *d = vd, *n = vn; @@ -1573,8 +1561,8 @@ void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc) /* Two operand predicated copy immediate with merge. All valid immediates * can fit within 17 signed bits in the simd_data field. */ -void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg, - uint64_t mm, uint32_t desc) +void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg, uint64_t mm, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; @@ -1588,8 +1576,8 @@ void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg, } } -void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg, - uint64_t mm, uint32_t desc) +void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg, uint64_t mm, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; @@ -1603,8 +1591,8 @@ void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg, } } -void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg, - uint64_t mm, uint32_t desc) +void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg, uint64_t mm, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; @@ -1618,8 +1606,8 @@ void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg, } } -void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg, - uint64_t mm, uint32_t desc) +void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg, uint64_t mm, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; @@ -1678,7 +1666,7 @@ void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc) } } -/* Big-endian hosts need to frob the byte indicies. If the copy +/* Big-endian hosts need to frob the byte indices. If the copy * happens to be 8-byte aligned, then no frobbing necessary. */ static void swap_memmove(void *vd, void *vs, size_t n) @@ -1702,7 +1690,7 @@ static void swap_memmove(void *vd, void *vs, size_t n) *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i); } } else { - for (i = n; i > 0; ) { + for (i = n; i > 0;) { i -= 4; *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i); } @@ -1716,7 +1704,7 @@ static void swap_memmove(void *vd, void *vs, size_t n) *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i); } } else { - for (i = n; i > 0; ) { + for (i = n; i > 0;) { i -= 2; *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i); } @@ -1729,7 +1717,7 @@ static void swap_memmove(void *vd, void *vs, size_t n) *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i); } } else { - for (i = n; i > 0; ) { + for (i = n; i > 0;) { i -= 1; *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i); } @@ -1800,13 +1788,13 @@ void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc) } } -#define DO_INSR(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \ -{ \ - intptr_t opr_sz = simd_oprsz(desc); \ - swap_memmove((char *)vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \ - *(TYPE *)((char *)vd + H(0)) = val; \ -} +#define DO_INSR(NAME, TYPE, H) \ + void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \ + { \ + intptr_t opr_sz = simd_oprsz(desc); \ + swap_memmove((char *)vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \ + *(TYPE *)((char *)vd + H(0)) = val; \ + } DO_INSR(sve_insr_b, uint8_t, H1) DO_INSR(sve_insr_h, uint16_t, H1_2) @@ -1859,21 +1847,21 @@ void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc) } } -#define DO_TBL(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - uintptr_t elem = opr_sz / sizeof(TYPE); \ - TYPE *d = vd, *n = vn, *m = vm; \ - ARMVectorReg tmp; \ - if (unlikely(vd == vn)) { \ - n = memcpy(&tmp, vn, opr_sz); \ - } \ - for (i = 0; i < elem; i++) { \ - TYPE j = m[H(i)]; \ - d[H(i)] = j < elem ? n[H(j)] : 0; \ - } \ -} +#define DO_TBL(NAME, TYPE, H) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + uintptr_t elem = opr_sz / sizeof(TYPE); \ + TYPE *d = vd, *n = vn, *m = vm; \ + ARMVectorReg tmp; \ + if (unlikely(vd == vn)) { \ + n = memcpy(&tmp, vn, opr_sz); \ + } \ + for (i = 0; i < elem; i++) { \ + TYPE j = m[H(i)]; \ + d[H(i)] = j < elem ? n[H(j)] : 0; \ + } \ + } DO_TBL(sve_tbl_b, uint8_t, H1) DO_TBL(sve_tbl_h, uint16_t, H2) @@ -1882,20 +1870,20 @@ DO_TBL(sve_tbl_d, uint64_t, ) #undef TBL -#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \ -void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ -{ \ - intptr_t i, opr_sz = simd_oprsz(desc); \ - TYPED *d = vd; \ - TYPES *n = vn; \ - ARMVectorReg tmp; \ - if (unlikely((char *)vn - (char *)vd < opr_sz)) { \ - n = memcpy(&tmp, n, opr_sz / 2); \ - } \ - for (i = 0; i < opr_sz / sizeof(TYPED); i++) { \ - d[HD(i)] = n[HS(i)]; \ - } \ -} +#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \ + void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ + { \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPED *d = vd; \ + TYPES *n = vn; \ + ARMVectorReg tmp; \ + if (unlikely((char *)vn - (char *)vd < opr_sz)) { \ + n = memcpy(&tmp, n, opr_sz / 2); \ + } \ + for (i = 0; i < opr_sz / sizeof(TYPED); i++) { \ + d[HD(i)] = n[HS(i)]; \ + } \ + } DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1) DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2) @@ -1912,11 +1900,8 @@ DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4) * same pattern out to 16-bit units. */ static const uint64_t even_bit_esz_masks[5] = { - 0x5555555555555555ull, - 0x3333333333333333ull, - 0x0f0f0f0f0f0f0f0full, - 0x00ff00ff00ff00ffull, - 0x0000ffff0000ffffull, + 0x5555555555555555ull, 0x3333333333333333ull, 0x0f0f0f0f0f0f0f0full, + 0x00ff00ff00ff00ffull, 0x0000ffff0000ffffull, }; /* Zero-extend units of 2**N bits to units of 2**(N+1) bits. @@ -2112,7 +2097,7 @@ static uint64_t reverse_bits_64(uint64_t x, int n) static uint8_t reverse_bits_8(uint8_t x, int n) { - static const uint8_t mask[3] = { 0x55, 0x33, 0x0f }; + static const uint8_t mask[3] = {0x55, 0x33, 0x0f}; int i, sh; for (i = 2, sh = 4; i >= n; i--, sh >>= 1) { @@ -2197,68 +2182,72 @@ void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc) } } -#define DO_ZIP(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ -{ \ - intptr_t oprsz = simd_oprsz(desc); \ - intptr_t i, oprsz_2 = oprsz / 2; \ - ARMVectorReg tmp_n, tmp_m; \ - /* We produce output faster than we consume input. \ - Therefore we must be mindful of possible overlap. */ \ - if (unlikely(((char *)vn - (char *)vd) < (uintptr_t)oprsz)) { \ - vn = memcpy(&tmp_n, vn, oprsz_2); \ - } \ - if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ - vm = memcpy(&tmp_m, vm, oprsz_2); \ - } \ - for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ - *(TYPE *)((char *)vd + H(2 * i + 0)) = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)((char *)vm + H(i)); \ - } \ -} +#define DO_ZIP(NAME, TYPE, H) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t i, oprsz_2 = oprsz / 2; \ + ARMVectorReg tmp_n, tmp_m; \ + /* We produce output faster than we consume input. \ + Therefore we must be mindful of possible overlap. */ \ + if (unlikely(((char *)vn - (char *)vd) < (uintptr_t)oprsz)) { \ + vn = memcpy(&tmp_n, vn, oprsz_2); \ + } \ + if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ + vm = memcpy(&tmp_m, vm, oprsz_2); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(2 * i + 0)) = \ + *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(2 * i + sizeof(TYPE))) = \ + *(TYPE *)((char *)vm + H(i)); \ + } \ + } DO_ZIP(sve_zip_b, uint8_t, H1) DO_ZIP(sve_zip_h, uint16_t, H1_2) DO_ZIP(sve_zip_s, uint32_t, H1_4) DO_ZIP(sve_zip_d, uint64_t, ) -#define DO_UZP(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ -{ \ - intptr_t oprsz = simd_oprsz(desc); \ - intptr_t oprsz_2 = oprsz / 2; \ - intptr_t odd_ofs = simd_data(desc); \ - intptr_t i; \ - ARMVectorReg tmp_m; \ - if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ - vm = memcpy(&tmp_m, vm, oprsz); \ - } \ - for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ - *(TYPE *)((char *)vd + H(i)) = *(TYPE *)((char *)vn + H(2 * i + odd_ofs)); \ - } \ - for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ - *(TYPE *)((char *)vd + H(oprsz_2 + i)) = *(TYPE *)((char *)vm + H(2 * i + odd_ofs)); \ - } \ -} +#define DO_UZP(NAME, TYPE, H) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t oprsz_2 = oprsz / 2; \ + intptr_t odd_ofs = simd_data(desc); \ + intptr_t i; \ + ARMVectorReg tmp_m; \ + if (unlikely(((char *)vm - (char *)vd) < (uintptr_t)oprsz)) { \ + vm = memcpy(&tmp_m, vm, oprsz); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(i)) = \ + *(TYPE *)((char *)vn + H(2 * i + odd_ofs)); \ + } \ + for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)vd + H(oprsz_2 + i)) = \ + *(TYPE *)((char *)vm + H(2 * i + odd_ofs)); \ + } \ + } DO_UZP(sve_uzp_b, uint8_t, H1) DO_UZP(sve_uzp_h, uint16_t, H1_2) DO_UZP(sve_uzp_s, uint32_t, H1_4) DO_UZP(sve_uzp_d, uint64_t, ) -#define DO_TRN(NAME, TYPE, H) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ -{ \ - intptr_t oprsz = simd_oprsz(desc); \ - intptr_t odd_ofs = simd_data(desc); \ - intptr_t i; \ - for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) { \ - TYPE ae = *(TYPE *)((char *)vn + H(i + odd_ofs)); \ - TYPE be = *(TYPE *)((char *)vm + H(i + odd_ofs)); \ - *(TYPE *)((char *)vd + H(i + 0)) = ae; \ - *(TYPE *)((char *)vd + H(i + sizeof(TYPE))) = be; \ - } \ -} +#define DO_TRN(NAME, TYPE, H) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ + { \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t odd_ofs = simd_data(desc); \ + intptr_t i; \ + for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) { \ + TYPE ae = *(TYPE *)((char *)vn + H(i + odd_ofs)); \ + TYPE be = *(TYPE *)((char *)vm + H(i + odd_ofs)); \ + *(TYPE *)((char *)vd + H(i + 0)) = ae; \ + *(TYPE *)((char *)vd + H(i + sizeof(TYPE))) = be; \ + } \ + } DO_TRN(sve_trn_b, uint8_t, H1) DO_TRN(sve_trn_h, uint16_t, H1_2) @@ -2352,8 +2341,8 @@ void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) swap_memmove((char *)vd + len, vm, opr_sz * 8 - len); } -void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm, - void *vg, uint32_t desc) +void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm, void *vg, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn, *m = vm; @@ -2366,8 +2355,8 @@ void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm, } } -void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm, - void *vg, uint32_t desc) +void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm, void *vg, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn, *m = vm; @@ -2380,8 +2369,8 @@ void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm, } } -void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm, - void *vg, uint32_t desc) +void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm, void *vg, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn, *m = vm; @@ -2394,8 +2383,8 @@ void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm, } } -void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, - void *vg, uint32_t desc) +void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, void *vg, + uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn, *m = vm; @@ -2428,63 +2417,64 @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, * a scalar output, and also handles the byte-ordering of sub-uint64_t * scalar outputs, is tricky. */ -#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK) \ -uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - intptr_t opr_sz = simd_oprsz(desc); \ - uint32_t flags = PREDTEST_INIT; \ - intptr_t i = opr_sz; \ - do { \ - uint64_t out = 0, pg; \ - do { \ - i -= sizeof(TYPE), out <<= sizeof(TYPE); \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - TYPE mm = *(TYPE *)((char *)vm + H(i)); \ - out |= nn OP mm; \ - } while (i & 63); \ - pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ - out &= pg; \ - *(uint64_t *)((char *)vd + (i >> 3)) = out; \ - flags = iter_predtest_bwd(out, pg, flags); \ - } while (i > 0); \ - return flags; \ -} - -#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \ - DO_CMP_PPZZ(NAME, TYPE, OP, H1, 0xffffffffffffffffull) -#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \ +#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK) \ + uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ + uint32_t desc) \ + { \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + out |= nn OP mm; \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ + } + +#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \ DO_CMP_PPZZ(NAME, TYPE, OP, H1_2, 0x5555555555555555ull) -#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \ +#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \ DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull) -#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \ - DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull) +#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \ + DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull) -DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==) +DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==) DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==) DO_CMP_PPZZ_S(sve_cmpeq_ppzz_s, uint32_t, ==) DO_CMP_PPZZ_D(sve_cmpeq_ppzz_d, uint64_t, ==) -DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t, !=) +DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t, !=) DO_CMP_PPZZ_H(sve_cmpne_ppzz_h, uint16_t, !=) DO_CMP_PPZZ_S(sve_cmpne_ppzz_s, uint32_t, !=) DO_CMP_PPZZ_D(sve_cmpne_ppzz_d, uint64_t, !=) -DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t, >) +DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t, >) DO_CMP_PPZZ_H(sve_cmpgt_ppzz_h, int16_t, >) DO_CMP_PPZZ_S(sve_cmpgt_ppzz_s, int32_t, >) DO_CMP_PPZZ_D(sve_cmpgt_ppzz_d, int64_t, >) -DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t, >=) +DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t, >=) DO_CMP_PPZZ_H(sve_cmpge_ppzz_h, int16_t, >=) DO_CMP_PPZZ_S(sve_cmpge_ppzz_s, int32_t, >=) DO_CMP_PPZZ_D(sve_cmpge_ppzz_d, int64_t, >=) -DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t, >) +DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t, >) DO_CMP_PPZZ_H(sve_cmphi_ppzz_h, uint16_t, >) DO_CMP_PPZZ_S(sve_cmphi_ppzz_s, uint32_t, >) DO_CMP_PPZZ_D(sve_cmphi_ppzz_d, uint64_t, >) -DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t, >=) +DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t, >=) DO_CMP_PPZZ_H(sve_cmphs_ppzz_h, uint16_t, >=) DO_CMP_PPZZ_S(sve_cmphs_ppzz_s, uint32_t, >=) DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=) @@ -2496,74 +2486,75 @@ DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=) #undef DO_CMP_PPZZ /* Similar, but the second source is "wide". */ -#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK) \ -uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \ -{ \ - intptr_t opr_sz = simd_oprsz(desc); \ - uint32_t flags = PREDTEST_INIT; \ - intptr_t i = opr_sz; \ - do { \ - uint64_t out = 0, pg; \ - do { \ - TYPEW mm = *(TYPEW *)((char *)vm + i - 8); \ - do { \ - i -= sizeof(TYPE), out <<= sizeof(TYPE); \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - out |= nn OP mm; \ - } while (i & 7); \ - } while (i & 63); \ - pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ - out &= pg; \ - *(uint64_t *)((char *)vd + (i >> 3)) = out; \ - flags = iter_predtest_bwd(out, pg, flags); \ - } while (i > 0); \ - return flags; \ -} - -#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \ - DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1, 0xffffffffffffffffull) -#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \ +#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK) \ + uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ + uint32_t desc) \ + { \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + TYPEW mm = *(TYPEW *)((char *)vm + i - 8); \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= nn OP mm; \ + } while (i & 7); \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ + } + +#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \ + DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \ DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_2, 0x5555555555555555ull) -#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \ +#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \ DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_4, 0x1111111111111111ull) -DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t, uint64_t, ==) +DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t, uint64_t, ==) DO_CMP_PPZW_H(sve_cmpeq_ppzw_h, int16_t, uint64_t, ==) DO_CMP_PPZW_S(sve_cmpeq_ppzw_s, int32_t, uint64_t, ==) -DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t, uint64_t, !=) +DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t, uint64_t, !=) DO_CMP_PPZW_H(sve_cmpne_ppzw_h, int16_t, uint64_t, !=) DO_CMP_PPZW_S(sve_cmpne_ppzw_s, int32_t, uint64_t, !=) -DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t, int64_t, >) -DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t, int64_t, >) -DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t, int64_t, >) +DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t, int64_t, >) +DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t, int64_t, >) +DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t, int64_t, >) -DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t, int64_t, >=) -DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t, int64_t, >=) -DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t, int64_t, >=) +DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t, int64_t, >=) +DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t, int64_t, >=) +DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t, int64_t, >=) -DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t, uint64_t, >) +DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t, uint64_t, >) DO_CMP_PPZW_H(sve_cmphi_ppzw_h, uint16_t, uint64_t, >) DO_CMP_PPZW_S(sve_cmphi_ppzw_s, uint32_t, uint64_t, >) -DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t, uint64_t, >=) +DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t, uint64_t, >=) DO_CMP_PPZW_H(sve_cmphs_ppzw_h, uint16_t, uint64_t, >=) DO_CMP_PPZW_S(sve_cmphs_ppzw_s, uint32_t, uint64_t, >=) -DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t, int64_t, <) -DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t, int64_t, <) -DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t, int64_t, <) +DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t, int64_t, <) +DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t, int64_t, <) +DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t, int64_t, <) -DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t, int64_t, <=) -DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t, int64_t, <=) -DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t, int64_t, <=) +DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t, int64_t, <=) +DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t, int64_t, <=) +DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t, int64_t, <=) -DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t, uint64_t, <) +DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t, uint64_t, <) DO_CMP_PPZW_H(sve_cmplo_ppzw_h, uint16_t, uint64_t, <) DO_CMP_PPZW_S(sve_cmplo_ppzw_s, uint32_t, uint64_t, <) -DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t, uint64_t, <=) +DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t, uint64_t, <=) DO_CMP_PPZW_H(sve_cmpls_ppzw_h, uint16_t, uint64_t, <=) DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=) @@ -2573,83 +2564,83 @@ DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=) #undef DO_CMP_PPZW /* Similar, but the second source is immediate. */ -#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK) \ -uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ -{ \ - intptr_t opr_sz = simd_oprsz(desc); \ - uint32_t flags = PREDTEST_INIT; \ - TYPE mm = simd_data(desc); \ - intptr_t i = opr_sz; \ - do { \ - uint64_t out = 0, pg; \ - do { \ - i -= sizeof(TYPE), out <<= sizeof(TYPE); \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - out |= nn OP mm; \ - } while (i & 63); \ - pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ - out &= pg; \ - *(uint64_t *)((char *)vd + (i >> 3)) = out; \ - flags = iter_predtest_bwd(out, pg, flags); \ - } while (i > 0); \ - return flags; \ -} - -#define DO_CMP_PPZI_B(NAME, TYPE, OP) \ - DO_CMP_PPZI(NAME, TYPE, OP, H1, 0xffffffffffffffffull) -#define DO_CMP_PPZI_H(NAME, TYPE, OP) \ +#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK) \ + uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \ + { \ + intptr_t opr_sz = simd_oprsz(desc); \ + uint32_t flags = PREDTEST_INIT; \ + TYPE mm = simd_data(desc); \ + intptr_t i = opr_sz; \ + do { \ + uint64_t out = 0, pg; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= nn OP mm; \ + } while (i & 63); \ + pg = *(uint64_t *)((char *)vg + (i >> 3)) & MASK; \ + out &= pg; \ + *(uint64_t *)((char *)vd + (i >> 3)) = out; \ + flags = iter_predtest_bwd(out, pg, flags); \ + } while (i > 0); \ + return flags; \ + } + +#define DO_CMP_PPZI_B(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, H1, 0xffffffffffffffffull) +#define DO_CMP_PPZI_H(NAME, TYPE, OP) \ DO_CMP_PPZI(NAME, TYPE, OP, H1_2, 0x5555555555555555ull) -#define DO_CMP_PPZI_S(NAME, TYPE, OP) \ +#define DO_CMP_PPZI_S(NAME, TYPE, OP) \ DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull) -#define DO_CMP_PPZI_D(NAME, TYPE, OP) \ - DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull) +#define DO_CMP_PPZI_D(NAME, TYPE, OP) \ + DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull) -DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==) +DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==) DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==) DO_CMP_PPZI_S(sve_cmpeq_ppzi_s, uint32_t, ==) DO_CMP_PPZI_D(sve_cmpeq_ppzi_d, uint64_t, ==) -DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t, !=) +DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t, !=) DO_CMP_PPZI_H(sve_cmpne_ppzi_h, uint16_t, !=) DO_CMP_PPZI_S(sve_cmpne_ppzi_s, uint32_t, !=) DO_CMP_PPZI_D(sve_cmpne_ppzi_d, uint64_t, !=) -DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t, >) +DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t, >) DO_CMP_PPZI_H(sve_cmpgt_ppzi_h, int16_t, >) DO_CMP_PPZI_S(sve_cmpgt_ppzi_s, int32_t, >) DO_CMP_PPZI_D(sve_cmpgt_ppzi_d, int64_t, >) -DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t, >=) +DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t, >=) DO_CMP_PPZI_H(sve_cmpge_ppzi_h, int16_t, >=) DO_CMP_PPZI_S(sve_cmpge_ppzi_s, int32_t, >=) DO_CMP_PPZI_D(sve_cmpge_ppzi_d, int64_t, >=) -DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t, >) +DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t, >) DO_CMP_PPZI_H(sve_cmphi_ppzi_h, uint16_t, >) DO_CMP_PPZI_S(sve_cmphi_ppzi_s, uint32_t, >) DO_CMP_PPZI_D(sve_cmphi_ppzi_d, uint64_t, >) -DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t, >=) +DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t, >=) DO_CMP_PPZI_H(sve_cmphs_ppzi_h, uint16_t, >=) DO_CMP_PPZI_S(sve_cmphs_ppzi_s, uint32_t, >=) DO_CMP_PPZI_D(sve_cmphs_ppzi_d, uint64_t, >=) -DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t, <) +DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t, <) DO_CMP_PPZI_H(sve_cmplt_ppzi_h, int16_t, <) DO_CMP_PPZI_S(sve_cmplt_ppzi_s, int32_t, <) DO_CMP_PPZI_D(sve_cmplt_ppzi_d, int64_t, <) -DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t, <=) +DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t, <=) DO_CMP_PPZI_H(sve_cmple_ppzi_h, int16_t, <=) DO_CMP_PPZI_S(sve_cmple_ppzi_s, int32_t, <=) DO_CMP_PPZI_D(sve_cmple_ppzi_d, int64_t, <=) -DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t, <) +DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t, <) DO_CMP_PPZI_H(sve_cmplo_ppzi_h, uint16_t, <) DO_CMP_PPZI_S(sve_cmplo_ppzi_s, uint32_t, <) DO_CMP_PPZI_D(sve_cmplo_ppzi_d, uint64_t, <) -DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t, <=) +DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t, <=) DO_CMP_PPZI_H(sve_cmpls_ppzi_h, uint16_t, <=) DO_CMP_PPZI_S(sve_cmpls_ppzi_s, uint32_t, <=) DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=) @@ -2678,8 +2669,8 @@ static bool last_active_pred(void *vd, void *vg, intptr_t oprsz) * (if after) or excluding (if !after) the first G & N. * Return true if BRK found. */ -static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, - bool brk, bool after) +static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, bool brk, + bool after) { uint64_t b; @@ -2690,16 +2681,16 @@ static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, b = g; } else { /* Break somewhere in N. Locate it. */ - b = g & n; /* guard true, pred true */ + b = g & n; /* guard true, pred true */ #ifdef _MSC_VER - b = b & (0 - b); /* first such */ + b = b & (0 - b); /* first such */ #else - b = b & -b; /* first such */ + b = b & -b; /* first such */ #endif if (after) { - b = b | (b - 1); /* break after same */ + b = b | (b - 1); /* break after same */ } else { - b = b - 1; /* break before same */ + b = b - 1; /* break before same */ } brk = true; } @@ -2709,8 +2700,8 @@ static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g, } /* Compute a zeroing BRK. */ -static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g, - intptr_t oprsz, bool after) +static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g, intptr_t oprsz, + bool after) { bool brk = false; intptr_t i; @@ -2742,8 +2733,8 @@ static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g, } /* Compute a merging BRK. */ -static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g, - intptr_t oprsz, bool after) +static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g, intptr_t oprsz, + bool after) { bool brk = false; intptr_t i; @@ -2960,61 +2951,61 @@ uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc) * The recursion is bounded to depth 7 (128 fp16 elements), so there's * little to gain with a more complex non-recursive form. */ -#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT) \ -static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ -{ \ - if (n == 1) { \ - return *data; \ - } else { \ - uintptr_t half = n / 2; \ - TYPE lo = NAME##_reduce(data, status, half); \ - TYPE hi = NAME##_reduce(data + half, status, half); \ - return TYPE##_##FUNC(lo, hi, status); \ - } \ -} \ -uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ -{ \ - uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \ - TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ - do { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)data + i) = (pg & 1 ? nn : IDENT); \ - i += sizeof(TYPE), pg >>= sizeof(TYPE); \ - } while (i & 15); \ - } \ - for (; i < maxsz; i += sizeof(TYPE)) { \ - *(TYPE *)((char *)data + i) = IDENT; \ - } \ - return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \ -} +#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT) \ + static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \ + { \ + if (n == 1) { \ + return *data; \ + } else { \ + uintptr_t half = n / 2; \ + TYPE lo = NAME##_reduce(data, status, half); \ + TYPE hi = NAME##_reduce(data + half, status, half); \ + return TYPE##_##FUNC(lo, hi, status); \ + } \ + } \ + uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \ + { \ + uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \ + TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ + for (i = 0; i < oprsz;) { \ + uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); \ + do { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)data + i) = (pg & 1 ? nn : IDENT); \ + i += sizeof(TYPE), pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ + for (; i < maxsz; i += sizeof(TYPE)) { \ + *(TYPE *)((char *)data + i) = IDENT; \ + } \ + return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \ + } DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero) DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero) -DO_REDUCE(sve_faddv_d, float64, , add, float64_zero) +DO_REDUCE(sve_faddv_d, float64, , add, float64_zero) /* Identity is floatN_default_nan, without the function call. */ DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00) DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000) -DO_REDUCE(sve_fminnmv_d, float64, , minnum, 0x7FF8000000000000ULL) +DO_REDUCE(sve_fminnmv_d, float64, , minnum, 0x7FF8000000000000ULL) DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00) DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000) -DO_REDUCE(sve_fmaxnmv_d, float64, , maxnum, 0x7FF8000000000000ULL) +DO_REDUCE(sve_fmaxnmv_d, float64, , maxnum, 0x7FF8000000000000ULL) DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity) DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity) -DO_REDUCE(sve_fminv_d, float64, , min, float64_infinity) +DO_REDUCE(sve_fminv_d, float64, , min, float64_infinity) DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity)) DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity)) -DO_REDUCE(sve_fmaxv_d, float64, , max, float64_chs(float64_infinity)) +DO_REDUCE(sve_fmaxv_d, float64, , max, float64_chs(float64_infinity)) #undef DO_REDUCE -uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, - void *status, uint32_t desc) +uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, void *status, + uint32_t desc) { intptr_t i = 0, opr_sz = simd_oprsz(desc); float16 result = nn; @@ -3033,8 +3024,8 @@ uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg, return result; } -uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, - void *status, uint32_t desc) +uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, void *status, + uint32_t desc) { intptr_t i = 0, opr_sz = simd_oprsz(desc); float32 result = nn; @@ -3053,8 +3044,8 @@ uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg, return result; } -uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, - void *status, uint32_t desc) +uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, void *status, + uint32_t desc) { intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8; uint64_t *m = vm; @@ -3072,56 +3063,56 @@ uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg, /* Fully general three-operand expander, controlled by a predicate, * With the extra float_status parameter. */ -#define DO_ZPZZ_FP(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ - void *status, uint32_t desc) \ -{ \ - intptr_t i = simd_oprsz(desc); \ - uint64_t *g = vg; \ - do { \ - uint64_t pg = g[(i - 1) >> 6]; \ - do { \ - i -= sizeof(TYPE); \ - if (likely((pg >> (i & 63)) & 1)) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - TYPE mm = *(TYPE *)((char *)vm + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ - } \ - } while (i & 63); \ - } while (i != 0); \ -} +#define DO_ZPZZ_FP(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, void *status, \ + uint32_t desc) \ + { \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ + } DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add) DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add) -DO_ZPZZ_FP(sve_fadd_d, uint64_t, , float64_add) +DO_ZPZZ_FP(sve_fadd_d, uint64_t, , float64_add) DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub) DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub) -DO_ZPZZ_FP(sve_fsub_d, uint64_t, , float64_sub) +DO_ZPZZ_FP(sve_fsub_d, uint64_t, , float64_sub) DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul) DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul) -DO_ZPZZ_FP(sve_fmul_d, uint64_t, , float64_mul) +DO_ZPZZ_FP(sve_fmul_d, uint64_t, , float64_mul) DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div) DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div) -DO_ZPZZ_FP(sve_fdiv_d, uint64_t, , float64_div) +DO_ZPZZ_FP(sve_fdiv_d, uint64_t, , float64_div) DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min) DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min) -DO_ZPZZ_FP(sve_fmin_d, uint64_t, , float64_min) +DO_ZPZZ_FP(sve_fmin_d, uint64_t, , float64_min) DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) -DO_ZPZZ_FP(sve_fmax_d, uint64_t, , float64_max) +DO_ZPZZ_FP(sve_fmax_d, uint64_t, , float64_max) DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) -DO_ZPZZ_FP(sve_fminnum_d, uint64_t, , float64_minnum) +DO_ZPZZ_FP(sve_fminnum_d, uint64_t, , float64_minnum) DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum) DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum) -DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, , float64_maxnum) +DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, , float64_maxnum) static inline float16 abd_h(float16 a, float16 b, float_status *s) { @@ -3140,7 +3131,7 @@ static inline float64 abd_d(float64 a, float64 b, float_status *s) DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h) DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s) -DO_ZPZZ_FP(sve_fabd_d, uint64_t, , abd_d) +DO_ZPZZ_FP(sve_fabd_d, uint64_t, , abd_d) static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) { @@ -3150,47 +3141,47 @@ static inline float64 scalbn_d(float64 a, int64_t b, float_status *s) DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn) DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn) -DO_ZPZZ_FP(sve_fscalbn_d, int64_t, , scalbn_d) +DO_ZPZZ_FP(sve_fscalbn_d, int64_t, , scalbn_d) DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh) DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs) -DO_ZPZZ_FP(sve_fmulx_d, uint64_t, , helper_vfp_mulxd) +DO_ZPZZ_FP(sve_fmulx_d, uint64_t, , helper_vfp_mulxd) #undef DO_ZPZZ_FP /* Three-operand expander, with one scalar operand, controlled by * a predicate, with the extra float_status parameter. */ -#define DO_ZPZS_FP(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \ - void *status, uint32_t desc) \ -{ \ - intptr_t i = simd_oprsz(desc); \ - uint64_t *g = vg; \ - TYPE mm = scalar; \ - do { \ - uint64_t pg = g[(i - 1) >> 6]; \ - do { \ - i -= sizeof(TYPE); \ - if (likely((pg >> (i & 63)) & 1)) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ - } \ - } while (i & 63); \ - } while (i != 0); \ -} +#define DO_ZPZS_FP(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \ + void *status, uint32_t desc) \ + { \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + TYPE mm = scalar; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, mm, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ + } DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add) DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add) -DO_ZPZS_FP(sve_fadds_d, float64, , float64_add) +DO_ZPZS_FP(sve_fadds_d, float64, , float64_add) DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub) DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub) -DO_ZPZS_FP(sve_fsubs_d, float64, , float64_sub) +DO_ZPZS_FP(sve_fsubs_d, float64, , float64_sub) DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul) DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul) -DO_ZPZS_FP(sve_fmuls_d, float64, , float64_mul) +DO_ZPZS_FP(sve_fmuls_d, float64, , float64_mul) static inline float16 subr_h(float16 a, float16 b, float_status *s) { @@ -3209,43 +3200,44 @@ static inline float64 subr_d(float64 a, float64 b, float_status *s) DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h) DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s) -DO_ZPZS_FP(sve_fsubrs_d, float64, , subr_d) +DO_ZPZS_FP(sve_fsubrs_d, float64, , subr_d) DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum) DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum) -DO_ZPZS_FP(sve_fmaxnms_d, float64, , float64_maxnum) +DO_ZPZS_FP(sve_fmaxnms_d, float64, , float64_maxnum) DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum) DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum) -DO_ZPZS_FP(sve_fminnms_d, float64, , float64_minnum) +DO_ZPZS_FP(sve_fminnms_d, float64, , float64_minnum) DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max) DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max) -DO_ZPZS_FP(sve_fmaxs_d, float64, , float64_max) +DO_ZPZS_FP(sve_fmaxs_d, float64, , float64_max) DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min) DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min) -DO_ZPZS_FP(sve_fmins_d, float64, , float64_min) +DO_ZPZS_FP(sve_fmins_d, float64, , float64_min) /* Fully general two-operand expander, controlled by a predicate, * With the extra float_status parameter. */ -#define DO_ZPZ_FP(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ -{ \ - intptr_t i = simd_oprsz(desc); \ - uint64_t *g = vg; \ - do { \ - uint64_t pg = g[(i - 1) >> 6]; \ - do { \ - i -= sizeof(TYPE); \ - if (likely((pg >> (i & 63)) & 1)) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - *(TYPE *)((char *)vd + H(i)) = OP(nn, status); \ - } \ - } while (i & 63); \ - } while (i != 0); \ -} +#define DO_ZPZ_FP(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, \ + uint32_t desc) \ + { \ + intptr_t i = simd_oprsz(desc); \ + uint64_t *g = vg; \ + do { \ + uint64_t pg = g[(i - 1) >> 6]; \ + do { \ + i -= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + *(TYPE *)((char *)vd + H(i)) = OP(nn, status); \ + } \ + } while (i & 63); \ + } while (i != 0); \ + } /* SVE fp16 conversions always use IEEE mode. Like AdvSIMD, they ignore * FZ16. When converting from fp16, this affects flushing input denormals; @@ -3253,7 +3245,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \ */ static inline float32 sve_f16_to_f32(float16 f, float_status *fpst) { - flag save = get_flush_inputs_to_zero(fpst); + bool save = get_flush_inputs_to_zero(fpst); float32 ret; set_flush_inputs_to_zero(false, fpst); @@ -3264,7 +3256,7 @@ static inline float32 sve_f16_to_f32(float16 f, float_status *fpst) static inline float64 sve_f16_to_f64(float16 f, float_status *fpst) { - flag save = get_flush_inputs_to_zero(fpst); + bool save = get_flush_inputs_to_zero(fpst); float64 ret; set_flush_inputs_to_zero(false, fpst); @@ -3275,7 +3267,7 @@ static inline float64 sve_f16_to_f64(float16 f, float_status *fpst) static inline float16 sve_f32_to_f16(float32 f, float_status *fpst) { - flag save = get_flush_to_zero(fpst); + bool save = get_flush_to_zero(fpst); float16 ret; set_flush_to_zero(false, fpst); @@ -3286,7 +3278,7 @@ static inline float16 sve_f32_to_f16(float32 f, float_status *fpst) static inline float16 sve_f64_to_f16(float64 f, float_status *fpst) { - flag save = get_flush_to_zero(fpst); + bool save = get_flush_to_zero(fpst); float16 ret; set_flush_to_zero(false, fpst); @@ -3369,78 +3361,66 @@ static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s) DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16) DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32) -DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16) -DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64) -DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32) -DO_ZPZ_FP(sve_fcvt_sd, uint64_t, , float32_to_float64) +DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16) +DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64) +DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32) +DO_ZPZ_FP(sve_fcvt_sd, uint64_t, , float32_to_float64) DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz) DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh) DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs) -DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, , vfp_float16_to_int64_rtz) -DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, , vfp_float32_to_int64_rtz) -DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, , helper_vfp_tosizd) -DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, , vfp_float64_to_int64_rtz) +DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, , vfp_float16_to_int64_rtz) +DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, , vfp_float32_to_int64_rtz) +DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, , helper_vfp_tosizd) +DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, , vfp_float64_to_int64_rtz) DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz) DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh) DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs) -DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, , vfp_float16_to_uint64_rtz) -DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, , vfp_float32_to_uint64_rtz) -DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, , helper_vfp_touizd) -DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, , vfp_float64_to_uint64_rtz) +DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, , vfp_float16_to_uint64_rtz) +DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, , vfp_float32_to_uint64_rtz) +DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, , helper_vfp_touizd) +DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, , vfp_float64_to_uint64_rtz) DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth) DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints) -DO_ZPZ_FP(sve_frint_d, uint64_t, , helper_rintd) +DO_ZPZ_FP(sve_frint_d, uint64_t, , helper_rintd) DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int) DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int) -DO_ZPZ_FP(sve_frintx_d, uint64_t, , float64_round_to_int) +DO_ZPZ_FP(sve_frintx_d, uint64_t, , float64_round_to_int) DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16) DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32) -DO_ZPZ_FP(sve_frecpx_d, uint64_t, , helper_frecpx_f64) +DO_ZPZ_FP(sve_frecpx_d, uint64_t, , helper_frecpx_f64) DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt) DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt) -DO_ZPZ_FP(sve_fsqrt_d, uint64_t, , float64_sqrt) +DO_ZPZ_FP(sve_fsqrt_d, uint64_t, , float64_sqrt) DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16) DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16) DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32) -DO_ZPZ_FP(sve_scvt_sd, uint64_t, , int32_to_float64) -DO_ZPZ_FP(sve_scvt_dh, uint64_t, , int64_to_float16) -DO_ZPZ_FP(sve_scvt_ds, uint64_t, , int64_to_float32) -DO_ZPZ_FP(sve_scvt_dd, uint64_t, , int64_to_float64) +DO_ZPZ_FP(sve_scvt_sd, uint64_t, , int32_to_float64) +DO_ZPZ_FP(sve_scvt_dh, uint64_t, , int64_to_float16) +DO_ZPZ_FP(sve_scvt_ds, uint64_t, , int64_to_float32) +DO_ZPZ_FP(sve_scvt_dd, uint64_t, , int64_to_float64) DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16) DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16) DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32) -DO_ZPZ_FP(sve_ucvt_sd, uint64_t, , uint32_to_float64) -DO_ZPZ_FP(sve_ucvt_dh, uint64_t, , uint64_to_float16) -DO_ZPZ_FP(sve_ucvt_ds, uint64_t, , uint64_to_float32) -DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64) +DO_ZPZ_FP(sve_ucvt_sd, uint64_t, , uint32_to_float64) +DO_ZPZ_FP(sve_ucvt_dh, uint64_t, , uint64_to_float16) +DO_ZPZ_FP(sve_ucvt_ds, uint64_t, , uint64_to_float32) +DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64) #undef DO_ZPZ_FP -/* 4-operand predicated multiply-add. This requires 7 operands to pass - * "properly", so we need to encode some of the registers into DESC. - */ -QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 20 > 32); - -static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc, - uint16_t neg1, uint16_t neg3) +static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, uint16_t neg1, + uint16_t neg3) { intptr_t i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; do { @@ -3453,45 +3433,42 @@ static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc, e1 = *(uint16_t *)((char *)vn + H1_2(i)) ^ neg1; e2 = *(uint16_t *)((char *)vm + H1_2(i)); e3 = *(uint16_t *)((char *)va + H1_2(i)) ^ neg3; - r = float16_muladd(e1, e2, e3, 0, &env->vfp.fp_status_f16); + r = float16_muladd(e1, e2, e3, 0, status); *(uint16_t *)((char *)vd + H1_2(i)) = r; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0, 0); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0); } -void HELPER(sve_fmls_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0x8000, 0); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0); } -void HELPER(sve_fnmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0x8000, 0x8000); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000); } -void HELPER(sve_fnmls_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_h(env, vg, desc, 0, 0x8000); + do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000); } -static void do_fmla_zpzzz_s(CPUARMState *env, void *vg, uint32_t desc, - uint32_t neg1, uint32_t neg3) +static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, uint32_t neg1, + uint32_t neg3) { intptr_t i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; do { @@ -3504,45 +3481,42 @@ static void do_fmla_zpzzz_s(CPUARMState *env, void *vg, uint32_t desc, e1 = *(uint32_t *)((char *)vn + H1_4(i)) ^ neg1; e2 = *(uint32_t *)((char *)vm + H1_4(i)); e3 = *(uint32_t *)((char *)va + H1_4(i)) ^ neg3; - r = float32_muladd(e1, e2, e3, 0, &env->vfp.fp_status); + r = float32_muladd(e1, e2, e3, 0, status); *(uint32_t *)((char *)vd + H1_4(i)) = r; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0, 0); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0); } -void HELPER(sve_fmls_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0x80000000, 0); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0); } -void HELPER(sve_fnmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0x80000000, 0x80000000); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000); } -void HELPER(sve_fnmls_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_s(env, vg, desc, 0, 0x80000000); + do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000); } -static void do_fmla_zpzzz_d(CPUARMState *env, void *vg, uint32_t desc, - uint64_t neg1, uint64_t neg3) +static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, uint64_t neg1, + uint64_t neg3) { intptr_t i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; do { @@ -3555,31 +3529,35 @@ static void do_fmla_zpzzz_d(CPUARMState *env, void *vg, uint32_t desc, e1 = *(uint64_t *)((char *)vn + i) ^ neg1; e2 = *(uint64_t *)((char *)vm + i); e3 = *(uint64_t *)((char *)va + i) ^ neg3; - r = float64_muladd(e1, e2, e3, 0, &env->vfp.fp_status); + r = float64_muladd(e1, e2, e3, 0, status); *(uint64_t *)((char *)vd + i) = r; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, 0, 0); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0); } -void HELPER(sve_fmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, INT64_MIN, 0); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0); } -void HELPER(sve_fnmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, INT64_MIN, INT64_MIN); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN); } -void HELPER(sve_fnmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { - do_fmla_zpzzz_d(env, vg, desc, 0, INT64_MIN); + do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN); } /* Two operand floating-point comparison controlled by a predicate. @@ -3587,49 +3565,46 @@ void HELPER(sve_fnmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) * compare operands, since the comparison may have side effects wrt * the FPSR. */ -#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ - void *status, uint32_t desc) \ -{ \ - intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ - uint64_t *d = vd, *g = vg; \ - do { \ - uint64_t out = 0, pg = g[j]; \ - do { \ - i -= sizeof(TYPE), out <<= sizeof(TYPE); \ - if (likely((pg >> (i & 63)) & 1)) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - TYPE mm = *(TYPE *)((char *)vm + H(i)); \ - out |= OP(TYPE, nn, mm, status); \ - } \ - } while (i & 63); \ - d[j--] = out; \ - } while (i > 0); \ -} - -#define DO_FPCMP_PPZZ_H(NAME, OP) \ - DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP) -#define DO_FPCMP_PPZZ_S(NAME, OP) \ - DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP) -#define DO_FPCMP_PPZZ_D(NAME, OP) \ - DO_FPCMP_PPZZ(NAME##_d, float64, , OP) - -#define DO_FPCMP_PPZZ_ALL(NAME, OP) \ - DO_FPCMP_PPZZ_H(NAME, OP) \ - DO_FPCMP_PPZZ_S(NAME, OP) \ +#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, void *status, \ + uint32_t desc) \ + { \ + intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ + uint64_t *d = vd, *g = vg; \ + do { \ + uint64_t out = 0, pg = g[j]; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + TYPE mm = *(TYPE *)((char *)vm + H(i)); \ + out |= OP(TYPE, nn, mm, status); \ + } \ + } while (i & 63); \ + d[j--] = out; \ + } while (i > 0); \ + } + +#define DO_FPCMP_PPZZ_H(NAME, OP) DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP) +#define DO_FPCMP_PPZZ_S(NAME, OP) DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP) +#define DO_FPCMP_PPZZ_D(NAME, OP) DO_FPCMP_PPZZ(NAME##_d, float64, , OP) + +#define DO_FPCMP_PPZZ_ALL(NAME, OP) \ + DO_FPCMP_PPZZ_H(NAME, OP) \ + DO_FPCMP_PPZZ_S(NAME, OP) \ DO_FPCMP_PPZZ_D(NAME, OP) -#define DO_FCMGE(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) <= 0 -#define DO_FCMGT(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) < 0 -#define DO_FCMLE(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) <= 0 -#define DO_FCMLT(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) < 0 -#define DO_FCMEQ(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) == 0 -#define DO_FCMNE(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) != 0 -#define DO_FCMUO(TYPE, X, Y, ST) \ +#define DO_FCMGE(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) <= 0 +#define DO_FCMGT(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) < 0 +#define DO_FCMLE(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) <= 0 +#define DO_FCMLT(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) < 0 +#define DO_FCMEQ(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) == 0 +#define DO_FCMNE(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) != 0 +#define DO_FCMUO(TYPE, X, Y, ST) \ TYPE##_compare_quiet(X, Y, ST) == float_relation_unordered -#define DO_FACGE(TYPE, X, Y, ST) \ +#define DO_FACGE(TYPE, X, Y, ST) \ TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) <= 0 -#define DO_FACGT(TYPE, X, Y, ST) \ +#define DO_FACGT(TYPE, X, Y, ST) \ TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) < 0 DO_FPCMP_PPZZ_ALL(sve_fcmge, DO_FCMGE) @@ -3649,35 +3624,32 @@ DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT) /* One operand floating-point comparison against zero, controlled * by a predicate. */ -#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \ -void HELPER(NAME)(void *vd, void *vn, void *vg, \ - void *status, uint32_t desc) \ -{ \ - intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ - uint64_t *d = vd, *g = vg; \ - do { \ - uint64_t out = 0, pg = g[j]; \ - do { \ - i -= sizeof(TYPE), out <<= sizeof(TYPE); \ - if ((pg >> (i & 63)) & 1) { \ - TYPE nn = *(TYPE *)((char *)vn + H(i)); \ - out |= OP(TYPE, nn, 0, status); \ - } \ - } while (i & 63); \ - d[j--] = out; \ - } while (i > 0); \ -} - -#define DO_FPCMP_PPZ0_H(NAME, OP) \ - DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP) -#define DO_FPCMP_PPZ0_S(NAME, OP) \ - DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP) -#define DO_FPCMP_PPZ0_D(NAME, OP) \ - DO_FPCMP_PPZ0(NAME##_d, float64, , OP) - -#define DO_FPCMP_PPZ0_ALL(NAME, OP) \ - DO_FPCMP_PPZ0_H(NAME, OP) \ - DO_FPCMP_PPZ0_S(NAME, OP) \ +#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \ + void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, \ + uint32_t desc) \ + { \ + intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ + uint64_t *d = vd, *g = vg; \ + do { \ + uint64_t out = 0, pg = g[j]; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + if ((pg >> (i & 63)) & 1) { \ + TYPE nn = *(TYPE *)((char *)vn + H(i)); \ + out |= OP(TYPE, nn, 0, status); \ + } \ + } while (i & 63); \ + d[j--] = out; \ + } while (i > 0); \ + } + +#define DO_FPCMP_PPZ0_H(NAME, OP) DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP) +#define DO_FPCMP_PPZ0_S(NAME, OP) DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP) +#define DO_FPCMP_PPZ0_D(NAME, OP) DO_FPCMP_PPZ0(NAME##_d, float64, , OP) + +#define DO_FPCMP_PPZ0_ALL(NAME, OP) \ + DO_FPCMP_PPZ0_H(NAME, OP) \ + DO_FPCMP_PPZ0_S(NAME, OP) \ DO_FPCMP_PPZ0_D(NAME, OP) DO_FPCMP_PPZ0_ALL(sve_fcmge0, DO_FCMGE) @@ -3712,9 +3684,8 @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) { static const float32 coeff[16] = { - 0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9, - 0x36369d6d, 0x00000000, 0x00000000, 0x00000000, - 0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705, + 0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9, 0x36369d6d, 0x00000000, + 0x00000000, 0x00000000, 0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705, 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000, }; intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32); @@ -3734,14 +3705,12 @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) { static const float64 coeff[16] = { - 0x3ff0000000000000ull, 0xbfc5555555555543ull, - 0x3f8111111110f30cull, 0xbf2a01a019b92fc6ull, - 0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull, - 0x3de5d8408868552full, 0x0000000000000000ull, - 0x3ff0000000000000ull, 0xbfe0000000000000ull, - 0x3fa5555555555536ull, 0xbf56c16c16c13a0bull, - 0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull, - 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull, + 0x3ff0000000000000ull, 0xbfc5555555555543ull, 0x3f8111111110f30cull, + 0xbf2a01a019b92fc6ull, 0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull, + 0x3de5d8408868552full, 0x0000000000000000ull, 0x3ff0000000000000ull, + 0xbfe0000000000000ull, 0x3fa5555555555536ull, 0xbf56c16c16c13a0bull, + 0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull, 0x3e21ee96d2641b13ull, + 0xbda8f76380fbb401ull, }; intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64); intptr_t x = simd_data(desc); @@ -3761,8 +3730,8 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc) * FP Complex Add */ -void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, - void *vs, uint32_t desc) +void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, void *vs, + uint32_t desc) { intptr_t j, i = simd_oprsz(desc); uint64_t *g = vg; @@ -3793,8 +3762,8 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg, } while (i != 0); } -void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, - void *vs, uint32_t desc) +void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, void *vs, + uint32_t desc) { intptr_t j, i = simd_oprsz(desc); uint64_t *g = vg; @@ -3825,8 +3794,8 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg, } while (i != 0); } -void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, - void *vs, uint32_t desc) +void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, void *vs, + uint32_t desc) { intptr_t j, i = simd_oprsz(desc); uint64_t *g = vg; @@ -3861,22 +3830,13 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg, * FP Complex Multiply */ -QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 22 > 32); - -void HELPER(sve_fcmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2); + unsigned rot = simd_data(desc); bool flip = rot & 1; float16 neg_imag, neg_real; - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; neg_imag = float16_set_sign(0, (rot & 2) != 0); @@ -3903,32 +3863,25 @@ void HELPER(sve_fcmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc) if (likely((pg >> (i & 63)) & 1)) { d = *(float16 *)((char *)va + H1_2(i)); - d = float16_muladd(e2, e1, d, 0, &env->vfp.fp_status_f16); + d = float16_muladd(e2, e1, d, 0, status); *(float16 *)((char *)vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float16 *)((char *)va + H1_2(j)); - d = float16_muladd(e4, e3, d, 0, &env->vfp.fp_status_f16); + d = float16_muladd(e4, e3, d, 0, status); *(float16 *)((char *)vd + H1_2(j)) = d; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fcmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2); + unsigned rot = simd_data(desc); bool flip = rot & 1; float32 neg_imag, neg_real; - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; neg_imag = float32_set_sign(0, (rot & 2) != 0); @@ -3955,32 +3908,25 @@ void HELPER(sve_fcmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc) if (likely((pg >> (i & 63)) & 1)) { d = *(float32 *)((char *)va + H1_2(i)); - d = float32_muladd(e2, e1, d, 0, &env->vfp.fp_status); + d = float32_muladd(e2, e1, d, 0, status); *(float32 *)((char *)vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float32 *)((char *)va + H1_2(j)); - d = float32_muladd(e4, e3, d, 0, &env->vfp.fp_status); + d = float32_muladd(e4, e3, d, 0, status); *(float32 *)((char *)vd + H1_2(j)) = d; } } while (i & 63); } while (i != 0); } -void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) +void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, void *vg, + void *status, uint32_t desc) { intptr_t j, i = simd_oprsz(desc); - unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); - unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); - unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); - unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); - unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2); + unsigned rot = simd_data(desc); bool flip = rot & 1; float64 neg_imag, neg_real; - void *vd = &env->vfp.zregs[rd]; - void *vn = &env->vfp.zregs[rn]; - void *vm = &env->vfp.zregs[rm]; - void *va = &env->vfp.zregs[ra]; uint64_t *g = vg; neg_imag = float64_set_sign(0, (rot & 2) != 0); @@ -4007,12 +3953,12 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) if (likely((pg >> (i & 63)) & 1)) { d = *(float64 *)((char *)va + H1_2(i)); - d = float64_muladd(e2, e1, d, 0, &env->vfp.fp_status); + d = float64_muladd(e2, e1, d, 0, status); *(float64 *)((char *)vd + H1_2(i)) = d; } if (likely((pg >> (j & 63)) & 1)) { d = *(float64 *)((char *)va + H1_2(j)); - d = float64_muladd(e4, e3, d, 0, &env->vfp.fp_status); + d = float64_muladd(e4, e3, d, 0, status); *(float64 *)((char *)vd + H1_2(j)) = d; } } while (i & 63); @@ -4024,103 +3970,111 @@ void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) */ /* - * Load elements into @vd, controlled by @vg, from @host + @mem_ofs. - * Memory is valid through @host + @mem_max. The register element - * indicies are inferred from @mem_ofs, as modified by the types for - * which the helper is built. Return the @mem_ofs of the first element - * not loaded (which is @mem_max if they are all loaded). - * - * For softmmu, we have fully validated the guest page. For user-only, - * we cannot fully validate without taking the mmap lock, but since we - * know the access is within one host page, if any access is valid they - * all must be valid. However, when @vg is all false, it may be that - * no access is valid. + * Load one element into @vd + @reg_off from @host. + * The controlling predicate is known to be true. */ -typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host, - intptr_t mem_ofs, intptr_t mem_max); +typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host); /* * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). * The controlling predicate is known to be true. */ -typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, - target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra); -typedef sve_ld1_tlb_fn sve_st1_tlb_fn; +typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, uintptr_t retaddr); /* * Generate the above primitives. */ -#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ -static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host, \ - intptr_t mem_off, const intptr_t mem_max) \ -{ \ - intptr_t reg_off = mem_off * (sizeof(TYPEE) / sizeof(TYPEM)); \ - uint64_t *pg = vg; \ - while (mem_off + sizeof(TYPEM) <= mem_max) { \ - TYPEM val = 0; \ - if (likely((pg[reg_off >> 6] >> (reg_off & 63)) & 1)) { \ - val = HOST((char *)host + mem_off); \ - } \ - *(TYPEE *)((char *)vd + H(reg_off)) = val; \ - mem_off += sizeof(TYPEM), reg_off += sizeof(TYPEE); \ - } \ - return mem_off; \ -} - -#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \ -static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ -{ \ - TYPEM val = TLB(env, addr, oi, ra); \ - *(TYPEE *)((char *)vd + H(reg_off)) = val; \ -} - -#define DO_LD_PRIM_1(NAME, H, TE, TM) \ - DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ - DO_LD_TLB(NAME, H, TE, TM, ldub_p, 0, helper_ret_ldub_mmu) - -DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) -DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) -DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) -DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) -DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) -DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t) -DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) - -#define DO_LD_PRIM_2(NAME, end, MOEND, H, TE, TM, PH, PT) \ - DO_LD_HOST(NAME##_##end, H, TE, TM, PH##_##end##_p) \ - DO_LD_TLB(NAME##_##end, H, TE, TM, PH##_##end##_p, \ - MOEND, helper_##end##_##PT##_mmu) - -DO_LD_PRIM_2(ld1hh, le, MO_LE, H1_2, uint16_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hsu, le, MO_LE, H1_4, uint32_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hss, le, MO_LE, H1_4, uint32_t, int16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hdu, le, MO_LE, , uint64_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hds, le, MO_LE, , uint64_t, int16_t, lduw, lduw) +#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ + static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ + { \ + TYPEM val = HOST(host); \ + *(TYPEE *)((char*)vd + H(reg_off)) = val; \ + } -DO_LD_PRIM_2(ld1ss, le, MO_LE, H1_4, uint32_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sdu, le, MO_LE, , uint64_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sds, le, MO_LE, , uint64_t, int32_t, ldl, ldul) +#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \ + static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ + { \ + HOST(host, (TYPEM) * (TYPEE *)((char*)vd + H(reg_off))); \ + } -DO_LD_PRIM_2(ld1dd, le, MO_LE, , uint64_t, uint64_t, ldq, ldq) +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \ + static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, uintptr_t ra) \ + { \ + *(TYPEE *)((char*)vd + H(reg_off)) = \ + (TYPEM)TLB(env, useronly_clean_ptr(addr), ra); \ + } -DO_LD_PRIM_2(ld1hh, be, MO_BE, H1_2, uint16_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hsu, be, MO_BE, H1_4, uint32_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hss, be, MO_BE, H1_4, uint32_t, int16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hdu, be, MO_BE, , uint64_t, uint16_t, lduw, lduw) -DO_LD_PRIM_2(ld1hds, be, MO_BE, , uint64_t, int16_t, lduw, lduw) +#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ + static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ + target_ulong addr, uintptr_t ra) \ + { \ + TLB(env, useronly_clean_ptr(addr), \ + (TYPEM) * (TYPEE *)((char*)vd + H(reg_off)), ra); \ + } -DO_LD_PRIM_2(ld1ss, be, MO_BE, H1_4, uint32_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sdu, be, MO_BE, , uint64_t, uint32_t, ldl, ldul) -DO_LD_PRIM_2(ld1sds, be, MO_BE, , uint64_t, int32_t, ldl, ldul) +#define DO_LD_PRIM_1(NAME, H, TE, TM) \ + DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ + DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra) -DO_LD_PRIM_2(ld1dd, be, MO_BE, , uint64_t, uint64_t, ldq, ldq) +DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) +DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) +DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) +DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) +DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) +DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t) +DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t) + +#define DO_ST_PRIM_1(NAME, H, TE, TM) \ + DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \ + DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra) + +DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t) +DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t) +DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t) +DO_ST_PRIM_1(bd, , uint64_t, uint8_t) + +#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \ + DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \ + DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \ + DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \ + DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra) + +#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \ + DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \ + DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \ + DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \ + DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra) + +DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw) +DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw) +DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw) +DO_LD_PRIM_2(hdu, , uint64_t, uint16_t, lduw) +DO_LD_PRIM_2(hds, , uint64_t, int16_t, lduw) + +DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw) +DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw) +DO_ST_PRIM_2(hd, , uint64_t, uint16_t, stw) + +DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl) +DO_LD_PRIM_2(sdu, , uint64_t, uint32_t, ldl) +DO_LD_PRIM_2(sds, , uint64_t, int32_t, ldl) + +DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl) +DO_ST_PRIM_2(sd, , uint64_t, uint32_t, stl) + +DO_LD_PRIM_2(dd, , uint64_t, uint64_t, ldq) +DO_ST_PRIM_2(dd, , uint64_t, uint64_t, stq) #undef DO_LD_TLB +#undef DO_ST_TLB #undef DO_LD_HOST #undef DO_LD_PRIM_1 +#undef DO_ST_PRIM_1 #undef DO_LD_PRIM_2 +#undef DO_ST_PRIM_2 /* * Skip through a sequence of inactive elements in the guarding predicate @vg, @@ -4157,297 +4111,687 @@ static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off, } /* - * Return the maximum offset <= @mem_max which is still within the page - * referenced by @base + @mem_off. + * Resolve the guest virtual address to info->host and info->flags. + * If @nofault, return false if the page is invalid, otherwise + * exit via page fault exception. */ -static intptr_t max_for_page(struct uc_struct *uc, target_ulong base, intptr_t mem_off, - intptr_t mem_max) -{ - target_ulong addr = base + mem_off; - intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK); - return MIN(split, mem_max - mem_off) + mem_off; -} -/* These are normally defined only for CONFIG_USER_ONLY in */ -static inline void set_helper_retaddr(uintptr_t ra) { } -static inline void clear_helper_retaddr(void) { } +typedef struct { + void *host; + int flags; + MemTxAttrs attrs; +} SVEHostPage; -/* - * The result of tlb_vaddr_to_host for user-only is just g2h(x), - * which is always non-null. Elide the useless test. - */ -static inline bool test_host_page(void *host) +static bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env, + target_ulong addr, int mem_off, + MMUAccessType access_type, int mmu_idx, + uintptr_t retaddr) { - return likely(host != NULL); + int flags; + + addr += mem_off; + + /* + * User-only currently always issues with TBI. See the comment + * above useronly_clean_ptr. Usually we clean this top byte away + * during translation, but we can't do that for e.g. vector + imm + * addressing modes. + * + * We currently always enable TBI for user-only, and do not provide + * a way to turn it off. So clean the pointer unconditionally here, + * rather than look it up here, or pass it down from above. + */ + addr = useronly_clean_ptr(addr); + + flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault, + &info->host, retaddr); + info->flags = flags; + + if (flags & TLB_INVALID_MASK) { + g_assert(nofault); + return false; + } + + /* Ensure that info->host[] is relative to addr, not addr + mem_off. */ + info->host = (void*)((char*)(info->host) - mem_off); + + /* + * Find the iotlbentry for addr and return the transaction attributes. + * This *must* be present in the TLB because we just found the mapping. + */ + { + uintptr_t index = tlb_index(env, mmu_idx, addr); + +#ifdef CONFIG_DEBUG_TCG + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + target_ulong comparator = + (access_type == MMU_DATA_LOAD ? entry->addr_read + : tlb_addr_write(entry)); + g_assert(tlb_hit(comparator, addr)); +#endif + + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; + info->attrs = iotlbentry->attrs; + } + + return true; } /* - * Common helper for all contiguous one-register predicated loads. + * Analyse contiguous data, protected by a governing predicate. */ -static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const uintptr_t retaddr, - const int esz, const int msz, - sve_ld1_host_fn *host_fn, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - void *vd = &env->vfp.zregs[rd]; - const int diffsz = esz - msz; - const intptr_t reg_max = simd_oprsz(desc); - const intptr_t mem_max = reg_max >> diffsz; - ARMVectorReg scratch; - void *host; - intptr_t split, reg_off, mem_off; - /* Find the first active element. */ - reg_off = find_next_active(vg, 0, reg_max, esz); - if (unlikely(reg_off == reg_max)) { - /* The entire predicate was false; no load occurs. */ - memset(vd, 0, reg_max); - return; - } - mem_off = reg_off >> diffsz; - set_helper_retaddr(retaddr); +typedef enum { + FAULT_NO, + FAULT_FIRST, + FAULT_ALL, +} SVEContFault; +typedef struct { /* - * If the (remaining) load is entirely within a single page, then: - * For softmmu, and the tlb hits, then no faults will occur; - * For user-only, either the first load will fault or none will. - * We can thus perform the load directly to the destination and - * Vd will be unmodified on any exception path. + * First and last element wholly contained within the two pages. + * mem_off_first[0] and reg_off_first[0] are always set >= 0. + * reg_off_last[0] may be < 0 if the first element crosses pages. + * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1] + * are set >= 0 only if there are complete elements on a second page. + * + * The reg_off_* offsets are relative to the internal vector register. + * The mem_off_first offset is relative to the memory address; the + * two offsets are different when a load operation extends, a store + * operation truncates, or for multi-register operations. */ - split = max_for_page(env->uc, addr, mem_off, mem_max); - if (likely(split == mem_max)) { - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - if (test_host_page(host)) { - mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max); - tcg_debug_assert(mem_off == mem_max); - clear_helper_retaddr(); - /* After having taken any fault, zero leading inactive elements. */ - swap_memzero(vd, reg_off); - return; + int16_t mem_off_first[2]; + int16_t reg_off_first[2]; + int16_t reg_off_last[2]; + + /* + * One element that is misaligned and spans both pages, + * or -1 if there is no such active element. + */ + int16_t mem_off_split; + int16_t reg_off_split; + + /* + * The byte offset at which the entire operation crosses a page boundary. + * Set >= 0 if and only if the entire operation spans two pages. + */ + int16_t page_split; + + /* TLB data for the two pages. */ + SVEHostPage page[2]; +} SVEContLdSt; + +/* + * Find first active element on each page, and a loose bound for the + * final element on each page. Identify any single element that spans + * the page boundary. Return true if there are any active elements. + */ +static bool sve_cont_ldst_elements(CPUARMState *env, SVEContLdSt *info, + target_ulong addr, uint64_t *vg, + intptr_t reg_max, int esz, int msize) +{ + uc_engine *uc = env->uc; + const int esize = 1 << esz; + const uint64_t pg_mask = pred_esz_masks[esz]; + intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split; + intptr_t mem_off_last, mem_off_split; + intptr_t page_split, elt_split; + intptr_t i; + + /* Set all of the element indices to -1, and the TLB data to 0. */ + memset(info, -1, offsetof(SVEContLdSt, page)); + memset(info->page, 0, sizeof(info->page)); + + /* Gross scan over the entire predicate to find bounds. */ + i = 0; + do { + uint64_t pg = vg[i] & pg_mask; + if (pg) { + reg_off_last = i * 64 + 63 - clz64(pg); + if (reg_off_first < 0) { + reg_off_first = i * 64 + ctz64(pg); + } } + } while (++i * 64 < reg_max); + + if (unlikely(reg_off_first < 0)) { + /* No active elements, no pages touched. */ + return false; } + tcg_debug_assert(reg_off_last >= 0 && reg_off_last < reg_max); + + info->reg_off_first[0] = reg_off_first; + info->mem_off_first[0] = (reg_off_first >> esz) * msize; + mem_off_last = (reg_off_last >> esz) * msize; + + page_split = -(addr | TARGET_PAGE_MASK); + if (likely(mem_off_last + msize <= page_split)) { + /* The entire operation fits within a single page. */ + info->reg_off_last[0] = reg_off_last; + return true; + } + + info->page_split = page_split; + elt_split = page_split / msize; + reg_off_split = elt_split << esz; + mem_off_split = elt_split * msize; /* - * Perform the predicated read into a temporary, thus ensuring - * if the load of the last element faults, Vd is not modified. + * This is the last full element on the first page, but it is not + * necessarily active. If there is no full element, i.e. the first + * active element is the one that's split, this value remains -1. + * It is useful as iteration bounds. */ - memset(&scratch, 0, reg_max); - goto start; - while (1) { - reg_off = find_next_active(vg, reg_off, reg_max, esz); - if (reg_off >= reg_max) { - break; - } - mem_off = reg_off >> diffsz; - split = max_for_page(env->uc, addr, mem_off, mem_max); - - start: - if (split - mem_off >= (1ULL << msz)) { - /* At least one whole element on this page. */ - host = tlb_vaddr_to_host(env, addr + mem_off, - MMU_DATA_LOAD, mmu_idx); - if (host) { - mem_off = host_fn(&scratch, vg, (char *)host - mem_off, - mem_off, split); - reg_off = mem_off << diffsz; - continue; + if (elt_split != 0) { + info->reg_off_last[0] = reg_off_split - esize; + } + + /* Determine if an unaligned element spans the pages. */ + if (page_split % msize != 0) { + /* It is helpful to know if the split element is active. */ + if ((vg[reg_off_split >> 6] >> (reg_off_split & 63)) & 1) { + info->reg_off_split = reg_off_split; + info->mem_off_split = mem_off_split; + + if (reg_off_split == reg_off_last) { + /* The page crossing element is last. */ + return true; } } + reg_off_split += esize; + mem_off_split += msize; + } + + /* + * We do want the first active element on the second page, because + * this may affect the address reported in an exception. + */ + reg_off_split = find_next_active(vg, reg_off_split, reg_max, esz); + tcg_debug_assert(reg_off_split <= reg_off_last); + info->reg_off_first[1] = reg_off_split; + info->mem_off_first[1] = (reg_off_split >> esz) * msize; + info->reg_off_last[1] = reg_off_last; + return true; +} + +/* + * Resolve the guest virtual addresses to info->page[]. + * Control the generation of page faults with @fault. Return false if + * there is no work to do, which can only happen with @fault == FAULT_NO. + */ +static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, + CPUARMState *env, target_ulong addr, + MMUAccessType access_type, uintptr_t retaddr) +{ + int mmu_idx = cpu_mmu_index(env, false); + int mem_off = info->mem_off_first[0]; + bool nofault = fault == FAULT_NO; + bool have_work = true; + + if (!sve_probe_page(&info->page[0], nofault, env, addr, mem_off, + access_type, mmu_idx, retaddr)) { + /* No work to be done. */ + return false; + } + + if (likely(info->page_split < 0)) { + /* The entire operation was on the one page. */ + return true; + } + /* + * If the second page is invalid, then we want the fault address to be + * the first byte on that page which is accessed. + */ + if (info->mem_off_split >= 0) { + /* + * There is an element split across the pages. The fault address + * should be the first byte of the second page. + */ + mem_off = info->page_split; /* - * Perform one normal read. This may fault, longjmping out to the - * main loop in order to raise an exception. It may succeed, and - * as a side-effect load the TLB entry for the next round. Finally, - * in the extremely unlikely case we're performing this operation - * on I/O memory, it may succeed but not bring in the TLB entry. - * But even then we have still made forward progress. + * If the split element is also the first active element + * of the vector, then: For first-fault we should continue + * to generate faults for the second page. For no-fault, + * we have work only if the second page is valid. */ - tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr); - reg_off += 1ULL << esz; + if (info->mem_off_first[0] < info->mem_off_split) { + nofault = FAULT_FIRST; + have_work = false; + } + } else { + /* + * There is no element split across the pages. The fault address + * should be the first active element on the second page. + */ + mem_off = info->mem_off_first[1]; + /* + * There must have been one active element on the first page, + * so we're out of first-fault territory. + */ + nofault = fault != FAULT_ALL; } - clear_helper_retaddr(); - memcpy(vd, &scratch, reg_max); + have_work |= sve_probe_page(&info->page[1], nofault, env, addr, mem_off, + access_type, mmu_idx, retaddr); + return have_work; } -#define DO_LD1_1(NAME, ESZ) \ -void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ - sve_##NAME##_host, sve_##NAME##_tlb); \ -} - -#define DO_LD1_2(NAME, ESZ, MSZ) \ -void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ - sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ -} \ -void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ - sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ -} - -DO_LD1_1(ld1bb, 0) -DO_LD1_1(ld1bhu, 1) -DO_LD1_1(ld1bhs, 1) -DO_LD1_1(ld1bsu, 2) -DO_LD1_1(ld1bss, 2) -DO_LD1_1(ld1bdu, 3) -DO_LD1_1(ld1bds, 3) - -DO_LD1_2(ld1hh, 1, 1) -DO_LD1_2(ld1hsu, 2, 1) -DO_LD1_2(ld1hss, 2, 1) -DO_LD1_2(ld1hdu, 3, 1) -DO_LD1_2(ld1hds, 3, 1) - -DO_LD1_2(ld1ss, 2, 2) -DO_LD1_2(ld1sdu, 3, 2) -DO_LD1_2(ld1sds, 3, 2) - -DO_LD1_2(ld1dd, 3, 3) +static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, int wp_access, + uintptr_t retaddr) +{ + intptr_t mem_off, reg_off, reg_last; + int flags0 = info->page[0].flags; + int flags1 = info->page[1].flags; -#undef DO_LD1_1 -#undef DO_LD1_2 + if (likely(!((flags0 | flags1) & TLB_WATCHPOINT))) { + return; + } -/* - * Common helpers for all contiguous 2,3,4-register predicated loads. - */ -static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, int size, uintptr_t ra, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch[2] = { 0 }; + /* Indicate that watchpoints are handled. */ + info->page[0].flags = flags0 & ~TLB_WATCHPOINT; + info->page[1].flags = flags1 & ~TLB_WATCHPOINT; + + if (flags0 & TLB_WATCHPOINT) { + mem_off = info->mem_off_first[0]; + reg_off = info->reg_off_first[0]; + reg_last = info->reg_off_last[0]; + + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize, + info->page[0].attrs, wp_access, + retaddr); + } + reg_off += esize; + mem_off += msize; + } while (reg_off <= reg_last && (reg_off & 63)); + } + } - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); - do { - if (pg & 1) { - tlb_fn(env, &scratch[0], i, addr, oi, ra); - tlb_fn(env, &scratch[1], i, addr + size, oi, ra); - } - i += size, pg >>= size; - addr += 2 * size; - } while (i & 15); + mem_off = info->mem_off_split; + if (mem_off >= 0) { + cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize, + info->page[0].attrs, wp_access, retaddr); } - clear_helper_retaddr(); - /* Wait until all exceptions have been raised to write back. */ - memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); - memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); + mem_off = info->mem_off_first[1]; + if ((flags1 & TLB_WATCHPOINT) && mem_off >= 0) { + reg_off = info->reg_off_first[1]; + reg_last = info->reg_off_last[1]; + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize, + info->page[1].attrs, wp_access, + retaddr); + } + reg_off += esize; + mem_off += msize; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } } -static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, int size, uintptr_t ra, - sve_ld1_tlb_fn *tlb_fn) +typedef uint64_t mte_check_fn(CPUARMState *, uint32_t, uint64_t, uintptr_t); + +static inline QEMU_ALWAYS_INLINE void +sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, uint64_t *vg, + target_ulong addr, int esize, int msize, + uint32_t mtedesc, uintptr_t ra, mte_check_fn *check) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch[3] = { 0 }; + intptr_t mem_off, reg_off, reg_last; + + /* Process the page only if MemAttr == Tagged. */ + if (arm_tlb_mte_tagged(&info->page[0].attrs)) { + mem_off = info->mem_off_first[0]; + reg_off = info->reg_off_first[0]; + reg_last = info->reg_off_split; + if (reg_last < 0) { + reg_last = info->reg_off_last[0]; + } - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); do { - if (pg & 1) { - tlb_fn(env, &scratch[0], i, addr, oi, ra); - tlb_fn(env, &scratch[1], i, addr + size, oi, ra); - tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); - } - i += size, pg >>= size; - addr += 3 * size; - } while (i & 15); + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + check(env, mtedesc, addr, ra); + } + reg_off += esize; + mem_off += msize; + } while (reg_off <= reg_last && (reg_off & 63)); + } while (reg_off <= reg_last); } - clear_helper_retaddr(); - /* Wait until all exceptions have been raised to write back. */ - memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); - memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); - memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); + mem_off = info->mem_off_first[1]; + if (mem_off >= 0 && arm_tlb_mte_tagged(&info->page[1].attrs)) { + reg_off = info->reg_off_first[1]; + reg_last = info->reg_off_last[1]; + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + check(env, mtedesc, addr, ra); + } + reg_off += esize; + mem_off += msize; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } } -static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, int size, uintptr_t ra, - sve_ld1_tlb_fn *tlb_fn) +typedef void sve_cont_ldst_mte_check_fn(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, uint32_t mtedesc, + uintptr_t ra); + +static void sve_cont_ldst_mte_check1(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, int esize, + int msize, uint32_t mtedesc, uintptr_t ra) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch[4] = { 0 }; + sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, mtedesc, ra, + mte_check1); +} + +static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, int esize, + int msize, uint32_t mtedesc, uintptr_t ra) +{ + sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, mtedesc, ra, + mte_checkN); +} + +/* + * Common helper for all contiguous 1,2,3,4-register predicated stores. + */ +static inline QEMU_ALWAYS_INLINE void +sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, const int esz, const int msz, + const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn, sve_cont_ldst_mte_check_fn *mte_check_fn) +{ + const unsigned rd = simd_data(desc); + const intptr_t reg_max = simd_oprsz(desc); + intptr_t reg_off, reg_last, mem_off; + SVEContLdSt info; + void *host; + int flags, i; + + /* Find the active elements. */ + if (!sve_cont_ldst_elements(env, &info, addr, vg, reg_max, esz, N << msz)) { + /* The entire predicate was false; no load occurs. */ + for (i = 0; i < N; ++i) { + memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max); + } + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, retaddr); + + /* Handle watchpoints for all active elements. */ + sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz, + BP_MEM_READ, retaddr); + + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mte_check_fn && mtedesc) { + mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, mtedesc, + retaddr); + } + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. Perform the load + * into scratch memory to preserve register state until the end. + */ + ARMVectorReg scratch[4] = {}; + + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[1]; + if (reg_last < 0) { + reg_last = info.reg_off_split; + if (reg_last < 0) { + reg_last = info.reg_off_last[0]; + } + } - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); do { - if (pg & 1) { - tlb_fn(env, &scratch[0], i, addr, oi, ra); - tlb_fn(env, &scratch[1], i, addr + size, oi, ra); - tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra); - tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra); + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + tlb_fn(env, &scratch[i], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + + for (i = 0; i < N; ++i) { + memcpy(&env->vfp.zregs[(rd + i) & 31], &scratch[i], reg_max); + } + return; +#endif + } + + /* The entire operation is in RAM, on valid pages. */ + + for (i = 0; i < N; ++i) { + memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max); + } + + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[0]; + host = info.page[0].host; + + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + (char*)host + mem_off + (i << msz)); + } } - i += size, pg >>= size; - addr += 4 * size; - } while (i & 15); + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off <= reg_last && (reg_off & 63)); } - clear_helper_retaddr(); - /* Wait until all exceptions have been raised to write back. */ - memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz); - memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz); - memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz); - memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz); -} + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + mem_off = info.mem_off_split; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_split; + for (i = 0; i < N; ++i) { + tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } -#define DO_LDN_1(N) \ -void QEMU_FLATTEN HELPER(sve_ld##N##bb_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb); \ -} + mem_off = info.mem_off_first[1]; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_first[1]; + reg_last = info.reg_off_last[1]; + host = info.page[1].host; -#define DO_LDN_2(N, SUFF, SIZE) \ -void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_le_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ - sve_ld1##SUFF##_le_tlb); \ -} \ -void QEMU_FLATTEN HELPER(sve_ld##N##SUFF##_be_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \ - sve_ld1##SUFF##_be_tlb); \ + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + (char*)host + mem_off + (i << msz)); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } } +static inline QEMU_ALWAYS_INLINE void +sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, + const uintptr_t ra, const int esz, const int msz, const int N, + sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); + + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, + N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); +} + +#define DO_LD1_1(NAME, ESZ) \ + void HELPER(sve_##NAME##_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0, \ + sve_##NAME##_host, sve_##NAME##_tlb, NULL); \ + } \ + void HELPER(sve_##NAME##_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \ + sve_##NAME##_host, sve_##NAME##_tlb); \ + } + +#define DO_LD1_2(NAME, ESZ, MSZ) \ + void HELPER(sve_##NAME##_le_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb, NULL); \ + } \ + void HELPER(sve_##NAME##_be_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb, NULL); \ + } \ + void HELPER(sve_##NAME##_le_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ + } \ + void HELPER(sve_##NAME##_be_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ + } + +DO_LD1_1(ld1bb, MO_8) +DO_LD1_1(ld1bhu, MO_16) +DO_LD1_1(ld1bhs, MO_16) +DO_LD1_1(ld1bsu, MO_32) +DO_LD1_1(ld1bss, MO_32) +DO_LD1_1(ld1bdu, MO_64) +DO_LD1_1(ld1bds, MO_64) + +DO_LD1_2(ld1hh, MO_16, MO_16) +DO_LD1_2(ld1hsu, MO_32, MO_16) +DO_LD1_2(ld1hss, MO_32, MO_16) +DO_LD1_2(ld1hdu, MO_64, MO_16) +DO_LD1_2(ld1hds, MO_64, MO_16) + +DO_LD1_2(ld1ss, MO_32, MO_32) +DO_LD1_2(ld1sdu, MO_64, MO_32) +DO_LD1_2(ld1sds, MO_64, MO_32) + +DO_LD1_2(ld1dd, MO_64, MO_64) + +#undef DO_LD1_1 +#undef DO_LD1_2 + +#define DO_LDN_1(N) \ + void HELPER(sve_ld##N##bb_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0, \ + sve_ld1bb_host, sve_ld1bb_tlb, NULL); \ + } \ + void HELPER(sve_ld##N##bb_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \ + sve_ld1bb_host, sve_ld1bb_tlb); \ + } + +#define DO_LDN_2(N, SUFF, ESZ) \ + void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb, NULL); \ + } \ + void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb, NULL); \ + } \ + void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ + } \ + void HELPER(sve_ld##N##SUFF##_be_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ + } + DO_LDN_1(2) DO_LDN_1(3) DO_LDN_1(4) -DO_LDN_2(2, hh, 2) -DO_LDN_2(3, hh, 2) -DO_LDN_2(4, hh, 2) +DO_LDN_2(2, hh, MO_16) +DO_LDN_2(3, hh, MO_16) +DO_LDN_2(4, hh, MO_16) -DO_LDN_2(2, ss, 4) -DO_LDN_2(3, ss, 4) -DO_LDN_2(4, ss, 4) +DO_LDN_2(2, ss, MO_32) +DO_LDN_2(3, ss, MO_32) +DO_LDN_2(4, ss, MO_32) -DO_LDN_2(2, dd, 8) -DO_LDN_2(3, dd, 8) -DO_LDN_2(4, dd, 8) +DO_LDN_2(2, dd, MO_64) +DO_LDN_2(3, dd, MO_64) +DO_LDN_2(4, dd, MO_64) #undef DO_LDN_1 #undef DO_LDN_2 @@ -4484,385 +4828,524 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz) } /* - * Common helper for all contiguous first-fault loads. + * Common helper for all contiguous no-fault and first-fault loads. */ -static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const uintptr_t retaddr, - const int esz, const int msz, - sve_ld1_host_fn *host_fn, - sve_ld1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); +static inline QEMU_ALWAYS_INLINE void +sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, + uint32_t desc, const uintptr_t retaddr, uint32_t mtedesc, + const int esz, const int msz, const SVEContFault fault, + sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) +{ + const unsigned rd = simd_data(desc); void *vd = &env->vfp.zregs[rd]; - const int diffsz = esz - msz; const intptr_t reg_max = simd_oprsz(desc); - const intptr_t mem_max = reg_max >> diffsz; - intptr_t split, reg_off, mem_off; + intptr_t reg_off, mem_off, reg_last; + SVEContLdSt info; + int flags; void *host; - /* Skip to the first active element. */ - reg_off = find_next_active(vg, 0, reg_max, esz); - if (unlikely(reg_off == reg_max)) { + /* Find the active elements. */ + if (!sve_cont_ldst_elements(env, &info, addr, vg, reg_max, esz, 1 << msz)) { /* The entire predicate was false; no load occurs. */ memset(vd, 0, reg_max); return; } - mem_off = reg_off >> diffsz; - set_helper_retaddr(retaddr); + reg_off = info.reg_off_first[0]; + + /* Probe the page(s). */ + if (!sve_cont_ldst_pages(&info, fault, env, addr, MMU_DATA_LOAD, retaddr)) { + /* Fault on first element. */ + tcg_debug_assert(fault == FAULT_NO); + memset(vd, 0, reg_max); + goto do_fault; + } + + mem_off = info.mem_off_first[0]; + flags = info.page[0].flags; /* - * If the (remaining) load is entirely within a single page, then: - * For softmmu, and the tlb hits, then no faults will occur; - * For user-only, either the first load will fault or none will. - * We can thus perform the load directly to the destination and - * Vd will be unmodified on any exception path. + * Disable MTE checking if the Tagged bit is not set. Since TBI must + * be set within MTEDESC for MTE, !mtedesc => !mte_active. */ - split = max_for_page(env->uc, addr, mem_off, mem_max); - if (likely(split == mem_max)) { - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - if (test_host_page(host)) { - mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, mem_max); - tcg_debug_assert(mem_off == mem_max); - clear_helper_retaddr(); - /* After any fault, zero any leading inactive elements. */ + if (arm_tlb_mte_tagged(&info.page[0].attrs)) { + mtedesc = 0; + } + + if (fault == FAULT_FIRST) { + /* Trapping mte check for the first-fault element. */ + if (mtedesc) { + mte_check1(env, mtedesc, addr + mem_off, retaddr); + } + + /* + * Special handling of the first active element, + * if it crosses a page boundary or is MMIO. + */ + bool is_split = mem_off == info.mem_off_split; + if (unlikely(flags != 0) || unlikely(is_split)) { + /* + * Use the slow path for cross-page handling. + * Might trap for MMIO or watchpoints. + */ + tlb_fn(env, vd, reg_off, addr + mem_off, retaddr); + + /* After any fault, zero the other elements. */ swap_memzero(vd, reg_off); - return; + reg_off += 1 << esz; + mem_off += 1 << msz; + swap_memzero((char*)vd + reg_off, reg_max - reg_off); + + if (is_split) { + goto second_page; + } + } else { + memset(vd, 0, reg_max); + } + } else { + memset(vd, 0, reg_max); + if (unlikely(mem_off == info.mem_off_split)) { + /* The first active element crosses a page boundary. */ + flags |= info.page[1].flags; + if (unlikely(flags & TLB_MMIO)) { + /* Some page is MMIO, see below. */ + goto do_fault; + } + if (unlikely(flags & TLB_WATCHPOINT) && + (cpu_watchpoint_address_matches(env_cpu(env), addr + mem_off, + 1 << msz) & + BP_MEM_READ)) { + /* Watchpoint hit, see below. */ + goto do_fault; + } + if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + goto do_fault; + } + /* + * Use the slow path for cross-page handling. + * This is RAM, without a watchpoint, and will not trap. + */ + tlb_fn(env, vd, reg_off, addr + mem_off, retaddr); + goto second_page; } } /* - * Perform one normal read, which will fault or not. - * But it is likely to bring the page into the tlb. + * From this point on, all memory operations are MemSingleNF. + * + * Per the MemSingleNF pseudocode, a no-fault load from Device memory + * must not actually hit the bus -- it returns (UNKNOWN, FAULT) instead. + * + * Unfortuately we do not have access to the memory attributes from the + * PTE to tell Device memory from Normal memory. So we make a mostly + * correct check, and indicate (UNKNOWN, FAULT) for any MMIO. + * This gives the right answer for the common cases of "Normal memory, + * backed by host RAM" and "Device memory, backed by MMIO". + * The architecture allows us to suppress an NF load and return + * (UNKNOWN, FAULT) for any reason, so our behaviour for the corner + * case of "Normal memory, backed by MMIO" is permitted. The case we + * get wrong is "Device memory, backed by host RAM", for which we + * should return (UNKNOWN, FAULT) for but do not. + * + * Similarly, CPU_BP breakpoints would raise exceptions, and so + * return (UNKNOWN, FAULT). For simplicity, we consider gdb and + * architectural breakpoints the same. */ - tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr); - - /* After any fault, zero any leading predicated false elts. */ - swap_memzero(vd, reg_off); - mem_off += 1ULL << msz; - reg_off += 1ULL << esz; - - /* Try again to read the balance of the page. */ - split = max_for_page(env->uc, addr, mem_off - 1, mem_max); - if (split >= (1ULL << msz)) { - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - if (host) { - mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split); - reg_off = mem_off << diffsz; - } + if (unlikely(flags & TLB_MMIO)) { + goto do_fault; } - clear_helper_retaddr(); - record_fault(env, reg_off, reg_max); -} + reg_last = info.reg_off_last[0]; + host = info.page[0].host; -/* - * Common helper for all contiguous no-fault loads. - */ -static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const int esz, const int msz, - sve_ld1_host_fn *host_fn) -{ - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - void *vd = &env->vfp.zregs[rd]; - const int diffsz = esz - msz; - const intptr_t reg_max = simd_oprsz(desc); - const intptr_t mem_max = reg_max >> diffsz; - const int mmu_idx = cpu_mmu_index(env, false); - intptr_t split, reg_off, mem_off; - void *host; + do { + uint64_t pg = *(uint64_t *)((char*)vg + (reg_off >> 3)); + do { + if ((pg >> (reg_off & 63)) & 1) { + if (unlikely(flags & TLB_WATCHPOINT) && + (cpu_watchpoint_address_matches(env_cpu(env), + addr + mem_off, 1 << msz) & + BP_MEM_READ)) { + goto do_fault; + } + if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + goto do_fault; + } + host_fn(vd, reg_off, (char*)host + mem_off); + } + reg_off += 1 << esz; + mem_off += 1 << msz; + } while (reg_off <= reg_last && (reg_off & 63)); + } while (reg_off <= reg_last); - /* There will be no fault, so we may modify in advance. */ - memset(vd, 0, reg_max); + /* + * MemSingleNF is allowed to fail for any reason. We have special + * code above to handle the first element crossing a page boundary. + * As an implementation choice, decline to handle a cross-page element + * in any other position. + */ + reg_off = info.reg_off_split; + if (reg_off >= 0) { + goto do_fault; + } - /* Skip to the first active element. */ - reg_off = find_next_active(vg, 0, reg_max, esz); - if (unlikely(reg_off == reg_max)) { - /* The entire predicate was false; no load occurs. */ +second_page: + reg_off = info.reg_off_first[1]; + if (likely(reg_off < 0)) { + /* No active elements on the second page. All done. */ return; } - mem_off = reg_off >> diffsz; /* - * If the address is not in the TLB, we have no way to bring the - * entry into the TLB without also risking a fault. Note that - * the corollary is that we never load from an address not in RAM. - * - * This last is out of spec, in a weird corner case. - * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory - * must not actually hit the bus -- it returns UNKNOWN data instead. - * But if you map non-RAM with Normal memory attributes and do a NF - * load then it should access the bus. (Nobody ought actually do this - * in the real world, obviously.) - * - * Then there are the annoying special cases with watchpoints... - * TODO: Add a form of non-faulting loads using cc->tlb_fill(probe=true). + * MemSingleNF is allowed to fail for any reason. As an implementation + * choice, decline to handle elements on the second page. This should + * be low frequency as the guest walks through memory -- the next + * iteration of the guest's loop should be aligned on the page boundary, + * and then all following iterations will stay aligned. */ - host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx); - split = max_for_page(env->uc, addr, mem_off, mem_max); - if (host && split >= (1ULL << msz)) { - mem_off = host_fn(vd, vg, (char *)host - mem_off, mem_off, split); - reg_off = mem_off << diffsz; - } +do_fault: record_fault(env, reg_off, reg_max); } -#define DO_LDFF1_LDNF1_1(PART, ESZ) \ -void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \ - sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ -} \ -void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \ -} - -#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ -void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ - sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ -} \ -void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \ -} \ -void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \ - sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ -} \ -void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \ -} - -DO_LDFF1_LDNF1_1(bb, 0) -DO_LDFF1_LDNF1_1(bhu, 1) -DO_LDFF1_LDNF1_1(bhs, 1) -DO_LDFF1_LDNF1_1(bsu, 2) -DO_LDFF1_LDNF1_1(bss, 2) -DO_LDFF1_LDNF1_1(bdu, 3) -DO_LDFF1_LDNF1_1(bds, 3) - -DO_LDFF1_LDNF1_2(hh, 1, 1) -DO_LDFF1_LDNF1_2(hsu, 2, 1) -DO_LDFF1_LDNF1_2(hss, 2, 1) -DO_LDFF1_LDNF1_2(hdu, 3, 1) -DO_LDFF1_LDNF1_2(hds, 3, 1) - -DO_LDFF1_LDNF1_2(ss, 2, 2) -DO_LDFF1_LDNF1_2(sdu, 3, 2) -DO_LDFF1_LDNF1_2(sds, 3, 2) - -DO_LDFF1_LDNF1_2(dd, 3, 3) - -#undef DO_LDFF1_LDNF1_1 -#undef DO_LDFF1_LDNF1_2 - -/* - * Store contiguous data, protected by a governing predicate. - */ +static inline QEMU_ALWAYS_INLINE void +sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, uint32_t desc, + const uintptr_t retaddr, const int esz, const int msz, + const SVEContFault fault, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); -#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \ -static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \ -{ \ - TLB(env, addr, *(TYPEM *)((char *)vd + H(reg_off)), oi, ra); \ -} + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); -DO_ST_TLB(st1bb, H1, uint8_t, stb_p, 0, helper_ret_stb_mmu) -DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu) -DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu) -DO_ST_TLB(st1bd, , uint64_t, stb_p, 0, helper_ret_stb_mmu) + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } -DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu) -DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu) -DO_ST_TLB(st1hd_le, , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu) + sve_ldnfff1_r(env, vg, addr, desc, retaddr, mtedesc, esz, msz, fault, + host_fn, tlb_fn); +} + +#define DO_LDFF1_LDNF1_1(PART, ESZ) \ + void HELPER(sve_ldff1##PART##_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_FIRST, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_NO, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ + } \ + void HELPER(sve_ldff1##PART##_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, \ + FAULT_FIRST, sve_ld1##PART##_host, \ + sve_ld1##PART##_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ + } -DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu) -DO_ST_TLB(st1sd_le, , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu) +#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ + void HELPER(sve_ldff1##PART##_le_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ + } \ + void HELPER(sve_ldff1##PART##_be_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ + } \ + void HELPER(sve_ldff1##PART##_le_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_le_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ + } \ + void HELPER(sve_ldff1##PART##_be_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ + } \ + void HELPER(sve_ldnf1##PART##_be_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ + } -DO_ST_TLB(st1dd_le, , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu) +DO_LDFF1_LDNF1_1(bb, MO_8) +DO_LDFF1_LDNF1_1(bhu, MO_16) +DO_LDFF1_LDNF1_1(bhs, MO_16) +DO_LDFF1_LDNF1_1(bsu, MO_32) +DO_LDFF1_LDNF1_1(bss, MO_32) +DO_LDFF1_LDNF1_1(bdu, MO_64) +DO_LDFF1_LDNF1_1(bds, MO_64) -DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu) -DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu) -DO_ST_TLB(st1hd_be, , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu) +DO_LDFF1_LDNF1_2(hh, MO_16, MO_16) +DO_LDFF1_LDNF1_2(hsu, MO_32, MO_16) +DO_LDFF1_LDNF1_2(hss, MO_32, MO_16) +DO_LDFF1_LDNF1_2(hdu, MO_64, MO_16) +DO_LDFF1_LDNF1_2(hds, MO_64, MO_16) -DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu) -DO_ST_TLB(st1sd_be, , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu) +DO_LDFF1_LDNF1_2(ss, MO_32, MO_32) +DO_LDFF1_LDNF1_2(sdu, MO_64, MO_32) +DO_LDFF1_LDNF1_2(sds, MO_64, MO_32) -DO_ST_TLB(st1dd_be, , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu) +DO_LDFF1_LDNF1_2(dd, MO_64, MO_64) -#undef DO_ST_TLB +#undef DO_LDFF1_LDNF1_1 +#undef DO_LDFF1_LDNF1_2 /* - * Common helpers for all contiguous 1,2,3,4-register predicated stores. + * Common helper for all contiguous 1,2,3,4-register predicated stores. */ -static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) + +static inline QEMU_ALWAYS_INLINE void +sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, + const uintptr_t retaddr, const int esz, const int msz, const int N, + uint32_t mtedesc, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn, sve_cont_ldst_mte_check_fn *mte_check_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *vd = &env->vfp.zregs[rd]; + const unsigned rd = simd_data(desc); + const intptr_t reg_max = simd_oprsz(desc); + intptr_t reg_off, reg_last, mem_off; + SVEContLdSt info; + void *host; + int i, flags; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); - do { - if (pg & 1) { - tlb_fn(env, vd, i, addr, oi, ra); + /* Find the active elements. */ + if (!sve_cont_ldst_elements(env, &info, addr, vg, reg_max, esz, N << msz)) { + /* The entire predicate was false; no store occurs. */ + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, retaddr); + + /* Handle watchpoints for all active elements. */ + sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz, + BP_MEM_WRITE, retaddr); + + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mte_check_fn && mtedesc) { + mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, mtedesc, + retaddr); + } + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. We cannot avoid + * this fault and will leave with the store incomplete. + */ + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[1]; + if (reg_last < 0) { + reg_last = info.reg_off_split; + if (reg_last < 0) { + reg_last = info.reg_off_last[0]; } - i += esize, pg >>= esize; - addr += msize; - } while (i & 15); + } + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + return; +#endif } - clear_helper_retaddr(); -} -static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *d1 = &env->vfp.zregs[rd]; - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; + mem_off = info.mem_off_first[0]; + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[0]; + host = info.page[0].host; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; do { - if (pg & 1) { - tlb_fn(env, d1, i, addr, oi, ra); - tlb_fn(env, d2, i, addr + msize, oi, ra); + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + (char*)host + mem_off + (i << msz)); + } } - i += esize, pg >>= esize; - addr += 2 * msize; - } while (i & 15); + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off <= reg_last && (reg_off & 63)); } - clear_helper_retaddr(); -} -static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) -{ - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *d1 = &env->vfp.zregs[rd]; - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + mem_off = info.mem_off_split; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_split; + for (i = 0; i < N; ++i) { + tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off, + addr + mem_off + (i << msz), retaddr); + } + } + + mem_off = info.mem_off_first[1]; + if (unlikely(mem_off >= 0)) { + reg_off = info.reg_off_first[1]; + reg_last = info.reg_off_last[1]; + host = info.page[1].host; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); do { - if (pg & 1) { - tlb_fn(env, d1, i, addr, oi, ra); - tlb_fn(env, d2, i, addr + msize, oi, ra); - tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); - } - i += esize, pg >>= esize; - addr += 3 * msize; - } while (i & 15); + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + for (i = 0; i < N; ++i) { + host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off, + (char*)host + mem_off + (i << msz)); + } + } + reg_off += 1 << esz; + mem_off += N << msz; + } while (reg_off & 63); + } while (reg_off <= reg_last); } - clear_helper_retaddr(); } -static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr, - uint32_t desc, const uintptr_t ra, - const int esize, const int msize, - sve_st1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE void +sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, + const uintptr_t ra, const int esz, const int msz, const int N, + sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5); - intptr_t i, oprsz = simd_oprsz(desc); - void *d1 = &env->vfp.zregs[rd]; - void *d2 = &env->vfp.zregs[(rd + 1) & 31]; - void *d3 = &env->vfp.zregs[(rd + 2) & 31]; - void *d4 = &env->vfp.zregs[(rd + 3) & 31]; + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); - do { - if (pg & 1) { - tlb_fn(env, d1, i, addr, oi, ra); - tlb_fn(env, d2, i, addr + msize, oi, ra); - tlb_fn(env, d3, i, addr + 2 * msize, oi, ra); - tlb_fn(env, d4, i, addr + 3 * msize, oi, ra); - } - i += esize, pg >>= esize; - addr += 4 * msize; - } while (i & 15); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, + N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); +} + +#define DO_STN_1(N, NAME, ESZ) \ + void HELPER(sve_st##N##NAME##_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0, \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb, NULL); \ + } \ + void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ + } + +#define DO_STN_2(N, NAME, ESZ, MSZ) \ + void HELPER(sve_st##N##NAME##_le_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb, NULL); \ + } \ + void HELPER(sve_st##N##NAME##_be_r)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb, NULL); \ + } \ + void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ + } \ + void HELPER(sve_st##N##NAME##_be_r_mte)(CPUARMState * env, void *vg, \ + target_ulong addr, uint32_t desc) \ + { \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ } - clear_helper_retaddr(); -} - -#define DO_STN_1(N, NAME, ESIZE) \ -void QEMU_FLATTEN HELPER(sve_st##N##NAME##_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1, \ - sve_st1##NAME##_tlb); \ -} - -#define DO_STN_2(N, NAME, ESIZE, MSIZE) \ -void QEMU_FLATTEN HELPER(sve_st##N##NAME##_le_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ - sve_st1##NAME##_le_tlb); \ -} \ -void QEMU_FLATTEN HELPER(sve_st##N##NAME##_be_r) \ - (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \ -{ \ - sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \ - sve_st1##NAME##_be_tlb); \ -} - -DO_STN_1(1, bb, 1) -DO_STN_1(1, bh, 2) -DO_STN_1(1, bs, 4) -DO_STN_1(1, bd, 8) -DO_STN_1(2, bb, 1) -DO_STN_1(3, bb, 1) -DO_STN_1(4, bb, 1) - -DO_STN_2(1, hh, 2, 2) -DO_STN_2(1, hs, 4, 2) -DO_STN_2(1, hd, 8, 2) -DO_STN_2(2, hh, 2, 2) -DO_STN_2(3, hh, 2, 2) -DO_STN_2(4, hh, 2, 2) - -DO_STN_2(1, ss, 4, 4) -DO_STN_2(1, sd, 8, 4) -DO_STN_2(2, ss, 4, 4) -DO_STN_2(3, ss, 4, 4) -DO_STN_2(4, ss, 4, 4) - -DO_STN_2(1, dd, 8, 8) -DO_STN_2(2, dd, 8, 8) -DO_STN_2(3, dd, 8, 8) -DO_STN_2(4, dd, 8, 8) + +DO_STN_1(1, bb, MO_8) +DO_STN_1(1, bh, MO_16) +DO_STN_1(1, bs, MO_32) +DO_STN_1(1, bd, MO_64) +DO_STN_1(2, bb, MO_8) +DO_STN_1(3, bb, MO_8) +DO_STN_1(4, bb, MO_8) + +DO_STN_2(1, hh, MO_16, MO_16) +DO_STN_2(1, hs, MO_32, MO_16) +DO_STN_2(1, hd, MO_64, MO_16) +DO_STN_2(2, hh, MO_16, MO_16) +DO_STN_2(3, hh, MO_16, MO_16) +DO_STN_2(4, hh, MO_16, MO_16) + +DO_STN_2(1, ss, MO_32, MO_32) +DO_STN_2(1, sd, MO_64, MO_32) +DO_STN_2(2, ss, MO_32, MO_32) +DO_STN_2(3, ss, MO_32, MO_32) +DO_STN_2(4, ss, MO_32, MO_32) + +DO_STN_2(1, dd, MO_64, MO_64) +DO_STN_2(2, dd, MO_64, MO_64) +DO_STN_2(3, dd, MO_64, MO_64) +DO_STN_2(4, dd, MO_64, MO_64) #undef DO_STN_1 #undef DO_STN_2 @@ -4878,497 +5361,578 @@ typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs); static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs) { - return *(uint32_t *)((char *)reg + H1_4(reg_ofs)); + return *(uint32_t *)((char*)reg + H1_4(reg_ofs)); } static target_ulong off_zss_s(void *reg, intptr_t reg_ofs) { - return *(int32_t *)((char *)reg + H1_4(reg_ofs)); + return *(int32_t *)((char*)reg + H1_4(reg_ofs)); } static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs) { - return (uint32_t)*(uint64_t *)((char *)reg + reg_ofs); + return (uint32_t)*(uint64_t *)((char*)reg + reg_ofs); } static target_ulong off_zss_d(void *reg, intptr_t reg_ofs) { - return (int32_t)*(uint64_t *)((char *)reg + reg_ofs); + return (int32_t)*(uint64_t *)((char*)reg + reg_ofs); } static target_ulong off_zd_d(void *reg, intptr_t reg_ofs) { - return *(uint64_t *)((char *)reg + reg_ofs); + return *(uint64_t *)((char*)reg + reg_ofs); } -static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE void +sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base, + uint32_t desc, uintptr_t retaddr, uint32_t mtedesc, int esize, + int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc); - ARMVectorReg scratch = { 0 }; + uc_engine *uc = env->uc; + const int mmu_idx = cpu_mmu_index(env, false); + const intptr_t reg_max = simd_oprsz(desc); + const int scale = simd_data(desc); + ARMVectorReg scratch; + intptr_t reg_off; + SVEHostPage info, info2; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + memset(&scratch, 0, reg_max); + reg_off = 0; + do { + uint64_t pg = vg[reg_off >> 6]; do { if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i); - tlb_fn(env, &scratch, i, base + (off << scale), oi, ra); + target_ulong addr = base + (off_fn(vm, reg_off) << scale); + target_ulong in_page = -(addr | TARGET_PAGE_MASK); + + sve_probe_page(&info, false, env, addr, 0, MMU_DATA_LOAD, + mmu_idx, retaddr); + + if (likely(in_page >= msize)) { + if (unlikely(info.flags & TLB_WATCHPOINT)) { + cpu_check_watchpoint(env_cpu(env), addr, msize, + info.attrs, BP_MEM_READ, retaddr); + } + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } + host_fn(&scratch, reg_off, info.host); + } else { + /* Element crosses the page boundary. */ + sve_probe_page(&info2, false, env, addr + in_page, 0, + MMU_DATA_LOAD, mmu_idx, retaddr); + if (unlikely((info.flags | info2.flags) & TLB_WATCHPOINT)) { + cpu_check_watchpoint(env_cpu(env), addr, msize, + info.attrs, BP_MEM_READ, retaddr); + } + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } + tlb_fn(env, &scratch, reg_off, addr, retaddr); + } } - i += 4, pg >>= 4; - } while (i & 15); - } - clear_helper_retaddr(); + reg_off += esize; + pg >>= esize; + } while (reg_off & 63); + } while (reg_off < reg_max); /* Wait until all exceptions have been raised to write back. */ - memcpy(vd, &scratch, oprsz); + memcpy(vd, &scratch, reg_max); } -static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE void +sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, int esize, + int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc) / 8; - ARMVectorReg scratch = { 0 }; + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - set_helper_retaddr(ra); - for (i = 0; i < oprsz; i++) { - uint8_t pg = *(uint8_t *)((char *)vg + H1(i)); - if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i * 8); - tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra); - } + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_ld1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, esize, msize, + off_fn, host_fn, tlb_fn); +} + +#define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \ + void HELPER(sve_ld##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << (MSZ), \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ + } \ + void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << (MSZ), \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } - clear_helper_retaddr(); - /* Wait until all exceptions have been raised to write back. */ - memcpy(vd, &scratch, oprsz * 8); -} - -#define DO_LD1_ZPZ_S(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_s, sve_ld1##MEM##_tlb); \ -} - -#define DO_LD1_ZPZ_D(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_ld##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_d, sve_ld1##MEM##_tlb); \ -} - -DO_LD1_ZPZ_S(bsu, zsu) -DO_LD1_ZPZ_S(bsu, zss) -DO_LD1_ZPZ_D(bdu, zsu) -DO_LD1_ZPZ_D(bdu, zss) -DO_LD1_ZPZ_D(bdu, zd) - -DO_LD1_ZPZ_S(bss, zsu) -DO_LD1_ZPZ_S(bss, zss) -DO_LD1_ZPZ_D(bds, zsu) -DO_LD1_ZPZ_D(bds, zss) -DO_LD1_ZPZ_D(bds, zd) - -DO_LD1_ZPZ_S(hsu_le, zsu) -DO_LD1_ZPZ_S(hsu_le, zss) -DO_LD1_ZPZ_D(hdu_le, zsu) -DO_LD1_ZPZ_D(hdu_le, zss) -DO_LD1_ZPZ_D(hdu_le, zd) - -DO_LD1_ZPZ_S(hsu_be, zsu) -DO_LD1_ZPZ_S(hsu_be, zss) -DO_LD1_ZPZ_D(hdu_be, zsu) -DO_LD1_ZPZ_D(hdu_be, zss) -DO_LD1_ZPZ_D(hdu_be, zd) - -DO_LD1_ZPZ_S(hss_le, zsu) -DO_LD1_ZPZ_S(hss_le, zss) -DO_LD1_ZPZ_D(hds_le, zsu) -DO_LD1_ZPZ_D(hds_le, zss) -DO_LD1_ZPZ_D(hds_le, zd) - -DO_LD1_ZPZ_S(hss_be, zsu) -DO_LD1_ZPZ_S(hss_be, zss) -DO_LD1_ZPZ_D(hds_be, zsu) -DO_LD1_ZPZ_D(hds_be, zss) -DO_LD1_ZPZ_D(hds_be, zd) - -DO_LD1_ZPZ_S(ss_le, zsu) -DO_LD1_ZPZ_S(ss_le, zss) -DO_LD1_ZPZ_D(sdu_le, zsu) -DO_LD1_ZPZ_D(sdu_le, zss) -DO_LD1_ZPZ_D(sdu_le, zd) - -DO_LD1_ZPZ_S(ss_be, zsu) -DO_LD1_ZPZ_S(ss_be, zss) -DO_LD1_ZPZ_D(sdu_be, zsu) -DO_LD1_ZPZ_D(sdu_be, zss) -DO_LD1_ZPZ_D(sdu_be, zd) - -DO_LD1_ZPZ_D(sds_le, zsu) -DO_LD1_ZPZ_D(sds_le, zss) -DO_LD1_ZPZ_D(sds_le, zd) - -DO_LD1_ZPZ_D(sds_be, zsu) -DO_LD1_ZPZ_D(sds_be, zss) -DO_LD1_ZPZ_D(sds_be, zd) - -DO_LD1_ZPZ_D(dd_le, zsu) -DO_LD1_ZPZ_D(dd_le, zss) -DO_LD1_ZPZ_D(dd_le, zd) - -DO_LD1_ZPZ_D(dd_be, zsu) -DO_LD1_ZPZ_D(dd_be, zss) -DO_LD1_ZPZ_D(dd_be, zd) +#define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \ + void HELPER(sve_ld##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << (MSZ), \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ + } \ + void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << (MSZ), \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ + } + +DO_LD1_ZPZ_S(bsu, zsu, MO_8) +DO_LD1_ZPZ_S(bsu, zss, MO_8) +DO_LD1_ZPZ_D(bdu, zsu, MO_8) +DO_LD1_ZPZ_D(bdu, zss, MO_8) +DO_LD1_ZPZ_D(bdu, zd, MO_8) + +DO_LD1_ZPZ_S(bss, zsu, MO_8) +DO_LD1_ZPZ_S(bss, zss, MO_8) +DO_LD1_ZPZ_D(bds, zsu, MO_8) +DO_LD1_ZPZ_D(bds, zss, MO_8) +DO_LD1_ZPZ_D(bds, zd, MO_8) + +DO_LD1_ZPZ_S(hsu_le, zsu, MO_16) +DO_LD1_ZPZ_S(hsu_le, zss, MO_16) +DO_LD1_ZPZ_D(hdu_le, zsu, MO_16) +DO_LD1_ZPZ_D(hdu_le, zss, MO_16) +DO_LD1_ZPZ_D(hdu_le, zd, MO_16) + +DO_LD1_ZPZ_S(hsu_be, zsu, MO_16) +DO_LD1_ZPZ_S(hsu_be, zss, MO_16) +DO_LD1_ZPZ_D(hdu_be, zsu, MO_16) +DO_LD1_ZPZ_D(hdu_be, zss, MO_16) +DO_LD1_ZPZ_D(hdu_be, zd, MO_16) + +DO_LD1_ZPZ_S(hss_le, zsu, MO_16) +DO_LD1_ZPZ_S(hss_le, zss, MO_16) +DO_LD1_ZPZ_D(hds_le, zsu, MO_16) +DO_LD1_ZPZ_D(hds_le, zss, MO_16) +DO_LD1_ZPZ_D(hds_le, zd, MO_16) + +DO_LD1_ZPZ_S(hss_be, zsu, MO_16) +DO_LD1_ZPZ_S(hss_be, zss, MO_16) +DO_LD1_ZPZ_D(hds_be, zsu, MO_16) +DO_LD1_ZPZ_D(hds_be, zss, MO_16) +DO_LD1_ZPZ_D(hds_be, zd, MO_16) + +DO_LD1_ZPZ_S(ss_le, zsu, MO_32) +DO_LD1_ZPZ_S(ss_le, zss, MO_32) +DO_LD1_ZPZ_D(sdu_le, zsu, MO_32) +DO_LD1_ZPZ_D(sdu_le, zss, MO_32) +DO_LD1_ZPZ_D(sdu_le, zd, MO_32) + +DO_LD1_ZPZ_S(ss_be, zsu, MO_32) +DO_LD1_ZPZ_S(ss_be, zss, MO_32) +DO_LD1_ZPZ_D(sdu_be, zsu, MO_32) +DO_LD1_ZPZ_D(sdu_be, zss, MO_32) +DO_LD1_ZPZ_D(sdu_be, zd, MO_32) + +DO_LD1_ZPZ_D(sds_le, zsu, MO_32) +DO_LD1_ZPZ_D(sds_le, zss, MO_32) +DO_LD1_ZPZ_D(sds_le, zd, MO_32) + +DO_LD1_ZPZ_D(sds_be, zsu, MO_32) +DO_LD1_ZPZ_D(sds_be, zss, MO_32) +DO_LD1_ZPZ_D(sds_be, zd, MO_32) + +DO_LD1_ZPZ_D(dd_le, zsu, MO_64) +DO_LD1_ZPZ_D(dd_le, zss, MO_64) +DO_LD1_ZPZ_D(dd_le, zd, MO_64) + +DO_LD1_ZPZ_D(dd_be, zsu, MO_64) +DO_LD1_ZPZ_D(dd_be, zss, MO_64) +DO_LD1_ZPZ_D(dd_be, zd, MO_64) #undef DO_LD1_ZPZ_S #undef DO_LD1_ZPZ_D /* First fault loads with a vector index. */ -/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting. - * The controlling predicate is known to be true. Return true if the - * load was successful. - */ -typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off, - target_ulong vaddr, int mmu_idx); - -#ifdef _MSC_VER -#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ -static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, int mmu_idx) \ -{ \ - struct uc_struct *uc = env->uc; \ - target_ulong next_page = 0ULL - (addr | TARGET_PAGE_MASK); \ - if (likely(next_page - addr >= sizeof(TYPEM))) { \ - void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ - if (likely(host)) { \ - TYPEM val = HOST(host); \ - *(TYPEE *)((char *)vd + H(reg_off)) = val; \ - return true; \ - } \ - } \ - return false; \ -} -#else -#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \ -static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, int mmu_idx) \ -{ \ - struct uc_struct *uc = env->uc; \ - target_ulong next_page = -(addr | TARGET_PAGE_MASK); \ - if (likely(next_page - addr >= sizeof(TYPEM))) { \ - void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \ - if (likely(host)) { \ - TYPEM val = HOST(host); \ - *(TYPEE *)((char *)vd + H(reg_off)) = val; \ - return true; \ - } \ - } \ - return false; \ -} -#endif - -DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p) -DO_LD_NF(bss, H1_4, uint32_t, int8_t, ldsb_p) -DO_LD_NF(bdu, , uint64_t, uint8_t, ldub_p) -DO_LD_NF(bds, , uint64_t, int8_t, ldsb_p) - -DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p) -DO_LD_NF(hss_le, H1_4, uint32_t, int16_t, ldsw_le_p) -DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p) -DO_LD_NF(hss_be, H1_4, uint32_t, int16_t, ldsw_be_p) -DO_LD_NF(hdu_le, , uint64_t, uint16_t, lduw_le_p) -DO_LD_NF(hds_le, , uint64_t, int16_t, ldsw_le_p) -DO_LD_NF(hdu_be, , uint64_t, uint16_t, lduw_be_p) -DO_LD_NF(hds_be, , uint64_t, int16_t, ldsw_be_p) - -DO_LD_NF(ss_le, H1_4, uint32_t, uint32_t, ldl_le_p) -DO_LD_NF(ss_be, H1_4, uint32_t, uint32_t, ldl_be_p) -DO_LD_NF(sdu_le, , uint64_t, uint32_t, ldl_le_p) -DO_LD_NF(sds_le, , uint64_t, int32_t, ldl_le_p) -DO_LD_NF(sdu_be, , uint64_t, uint32_t, ldl_be_p) -DO_LD_NF(sds_be, , uint64_t, int32_t, ldl_be_p) - -DO_LD_NF(dd_le, , uint64_t, uint64_t, ldq_le_p) -DO_LD_NF(dd_be, , uint64_t, uint64_t, ldq_be_p) - /* - * Common helper for all gather first-faulting loads. + * Common helpers for all gather first-faulting loads. */ -static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, - sve_ld1_nf_fn *nonfault_fn) + +static inline QEMU_ALWAYS_INLINE void +sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + uint32_t mtedesc, const int esz, const int msz, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t reg_off, reg_max = simd_oprsz(desc); - target_ulong addr; + uc_engine *uc = env->uc; + const int mmu_idx = cpu_mmu_index(env, false); + const intptr_t reg_max = simd_oprsz(desc); + const int scale = simd_data(desc); + const int esize = 1 << esz; + const int msize = 1 << msz; + intptr_t reg_off; + SVEHostPage info; + target_ulong addr, in_page; /* Skip to the first true predicate. */ - reg_off = find_next_active(vg, 0, reg_max, MO_32); - if (likely(reg_off < reg_max)) { - /* Perform one normal read, which will fault or not. */ - set_helper_retaddr(ra); - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - tlb_fn(env, vd, reg_off, addr, oi, ra); + reg_off = find_next_active(vg, 0, reg_max, esz); + if (unlikely(reg_off >= reg_max)) { + /* The entire predicate was false; no load occurs. */ + memset(vd, 0, reg_max); + return; + } - /* The rest of the reads will be non-faulting. */ - clear_helper_retaddr(); + /* + * Probe the first element, allowing faults. + */ + addr = base + (off_fn(vm, reg_off) << scale); + if (mtedesc) { + mte_check1(env, mtedesc, addr, retaddr); } + tlb_fn(env, vd, reg_off, addr, retaddr); - /* After any fault, zero the leading predicated false elements. */ + /* After any fault, zero the other elements. */ swap_memzero(vd, reg_off); + reg_off += esize; + swap_memzero((char*)vd + reg_off, reg_max - reg_off); - while (likely((reg_off += 4) < reg_max)) { - uint64_t pg = *(uint64_t *)((char *)vg + (reg_off >> 6) * 8); - if (likely((pg >> (reg_off & 63)) & 1)) { - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { - record_fault(env, reg_off, reg_max); - break; + /* + * Probe the remaining elements, not allowing faults. + */ + while (reg_off < reg_max) { + uint64_t pg = vg[reg_off >> 6]; + do { + if (likely((pg >> (reg_off & 63)) & 1)) { + addr = base + (off_fn(vm, reg_off) << scale); + in_page = -(addr | TARGET_PAGE_MASK); + + if (unlikely(in_page < msize)) { + /* Stop if the element crosses a page boundary. */ + goto fault; + } + + sve_probe_page(&info, true, env, addr, 0, MMU_DATA_LOAD, + mmu_idx, retaddr); + if (unlikely(info.flags & (TLB_INVALID_MASK | TLB_MMIO))) { + goto fault; + } + if (unlikely(info.flags & TLB_WATCHPOINT) && + (cpu_watchpoint_address_matches(env_cpu(env), addr, msize) & + BP_MEM_READ)) { + goto fault; + } + if (mtedesc && arm_tlb_mte_tagged(&info.attrs) && + !mte_probe1(env, mtedesc, addr)) { + goto fault; + } + + host_fn(vd, reg_off, info.host); } - } else { - *(uint32_t *)((char *)vd + H1_4(reg_off)) = 0; - } + reg_off += esize; + } while (reg_off & 63); } + return; + +fault: + record_fault(env, reg_off, reg_max); } -static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn, - sve_ld1_nf_fn *nonfault_fn) +static inline QEMU_ALWAYS_INLINE void +sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + const int esz, const int msz, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int mmu_idx = get_mmuidx(oi); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t reg_off, reg_max = simd_oprsz(desc); - target_ulong addr; - - /* Skip to the first true predicate. */ - reg_off = find_next_active(vg, 0, reg_max, MO_64); - if (likely(reg_off < reg_max)) { - /* Perform one normal read, which will fault or not. */ - set_helper_retaddr(ra); - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - tlb_fn(env, vd, reg_off, addr, oi, ra); + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - /* The rest of the reads will be non-faulting. */ - clear_helper_retaddr(); + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_ldff1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, esz, msz, off_fn, + host_fn, tlb_fn); +} + +#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \ + void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_32, MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ + } \ + void HELPER(sve_ldff##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, \ + sve_ld1##MEM##_tlb); \ } - /* After any fault, zero the leading predicated false elements. */ - swap_memzero(vd, reg_off); - - while (likely((reg_off += 8) < reg_max)) { - uint8_t pg = *(uint8_t *)((char *)vg + H1(reg_off >> 3)); - if (likely(pg & 1)) { - addr = off_fn(vm, reg_off); - addr = base + (addr << scale); - if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) { - record_fault(env, reg_off, reg_max); - break; - } - } else { - *(uint64_t *)((char *)vd + reg_off) = 0; - } +#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \ + void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_64, MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ + } \ + void HELPER(sve_ldff##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, \ + sve_ld1##MEM##_tlb); \ } -} -#define DO_LDFF1_ZPZ_S(MEM, OFS) \ -void HELPER(sve_ldff##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ -} - -#define DO_LDFF1_ZPZ_D(MEM, OFS) \ -void HELPER(sve_ldff##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \ -} - -DO_LDFF1_ZPZ_S(bsu, zsu) -DO_LDFF1_ZPZ_S(bsu, zss) -DO_LDFF1_ZPZ_D(bdu, zsu) -DO_LDFF1_ZPZ_D(bdu, zss) -DO_LDFF1_ZPZ_D(bdu, zd) - -DO_LDFF1_ZPZ_S(bss, zsu) -DO_LDFF1_ZPZ_S(bss, zss) -DO_LDFF1_ZPZ_D(bds, zsu) -DO_LDFF1_ZPZ_D(bds, zss) -DO_LDFF1_ZPZ_D(bds, zd) - -DO_LDFF1_ZPZ_S(hsu_le, zsu) -DO_LDFF1_ZPZ_S(hsu_le, zss) -DO_LDFF1_ZPZ_D(hdu_le, zsu) -DO_LDFF1_ZPZ_D(hdu_le, zss) -DO_LDFF1_ZPZ_D(hdu_le, zd) - -DO_LDFF1_ZPZ_S(hsu_be, zsu) -DO_LDFF1_ZPZ_S(hsu_be, zss) -DO_LDFF1_ZPZ_D(hdu_be, zsu) -DO_LDFF1_ZPZ_D(hdu_be, zss) -DO_LDFF1_ZPZ_D(hdu_be, zd) - -DO_LDFF1_ZPZ_S(hss_le, zsu) -DO_LDFF1_ZPZ_S(hss_le, zss) -DO_LDFF1_ZPZ_D(hds_le, zsu) -DO_LDFF1_ZPZ_D(hds_le, zss) -DO_LDFF1_ZPZ_D(hds_le, zd) - -DO_LDFF1_ZPZ_S(hss_be, zsu) -DO_LDFF1_ZPZ_S(hss_be, zss) -DO_LDFF1_ZPZ_D(hds_be, zsu) -DO_LDFF1_ZPZ_D(hds_be, zss) -DO_LDFF1_ZPZ_D(hds_be, zd) - -DO_LDFF1_ZPZ_S(ss_le, zsu) -DO_LDFF1_ZPZ_S(ss_le, zss) -DO_LDFF1_ZPZ_D(sdu_le, zsu) -DO_LDFF1_ZPZ_D(sdu_le, zss) -DO_LDFF1_ZPZ_D(sdu_le, zd) - -DO_LDFF1_ZPZ_S(ss_be, zsu) -DO_LDFF1_ZPZ_S(ss_be, zss) -DO_LDFF1_ZPZ_D(sdu_be, zsu) -DO_LDFF1_ZPZ_D(sdu_be, zss) -DO_LDFF1_ZPZ_D(sdu_be, zd) - -DO_LDFF1_ZPZ_D(sds_le, zsu) -DO_LDFF1_ZPZ_D(sds_le, zss) -DO_LDFF1_ZPZ_D(sds_le, zd) - -DO_LDFF1_ZPZ_D(sds_be, zsu) -DO_LDFF1_ZPZ_D(sds_be, zss) -DO_LDFF1_ZPZ_D(sds_be, zd) - -DO_LDFF1_ZPZ_D(dd_le, zsu) -DO_LDFF1_ZPZ_D(dd_le, zss) -DO_LDFF1_ZPZ_D(dd_le, zd) - -DO_LDFF1_ZPZ_D(dd_be, zsu) -DO_LDFF1_ZPZ_D(dd_be, zss) -DO_LDFF1_ZPZ_D(dd_be, zd) +DO_LDFF1_ZPZ_S(bsu, zsu, MO_8) +DO_LDFF1_ZPZ_S(bsu, zss, MO_8) +DO_LDFF1_ZPZ_D(bdu, zsu, MO_8) +DO_LDFF1_ZPZ_D(bdu, zss, MO_8) +DO_LDFF1_ZPZ_D(bdu, zd, MO_8) + +DO_LDFF1_ZPZ_S(bss, zsu, MO_8) +DO_LDFF1_ZPZ_S(bss, zss, MO_8) +DO_LDFF1_ZPZ_D(bds, zsu, MO_8) +DO_LDFF1_ZPZ_D(bds, zss, MO_8) +DO_LDFF1_ZPZ_D(bds, zd, MO_8) + +DO_LDFF1_ZPZ_S(hsu_le, zsu, MO_16) +DO_LDFF1_ZPZ_S(hsu_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_le, zsu, MO_16) +DO_LDFF1_ZPZ_D(hdu_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_le, zd, MO_16) + +DO_LDFF1_ZPZ_S(hsu_be, zsu, MO_16) +DO_LDFF1_ZPZ_S(hsu_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_be, zsu, MO_16) +DO_LDFF1_ZPZ_D(hdu_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hdu_be, zd, MO_16) + +DO_LDFF1_ZPZ_S(hss_le, zsu, MO_16) +DO_LDFF1_ZPZ_S(hss_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_le, zsu, MO_16) +DO_LDFF1_ZPZ_D(hds_le, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_le, zd, MO_16) + +DO_LDFF1_ZPZ_S(hss_be, zsu, MO_16) +DO_LDFF1_ZPZ_S(hss_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_be, zsu, MO_16) +DO_LDFF1_ZPZ_D(hds_be, zss, MO_16) +DO_LDFF1_ZPZ_D(hds_be, zd, MO_16) + +DO_LDFF1_ZPZ_S(ss_le, zsu, MO_32) +DO_LDFF1_ZPZ_S(ss_le, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_le, zsu, MO_32) +DO_LDFF1_ZPZ_D(sdu_le, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_le, zd, MO_32) + +DO_LDFF1_ZPZ_S(ss_be, zsu, MO_32) +DO_LDFF1_ZPZ_S(ss_be, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_be, zsu, MO_32) +DO_LDFF1_ZPZ_D(sdu_be, zss, MO_32) +DO_LDFF1_ZPZ_D(sdu_be, zd, MO_32) + +DO_LDFF1_ZPZ_D(sds_le, zsu, MO_32) +DO_LDFF1_ZPZ_D(sds_le, zss, MO_32) +DO_LDFF1_ZPZ_D(sds_le, zd, MO_32) + +DO_LDFF1_ZPZ_D(sds_be, zsu, MO_32) +DO_LDFF1_ZPZ_D(sds_be, zss, MO_32) +DO_LDFF1_ZPZ_D(sds_be, zd, MO_32) + +DO_LDFF1_ZPZ_D(dd_le, zsu, MO_64) +DO_LDFF1_ZPZ_D(dd_le, zss, MO_64) +DO_LDFF1_ZPZ_D(dd_le, zd, MO_64) + +DO_LDFF1_ZPZ_D(dd_be, zsu, MO_64) +DO_LDFF1_ZPZ_D(dd_be, zss, MO_64) +DO_LDFF1_ZPZ_D(dd_be, zd, MO_64) /* Stores with a vector index. */ -static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE void +sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base, + uint32_t desc, uintptr_t retaddr, uint32_t mtedesc, int esize, + int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc); + uc_engine *uc = env->uc; + const int mmu_idx = cpu_mmu_index(env, false); + const intptr_t reg_max = simd_oprsz(desc); + const int scale = simd_data(desc); + void *host[ARM_MAX_VQ * 4]; + intptr_t reg_off, i; + SVEHostPage info, info2; - set_helper_retaddr(ra); - for (i = 0; i < oprsz; ) { - uint16_t pg = *(uint16_t *)((char *)vg + H1_2(i >> 3)); + /* + * Probe all of the elements for host addresses and flags. + */ + i = reg_off = 0; + do { + uint64_t pg = vg[reg_off >> 6]; do { - if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i); - tlb_fn(env, vd, i, base + (off << scale), oi, ra); + target_ulong addr = base + (off_fn(vm, reg_off) << scale); + target_ulong in_page = -(addr | TARGET_PAGE_MASK); + + host[i] = NULL; + if (likely((pg >> (reg_off & 63)) & 1)) { + if (likely(in_page >= msize)) { + sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE, + mmu_idx, retaddr); + host[i] = info.host; + } else { + /* + * Element crosses the page boundary. + * Probe both pages, but do not record the host address, + * so that we use the slow path. + */ + sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE, + mmu_idx, retaddr); + sve_probe_page(&info2, false, env, addr + in_page, 0, + MMU_DATA_STORE, mmu_idx, retaddr); + info.flags |= info2.flags; + } + + if (unlikely(info.flags & TLB_WATCHPOINT)) { + cpu_check_watchpoint(env_cpu(env), addr, msize, info.attrs, + BP_MEM_WRITE, retaddr); + } + + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } } - i += 4, pg >>= 4; - } while (i & 15); - } - clear_helper_retaddr(); + i += 1; + reg_off += esize; + } while (reg_off & 63); + } while (reg_off < reg_max); + + /* + * Now that we have recognized all exceptions except SyncExternal + * (from TLB_MMIO), which we cannot avoid, perform all of the stores. + * + * Note for the common case of an element in RAM, not crossing a page + * boundary, we have stored the host address in host[]. This doubles + * as a first-level check against the predicate, since only enabled + * elements have non-null host addresses. + */ + i = reg_off = 0; + do { + void *h = host[i]; + if (likely(h != NULL)) { + host_fn(vd, reg_off, h); + } else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) { + target_ulong addr = base + (off_fn(vm, reg_off) << scale); + tlb_fn(env, vd, reg_off, addr, retaddr); + } + i += 1; + reg_off += esize; + } while (reg_off < reg_max); } -static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, - target_ulong base, uint32_t desc, uintptr_t ra, - zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +static inline QEMU_ALWAYS_INLINE void +sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, int esize, + int msize, zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) { - const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT); - const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); - intptr_t i, oprsz = simd_oprsz(desc) / 8; + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); - set_helper_retaddr(ra); - for (i = 0; i < oprsz; i++) { - uint8_t pg = *(uint8_t *)((char *)vg + H1(i)); - if (likely(pg & 1)) { - target_ulong off = off_fn(vm, i * 8); - tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra); - } + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_st1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, esize, msize, + off_fn, host_fn, tlb_fn); +} + +#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \ + void HELPER(sve_st##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \ + off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ + } \ + void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ + off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } - clear_helper_retaddr(); -} - -#define DO_ST1_ZPZ_S(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_s, sve_st1##MEM##_tlb); \ -} - -#define DO_ST1_ZPZ_D(MEM, OFS) \ -void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ - (CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \ - off_##OFS##_d, sve_st1##MEM##_tlb); \ -} - -DO_ST1_ZPZ_S(bs, zsu) -DO_ST1_ZPZ_S(hs_le, zsu) -DO_ST1_ZPZ_S(hs_be, zsu) -DO_ST1_ZPZ_S(ss_le, zsu) -DO_ST1_ZPZ_S(ss_be, zsu) - -DO_ST1_ZPZ_S(bs, zss) -DO_ST1_ZPZ_S(hs_le, zss) -DO_ST1_ZPZ_S(hs_be, zss) -DO_ST1_ZPZ_S(ss_le, zss) -DO_ST1_ZPZ_S(ss_be, zss) - -DO_ST1_ZPZ_D(bd, zsu) -DO_ST1_ZPZ_D(hd_le, zsu) -DO_ST1_ZPZ_D(hd_be, zsu) -DO_ST1_ZPZ_D(sd_le, zsu) -DO_ST1_ZPZ_D(sd_be, zsu) -DO_ST1_ZPZ_D(dd_le, zsu) -DO_ST1_ZPZ_D(dd_be, zsu) - -DO_ST1_ZPZ_D(bd, zss) -DO_ST1_ZPZ_D(hd_le, zss) -DO_ST1_ZPZ_D(hd_be, zss) -DO_ST1_ZPZ_D(sd_le, zss) -DO_ST1_ZPZ_D(sd_be, zss) -DO_ST1_ZPZ_D(dd_le, zss) -DO_ST1_ZPZ_D(dd_be, zss) - -DO_ST1_ZPZ_D(bd, zd) -DO_ST1_ZPZ_D(hd_le, zd) -DO_ST1_ZPZ_D(hd_be, zd) -DO_ST1_ZPZ_D(sd_le, zd) -DO_ST1_ZPZ_D(sd_be, zd) -DO_ST1_ZPZ_D(dd_le, zd) -DO_ST1_ZPZ_D(dd_be, zd) + +#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \ + void HELPER(sve_st##MEM##_##OFS)(CPUARMState * env, void *vd, void *vg, \ + void *vm, target_ulong base, \ + uint32_t desc) \ + { \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ + } \ + void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState * env, void *vd, \ + void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ + { \ + sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ + } + +DO_ST1_ZPZ_S(bs, zsu, MO_8) +DO_ST1_ZPZ_S(hs_le, zsu, MO_16) +DO_ST1_ZPZ_S(hs_be, zsu, MO_16) +DO_ST1_ZPZ_S(ss_le, zsu, MO_32) +DO_ST1_ZPZ_S(ss_be, zsu, MO_32) + +DO_ST1_ZPZ_S(bs, zss, MO_8) +DO_ST1_ZPZ_S(hs_le, zss, MO_16) +DO_ST1_ZPZ_S(hs_be, zss, MO_16) +DO_ST1_ZPZ_S(ss_le, zss, MO_32) +DO_ST1_ZPZ_S(ss_be, zss, MO_32) + +DO_ST1_ZPZ_D(bd, zsu, MO_8) +DO_ST1_ZPZ_D(hd_le, zsu, MO_16) +DO_ST1_ZPZ_D(hd_be, zsu, MO_16) +DO_ST1_ZPZ_D(sd_le, zsu, MO_32) +DO_ST1_ZPZ_D(sd_be, zsu, MO_32) +DO_ST1_ZPZ_D(dd_le, zsu, MO_64) +DO_ST1_ZPZ_D(dd_be, zsu, MO_64) + +DO_ST1_ZPZ_D(bd, zss, MO_8) +DO_ST1_ZPZ_D(hd_le, zss, MO_16) +DO_ST1_ZPZ_D(hd_be, zss, MO_16) +DO_ST1_ZPZ_D(sd_le, zss, MO_32) +DO_ST1_ZPZ_D(sd_be, zss, MO_32) +DO_ST1_ZPZ_D(dd_le, zss, MO_64) +DO_ST1_ZPZ_D(dd_be, zss, MO_64) + +DO_ST1_ZPZ_D(bd, zd, MO_8) +DO_ST1_ZPZ_D(hd_le, zd, MO_16) +DO_ST1_ZPZ_D(hd_be, zd, MO_16) +DO_ST1_ZPZ_D(sd_le, zd, MO_32) +DO_ST1_ZPZ_D(sd_be, zd, MO_32) +DO_ST1_ZPZ_D(dd_le, zd, MO_64) +DO_ST1_ZPZ_D(dd_be, zd, MO_64) #undef DO_ST1_ZPZ_S #undef DO_ST1_ZPZ_D diff --git a/qemu/target/arm/tlb_helper.c b/qemu/target/arm/tlb_helper.c index e19d6c17a3..c3335f75ac 100644 --- a/qemu/target/arm/tlb_helper.c +++ b/qemu/target/arm/tlb_helper.c @@ -31,7 +31,7 @@ static inline uint32_t merge_syn_data_abort(uint32_t template_syn, * ISV field. */ if (!(template_syn & ARM_EL_ISV) || target_el != 2 || s1ptw) { - syn = syn_data_abort_no_iss(same_el, + syn = syn_data_abort_no_iss(same_el, 0, ea, 0, s1ptw, is_write, fsc); } else { /* @@ -154,6 +154,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, int prot, ret; MemTxAttrs attrs = { 0 }; ARMMMUFaultInfo fi = { 0 }; + ARMCacheAttrs cacheattrs = {}; /* * Walk the page table and (if the mapping exists) add the page @@ -163,7 +164,8 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, */ ret = get_phys_addr(&cpu->env, address, access_type, core_to_arm_mmu_idx(&cpu->env, mmu_idx), - &phys_addr, &attrs, &prot, &page_size, &fi, NULL); + &phys_addr, &attrs, &prot, &page_size, + &fi, &cacheattrs); if (likely(!ret)) { /* * Map a single [sub]page. Regions smaller than our declared diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 922976536e..9d8cc18836 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -38,11 +38,9 @@ #include "kvm-consts.h" static const char *regnames[] = { - "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", - "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", - "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", - "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" -}; + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", + "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"}; enum a64_shift_type { A64_SHIFT_TYPE_LSL = 0, @@ -62,40 +60,23 @@ typedef struct AArch64DecodeTable { AArch64DecodeFn *disas_fn; } AArch64DecodeTable; -/* Function prototype for gen_ functions for calling Neon helpers */ -typedef void NeonGenOneOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32); -typedef void NeonGenTwoOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32); -typedef void NeonGenTwoOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); -typedef void NeonGenTwo64OpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64); -typedef void NeonGenTwo64OpEnvFn(TCGContext *, TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); -typedef void NeonGenNarrowFn(TCGContext *, TCGv_i32, TCGv_i64); -typedef void NeonGenNarrowEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i64); -typedef void NeonGenWidenFn(TCGContext *, TCGv_i64, TCGv_i32); -typedef void NeonGenTwoSingleOPFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); -typedef void NeonGenTwoDoubleOPFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); -typedef void NeonGenOneOpFn(TCGContext *, TCGv_i64, TCGv_i64); -typedef void CryptoTwoOpFn(TCGContext *, TCGv_ptr, TCGv_ptr); -typedef void CryptoThreeOpIntFn(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_i32); -typedef void CryptoThreeOpFn(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv_ptr); -typedef void AtomicThreeOpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); - /* initialize TCG globals. */ void a64_translate_init(struct uc_struct *uc) { int i; TCGContext *tcg_ctx = uc->tcg_ctx; - tcg_ctx->cpu_pc_arm64 = tcg_global_mem_new_i64(tcg_ctx, tcg_ctx->cpu_env, - offsetof(CPUARMState, pc), - "pc"); + tcg_ctx->cpu_pc_arm64 = tcg_global_mem_new_i64( + tcg_ctx, tcg_ctx->cpu_env, offsetof(CPUARMState, pc), "pc"); for (i = 0; i < 32; i++) { - tcg_ctx->cpu_X[i] = tcg_global_mem_new_i64(tcg_ctx, tcg_ctx->cpu_env, - offsetof(CPUARMState, xregs[i]), - regnames[i]); + tcg_ctx->cpu_X[i] = tcg_global_mem_new_i64( + tcg_ctx, tcg_ctx->cpu_env, offsetof(CPUARMState, xregs[i]), + regnames[i]); } - tcg_ctx->cpu_exclusive_high = tcg_global_mem_new_i64(tcg_ctx, tcg_ctx->cpu_env, - offsetof(CPUARMState, exclusive_high), "exclusive_high"); + tcg_ctx->cpu_exclusive_high = tcg_global_mem_new_i64( + tcg_ctx, tcg_ctx->cpu_env, offsetof(CPUARMState, exclusive_high), + "exclusive_high"); } /* @@ -140,7 +121,8 @@ static void reset_btype(DisasContext *s) TCGContext *tcg_ctx = s->uc->tcg_ctx; if (s->btype != 0) { TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0); - tcg_gen_st_i32(tcg_ctx, zero, tcg_ctx->cpu_env, offsetof(CPUARMState, btype)); + tcg_gen_st_i32(tcg_ctx, zero, tcg_ctx->cpu_env, + offsetof(CPUARMState, btype)); tcg_temp_free_i32(tcg_ctx, zero); s->btype = 0; } @@ -155,7 +137,8 @@ static void set_btype(DisasContext *s, int val) tcg_debug_assert(val >= 1 && val <= 3); tcg_val = tcg_const_i32(tcg_ctx, val); - tcg_gen_st_i32(tcg_ctx, tcg_val, tcg_ctx->cpu_env, offsetof(CPUARMState, btype)); + tcg_gen_st_i32(tcg_ctx, tcg_val, tcg_ctx->cpu_env, + offsetof(CPUARMState, btype)); tcg_temp_free_i32(tcg_ctx, tcg_val); s->btype = -1; } @@ -178,8 +161,8 @@ void gen_a64_set_pc_im(TCGContext *tcg_ctx, uint64_t val) * * Here We have concatenated TBI{1,0} into tbi. */ -static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, - TCGv_i64 src, int tbi) +static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, TCGv_i64 src, + int tbi) { TCGContext *tcg_ctx = s->uc->tcg_ctx; if (tbi == 0) { @@ -217,25 +200,118 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) } /* - * Return a "clean" address for ADDR according to TBID. - * This is always a fresh temporary, as we need to be able to - * increment this independently of a dirty write-back address. + * Handle MTE and/or TBI. + * + * For TBI, ideally, we would do nothing. Proper behaviour on fault is + * for the tag to be present in the FAR_ELx register. But for user-only + * mode we do not have a TLB with which to implement this, so we must + * remove the top byte now. + * + * Always return a fresh temporary that we can increment independently + * of the write-back address. */ -static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) +TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 clean = new_tmp_a64(s); - /* - * In order to get the correct value in the FAR_ELx register, - * we must present the memory subsystem with the "dirty" address - * including the TBI. In system mode we can make this work via - * the TLB, dropping the TBI during translation. But for user-only - * mode we don't have that option, and must remove the top byte now. - */ tcg_gen_mov_i64(tcg_ctx, clean, addr); return clean; } +/* Insert a zero tag into src, with the result at dst. */ +static void gen_address_with_allocation_tag0(TCGContext *tcg_ctx, TCGv_i64 dst, + TCGv_i64 src) +{ + tcg_gen_andi_i64(tcg_ctx, dst, src, ~MAKE_64BIT_MASK(56, 4)); +} + +static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, MMUAccessType acc, + int log2_size) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i32 t_acc = tcg_const_i32(tcg_ctx, acc); + TCGv_i32 t_idx = tcg_const_i32(tcg_ctx, get_mem_index(s)); + TCGv_i32 t_size = tcg_const_i32(tcg_ctx, 1 << log2_size); + + glue(gen_helper_probe_access, UNICORN_ARCH_POSTFIX)(tcg_ctx, tcg_ctx->cpu_env, ptr, t_acc, t_idx, + t_size); + tcg_temp_free_i32(tcg_ctx, t_acc); + tcg_temp_free_i32(tcg_ctx, t_idx); + tcg_temp_free_i32(tcg_ctx, t_size); +} + +/* + * For MTE, check a single logical or atomic access. This probes a single + * address, the exact one specified. The size and alignment of the access + * is not relevant to MTE, per se, but watchpoints do require the size, + * and we want to recognize those before making any other changes to state. + */ +static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, + bool is_write, bool tag_checked, + int log2_size, bool is_unpriv, + int core_idx) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (tag_checked && s->mte_active[is_unpriv]) { + TCGv_i32 tcg_desc; + TCGv_i64 ret; + int desc = 0; + + FIELD_DP32(desc, MTEDESC, MIDX, core_idx, desc); + FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc); + FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc); + FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc); + FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size, desc); + tcg_desc = tcg_const_i32(tcg_ctx, desc); + + ret = new_tmp_a64(s); + gen_helper_mte_check1(tcg_ctx, ret, tcg_ctx->cpu_env, tcg_desc, addr); + tcg_temp_free_i32(tcg_ctx, tcg_desc); + + return ret; + } + return clean_data_tbi(s, addr); +} + +TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_size) +{ + return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, + false, get_mem_index(s)); +} + +/* + * For MTE, check multiple logical sequential accesses. + */ +TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_esize, int total_size) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) { + TCGv_i32 tcg_desc; + TCGv_i64 ret; + int desc = 0; + + FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc); + FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc); + FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc); + FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc); + FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize, desc); + FIELD_DP32(desc, MTEDESC, TSIZE, total_size, desc); + tcg_desc = tcg_const_i32(tcg_ctx, desc); + + ret = new_tmp_a64(s); + gen_helper_mte_checkN(tcg_ctx, ret, tcg_ctx->cpu_env, tcg_desc, addr); + tcg_temp_free_i32(tcg_ctx, tcg_desc); + + return ret; + } + return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -248,7 +324,7 @@ static void a64_test_cc(TCGContext *tcg_ctx, DisasCompare64 *c64, int cc) arm_test_cc(tcg_ctx, &c32, cc); /* Sign-extend the 32-bit value so that the GE/LT comparisons work - * properly. The NE/EQ comparisons are also fine with this choice. */ + * properly. The NE/EQ comparisons are also fine with this choice. */ c64->cond = c32.cond; c64->value = tcg_temp_new_i64(tcg_ctx); tcg_gen_ext_i32_i64(tcg_ctx, c64->value, c32.value); @@ -390,6 +466,13 @@ TCGv_i64 new_tmp_a64(DisasContext *s) return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64(tcg_ctx); } +TCGv_i64 new_tmp_a64_local(DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + assert(s->tmp_a64_count < TMP_A64_MAX); + return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_local_new_i64(tcg_ctx); +} + TCGv_i64 new_tmp_a64_zero(DisasContext *s) { TCGContext *tcg_ctx = s->uc->tcg_ctx; @@ -505,7 +588,8 @@ static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 v = tcg_temp_new_i32(tcg_ctx); - tcg_gen_ld16u_i32(tcg_ctx, v, tcg_ctx->cpu_env, fp_reg_offset(s, reg, MO_16)); + tcg_gen_ld16u_i32(tcg_ctx, v, tcg_ctx->cpu_env, + fp_reg_offset(s, reg, MO_16)); return v; } @@ -518,14 +602,10 @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd) unsigned ofs = fp_reg_offset(s, rd, MO_64); unsigned vsz = vec_full_reg_size(s); - if (!is_q) { - TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0); - tcg_gen_st_i64(tcg_ctx, tcg_zero, tcg_ctx->cpu_env, ofs + 8); - tcg_temp_free_i64(tcg_ctx, tcg_zero); - } - if (vsz > 16) { - tcg_gen_gvec_dup8i(tcg_ctx, ofs + 16, vsz - 16, vsz - 16, 0); - } + TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0); + tcg_temp_free_i64(tcg_ctx, tcg_zero); + /* Nop move, with side effect of clearing the tail. */ + tcg_gen_gvec_mov(tcg_ctx, MO_64, ofs, ofs, is_q ? 16 : 8, vsz); } void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) @@ -571,8 +651,8 @@ static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, GVecGen2Fn *gvec_fn, int vece) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - is_q ? 16 : 8, vec_full_reg_size(s)); + gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), is_q ? 16 : 8, vec_full_reg_size(s)); } /* Expand a 2-operand + immediate AdvSIMD vector operation using @@ -582,8 +662,9 @@ static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, int64_t imm, GVecGen2iFn *gvec_fn, int vece) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - imm, is_q ? 16 : 8, vec_full_reg_size(s)); + gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), imm, is_q ? 16 : 8, + vec_full_reg_size(s)); } /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ @@ -591,8 +672,9 @@ static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, GVecGen3Fn *gvec_fn, int vece) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); + gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), + is_q ? 16 : 8, vec_full_reg_size(s)); } /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ @@ -600,56 +682,31 @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, int rx, GVecGen4Fn *gvec_fn, int vece) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), - is_q ? 16 : 8, vec_full_reg_size(s)); -} - -/* Expand a 2-operand + immediate AdvSIMD vector operation using - * an op descriptor. - */ -static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd, - int rn, int64_t imm, const GVecGen2i *gvec_op) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - tcg_gen_gvec_2i(tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op); + gvec_fn(tcg_ctx, vece, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rx), is_q ? 16 : 8, vec_full_reg_size(s)); } -/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */ -static void gen_gvec_op3(DisasContext *s, bool is_q, int rd, - int rn, int rm, const GVecGen3 *gvec_op) +/* Expand a 2-operand operation using an out-of-line helper. */ +static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, int rn, + int data, gen_helper_gvec_2 *fn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - tcg_gen_gvec_3(tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), is_q ? 16 : 8, - vec_full_reg_size(s), gvec_op); + tcg_gen_gvec_2_ool(tcg_ctx, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), is_q ? 16 : 8, + vec_full_reg_size(s), data, fn); } /* Expand a 3-operand operation using an out-of-line helper. */ -static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, - int rn, int rm, int data, gen_helper_gvec_3 *fn) +static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, int rn, int rm, + int data, gen_helper_gvec_3 *fn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s), data, fn); } -/* Expand a 3-operand + env pointer operation using - * an out-of-line helper. - */ -static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd, - int rn, int rm, gen_helper_gvec_3_ptr *fn) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, - is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); -} - /* Expand a 3-operand + fpstatus pointer + simd data value operation using * an out-of-line helper. */ @@ -660,9 +717,8 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, is_fp16); tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), fpst, - is_q ? 16 : 8, vec_full_reg_size(s), data, fn); + vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), + fpst, is_q ? 16 : 8, vec_full_reg_size(s), data, fn); tcg_temp_free_ptr(tcg_ctx, fpst); } @@ -689,7 +745,8 @@ static inline void gen_logic_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 result) } /* dest = T0 + T1; compute C, N, V and Z flags */ -static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, + TCGv_i64 t1) { if (sf) { TCGv_i64 result, flag, tmp; @@ -722,7 +779,8 @@ static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, tcg_gen_movi_i32(tcg_ctx, tmp, 0); tcg_gen_extrl_i64_i32(tcg_ctx, t0_32, t0); tcg_gen_extrl_i64_i32(tcg_ctx, t1_32, t1); - tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, t1_32, tmp); + tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, + t1_32, tmp); tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF); tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32); tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32); @@ -736,7 +794,8 @@ static void gen_add_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, } /* dest = T0 - T1; compute C, N, V and Z flags */ -static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, + TCGv_i64 t1) { if (sf) { /* 64 bit arithmetic */ @@ -770,7 +829,8 @@ static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, tcg_gen_extrl_i64_i32(tcg_ctx, t1_32, t1); tcg_gen_sub_i32(tcg_ctx, tcg_ctx->cpu_NF, t0_32, t1_32); tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF); - tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GEU, tcg_ctx->cpu_CF, t0_32, t1_32); + tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GEU, tcg_ctx->cpu_CF, t0_32, + t1_32); tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32); tmp = tcg_temp_new_i32(tcg_ctx); tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32); @@ -783,7 +843,8 @@ static void gen_sub_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, } /* dest = T0 + T1 + CF; do not compute flags. */ -static void gen_adc(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +static void gen_adc(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, + TCGv_i64 t1) { TCGv_i64 flag = tcg_temp_new_i64(tcg_ctx); tcg_gen_extu_i32_i64(tcg_ctx, flag, tcg_ctx->cpu_CF); @@ -797,7 +858,8 @@ static void gen_adc(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCG } /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ -static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, + TCGv_i64 t1) { if (sf) { TCGv_i64 result, cf_64, vf_64, tmp; @@ -831,8 +893,10 @@ static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, tcg_gen_extrl_i64_i32(tcg_ctx, t0_32, t0); tcg_gen_extrl_i64_i32(tcg_ctx, t1_32, t1); - tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, tcg_ctx->cpu_CF, tmp); - tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t1_32, tmp); + tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, + tcg_ctx->cpu_CF, tmp); + tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, + tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t1_32, tmp); tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF); tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32); @@ -855,9 +919,8 @@ static void gen_adc_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 dest, TCGv_i64 t0, */ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, TCGv_i64 tcg_addr, int size, int memidx, - bool iss_valid, - unsigned int iss_srt, - bool iss_sf, bool iss_ar) + bool iss_valid, unsigned int iss_srt, bool iss_sf, + bool iss_ar) { TCGContext *tcg_ctx = s->uc->tcg_ctx; g_assert(size <= 3); @@ -866,36 +929,27 @@ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, if (iss_valid) { uint32_t syn; - syn = syn_data_abort_with_iss(0, - size, - false, - iss_srt, - iss_sf, - iss_ar, + syn = syn_data_abort_with_iss(0, size, false, iss_srt, iss_sf, iss_ar, 0, 0, 0, 0, 0, false); disas_set_insn_syndrome(s, syn); } } -static void do_gpr_st(DisasContext *s, TCGv_i64 source, - TCGv_i64 tcg_addr, int size, - bool iss_valid, - unsigned int iss_srt, +static void do_gpr_st(DisasContext *s, TCGv_i64 source, TCGv_i64 tcg_addr, + int size, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s), - iss_valid, iss_srt, iss_sf, iss_ar); + do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s), iss_valid, + iss_srt, iss_sf, iss_ar); } /* * Load from memory to GPR register */ -static void do_gpr_ld_memidx(DisasContext *s, - TCGv_i64 dest, TCGv_i64 tcg_addr, - int size, bool is_signed, - bool extend, int memidx, - bool iss_valid, unsigned int iss_srt, - bool iss_sf, bool iss_ar) +static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + int size, bool is_signed, bool extend, int memidx, + bool iss_valid, unsigned int iss_srt, bool iss_sf, + bool iss_ar) { TCGContext *tcg_ctx = s->uc->tcg_ctx; MemOp memop = s->be_data + size; @@ -916,26 +970,18 @@ static void do_gpr_ld_memidx(DisasContext *s, if (iss_valid) { uint32_t syn; - syn = syn_data_abort_with_iss(0, - size, - is_signed, - iss_srt, - iss_sf, - iss_ar, - 0, 0, 0, 0, 0, false); + syn = syn_data_abort_with_iss(0, size, is_signed, iss_srt, iss_sf, + iss_ar, 0, 0, 0, 0, 0, false); disas_set_insn_syndrome(s, syn); } } -static void do_gpr_ld(DisasContext *s, - TCGv_i64 dest, TCGv_i64 tcg_addr, - int size, bool is_signed, bool extend, - bool iss_valid, unsigned int iss_srt, - bool iss_sf, bool iss_ar) +static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + int size, bool is_signed, bool extend, bool iss_valid, + unsigned int iss_srt, bool iss_sf, bool iss_ar) { do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend, - get_mem_index(s), - iss_valid, iss_srt, iss_sf, iss_ar); + get_mem_index(s), iss_valid, iss_srt, iss_sf, iss_ar); } /* @@ -946,7 +992,8 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) TCGContext *tcg_ctx = s->uc->tcg_ctx; /* This writes the bottom N bits of a 128 bit wide vector to memory */ TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx); - tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_offset(s, srcidx, MO_64)); + tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, + fp_reg_offset(s, srcidx, MO_64)); if (size < 4) { tcg_gen_qemu_st_i64(tcg_ctx, tmp, tcg_addr, get_mem_index(s), s->be_data + size); @@ -955,11 +1002,12 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(tcg_ctx); tcg_gen_addi_i64(tcg_ctx, tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s), - s->be_data | MO_Q); - tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_hi_offset(s, srcidx)); - tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s), - s->be_data | MO_Q); + tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_hiaddr : tcg_addr, + get_mem_index(s), s->be_data | MO_Q); + tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, + fp_reg_hi_offset(s, srcidx)); + tcg_gen_qemu_st_i64(tcg_ctx, tmp, be ? tcg_addr : tcg_hiaddr, + get_mem_index(s), s->be_data | MO_Q); tcg_temp_free_i64(tcg_ctx, tcg_hiaddr); } @@ -974,11 +1022,10 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) TCGContext *tcg_ctx = s->uc->tcg_ctx; /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(tcg_ctx); - TCGv_i64 tmphi; + TCGv_i64 tmphi = NULL; if (size < 4) { MemOp memop = s->be_data + size; - tmphi = tcg_const_i64(tcg_ctx, 0); tcg_gen_qemu_ld_i64(tcg_ctx, tmplo, tcg_addr, get_mem_index(s), memop); } else { bool be = s->be_data == MO_BE; @@ -988,20 +1035,24 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) tcg_hiaddr = tcg_temp_new_i64(tcg_ctx); tcg_gen_addi_i64(tcg_ctx, tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_ld_i64(tcg_ctx, tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s), - s->be_data | MO_Q); - tcg_gen_qemu_ld_i64(tcg_ctx, tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s), - s->be_data | MO_Q); + tcg_gen_qemu_ld_i64(tcg_ctx, tmplo, be ? tcg_hiaddr : tcg_addr, + get_mem_index(s), s->be_data | MO_Q); + tcg_gen_qemu_ld_i64(tcg_ctx, tmphi, be ? tcg_addr : tcg_hiaddr, + get_mem_index(s), s->be_data | MO_Q); tcg_temp_free_i64(tcg_ctx, tcg_hiaddr); } - tcg_gen_st_i64(tcg_ctx, tmplo, tcg_ctx->cpu_env, fp_reg_offset(s, destidx, MO_64)); - tcg_gen_st_i64(tcg_ctx, tmphi, tcg_ctx->cpu_env, fp_reg_hi_offset(s, destidx)); + tcg_gen_st_i64(tcg_ctx, tmplo, tcg_ctx->cpu_env, + fp_reg_offset(s, destidx, MO_64)); tcg_temp_free_i64(tcg_ctx, tmplo); - tcg_temp_free_i64(tcg_ctx, tmphi); - clear_vec_high(s, true, destidx); + if (tmphi) { + tcg_gen_st_i64(tcg_ctx, tmphi, tcg_ctx->cpu_env, + fp_reg_hi_offset(s, destidx)); + tcg_temp_free_i64(tcg_ctx, tmphi); + } + clear_vec_high(s, tmphi != NULL, destidx); } /* @@ -1032,17 +1083,17 @@ static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, case MO_32: tcg_gen_ld32u_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; - case MO_8|MO_SIGN: + case MO_8 | MO_SIGN: tcg_gen_ld8s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; - case MO_16|MO_SIGN: + case MO_16 | MO_SIGN: tcg_gen_ld16s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; - case MO_32|MO_SIGN: + case MO_32 | MO_SIGN: tcg_gen_ld32s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; case MO_64: - case MO_64|MO_SIGN: + case MO_64 | MO_SIGN: tcg_gen_ld_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; default: @@ -1062,14 +1113,14 @@ static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, case MO_16: tcg_gen_ld16u_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; - case MO_8|MO_SIGN: + case MO_8 | MO_SIGN: tcg_gen_ld8s_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; - case MO_16|MO_SIGN: + case MO_16 | MO_SIGN: tcg_gen_ld16s_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; case MO_32: - case MO_32|MO_SIGN: + case MO_32 | MO_SIGN: tcg_gen_ld_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off); break; default: @@ -1129,7 +1180,8 @@ static void do_vec_st(DisasContext *s, int srcidx, int element, TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx); read_vec_element(s, tcg_tmp, srcidx, element, size); - tcg_gen_qemu_st_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s), endian | size); + tcg_gen_qemu_st_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s), + endian | size); tcg_temp_free_i64(tcg_ctx, tcg_tmp); } @@ -1141,7 +1193,8 @@ static void do_vec_ld(DisasContext *s, int destidx, int element, TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx); - tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s), endian | size); + tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, tcg_addr, get_mem_index(s), + endian | size); write_vec_element(s, tcg_tmp, destidx, element, size); tcg_temp_free_i64(tcg_ctx, tcg_tmp); @@ -1186,8 +1239,8 @@ bool sve_access_check(DisasContext *s) * optional shift. You will likely want to pass a temporary for the * destination register. See DecodeRegExtend() in the ARM ARM. */ -static void ext_and_shift_reg(TCGContext *tcg_ctx, TCGv_i64 tcg_out, TCGv_i64 tcg_in, - int option, unsigned int shift) +static void ext_and_shift_reg(TCGContext *tcg_ctx, TCGv_i64 tcg_out, + TCGv_i64 tcg_in, int option, unsigned int shift) { int extsize = extract32(option, 0, 2); bool is_signed = extract32(option, 2, 1); @@ -1319,8 +1372,8 @@ static void disas_comp_b_imm(DisasContext *s, uint32_t insn) label_match = gen_new_label(tcg_ctx); reset_btype(s); - tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ, - tcg_cmp, 0, label_match); + tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ, tcg_cmp, 0, + label_match); gen_goto_tb(s, 0, s->base.pc_next); gen_set_label(tcg_ctx, label_match); @@ -1351,8 +1404,8 @@ static void disas_test_b_imm(DisasContext *s, uint32_t insn) label_match = gen_new_label(tcg_ctx); reset_btype(s); - tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ, - tcg_cmp, 0, label_match); + tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ, tcg_cmp, 0, + label_match); tcg_temp_free_i64(tcg_ctx, tcg_cmp); gen_goto_tb(s, 0, s->base.pc_next); gen_set_label(tcg_ctx, label_match); @@ -1393,8 +1446,8 @@ static void disas_cond_b_imm(DisasContext *s, uint32_t insn) } /* HINT instruction group, including various allocated HINTs */ -static void handle_hint(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static void handle_hint(DisasContext *s, uint32_t insn, unsigned int op1, + unsigned int op2, unsigned int crm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; unsigned int selector = crm << 3 | op2; @@ -1431,71 +1484,80 @@ static void handle_hint(DisasContext *s, uint32_t insn, break; case 7: // 0b00111: /* XPACLRI */ if (s->pauth_active) { - gen_helper_xpaci(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30]); + gen_helper_xpaci(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30]); } break; case 8: // 0b01000: /* PACIA1716 */ if (s->pauth_active) { - gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); + gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); } break; case 0xa: // 0b01010: /* PACIB1716 */ if (s->pauth_active) { - gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); + gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); } break; case 0xc: // 0b01100: /* AUTIA1716 */ if (s->pauth_active) { - gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); + gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); } break; case 0xe: // 0b01110: /* AUTIB1716 */ if (s->pauth_active) { - gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); + gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[17], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[17], tcg_ctx->cpu_X[16]); } break; case 0x18: // 0b11000: /* PACIAZ */ if (s->pauth_active) { - gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], - new_tmp_a64_zero(s)); + gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], new_tmp_a64_zero(s)); } break; case 0x19: // 0b11001: /* PACIASP */ if (s->pauth_active) { - gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); + gen_helper_pacia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); } break; case 0x1a: // 0b11010: /* PACIBZ */ if (s->pauth_active) { - gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], - new_tmp_a64_zero(s)); + gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], new_tmp_a64_zero(s)); } break; case 0x1b: // 0b11011: /* PACIBSP */ if (s->pauth_active) { - gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); + gen_helper_pacib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); } break; case 0x1c: // 0b11100: /* AUTIAZ */ if (s->pauth_active) { - gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], - new_tmp_a64_zero(s)); + gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], new_tmp_a64_zero(s)); } break; case 0x1d: // 0b11101: /* AUTIASP */ if (s->pauth_active) { - gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); + gen_helper_autia(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); } break; case 0x1e: // 0b11110: /* AUTIBZ */ if (s->pauth_active) { - gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], - new_tmp_a64_zero(s)); + gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], new_tmp_a64_zero(s)); } break; case 0x1f: // 0b11111: /* AUTIBSP */ if (s->pauth_active) { - gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); + gen_helper_autib(tcg_ctx, tcg_ctx->cpu_X[30], tcg_ctx->cpu_env, + tcg_ctx->cpu_X[30], tcg_ctx->cpu_X[31]); } break; default: @@ -1511,8 +1573,8 @@ static void gen_clrex(DisasContext *s, uint32_t insn) } /* CLREX, DSB, DMB, ISB */ -static void handle_sync(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static void handle_sync(DisasContext *s, uint32_t insn, unsigned int op1, + unsigned int op2, unsigned int crm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGBar bar; @@ -1601,19 +1663,22 @@ static void gen_xaflag(TCGContext *tcg_ctx) static void gen_axflag(TCGContext *tcg_ctx) { - tcg_gen_sari_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, 31); /* V ? -1 : 0 */ - tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF, tcg_ctx->cpu_VF); /* C & !V */ + tcg_gen_sari_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, + 31); /* V ? -1 : 0 */ + tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF, + tcg_ctx->cpu_VF); /* C & !V */ /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ - tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, tcg_ctx->cpu_VF); + tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, + tcg_ctx->cpu_VF); tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_NF, 0); tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_VF, 0); } /* MSR (immediate) - move immediate to processor state field */ -static void handle_msr_i(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static void handle_msr_i(DisasContext *s, uint32_t insn, unsigned int op1, + unsigned int op2, unsigned int crm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 t1; @@ -1695,7 +1760,28 @@ static void handle_msr_i(DisasContext *s, uint32_t insn, gen_helper_msr_i_daifclear(tcg_ctx, tcg_ctx->cpu_env, t1); tcg_temp_free_i32(tcg_ctx, t1); /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; + break; + + case 0x1c: /* TCO */ + if (dc_isar_feature(aa64_mte, s)) { + /* Full MTE is enabled -- set the TCO bit as directed. */ + if (crm & 1) { + set_pstate_bits(tcg_ctx, PSTATE_TCO); + } else { + clear_pstate_bits(tcg_ctx, PSTATE_TCO); + } + t1 = tcg_const_i32(tcg_ctx, s->current_el); + gen_helper_rebuild_hflags_a64(tcg_ctx, tcg_ctx->cpu_env, t1); + tcg_temp_free_i32(tcg_ctx, t1); + /* Many factors, including TCO, go into MTE_ACTIVE. */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; + } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { + /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ + s->base.is_jmp = DISAS_NEXT; + } else { + goto do_unallocated; + } break; default: @@ -1738,7 +1824,8 @@ static void gen_set_nzcv(TCGContext *tcg_ctx, TCGv_i64 tcg_rt) tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_NF, nzcv, (1U << 31)); /* bit 30, Z */ tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_ZF, nzcv, (1 << 30)); - tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, 0); + tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, + 0); /* bit 29, C */ tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_CF, nzcv, (1 << 29)); tcg_gen_shri_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF, 29); @@ -1748,7 +1835,6 @@ static void gen_set_nzcv(TCGContext *tcg_ctx, TCGv_i64 tcg_rt) tcg_temp_free_i32(tcg_ctx, nzcv); } - static TCGLabel *gen_hook_sys(DisasContext *s, uint32_t insn, struct hook *hk) { uc_engine *uc = s->uc; @@ -1759,17 +1845,18 @@ static TCGLabel *gen_hook_sys(DisasContext *s, uint32_t insn, struct hook *hk) tcg_skip = tcg_temp_new_i32(tcg_ctx); tcg_insn = tcg_const_i32(tcg_ctx, insn); - tcg_hk = tcg_const_ptr(tcg_ctx, (void*)hk); + tcg_hk = tcg_const_ptr(tcg_ctx, (void *)hk); // Sync pc in advance. gen_a64_set_pc_im(tcg_ctx, s->pc_curr); // Only one hook per instruction for SYS/SYSL/MRS/MSR is allowed. // This is intended and may be extended if it's really necessary. - gen_helper_uc_hooksys64(tcg_ctx, tcg_skip, tcg_ctx->cpu_env, tcg_insn, tcg_hk); + gen_helper_uc_hooksys64(tcg_ctx, tcg_skip, tcg_ctx->cpu_env, tcg_insn, + tcg_hk); tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_NE, tcg_skip, 0, label); - + tcg_temp_free_i32(tcg_ctx, tcg_skip); tcg_temp_free_i32(tcg_ctx, tcg_insn); tcg_temp_free_ptr(tcg_ctx, tcg_hk); @@ -1777,7 +1864,8 @@ static TCGLabel *gen_hook_sys(DisasContext *s, uint32_t insn, struct hook *hk) return label; } -static void may_gen_set_label(DisasContext *s, TCGLabel *label) { +static void may_gen_set_label(DisasContext *s, TCGLabel *label) +{ if (label) { gen_set_label(s->uc->tcg_ctx, label); } @@ -1802,7 +1890,8 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, struct hook *hook; HOOK_FOREACH_VAR_DECLARE; - HOOK_FOREACH(uc, hook, UC_HOOK_INSN) { + HOOK_FOREACH(uc, hook, UC_HOOK_INSN) + { if (hook->to_delete) continue; @@ -1811,32 +1900,32 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, } switch (hook->insn) { - case UC_ARM64_INS_MRS: { - if (isread && (op0 == 2 || op0 == 3)) { - label = gen_hook_sys(s, insn, hook); - } - break; + case UC_ARM64_INS_MRS: { + if (isread && (op0 == 2 || op0 == 3)) { + label = gen_hook_sys(s, insn, hook); } - case UC_ARM64_INS_MSR: { - if (!isread && (op0 == 2 || op0 == 3)) { - label = gen_hook_sys(s, insn, hook); - } - break; + break; + } + case UC_ARM64_INS_MSR: { + if (!isread && (op0 == 2 || op0 == 3)) { + label = gen_hook_sys(s, insn, hook); } - case UC_ARM64_INS_SYSL: { - if (isread && op0 == 1) { - label = gen_hook_sys(s, insn, hook); - } - break; + break; + } + case UC_ARM64_INS_SYSL: { + if (isread && op0 == 1) { + label = gen_hook_sys(s, insn, hook); } - case UC_ARM64_INS_SYS: { - if (!isread && op0 == 1) { - label = gen_hook_sys(s, insn, hook); - } - break; + break; + } + case UC_ARM64_INS_SYS: { + if (!isread && op0 == 1) { + label = gen_hook_sys(s, insn, hook); } - default: - break; + break; + } + default: + break; } if (label) { @@ -1844,15 +1933,16 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, } } - ri = get_arm_cp_reginfo(s->cp_regs, - ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, - crn, crm, op0, op1, op2)); + ri = get_arm_cp_reginfo( + s->cp_regs, + ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)); if (!ri) { /* Unknown register; this might be a guest error or a QEMU * unimplemented feature. */ - qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " + qemu_log_mask(LOG_UNIMP, + "%s access to unsupported AArch64 " "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", isread ? "read" : "write", op0, op1, crn, crm, op2); unallocated_encoding(s); @@ -1880,7 +1970,8 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); tcg_syn = tcg_const_i32(tcg_ctx, syndrome); tcg_isread = tcg_const_i32(tcg_ctx, isread); - gen_helper_access_check_cp_reg(tcg_ctx, tcg_ctx->cpu_env, tmpptr, tcg_syn, tcg_isread); + gen_helper_access_check_cp_reg(tcg_ctx, tcg_ctx->cpu_env, tmpptr, + tcg_syn, tcg_isread); tcg_temp_free_ptr(tcg_ctx, tmpptr); tcg_temp_free_i32(tcg_ctx, tcg_syn); tcg_temp_free_i32(tcg_ctx, tcg_isread); @@ -1916,10 +2007,62 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, return; case ARM_CP_DC_ZVA: /* Writes clear the aligned block of memory which rt points into. */ - tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); + if (s->mte_active[0]) { + TCGv_i32 t_desc; + int desc = 0; + + FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc); + FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc); + FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc); + t_desc = tcg_const_i32(tcg_ctx, desc); + + tcg_rt = new_tmp_a64(s); + gen_helper_mte_check_zva(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, t_desc, + cpu_reg(s, rt)); + tcg_temp_free_i32(tcg_ctx, t_desc); + } else { + tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); + } gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, tcg_rt); may_gen_set_label(s, label); return; + case ARM_CP_DC_GVA: { + TCGv_i64 clean_addr, tag; + + /* + * DC_GVA, like DC_ZVA, requires that we supply the original + * pointer for an invalid page. Probe that address first. + */ + tcg_rt = cpu_reg(s, rt); + clean_addr = clean_data_tbi(s, tcg_rt); + gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); + + if (s->ata) { + /* Extract the tag from the register to match STZGM. */ + tag = tcg_temp_new_i64(tcg_ctx); + tcg_gen_shri_i64(tcg_ctx, tag, tcg_rt, 56); + gen_helper_stzgm_tags(tcg_ctx, tcg_ctx->cpu_env, clean_addr, tag); + tcg_temp_free_i64(tcg_ctx, tag); + } + } + return; + case ARM_CP_DC_GZVA: { + TCGv_i64 clean_addr, tag; + + /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ + tcg_rt = cpu_reg(s, rt); + clean_addr = clean_data_tbi(s, tcg_rt); + gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, clean_addr); + + if (s->ata) { + /* Extract the tag from the register to match STZGM. */ + tag = tcg_temp_new_i64(tcg_ctx); + tcg_gen_shri_i64(tcg_ctx, tag, tcg_rt, 56); + gen_helper_stzgm_tags(tcg_ctx, tcg_ctx->cpu_env, clean_addr, tag); + tcg_temp_free_i64(tcg_ctx, tag); + } + } + return; default: break; } @@ -1961,7 +2104,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { /* I/O operations must end the TB here (whether read or write) */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { /* @@ -1976,7 +2119,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, * but allow this to be suppressed by the register definition * (usually only necessary to work around guest bugs). */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } may_gen_set_label(s, label); @@ -2046,12 +2189,12 @@ static void disas_exc(DisasContext *s, uint32_t insn) * instruction works properly. */ switch (op2_ll) { - case 1: /* SVC */ + case 1: /* SVC */ gen_ss_advance(s); gen_exception_insn(s, s->base.pc_next, EXCP_SWI, syn_aa64_svc(imm16), default_exception_el(s)); break; - case 2: /* HVC */ + case 2: /* HVC */ if (s->current_el == 0) { unallocated_encoding(s); break; @@ -2065,7 +2208,7 @@ static void disas_exc(DisasContext *s, uint32_t insn) gen_exception_insn(s, s->base.pc_next, EXCP_HVC, syn_aa64_hvc(imm16), 2); break; - case 3: /* SMC */ + case 3: /* SMC */ if (s->current_el == 0) { unallocated_encoding(s); break; @@ -2144,7 +2287,7 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; unsigned int opc, op2, op3, rn, op4; - unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */ + unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */ TCGv_i64 dst; TCGv_i64 modifier; @@ -2194,9 +2337,11 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) if (s->pauth_active) { dst = new_tmp_a64(s); if (op3 == 2) { - gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier); + gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, + cpu_reg(s, rn), modifier); } else { - gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier); + gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, + cpu_reg(s, rn), modifier); } } else { dst = cpu_reg(s, rn); @@ -2226,9 +2371,11 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) dst = new_tmp_a64(s); modifier = cpu_reg_sp(s, op4); if (op3 == 2) { - gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier); + gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), + modifier); } else { - gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), modifier); + gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, cpu_reg(s, rn), + modifier); } } else { dst = cpu_reg(s, rn); @@ -2268,9 +2415,11 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) if (s->pauth_active) { modifier = tcg_ctx->cpu_X[31]; if (op3 == 2) { - gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, dst, modifier); + gen_helper_autia(tcg_ctx, dst, tcg_ctx->cpu_env, dst, + modifier); } else { - gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, dst, modifier); + gen_helper_autib(tcg_ctx, dst, tcg_ctx->cpu_env, dst, + modifier); } } break; @@ -2326,14 +2475,18 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) static void disas_b_exc_sys(DisasContext *s, uint32_t insn) { switch (extract32(insn, 25, 7)) { - case 0x0a: case 0x0b: - case 0x4a: case 0x4b: /* Unconditional branch (immediate) */ + case 0x0a: + case 0x0b: + case 0x4a: + case 0x4b: /* Unconditional branch (immediate) */ disas_uncond_b_imm(s, insn); break; - case 0x1a: case 0x5a: /* Compare & branch (immediate) */ + case 0x1a: + case 0x5a: /* Compare & branch (immediate) */ disas_comp_b_imm(s, insn); break; - case 0x1b: case 0x5b: /* Test & branch (immediate) */ + case 0x1b: + case 0x5b: /* Test & branch (immediate) */ disas_test_b_imm(s, insn); break; case 0x2a: /* Conditional branch (immediate) */ @@ -2370,8 +2523,8 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn) * races in multi-threaded linux-user and when MTTCG softmmu is * enabled. */ -static void gen_load_exclusive(DisasContext *s, int rt, int rt2, - TCGv_i64 addr, int size, bool is_pair) +static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i64 addr, + int size, bool is_pair) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int idx = get_mem_index(s); @@ -2383,13 +2536,18 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, if (size == 2) { /* The pair must be single-copy atomic for the doubleword. */ memop |= MO_64 | MO_ALIGN; - tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx, memop); + tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx, + memop); if (s->be_data == MO_LE) { - tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val, 0, 32); - tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2), tcg_ctx->cpu_exclusive_val, 32, 32); + tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt), + tcg_ctx->cpu_exclusive_val, 0, 32); + tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2), + tcg_ctx->cpu_exclusive_val, 32, 32); } else { - tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val, 32, 32); - tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2), tcg_ctx->cpu_exclusive_val, 0, 32); + tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt), + tcg_ctx->cpu_exclusive_val, 32, 32); + tcg_gen_extract_i64(tcg_ctx, cpu_reg(s, rt2), + tcg_ctx->cpu_exclusive_val, 0, 32); } } else { /* The pair must be single-copy atomic for *each* doubleword, not @@ -2400,15 +2558,19 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i64 addr2 = tcg_temp_new_i64(tcg_ctx); tcg_gen_addi_i64(tcg_ctx, addr2, addr, 8); - tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_high, addr2, idx, memop); + tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_high, addr2, + idx, memop); tcg_temp_free_i64(tcg_ctx, addr2); - tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val); - tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt2), tcg_ctx->cpu_exclusive_high); + tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt), + tcg_ctx->cpu_exclusive_val); + tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt2), + tcg_ctx->cpu_exclusive_high); } } else { memop |= size | MO_ALIGN; - tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx, memop); + tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, addr, idx, + memop); tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt), tcg_ctx->cpu_exclusive_val); } tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, addr); @@ -2434,48 +2596,54 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, TCGLabel *done_label = gen_new_label(tcg_ctx); TCGv_i64 tmp; - tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, addr, tcg_ctx->cpu_exclusive_addr, fail_label); + tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, addr, tcg_ctx->cpu_exclusive_addr, + fail_label); tmp = tcg_temp_new_i64(tcg_ctx); if (is_pair) { if (size == 2) { if (s->be_data == MO_LE) { - tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); + tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt), + cpu_reg(s, rt2)); } else { - tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); - } - tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, - tcg_ctx->cpu_exclusive_val, tmp, - get_mem_index(s), - MO_64 | MO_ALIGN | s->be_data); - tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, tcg_ctx->cpu_exclusive_val); + tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt2), + cpu_reg(s, rt)); + } + tcg_gen_atomic_cmpxchg_i64( + tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, + tcg_ctx->cpu_exclusive_val, tmp, get_mem_index(s), + MO_64 | MO_ALIGN | s->be_data); + tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, + tcg_ctx->cpu_exclusive_val); } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { if (!HAVE_CMPXCHG128) { gen_helper_exit_atomic(tcg_ctx, tcg_ctx->cpu_env); s->base.is_jmp = DISAS_NORETURN; } else if (s->be_data == MO_LE) { - gen_helper_paired_cmpxchg64_le_parallel(tcg_ctx, tmp, tcg_ctx->cpu_env, - tcg_ctx->cpu_exclusive_addr, - cpu_reg(s, rt), - cpu_reg(s, rt2)); + gen_helper_paired_cmpxchg64_le_parallel( + tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_be_parallel(tcg_ctx, tmp, tcg_ctx->cpu_env, - tcg_ctx->cpu_exclusive_addr, - cpu_reg(s, rt), - cpu_reg(s, rt2)); + gen_helper_paired_cmpxchg64_be_parallel( + tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); } } else if (s->be_data == MO_LE) { - gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, + gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env, + tcg_ctx->cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, + gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env, + tcg_ctx->cpu_exclusive_addr, cpu_reg(s, rt), cpu_reg(s, rt2)); } } else { - tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, tcg_ctx->cpu_exclusive_val, - cpu_reg(s, rt), get_mem_index(s), + tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, + tcg_ctx->cpu_exclusive_val, cpu_reg(s, rt), + get_mem_index(s), size | MO_ALIGN | s->be_data); - tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, tcg_ctx->cpu_exclusive_val); + tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, + tcg_ctx->cpu_exclusive_val); } tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rd), tmp); tcg_temp_free_i64(tcg_ctx, tmp); @@ -2487,8 +2655,8 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, -1); } -static void gen_compare_and_swap(DisasContext *s, int rs, int rt, - int rn, int size) +static void gen_compare_and_swap(DisasContext *s, int rs, int rt, int rn, + int size) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rs = cpu_reg(s, rs); @@ -2499,13 +2667,13 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); - tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx, - size | MO_ALIGN | s->be_data); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); + tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tcg_rs, clean_addr, tcg_rs, tcg_rt, + memidx, size | MO_ALIGN | s->be_data); } -static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, - int rn, int size) +static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, int rn, + int size) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 s1 = cpu_reg(s, rs); @@ -2518,7 +2686,9 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + + /* This is a single atomic access, despite the "pair". */ + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1); if (size == 2) { TCGv_i64 cmp = tcg_temp_new_i64(tcg_ctx); @@ -2579,7 +2749,8 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, /* If compare equal, write back new data, else write back old data. */ tcg_gen_movcond_i64(tcg_ctx, TCG_COND_NE, c1, c2, zero, t1, d1); tcg_gen_movcond_i64(tcg_ctx, TCG_COND_NE, c2, c2, zero, t2, d2); - tcg_gen_qemu_st_i64(tcg_ctx, c1, clean_addr, memidx, MO_64 | s->be_data); + tcg_gen_qemu_st_i64(tcg_ctx, c1, clean_addr, memidx, + MO_64 | s->be_data); tcg_gen_qemu_st_i64(tcg_ctx, c2, a2, memidx, MO_64 | s->be_data); tcg_temp_free_i64(tcg_ctx, a2); tcg_temp_free_i64(tcg_ctx, c1); @@ -2644,7 +2815,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (is_lasr) { tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_STRL); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false); return; @@ -2653,7 +2824,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = + gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); s->is_ldex = true; gen_load_exclusive(s, rt, rt2, clean_addr, size, false); if (is_lasr) { @@ -2673,7 +2845,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) gen_check_sp_alignment(s); } tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_STRL); - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); return; @@ -2689,13 +2861,15 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = + gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ); return; - case 0x2: case 0x3: /* CASP / STXP */ + case 0x2: + case 0x3: /* CASP / STXP */ if (size & 2) { /* STXP / STLXP */ if (rn == 31) { gen_check_sp_alignment(s); @@ -2703,25 +2877,27 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (is_lasr) { tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_STRL); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = + gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true); return; } - if (rt2 == 31 - && ((rt | rs) & 1) == 0 - && dc_isar_feature(aa64_atomics, s)) { + if (rt2 == 31 && ((rt | rs) & 1) == 0 && + dc_isar_feature(aa64_atomics, s)) { /* CASP / CASPL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; } break; - case 0x6: case 0x7: /* CASPA / LDXP */ + case 0x6: + case 0x7: /* CASPA / LDXP */ if (size & 2) { /* LDXP / LDAXP */ if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = + gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); s->is_ldex = true; gen_load_exclusive(s, rt, rt2, clean_addr, size, true); if (is_lasr) { @@ -2729,9 +2905,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } return; } - if (rt2 == 31 - && ((rt | rs) & 1) == 0 - && dc_isar_feature(aa64_atomics, s)) { + if (rt2 == 31 && ((rt | rs) & 1) == 0 && + dc_isar_feature(aa64_atomics, s)) { /* CASPA / CASPAL */ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); return; @@ -2802,8 +2977,8 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) /* Only unsigned 32bit loads target 32bit registers. */ bool iss_sf = opc != 0; - do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false, true, rt, + iss_sf, false); } tcg_temp_free_i64(tcg_ctx, clean_addr); } @@ -2825,7 +3000,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) * +-----+-------+---+---+-------+---+-------+-------+------+------+ * * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit - * LDPSW 01 + * LDPSW/STGP 01 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit * V: 0 -> GPR, 1 -> Vector * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, @@ -2851,6 +3026,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) bool is_signed = false; bool postindex = false; bool wback = false; + bool set_tag = false; TCGv_i64 clean_addr, dirty_addr; @@ -2863,6 +3039,14 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) if (is_vector) { size = 2 + opc; + } else if (opc == 1 && !is_load) { + /* STGP */ + if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { + unallocated_encoding(s); + return; + } + size = 3; + set_tag = true; } else { size = 2 + extract32(opc, 1, 1); is_signed = extract32(opc, 0, 1); @@ -2903,7 +3087,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) return; } - offset <<= size; + offset <<= (set_tag ? LOG2_TAG_GRANULE : size); if (rn == 31) { gen_check_sp_alignment(s); @@ -2913,7 +3097,25 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) if (!postindex) { tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, offset); } - clean_addr = clean_data_tbi(s, dirty_addr); + + if (set_tag) { + if (!s->ata) { + /* + * TODO: We could rely on the stores below, at least for + * system mode, if we arrange to add MO_ALIGN_16. + */ + gen_helper_stg_stub(tcg_ctx, tcg_ctx->cpu_env, dirty_addr); + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_stg_parallel(tcg_ctx, tcg_ctx->cpu_env, dirty_addr, + dirty_addr); + } else { + gen_helper_stg(tcg_ctx, tcg_ctx->cpu_env, dirty_addr, dirty_addr); + } + } + + clean_addr = + gen_mte_checkN(s, dirty_addr, !is_load, (wback || rn != 31) && !set_tag, + size, 2 << size); if (is_vector) { if (is_load) { @@ -2937,20 +3139,18 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) /* Do not modify tcg_rt before recognizing any exception * from the second load. */ - do_gpr_ld(s, tmp, clean_addr, size, is_signed, false, - false, 0, false, false); + do_gpr_ld(s, tmp, clean_addr, size, is_signed, false, false, 0, + false, false); tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 1ULL << size); - do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false, - false, 0, false, false); + do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false, false, 0, + false, false); tcg_gen_mov_i64(tcg_ctx, tcg_rt, tmp); tcg_temp_free_i64(tcg_ctx, tmp); } else { - do_gpr_st(s, tcg_rt, clean_addr, size, - false, 0, false, false); + do_gpr_st(s, tcg_rt, clean_addr, size, false, 0, false, false); tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 1ULL << size); - do_gpr_st(s, tcg_rt2, clean_addr, size, - false, 0, false, false); + do_gpr_st(s, tcg_rt2, clean_addr, size, false, 0, false, false); } } @@ -2978,11 +3178,8 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 */ -static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, int opc, + int size, int rt, bool is_vector) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int rn = extract32(insn, 5, 5); @@ -2995,6 +3192,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, bool iss_valid = !is_vector; bool post_index; bool writeback; + int memidx; TCGv_i64 clean_addr, dirty_addr; @@ -3052,7 +3250,11 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, if (!post_index) { tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, imm9); } - clean_addr = clean_data_tbi(s, dirty_addr); + + memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + clean_addr = + gen_mte_check1_mmuidx(s, dirty_addr, is_store, writeback || rn != 31, + size, is_unpriv, memidx); if (is_vector) { if (is_store) { @@ -3062,16 +3264,14 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, } } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); - int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); if (is_store) { - do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, - iss_valid, rt, iss_sf, false); + do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, iss_valid, rt, + iss_sf, false); } else { - do_gpr_ld_memidx(s, tcg_rt, clean_addr, size, - is_signed, is_extended, memidx, - iss_valid, rt, iss_sf, false); + do_gpr_ld_memidx(s, tcg_rt, clean_addr, size, is_signed, + is_extended, memidx, iss_valid, rt, iss_sf, false); } } @@ -3105,11 +3305,8 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, * Rn: address register or SP for base * Rm: offset register or ZR for offset */ -static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, int opc, + int size, int rt, bool is_vector) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int rn = extract32(insn, 5, 5); @@ -3160,7 +3357,7 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, ext_and_shift_reg(tcg_ctx, tcg_rm, tcg_rm, opt, shift ? size : 0); tcg_gen_add_i64(tcg_ctx, dirty_addr, dirty_addr, tcg_rm); - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size); if (is_vector) { if (is_store) { @@ -3172,12 +3369,10 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, TCGv_i64 tcg_rt = cpu_reg(s, rt); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); if (is_store) { - do_gpr_st(s, tcg_rt, clean_addr, size, - true, rt, iss_sf, false); + do_gpr_st(s, tcg_rt, clean_addr, size, true, rt, iss_sf, false); } else { - do_gpr_ld(s, tcg_rt, clean_addr, size, - is_signed, is_extended, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended, true, + rt, iss_sf, false); } } } @@ -3199,11 +3394,8 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, * Rn: base address register (inc SP) * Rt: target register */ -static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, int opc, + int size, int rt, bool is_vector) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int rn = extract32(insn, 5, 5); @@ -3246,7 +3438,7 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, dirty_addr = read_cpu_reg_sp(s, rn, 1); offset = imm12 << size; tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, offset); - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size); if (is_vector) { if (is_store) { @@ -3258,11 +3450,10 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, TCGv_i64 tcg_rt = cpu_reg(s, rt); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); if (is_store) { - do_gpr_st(s, tcg_rt, clean_addr, size, - true, rt, iss_sf, false); + do_gpr_st(s, tcg_rt, clean_addr, size, true, rt, iss_sf, false); } else { - do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended, - true, rt, iss_sf, false); + do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended, true, + rt, iss_sf, false); } } } @@ -3281,8 +3472,8 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, * A: acquire flag * R: release flag */ -static void disas_ldst_atomic(DisasContext *s, uint32_t insn, - int size, int rt, bool is_vector) +static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int size, int rt, + bool is_vector) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int rs = extract32(insn, 16, 5); @@ -3291,7 +3482,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, bool r = extract32(insn, 22, 1); bool a = extract32(insn, 23, 1); TCGv_i64 tcg_rs, clean_addr; - AtomicThreeOpFn *fn; + AtomicThreeOpFn *fn = NULL; if (is_vector || !dc_isar_feature(aa64_atomics, s)) { unallocated_encoding(s); @@ -3326,8 +3517,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, fn = tcg_gen_atomic_xchg_i64; break; case 014: /* LDAPR, LDAPRH, LDAPRB */ - if (!dc_isar_feature(aa64_rcpc_8_3, s) || - rs != 31 || a != 1 || r != 0) { + if (!dc_isar_feature(aa64_rcpc_8_3, s) || rs != 31 || a != 1 || + r != 0) { unallocated_encoding(s); return; } @@ -3340,7 +3531,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); if (o3_opc == 014) { /* @@ -3350,8 +3541,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, * full load-acquire (we only need "load-acquire processor consistent"), * but we choose to implement them as full LDAQ. */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, - true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt, + disas_ldst_compute_iss_sf(size, false, 0), true); tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ); return; } @@ -3384,8 +3575,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, * W: pre-indexing flag * S: sign for imm9. */ -static void disas_ldst_pac(DisasContext *s, uint32_t insn, - int size, int rt, bool is_vector) +static void disas_ldst_pac(DisasContext *s, uint32_t insn, int size, int rt, + bool is_vector) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int rn = extract32(insn, 5, 5); @@ -3406,9 +3597,11 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, if (s->pauth_active) { if (use_key_a) { - gen_helper_autda(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr, tcg_ctx->cpu_X[31]); + gen_helper_autda(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr, + new_tmp_a64_zero(s)); } else { - gen_helper_autdb(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr, tcg_ctx->cpu_X[31]); + gen_helper_autdb(tcg_ctx, dirty_addr, tcg_ctx->cpu_env, dirty_addr, + new_tmp_a64_zero(s)); } } @@ -3418,7 +3611,8 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, tcg_gen_addi_i64(tcg_ctx, dirty_addr, dirty_addr, offset); /* Note that "clean" and "dirty" here refer to TBI not PAC. */ - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = + gen_mte_check1(s, dirty_addr, false, is_wback || rn != 31, size); tcg_rt = cpu_reg(s, rt); do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false, @@ -3507,8 +3701,8 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) * Load-AcquirePC semantics; we implement as the slightly more * restrictive Load-Acquire. */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend, - true, rt, iss_sf, true); + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend, true, + rt, iss_sf, true); tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ); } } @@ -3582,10 +3776,10 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; MemOp endian = s->be_data; - int ebytes; /* bytes per element */ + int total; /* bytes per element */ int elements; /* elements per vector */ - int rpt; /* num iterations */ - int selem; /* structure elements */ + int rpt; /* num iterations */ + int selem; /* structure elements */ int r; if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { @@ -3652,19 +3846,26 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) endian = MO_LE; } - /* Consecutive little-endian elements from a single register + total = rpt * selem * (is_q ? 16 : 8); + tcg_rn = cpu_reg_sp(s, rn); + + /* + * Issue the MTE check vs the logical repeat count, before we + * promote consecutive little-endian elements below. + */ + clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, + size, total); + + /* + * Consecutive little-endian elements from a single register * can be promoted to a larger little-endian operation. */ if (selem == 1 && endian == MO_LE) { size = 3; } - ebytes = 1 << size; - elements = (is_q ? 16 : 8) / ebytes; - - tcg_rn = cpu_reg_sp(s, rn); - clean_addr = clean_data_tbi(s, tcg_rn); - tcg_ebytes = tcg_const_i64(tcg_ctx, ebytes); + elements = (is_q ? 16 : 8) >> size; + tcg_ebytes = tcg_const_i64(tcg_ctx, 1 << size); for (r = 0; r < rpt; r++) { int e; for (e = 0; e < elements; e++) { @@ -3698,7 +3899,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) if (is_postidx) { if (rm == 31) { - tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, rpt * elements * selem * ebytes); + tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, total); } else { tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm)); } @@ -3745,7 +3946,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) int selem = (extract32(opc, 0, 1) << 1 | R) + 1; bool replicate = false; int index = is_q << 3 | S << 2 | size; - int ebytes, xs; + int xs, total; TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; if (extract32(insn, 31, 1)) { @@ -3799,26 +4000,26 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) return; } - ebytes = 1 << scale; - if (rn == 31) { gen_check_sp_alignment(s); } + total = selem << scale; tcg_rn = cpu_reg_sp(s, rn); - clean_addr = clean_data_tbi(s, tcg_rn); - tcg_ebytes = tcg_const_i64(tcg_ctx, ebytes); + clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, + scale, total); + + tcg_ebytes = tcg_const_i64(tcg_ctx, 1 << scale); for (xs = 0; xs < selem; xs++) { if (replicate) { /* Load and replicate to all elements */ TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx); - tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, clean_addr, - get_mem_index(s), s->be_data + scale); + tcg_gen_qemu_ld_i64(tcg_ctx, tcg_tmp, clean_addr, get_mem_index(s), + s->be_data + scale); tcg_gen_gvec_dup_i64(tcg_ctx, scale, vec_full_reg_offset(s, rt), - (is_q + 1) * 8, vec_full_reg_size(s), - tcg_tmp); + (is_q + 1) * 8, vec_full_reg_size(s), tcg_tmp); tcg_temp_free_i64(tcg_ctx, tcg_tmp); } else { /* Load/store one element per register */ @@ -3835,19 +4036,235 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) if (is_postidx) { if (rm == 31) { - tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, selem * ebytes); + tcg_gen_addi_i64(tcg_ctx, tcg_rn, tcg_rn, total); } else { tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm)); } } } +/* + * Load/Store memory tags + * + * 31 30 29 24 22 21 12 10 5 0 + * +-----+-------------+-----+---+------+-----+------+------+ + * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | + * +-----+-------------+-----+---+------+-----+------+------+ + */ +static void disas_ldst_tag(DisasContext *s, uint32_t insn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_READ, s->pc_curr) || + HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_WRITE, s->pc_curr)) { + // sync PC if there are memory hooks. + // TODO: Better granularity by checking ldst type and corresponding hook + // type + gen_a64_set_pc_im(s->uc->tcg_ctx, s->pc_curr); + } + + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; + int op2 = extract32(insn, 10, 2); + int op1 = extract32(insn, 22, 2); + bool is_load = false, is_pair = false, is_zero = false, is_mult = false; + int index = 0; + TCGv_i64 addr, clean_addr, tcg_rt; + + /* We checked insn bits [29:24,21] in the caller. */ + if (extract32(insn, 30, 2) != 3) { + goto do_unallocated; + } + + /* + * @index is a tri-state variable which has 3 states: + * < 0 : post-index, writeback + * = 0 : signed offset + * > 0 : pre-index, writeback + */ + switch (op1) { + case 0: + if (op2 != 0) { + /* STG */ + index = op2 - 2; + } else { + /* STZGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_zero = true; + } + break; + case 1: + if (op2 != 0) { + /* STZG */ + is_zero = true; + index = op2 - 2; + } else { + /* LDG */ + is_load = true; + } + break; + case 2: + if (op2 != 0) { + /* ST2G */ + is_pair = true; + index = op2 - 2; + } else { + /* STGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = true; + } + break; + case 3: + if (op2 != 0) { + /* STZ2G */ + is_pair = is_zero = true; + index = op2 - 2; + } else { + /* LDGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_load = true; + } + break; + + default: + do_unallocated: + unallocated_encoding(s); + return; + } + + if (is_mult ? !dc_isar_feature(aa64_mte, s) + : !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + addr = read_cpu_reg_sp(s, rn, true); + if (index >= 0) { + /* pre-index or signed offset */ + tcg_gen_addi_i64(tcg_ctx, addr, addr, offset); + } + + if (is_mult) { + tcg_rt = cpu_reg(s, rt); + + if (is_zero) { + int size = 4 << s->dcz_blocksize; + + if (s->ata) { + gen_helper_stzgm_tags(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt); + } + /* + * The non-tags portion of STZGM is mostly like DC_ZVA, + * except the alignment happens before the access. + */ + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(tcg_ctx, clean_addr, clean_addr, -size); + gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, clean_addr); + } else if (s->ata) { + if (is_load) { + gen_helper_ldgm(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, addr); + } else { + gen_helper_stgm(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt); + } + } else { + MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; + int size = 4 << GMID_EL1_BS; + + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(tcg_ctx, clean_addr, clean_addr, -size); + gen_probe_access(s, clean_addr, acc, size); + + if (is_load) { + /* The result tags are zeros. */ + tcg_gen_movi_i64(tcg_ctx, tcg_rt, 0); + } + } + return; + } + + if (is_load) { + tcg_gen_andi_i64(tcg_ctx, addr, addr, -TAG_GRANULE); + tcg_rt = cpu_reg(s, rt); + if (s->ata) { + gen_helper_ldg(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, addr, tcg_rt); + } else { + clean_addr = clean_data_tbi(s, addr); + gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); + gen_address_with_allocation_tag0(tcg_ctx, tcg_rt, addr); + } + } else { + tcg_rt = cpu_reg_sp(s, rt); + if (!s->ata) { + /* + * For STG and ST2G, we need to check alignment and probe memory. + * TODO: For STZG and STZ2G, we could rely on the stores below, + * at least for system mode; user-only won't enforce alignment. + */ + if (is_pair) { + gen_helper_st2g_stub(tcg_ctx, tcg_ctx->cpu_env, addr); + } else { + gen_helper_stg_stub(tcg_ctx, tcg_ctx->cpu_env, addr); + } + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (is_pair) { + gen_helper_st2g_parallel(tcg_ctx, tcg_ctx->cpu_env, addr, + tcg_rt); + } else { + gen_helper_stg_parallel(tcg_ctx, tcg_ctx->cpu_env, addr, + tcg_rt); + } + } else { + if (is_pair) { + gen_helper_st2g(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt); + } else { + gen_helper_stg(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt); + } + } + } + + if (is_zero) { + TCGv_i64 clean_addr = clean_data_tbi(s, addr); + TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0); + int mem_index = get_mem_index(s); + int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; + + tcg_gen_qemu_st_i64(tcg_ctx, tcg_zero, clean_addr, mem_index, + MO_Q | MO_ALIGN_16); + for (i = 8; i < n; i += 8) { + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8); + tcg_gen_qemu_st_i64(tcg_ctx, tcg_zero, clean_addr, mem_index, MO_Q); + } + tcg_temp_free_i64(tcg_ctx, tcg_zero); + } + + if (index != 0) { + /* pre-index or post-index */ + if (index < 0) { + /* post-index */ + tcg_gen_addi_i64(tcg_ctx, addr, addr, offset); + } + tcg_gen_mov_i64(tcg_ctx, cpu_reg_sp(s, rn), addr); + } +} + /* Loads and stores */ static void disas_ldst(DisasContext *s, uint32_t insn) { - if (HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_READ, s->pc_curr) || HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_WRITE, s->pc_curr)) { + if (HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_READ, s->pc_curr) || + HOOK_EXISTS_BOUNDED(s->uc, UC_HOOK_MEM_WRITE, s->pc_curr)) { // sync PC if there are memory hooks. - // TODO: Better granularity by checking ldst type and corresponding hook type + // TODO: Better granularity by checking ldst type and corresponding hook + // type gen_a64_set_pc_im(s->uc->tcg_ctx, s->pc_curr); } @@ -3855,15 +4272,20 @@ static void disas_ldst(DisasContext *s, uint32_t insn) case 0x08: /* Load/store exclusive */ disas_ldst_excl(s, insn); break; - case 0x18: case 0x1c: /* Load register (literal) */ + case 0x18: + case 0x1c: /* Load register (literal) */ disas_ld_lit(s, insn); break; - case 0x28: case 0x29: - case 0x2c: case 0x2d: /* Load/store pair (all forms) */ + case 0x28: + case 0x29: + case 0x2c: + case 0x2d: /* Load/store pair (all forms) */ disas_ldst_pair(s, insn); break; - case 0x38: case 0x39: - case 0x3c: case 0x3d: /* Load/store register (all forms) */ + case 0x38: + case 0x39: + case 0x3c: + case 0x3d: /* Load/store register (all forms) */ disas_ldst_reg(s, insn); break; case 0x0c: /* AdvSIMD load/store multiple structures */ @@ -3872,13 +4294,14 @@ static void disas_ldst(DisasContext *s, uint32_t insn) case 0x0d: /* AdvSIMD load/store single structure */ disas_ldst_single_struct(s, insn); break; - case 0x19: /* LDAPR/STLR (unscaled immediate) */ - if (extract32(insn, 10, 2) != 0 || - extract32(insn, 21, 1) != 0) { + case 0x19: + if (extract32(insn, 21, 1) != 0) { + disas_ldst_tag(s, insn); + } else if (extract32(insn, 10, 2) == 0) { + disas_ldst_ldapr_stlr(s, insn); + } else { unallocated_encoding(s); - break; } - disas_ldst_ldapr_stlr(s, insn); break; default: unallocated_encoding(s); @@ -3919,14 +4342,14 @@ static void disas_pc_rel_adr(DisasContext *s, uint32_t insn) * Add/subtract (immediate) * * 31 30 29 28 24 23 22 21 10 9 5 4 0 - * +--+--+--+-----------+-----+-------------+-----+-----+ - * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd | - * +--+--+--+-----------+-----+-------------+-----+-----+ + * +--+--+--+-------------+--+-------------+-----+-----+ + * |sf|op| S| 1 0 0 0 1 0 |sh| imm12 | Rn | Rd | + * +--+--+--+-------------+--+-------------+-----+-----+ * * sf: 0 -> 32bit, 1 -> 64bit * op: 0 -> add , 1 -> sub * S: 1 -> set flags - * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12 + * sh: 1 -> LSL imm by 12 */ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) { @@ -3934,7 +4357,7 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) int rd = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); uint64_t imm = extract32(insn, 10, 12); - int shift = extract32(insn, 22, 2); + bool shift = extract32(insn, 22, 1); bool setflags = extract32(insn, 29, 1); bool sub_op = extract32(insn, 30, 1); bool is_64bit = extract32(insn, 31, 1); @@ -3943,13 +4366,8 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd); TCGv_i64 tcg_result; - switch (shift) { - case 0x0: - break; - case 0x1: + if (shift) { imm <<= 12; - break; - default: unallocated_encoding(s); return; } @@ -3980,6 +4398,57 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) tcg_temp_free_i64(tcg_ctx, tcg_result); } +/* + * Add/subtract (immediate, with tags) + * + * 31 30 29 28 23 22 21 16 14 10 9 5 4 0 + * +--+--+--+-------------+--+---------+--+-------+-----+-----+ + * |sf|op| S| 1 0 0 0 1 1 |o2| uimm6 |o3| uimm4 | Rn | Rd | + * +--+--+--+-------------+--+---------+--+-------+-----+-----+ + * + * op: 0 -> add, 1 -> sub + */ +static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int uimm4 = extract32(insn, 10, 4); + int uimm6 = extract32(insn, 16, 6); + bool sub_op = extract32(insn, 30, 1); + TCGv_i64 tcg_rn, tcg_rd; + int imm; + + /* Test all of sf=1, S=0, o2=0, o3=0. */ + if ((insn & 0xa040c000u) != 0x80000000u || + !dc_isar_feature(aa64_mte_insn_reg, s)) { + unallocated_encoding(s); + return; + } + + imm = uimm6 << LOG2_TAG_GRANULE; + if (sub_op) { + imm = -imm; + } + + tcg_rn = cpu_reg_sp(s, rn); + tcg_rd = cpu_reg_sp(s, rd); + + if (s->ata) { + TCGv_i32 offset = tcg_const_i32(tcg_ctx, imm); + TCGv_i32 tag_offset = tcg_const_i32(tcg_ctx, uimm4); + + gen_helper_addsubg(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, offset, + tag_offset); + tcg_temp_free_i32(tcg_ctx, tag_offset); + tcg_temp_free_i32(tcg_ctx, offset); + } else { + tcg_gen_addi_i64(tcg_ctx, tcg_rd, tcg_rn, imm); + gen_address_with_allocation_tag0(tcg_ctx, tcg_rd, tcg_rd); + } +} + /* The input should be a value in the bottom e bits (with higher * bits zero); returns that value replicated into every element * of size e in a 64 bit integer. @@ -4267,7 +4736,7 @@ static void disas_bitfield(DisasContext *s, uint32_t insn) return; } - done: +done: if (!sf) { /* zero extend final result */ tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd); } @@ -4340,12 +4809,16 @@ static void disas_extract(DisasContext *s, uint32_t insn) static void disas_data_proc_imm(DisasContext *s, uint32_t insn) { switch (extract32(insn, 23, 6)) { - case 0x20: case 0x21: /* PC-rel. addressing */ + case 0x20: + case 0x21: /* PC-rel. addressing */ disas_pc_rel_adr(s, insn); break; - case 0x22: case 0x23: /* Add/subtract (immediate) */ + case 0x22: /* Add/subtract (immediate) */ disas_add_sub_imm(s, insn); break; + case 0x23: /* Add/subtract (immediate, with tags) */ + disas_add_sub_imm_with_tags(s, insn); + break; case 0x24: /* Logical (immediate) */ disas_logic_imm(s, insn); break; @@ -4414,8 +4887,9 @@ static void shift_reg(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, int sf, * The shift amount must be in range (this should always be true as the * relevant instructions will UNDEF on bad shift immediates). */ -static void shift_reg_imm(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, int sf, - enum a64_shift_type shift_type, unsigned int shift_i) +static void shift_reg_imm(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, + int sf, enum a64_shift_type shift_type, + unsigned int shift_i) { assert(shift_i < (sf ? 64 : 32)); @@ -4675,9 +5149,8 @@ static void disas_data_proc_3src(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int ra = extract32(insn, 10, 5); int rm = extract32(insn, 16, 5); - int op_id = (extract32(insn, 29, 3) << 4) | - (extract32(insn, 21, 3) << 1) | - extract32(insn, 15, 1); + int op_id = (extract32(insn, 29, 3) << 4) | (extract32(insn, 21, 3) << 1) | + extract32(insn, 15, 1); bool sf = extract32(insn, 31, 1); bool is_sub = extract32(op_id, 0, 1); bool is_high = extract32(op_id, 2, 1); @@ -4693,8 +5166,8 @@ static void disas_data_proc_3src(DisasContext *s, uint32_t insn) case 0x44: /* SMULH */ is_signed = true; break; - case 0x0: /* MADD (32bit) */ - case 0x1: /* MSUB (32bit) */ + case 0x0: /* MADD (32bit) */ + case 0x1: /* MSUB (32bit) */ case 0x40: /* MADD (64bit) */ case 0x41: /* MSUB (64bit) */ case 0x4a: /* UMADDL */ @@ -4866,7 +5339,7 @@ static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - shift = sz ? 16 : 24; /* SETF16 or SETF8 */ + shift = sz ? 16 : 24; /* SETF16 or SETF8 */ tmp = tcg_temp_new_i32(tcg_ctx); tcg_gen_extrl_i64_i32(tcg_ctx, tmp, cpu_reg(s, rn)); @@ -5016,7 +5489,8 @@ static void disas_cond_select(DisasContext *s, uint32_t insn) if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { /* CSET & CSETM. */ - tcg_gen_setcond_i64(tcg_ctx, tcg_invert_cond(c.cond), tcg_rd, c.value, zero); + tcg_gen_setcond_i64(tcg_ctx, tcg_invert_cond(c.cond), tcg_rd, c.value, + zero); if (else_inv) { tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rd); } @@ -5030,7 +5504,8 @@ static void disas_cond_select(DisasContext *s, uint32_t insn) } else if (else_inc) { tcg_gen_addi_i64(tcg_ctx, t_false, t_false, 1); } - tcg_gen_movcond_i64(tcg_ctx, c.cond, tcg_rd, c.value, zero, t_true, t_false); + tcg_gen_movcond_i64(tcg_ctx, c.cond, tcg_rd, c.value, zero, t_true, + t_false); } tcg_temp_free_i64(tcg_ctx, zero); @@ -5041,8 +5516,8 @@ static void disas_cond_select(DisasContext *s, uint32_t insn) } } -static void handle_clz(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_clz(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rd, tcg_rn; @@ -5060,8 +5535,8 @@ static void handle_clz(DisasContext *s, unsigned int sf, } } -static void handle_cls(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_cls(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rd, tcg_rn; @@ -5079,8 +5554,8 @@ static void handle_cls(DisasContext *s, unsigned int sf, } } -static void handle_rbit(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_rbit(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rd, tcg_rn; @@ -5099,8 +5574,8 @@ static void handle_rbit(DisasContext *s, unsigned int sf, } /* REV with sf==1, opcode==3 ("REV64") */ -static void handle_rev64(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_rev64(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; if (!sf) { @@ -5113,8 +5588,8 @@ static void handle_rev64(DisasContext *s, unsigned int sf, /* REV with sf==0, opcode==2 * REV32 (sf==1, opcode==2) */ -static void handle_rev32(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_rev32(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rd = cpu_reg(s, rd); @@ -5138,14 +5613,15 @@ static void handle_rev32(DisasContext *s, unsigned int sf, } /* REV16 (opcode==1) */ -static void handle_rev16(DisasContext *s, unsigned int sf, - unsigned int rn, unsigned int rd) +static void handle_rev16(DisasContext *s, unsigned int sf, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_rd = cpu_reg(s, rd); TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx); TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); - TCGv_i64 mask = tcg_const_i64(tcg_ctx, sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); + TCGv_i64 mask = + tcg_const_i64(tcg_ctx, sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 8); tcg_gen_and_i64(tcg_ctx, tcg_rd, tcg_rn, mask); @@ -5209,7 +5685,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x00): /* PACIA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5217,7 +5694,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x01): /* PACIB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5225,7 +5703,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x02): /* PACDA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5233,7 +5712,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x03): /* PACDB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5241,7 +5721,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x04): /* AUTIA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5249,7 +5730,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x05): /* AUTIB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5257,7 +5739,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x06): /* AUTDA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5265,7 +5748,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x07): /* AUTDB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5275,7 +5759,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x09): /* PACIZB */ @@ -5283,7 +5768,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0a): /* PACDZA */ @@ -5291,7 +5777,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0b): /* PACDZB */ @@ -5299,7 +5786,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0c): /* AUTIZA */ @@ -5307,7 +5795,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autia(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0d): /* AUTIZB */ @@ -5315,7 +5804,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autib(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0e): /* AUTDZA */ @@ -5323,7 +5813,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autda(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x0f): /* AUTDZB */ @@ -5331,7 +5822,8 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autdb(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rd, + new_tmp_a64_zero(s)); } break; case MAP(1, 0x01, 0x10): /* XPACI */ @@ -5388,9 +5880,9 @@ static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, } /* LSLV, LSRV, ASRV, RORV */ -static void handle_shift_reg(DisasContext *s, - enum a64_shift_type shift_type, unsigned int sf, - unsigned int rm, unsigned int rn, unsigned int rd) +static void handle_shift_reg(DisasContext *s, enum a64_shift_type shift_type, + unsigned int sf, unsigned int rm, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_shift = tcg_temp_new_i64(tcg_ctx); @@ -5403,17 +5895,16 @@ static void handle_shift_reg(DisasContext *s, } /* CRC32[BHWX], CRC32C[BHWX] */ -static void handle_crc32(DisasContext *s, - unsigned int sf, unsigned int sz, bool crc32c, - unsigned int rm, unsigned int rn, unsigned int rd) +static void handle_crc32(DisasContext *s, unsigned int sf, unsigned int sz, + bool crc32c, unsigned int rm, unsigned int rn, + unsigned int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_acc, tcg_val; TCGv_i32 tcg_bytes; - if (!dc_isar_feature(aa64_crc32, s) - || (sf == 1 && sz != 3) - || (sf == 0 && sz == 3)) { + if (!dc_isar_feature(aa64_crc32, s) || (sf == 1 && sz != 3) || + (sf == 0 && sz == 3)) { unallocated_encoding(s); return; } @@ -5443,9 +5934,11 @@ static void handle_crc32(DisasContext *s, tcg_bytes = tcg_const_i32(tcg_ctx, 1 << sz); if (crc32c) { - gen_helper_crc32c_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); + gen_helper_crc32c_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, + tcg_bytes); } else { - gen_helper_crc32_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); + gen_helper_crc32_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, + tcg_bytes); } tcg_temp_free_i32(tcg_ctx, tcg_bytes); @@ -5460,25 +5953,72 @@ static void handle_crc32(DisasContext *s, static void disas_data_proc_2src(DisasContext *s, uint32_t insn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - unsigned int sf, rm, opcode, rn, rd; + unsigned int sf, rm, opcode, rn, rd, setflag; sf = extract32(insn, 31, 1); + setflag = extract32(insn, 29, 1); rm = extract32(insn, 16, 5); opcode = extract32(insn, 10, 6); rn = extract32(insn, 5, 5); rd = extract32(insn, 0, 5); - if (extract32(insn, 29, 1)) { + if (setflag && opcode != 0) { unallocated_encoding(s); return; } switch (opcode) { + case 0: /* SUBP(S) */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } else { + TCGv_i64 tcg_n, tcg_m, tcg_d; + + tcg_n = read_cpu_reg_sp(s, rn, true); + tcg_m = read_cpu_reg_sp(s, rm, true); + tcg_gen_sextract_i64(tcg_ctx, tcg_n, tcg_n, 0, 56); + tcg_gen_sextract_i64(tcg_ctx, tcg_m, tcg_m, 0, 56); + tcg_d = cpu_reg(s, rd); + + if (setflag) { + gen_sub_CC(tcg_ctx, true, tcg_d, tcg_n, tcg_m); + } else { + tcg_gen_sub_i64(tcg_ctx, tcg_d, tcg_n, tcg_m); + } + } + break; case 2: /* UDIV */ handle_div(s, false, sf, rm, rn, rd); break; case 3: /* SDIV */ handle_div(s, true, sf, rm, rn, rd); break; + case 4: /* IRG */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } + if (s->ata) { + gen_helper_irg(tcg_ctx, cpu_reg_sp(s, rd), tcg_ctx->cpu_env, + cpu_reg_sp(s, rn), cpu_reg(s, rm)); + } else { + gen_address_with_allocation_tag0(tcg_ctx, cpu_reg_sp(s, rd), + cpu_reg_sp(s, rn)); + } + break; + case 5: /* GMI */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } else { + TCGv_i64 t1 = tcg_const_i64(tcg_ctx, 1); + TCGv_i64 t2 = tcg_temp_new_i64(tcg_ctx); + + tcg_gen_extract_i64(tcg_ctx, t2, cpu_reg_sp(s, rn), 56, 4); + tcg_gen_shl_i64(tcg_ctx, t1, t1, t2); + tcg_gen_or_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, rm), t1); + + tcg_temp_free_i64(tcg_ctx, t1); + tcg_temp_free_i64(tcg_ctx, t2); + } + break; case 8: /* LSLV */ handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); break; @@ -5573,7 +6113,7 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn) } break; - case 0x2: /* Conditional compare */ + case 0x2: /* Conditional compare */ disas_cc(s, insn); /* both imm and reg forms */ break; @@ -5581,10 +6121,10 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn) disas_cond_select(s, insn); break; - case 0x6: /* Data-processing */ - if (op0) { /* (1 source) */ + case 0x6: /* Data-processing */ + if (op0) { /* (1 source) */ disas_data_proc_1src(s, insn); - } else { /* (2 source) */ + } else { /* (2 source) */ disas_data_proc_2src(s, insn); } break; @@ -5606,9 +6146,9 @@ static void disas_data_proc_reg(DisasContext *s, uint32_t insn) } } -static void handle_fp_compare(DisasContext *s, int size, - unsigned int rn, unsigned int rm, - bool cmp_with_zero, bool signal_all_nans) +static void handle_fp_compare(DisasContext *s, int size, unsigned int rn, + unsigned int rm, bool cmp_with_zero, + bool signal_all_nans) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_flags = tcg_temp_new_i64(tcg_ctx); @@ -5644,16 +6184,20 @@ static void handle_fp_compare(DisasContext *s, int size, switch (size) { case MO_32: if (signal_all_nans) { - gen_helper_vfp_cmpes_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst); + gen_helper_vfp_cmpes_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, + fpst); } else { - gen_helper_vfp_cmps_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst); + gen_helper_vfp_cmps_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, + fpst); } break; case MO_16: if (signal_all_nans) { - gen_helper_vfp_cmpeh_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst); + gen_helper_vfp_cmpeh_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, + fpst); } else { - gen_helper_vfp_cmph_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst); + gen_helper_vfp_cmph_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, + fpst); } break; default: @@ -5844,7 +6388,8 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) a64_test_cc(tcg_ctx, &c, cond); t_zero = tcg_const_i64(tcg_ctx, 0); - tcg_gen_movcond_i64(tcg_ctx, c.cond, t_true, c.value, t_zero, t_true, t_false); + tcg_gen_movcond_i64(tcg_ctx, c.cond, t_true, c.value, t_zero, t_true, + t_false); tcg_temp_free_i64(tcg_ctx, t_zero); tcg_temp_free_i64(tcg_ctx, t_false); a64_free_cc(tcg_ctx, &c); @@ -5883,7 +6428,8 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) case 0xb: /* FRINTZ */ case 0xc: /* FRINTA */ { - TCGv_i32 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7)); + TCGv_i32 tcg_rmode = + tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7)); fpst = get_fpstatus_ptr(tcg_ctx, true); gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst); @@ -5983,7 +6529,7 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) } tcg_temp_free_ptr(tcg_ctx, fpst); - done: +done: write_fp_sreg(s, rd, tcg_res); tcg_temp_free_i32(tcg_ctx, tcg_op); tcg_temp_free_i32(tcg_ctx, tcg_res); @@ -6061,19 +6607,18 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) } tcg_temp_free_ptr(tcg_ctx, fpst); - done: +done: write_fp_dreg(s, rd, tcg_res); tcg_temp_free_i64(tcg_ctx, tcg_op); tcg_temp_free_i64(tcg_ctx, tcg_res); } -static void handle_fp_fcvt(DisasContext *s, int opcode, - int rd, int rn, int dtype, int ntype) +static void handle_fp_fcvt(DisasContext *s, int opcode, int rd, int rn, + int dtype, int ntype) { TCGContext *tcg_ctx = s->uc->tcg_ctx; switch (ntype) { - case 0x0: - { + case 0x0: { TCGv_i32 tcg_rn = read_fp_sreg(s, rn); if (dtype == 1) { /* Single to double */ @@ -6097,8 +6642,7 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, tcg_temp_free_i32(tcg_ctx, tcg_rn); break; } - case 0x1: - { + case 0x1: { TCGv_i64 tcg_rn = read_fp_dreg(s, rn); TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx); if (dtype == 0) { @@ -6117,9 +6661,8 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, tcg_temp_free_i32(tcg_ctx, tcg_rd); tcg_temp_free_i64(tcg_ctx, tcg_rn); break; - } - case 0x3: - { + } + case 0x3: { TCGv_i32 tcg_rn = read_fp_sreg(s, rn); TCGv_ptr tcg_fpst = get_fpstatus_ptr(tcg_ctx, false); TCGv_i32 tcg_ahp = get_ahp_flag(tcg_ctx); @@ -6127,13 +6670,15 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, if (dtype == 0) { /* Half to single */ TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst, + tcg_ahp); write_fp_sreg(s, rd, tcg_rd); tcg_temp_free_i32(tcg_ctx, tcg_rd); } else { /* Half to double */ TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx); - gen_helper_vfp_fcvt_f16_to_f64(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); + gen_helper_vfp_fcvt_f16_to_f64(tcg_ctx, tcg_rd, tcg_rn, tcg_fpst, + tcg_ahp); write_fp_dreg(s, rd, tcg_rd); tcg_temp_free_i64(tcg_ctx, tcg_rd); } @@ -6167,8 +6712,9 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) } switch (opcode) { - case 0x4: case 0x5: case 0x7: - { + case 0x4: + case 0x5: + case 0x7: { /* FCVT between half, single and double precision */ int dtype = extract32(opcode, 0, 2); if (type == 2 || dtype == type) { @@ -6242,8 +6788,8 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) } /* Floating-point data-processing (2 source) - single precision */ -static void handle_fp_2src_single(DisasContext *s, int opcode, - int rd, int rn, int rm) +static void handle_fp_2src_single(DisasContext *s, int opcode, int rd, int rn, + int rm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 tcg_op1; @@ -6296,8 +6842,8 @@ static void handle_fp_2src_single(DisasContext *s, int opcode, } /* Floating-point data-processing (2 source) - double precision */ -static void handle_fp_2src_double(DisasContext *s, int opcode, - int rd, int rn, int rm) +static void handle_fp_2src_double(DisasContext *s, int opcode, int rd, int rn, + int rm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_op1; @@ -6350,8 +6896,8 @@ static void handle_fp_2src_double(DisasContext *s, int opcode, } /* Floating-point data-processing (2 source) - half precision */ -static void handle_fp_2src_half(DisasContext *s, int opcode, - int rd, int rn, int rm) +static void handle_fp_2src_half(DisasContext *s, int opcode, int rd, int rn, + int rm) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 tcg_op1; @@ -6454,8 +7000,8 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn) } /* Floating-point data-processing (3 source) - single precision */ -static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) +static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, int rd, + int rn, int rm, int ra) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 tcg_op1, tcg_op2, tcg_op3; @@ -6493,8 +7039,8 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, } /* Floating-point data-processing (3 source) - double precision */ -static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) +static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, int rd, + int rn, int rm, int ra) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tcg_op1, tcg_op2, tcg_op3; @@ -6532,8 +7078,8 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, } /* Floating-point data-processing (3 source) - half precision */ -static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) +static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, int rd, + int rn, int rm, int ra) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 tcg_op1, tcg_op2, tcg_op3; @@ -6559,7 +7105,8 @@ static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, tcg_gen_xori_i32(tcg_ctx, tcg_op1, tcg_op1, 0x8000); } - gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); + gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_op3, + fpst); write_fp_sreg(s, rd, tcg_res); @@ -6708,11 +7255,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, case 1: /* float64 */ tcg_double = tcg_temp_new_i64(tcg_ctx); if (is_signed) { - gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int, tcg_shift, + tcg_fpstatus); } else { - gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int, tcg_shift, + tcg_fpstatus); } write_fp_dreg(s, rd, tcg_double); tcg_temp_free_i64(tcg_ctx, tcg_double); @@ -6721,11 +7268,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, case 0: /* float32 */ tcg_single = tcg_temp_new_i32(tcg_ctx); if (is_signed) { - gen_helper_vfp_sqtos(tcg_ctx, tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_sqtos(tcg_ctx, tcg_single, tcg_int, tcg_shift, + tcg_fpstatus); } else { - gen_helper_vfp_uqtos(tcg_ctx, tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_uqtos(tcg_ctx, tcg_single, tcg_int, tcg_shift, + tcg_fpstatus); } write_fp_sreg(s, rd, tcg_single); tcg_temp_free_i32(tcg_ctx, tcg_single); @@ -6734,11 +7281,11 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, case 3: /* float16 */ tcg_single = tcg_temp_new_i32(tcg_ctx); if (is_signed) { - gen_helper_vfp_sqtoh(tcg_ctx, tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_sqtoh(tcg_ctx, tcg_single, tcg_int, tcg_shift, + tcg_fpstatus); } else { - gen_helper_vfp_uqtoh(tcg_ctx, tcg_single, tcg_int, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_uqtoh(tcg_ctx, tcg_single, tcg_int, tcg_shift, + tcg_fpstatus); } write_fp_sreg(s, rd, tcg_single); tcg_temp_free_i32(tcg_ctx, tcg_single); @@ -6933,7 +7480,8 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) break; case 2: /* 64 bit to top half. */ - tcg_gen_st_i64(tcg_ctx, tcg_rn, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rd)); + tcg_gen_st_i64(tcg_ctx, tcg_rn, tcg_ctx->cpu_env, + fp_reg_hi_offset(s, rd)); clear_vec_high(s, true, rd); break; case 3: @@ -6952,19 +7500,23 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) switch (type) { case 0: /* 32 bit */ - tcg_gen_ld32u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_32)); + tcg_gen_ld32u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + fp_reg_offset(s, rn, MO_32)); break; case 1: /* 64 bit */ - tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_64)); + tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + fp_reg_offset(s, rn, MO_64)); break; case 2: /* 64 bits from top half */ - tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rn)); + tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + fp_reg_hi_offset(s, rn)); break; case 3: /* 16 bit */ - tcg_gen_ld16u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_16)); + tcg_gen_ld16u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + fp_reg_offset(s, rn, MO_16)); break; default: g_assert_not_reached(); @@ -7053,8 +7605,8 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) goto do_unallocated; } /* fallthru */ - case 6: // 0b00000110: /* FMOV 32-bit */ - case 7: // 0b00000111: + case 6: // 0b00000110: /* FMOV 32-bit */ + case 7: // 0b00000111: case 0xa6: // 0b10100110: /* FMOV 64-bit */ case 0xa7: // 0b10100111: case 0xce: // 0b11001110: /* FMOV top half of 128-bit */ @@ -7198,14 +7750,13 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn) read_vec_element(s, tcg_resh, rm, 0, MO_64); do_ext64(s, tcg_resh, tcg_resl, pos); } - tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0); } else { TCGv_i64 tcg_hh; typedef struct { int reg; int elt; } EltPosns; - EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; + EltPosns eltposns[] = {{rn, 0}, {rn, 1}, {rm, 0}, {rm, 1}}; EltPosns *elt = eltposns; if (pos >= 64) { @@ -7228,9 +7779,11 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn) write_vec_element(s, tcg_resl, rd, 0, MO_64); tcg_temp_free_i64(tcg_ctx, tcg_resl); - write_vec_element(s, tcg_resh, rd, 1, MO_64); + if (is_q) { + write_vec_element(s, tcg_resh, rd, 1, MO_64); + } tcg_temp_free_i64(tcg_ctx, tcg_resh); - clear_vec_high(s, true, rd); + clear_vec_high(s, is_q, rd); } /* TBL/TBX @@ -7268,17 +7821,21 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) * the input. */ tcg_resl = tcg_temp_new_i64(tcg_ctx); - tcg_resh = tcg_temp_new_i64(tcg_ctx); + tcg_resh = NULL; if (is_tblx) { read_vec_element(s, tcg_resl, rd, 0, MO_64); } else { tcg_gen_movi_i64(tcg_ctx, tcg_resl, 0); } - if (is_tblx && is_q) { - read_vec_element(s, tcg_resh, rd, 1, MO_64); - } else { - tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0); + + if (is_q) { + tcg_resh = tcg_temp_new_i64(tcg_ctx); + if (is_tblx) { + read_vec_element(s, tcg_resh, rd, 1, MO_64); + } else { + tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0); + } } tcg_idx = tcg_temp_new_i64(tcg_ctx); @@ -7289,8 +7846,8 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) tcg_regno, tcg_numregs); if (is_q) { read_vec_element(s, tcg_idx, rm, 1, MO_64); - gen_helper_simd_tbl(tcg_ctx, tcg_resh, tcg_ctx->cpu_env, tcg_resh, tcg_idx, - tcg_regno, tcg_numregs); + gen_helper_simd_tbl(tcg_ctx, tcg_resh, tcg_ctx->cpu_env, tcg_resh, + tcg_idx, tcg_regno, tcg_numregs); } tcg_temp_free_i64(tcg_ctx, tcg_idx); tcg_temp_free_i32(tcg_ctx, tcg_regno); @@ -7298,9 +7855,12 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) write_vec_element(s, tcg_resl, rd, 0, MO_64); tcg_temp_free_i64(tcg_ctx, tcg_resl); - write_vec_element(s, tcg_resh, rd, 1, MO_64); - tcg_temp_free_i64(tcg_ctx, tcg_resh); - clear_vec_high(s, true, rd); + + if (is_q) { + write_vec_element(s, tcg_resh, rd, 1, MO_64); + tcg_temp_free_i64(tcg_ctx, tcg_resh); + } + clear_vec_high(s, is_q, rd); } /* ZIP/UZP/TRN @@ -7338,7 +7898,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) } tcg_resl = tcg_const_i64(tcg_ctx, 0); - tcg_resh = tcg_const_i64(tcg_ctx, 0); + tcg_resh = is_q ? tcg_const_i64(tcg_ctx, 0) : NULL; tcg_res = tcg_temp_new_i64(tcg_ctx); for (i = 0; i < elements; i++) { @@ -7349,8 +7909,8 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) if (i < midpoint) { read_vec_element(s, tcg_res, rn, 2 * i + part, size); } else { - read_vec_element(s, tcg_res, rm, - 2 * (i - midpoint) + part, size); + read_vec_element(s, tcg_res, rm, 2 * (i - midpoint) + part, + size); } break; } @@ -7389,9 +7949,12 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) write_vec_element(s, tcg_resl, rd, 0, MO_64); tcg_temp_free_i64(tcg_ctx, tcg_resl); - write_vec_element(s, tcg_resh, rd, 1, MO_64); - tcg_temp_free_i64(tcg_ctx, tcg_resh); - clear_vec_high(s, true, rd); + + if (is_q) { + write_vec_element(s, tcg_resh, rd, 1, MO_64); + tcg_temp_free_i64(tcg_ctx, tcg_resh); + } + clear_vec_high(s, is_q, rd); } /* @@ -7496,8 +8059,8 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) return; } /* fall through */ - case 0x3: /* SADDLV, UADDLV */ - case 0xa: /* SMAXV, UMAXV */ + case 0x3: /* SADDLV, UADDLV */ + case 0xa: /* SMAXV, UMAXV */ case 0x1a: /* SMINV, UMINV */ if (size == 3 || (size == 2 && !is_q)) { unallocated_encoding(s); @@ -7577,7 +8140,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) default: g_assert_not_reached(); } - } } else { /* Floating point vector reduction ops which work across 32 @@ -7650,8 +8212,8 @@ static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, index = imm5 >> (size + 1); tcg_gen_gvec_dup_mem(tcg_ctx, size, vec_full_reg_offset(s, rd), - vec_reg_offset(s, rn, index, size), - is_q ? 16 : 8, vec_full_reg_size(s)); + vec_reg_offset(s, rn, index, size), is_q ? 16 : 8, + vec_full_reg_size(s)); } /* DUP (element, scalar) @@ -7660,8 +8222,7 @@ static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | * +-----------------------+--------+-------------+------+------+ */ -static void handle_simd_dupes(DisasContext *s, int rd, int rn, - int imm5) +static void handle_simd_dupes(DisasContext *s, int rd, int rn, int imm5) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = ctz32(imm5); @@ -7730,8 +8291,8 @@ static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, * size: encoded in imm5 (see ARM ARM LowestSetBit()) * index: encoded in imm5<4:size+1> */ -static void handle_simd_inse(DisasContext *s, int rd, int rn, - int imm4, int imm5) +static void handle_simd_inse(DisasContext *s, int rd, int rn, int imm4, + int imm5) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = ctz32(imm5); @@ -7747,7 +8308,7 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn, return; } - dst_index = extract32(imm5, 1+size, 5); + dst_index = extract32(imm5, 1 + size, 5); src_index = extract32(imm4, size, 4); tmp = tcg_temp_new_i64(tcg_ctx); @@ -7761,7 +8322,6 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn, clear_vec_high(s, true, rd); } - /* INS (General) * * 31 21 20 16 15 10 9 5 4 0 @@ -7820,9 +8380,7 @@ static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, return; } } else { - if (size > 3 - || (size < 3 && is_q) - || (size == 3 && !is_q)) { + if (size > 3 || (size < 3 && is_q) || (size == 3 && !is_q)) { unallocated_encoding(s); return; } @@ -7832,7 +8390,7 @@ static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, return; } - element = extract32(imm5, 1+size, 4); + element = extract32(imm5, 1 + size, 4); tcg_rd = cpu_reg(s, rd); read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); @@ -8014,8 +8572,8 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { /* MOVI or MVNI, with MVNI negation handled above. */ - tcg_gen_gvec_dup64i(tcg_ctx, vec_full_reg_offset(s, rd), is_q ? 16 : 8, - vec_full_reg_size(s), imm); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, vec_full_reg_offset(s, rd), + is_q ? 16 : 8, vec_full_reg_size(s), imm); } else { /* ORR or BIC, with BIC negation to AND handled above. */ if (is_neg) { @@ -8083,9 +8641,9 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) fpst = NULL; break; - case 0xc: /* FMAXNMP */ - case 0xd: /* FADDP */ - case 0xf: /* FMAXP */ + case 0xc: /* FMAXNMP */ + case 0xd: /* FADDP */ + case 0xf: /* FMAXP */ case 0x2c: /* FMINNMP */ case 0x2f: /* FMINP */ /* FP op, size[0] is 32 or 64 bit*/ @@ -8158,19 +8716,24 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) if (size == MO_16) { switch (opcode) { case 0xc: /* FMAXNMP */ - gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0xd: /* FADDP */ - gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0xf: /* FMAXP */ - gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x2c: /* FMINNMP */ - gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x2f: /* FMINP */ - gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; default: g_assert_not_reached(); @@ -8178,7 +8741,8 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) } else { switch (opcode) { case 0xc: /* FMAXNMP */ - gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0xd: /* FADDP */ gen_helper_vfp_adds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -8187,7 +8751,8 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x2c: /* FMINNMP */ - gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x2f: /* FMINP */ gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -8215,9 +8780,10 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) * This code is handles the common shifting code and is used by both * the vector and scalar code. */ -static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_i64 tcg_src, - TCGv_i64 tcg_rnd, bool accumulate, - bool is_u, int size, int shift) +static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, + TCGv_i64 tcg_src, TCGv_i64 tcg_rnd, + bool accumulate, bool is_u, int size, + int shift) { bool extended_result = false; bool round = tcg_rnd != NULL; @@ -8243,13 +8809,11 @@ static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_ if (!is_u) { /* take care of sign extending tcg_res */ tcg_gen_sari_i64(tcg_ctx, tcg_src_hi, tcg_src, 63); - tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi, - tcg_src, tcg_src_hi, - tcg_rnd, tcg_zero); + tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi, tcg_src, + tcg_src_hi, tcg_rnd, tcg_zero); } else { - tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi, - tcg_src, tcg_zero, - tcg_rnd, tcg_zero); + tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi, tcg_src, + tcg_zero, tcg_rnd, tcg_zero); } tcg_temp_free_i64(tcg_ctx, tcg_zero); } else { @@ -8298,9 +8862,8 @@ static void handle_shri_with_rndacc(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_ } /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ -static void handle_scalar_simd_shri(DisasContext *s, - bool is_u, int immh, int immb, - int opcode, int rn, int rd) +static void handle_scalar_simd_shri(DisasContext *s, bool is_u, int immh, + int immb, int opcode, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; const int size = 3; @@ -8345,7 +8908,8 @@ static void handle_scalar_simd_shri(DisasContext *s, } tcg_rn = read_fp_dreg(s, rn); - tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(tcg_ctx); + tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) + : tcg_temp_new_i64(tcg_ctx); if (insert) { /* shift count same as element size is valid but does nothing; @@ -8354,11 +8918,12 @@ static void handle_scalar_simd_shri(DisasContext *s, int esize = 8 << size; if (shift != esize) { tcg_gen_shri_i64(tcg_ctx, tcg_rn, tcg_rn, shift); - tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); + tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, 0, + esize - shift); } } else { - handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, - accumulate, is_u, size, shift); + handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, accumulate, + is_u, size, shift); } write_fp_dreg(s, rd, tcg_rd); @@ -8371,9 +8936,8 @@ static void handle_scalar_simd_shri(DisasContext *s, } /* SHL/SLI - Scalar shift left */ -static void handle_scalar_simd_shli(DisasContext *s, bool insert, - int immh, int immb, int opcode, - int rn, int rd) +static void handle_scalar_simd_shli(DisasContext *s, bool insert, int immh, + int immb, int opcode, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = 32 - clz32(immh) - 1; @@ -8409,9 +8973,8 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert, /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with * (signed/unsigned) narrowing */ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, - bool is_u_shift, bool is_u_narrow, - int immh, int immb, int opcode, - int rn, int rd) + bool is_u_shift, bool is_u_narrow, int immh, + int immb, int opcode, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int immhb = immh << 3 | immb; @@ -8425,21 +8988,15 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, TCGv_i32 tcg_rd_narrowed; TCGv_i64 tcg_final; - static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { - { gen_helper_neon_narrow_sat_s8, - gen_helper_neon_unarrow_sat8 }, - { gen_helper_neon_narrow_sat_s16, - gen_helper_neon_unarrow_sat16 }, - { gen_helper_neon_narrow_sat_s32, - gen_helper_neon_unarrow_sat32 }, - { NULL, NULL }, - }; - static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { - gen_helper_neon_narrow_sat_u8, - gen_helper_neon_narrow_sat_u16, - gen_helper_neon_narrow_sat_u32, - NULL + static NeonGenNarrowEnvFn *const signed_narrow_fns[4][2] = { + {gen_helper_neon_narrow_sat_s8, gen_helper_neon_unarrow_sat8}, + {gen_helper_neon_narrow_sat_s16, gen_helper_neon_unarrow_sat16}, + {gen_helper_neon_narrow_sat_s32, gen_helper_neon_unarrow_sat32}, + {NULL, NULL}, }; + static NeonGenNarrowEnvFn *const unsigned_narrow_fns[4] = { + gen_helper_neon_narrow_sat_u8, gen_helper_neon_narrow_sat_u16, + gen_helper_neon_narrow_sat_u32, NULL}; NeonGenNarrowEnvFn *narrowfn; int i; @@ -8475,11 +9032,12 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, for (i = 0; i < elements; i++) { read_vec_element(s, tcg_rn, rn, i, ldop); - handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, - false, is_u_shift, size+1, shift); + handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, false, + is_u_shift, size + 1, shift); narrowfn(tcg_ctx, tcg_rd_narrowed, tcg_ctx->cpu_env, tcg_rd); tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_rd_narrowed); - tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, esize); + tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, + esize); } if (!is_q) { @@ -8501,8 +9059,8 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, /* SQSHLU, UQSHL, SQSHL: saturating left shifts */ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, - bool src_unsigned, bool dst_unsigned, - int immh, int immb, int rn, int rd) + bool src_unsigned, bool dst_unsigned, int immh, + int immb, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int immhb = immh << 3 | immb; @@ -8544,9 +9102,9 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, if (size == 3) { TCGv_i64 tcg_shift = tcg_const_i64(tcg_ctx, shift); - static NeonGenTwo64OpEnvFn * const fns[2][2] = { - { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, - { NULL, gen_helper_neon_qshl_u64 }, + static NeonGenTwo64OpEnvFn *const fns[2][2] = { + {gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64}, + {NULL, gen_helper_neon_qshl_u64}, }; NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; int maxpass = is_q ? 2 : 1; @@ -8564,21 +9122,14 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, clear_vec_high(s, is_q, rd); } else { TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, shift); - static NeonGenTwoOpEnvFn * const fns[2][2][3] = { - { - { gen_helper_neon_qshl_s8, - gen_helper_neon_qshl_s16, - gen_helper_neon_qshl_s32 }, - { gen_helper_neon_qshlu_s8, - gen_helper_neon_qshlu_s16, - gen_helper_neon_qshlu_s32 } - }, { - { NULL, NULL, NULL }, - { gen_helper_neon_qshl_u8, - gen_helper_neon_qshl_u16, - gen_helper_neon_qshl_u32 } - } - }; + static NeonGenTwoOpEnvFn *const fns[2][2][3] = { + {{gen_helper_neon_qshl_s8, gen_helper_neon_qshl_s16, + gen_helper_neon_qshl_s32}, + {gen_helper_neon_qshlu_s8, gen_helper_neon_qshlu_s16, + gen_helper_neon_qshlu_s32}}, + {{NULL, NULL, NULL}, + {gen_helper_neon_qshl_u8, gen_helper_neon_qshl_u16, + gen_helper_neon_qshl_u32}}}; NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; MemOp memop = scalar ? size : MO_32; int maxpass = scalar ? 1 : is_q ? 4 : 2; @@ -8618,8 +9169,8 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, /* Common vector code for handling integer to FP conversion */ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, - int elements, int is_signed, - int fracbits, int size) + int elements, int is_signed, int fracbits, + int size) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_ptr tcg_fpst = get_fpstatus_ptr(tcg_ctx, size == MO_16); @@ -8640,11 +9191,11 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, read_vec_element(s, tcg_int64, rn, pass, mop); if (is_signed) { - gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int64, - tcg_shift, tcg_fpst); + gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int64, tcg_shift, + tcg_fpst); } else { - gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int64, - tcg_shift, tcg_fpst); + gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int64, tcg_shift, + tcg_fpst); } if (elements == 1) { write_fp_dreg(s, rd, tcg_double); @@ -8675,9 +9226,11 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, } } else { if (is_signed) { - gen_helper_vfp_sitos(tcg_ctx, tcg_float, tcg_int32, tcg_fpst); + gen_helper_vfp_sitos(tcg_ctx, tcg_float, tcg_int32, + tcg_fpst); } else { - gen_helper_vfp_uitos(tcg_ctx, tcg_float, tcg_int32, tcg_fpst); + gen_helper_vfp_uitos(tcg_ctx, tcg_float, tcg_int32, + tcg_fpst); } } break; @@ -8692,9 +9245,11 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, } } else { if (is_signed) { - gen_helper_vfp_sitoh(tcg_ctx, tcg_float, tcg_int32, tcg_fpst); + gen_helper_vfp_sitoh(tcg_ctx, tcg_float, tcg_int32, + tcg_fpst); } else { - gen_helper_vfp_uitoh(tcg_ctx, tcg_float, tcg_int32, tcg_fpst); + gen_helper_vfp_uitoh(tcg_ctx, tcg_float, tcg_int32, + tcg_fpst); } } break; @@ -8723,9 +9278,8 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, /* UCVTF/SCVTF - Integer to FP conversion */ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, - bool is_q, bool is_u, - int immh, int immb, int opcode, - int rn, int rd) + bool is_q, bool is_u, int immh, + int immb, int opcode, int rn, int rd) { int size, elements, fracbits; int immhb = immh << 3 | immb; @@ -8767,8 +9321,8 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, - bool is_q, bool is_u, - int immh, int immb, int rn, int rd) + bool is_q, bool is_u, int immh, + int immb, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int immhb = immh << 3 | immb; @@ -8817,9 +9371,11 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, read_vec_element(s, tcg_op, rn, pass, MO_64); if (is_u) { - gen_helper_vfp_touqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + gen_helper_vfp_touqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, + tcg_fpstatus); } else { - gen_helper_vfp_tosqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + gen_helper_vfp_tosqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, + tcg_fpstatus); } write_vec_element(s, tcg_op, rd, pass, MO_64); tcg_temp_free_i64(tcg_ctx, tcg_op); @@ -8910,8 +9466,8 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); break; case 0x1c: /* SCVTF, UCVTF */ - handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, - opcode, rn, rd); + handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, opcode, + rn, rd); break; case 0x10: /* SQSHRUN, SQSHRUN2 */ case 0x11: /* SQRSHRUN, SQRSHRUN2 */ @@ -8919,13 +9475,13 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - handle_vec_simd_sqshrn(s, true, false, false, true, - immh, immb, opcode, rn, rd); + handle_vec_simd_sqshrn(s, true, false, false, true, immh, immb, opcode, + rn, rd); break; case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ - handle_vec_simd_sqshrn(s, true, false, is_u, is_u, - immh, immb, opcode, rn, rd); + handle_vec_simd_sqshrn(s, true, false, is_u, is_u, immh, immb, opcode, + rn, rd); break; case 0xc: /* SQSHLU */ if (!is_u) { @@ -8994,7 +9550,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); tcg_gen_mul_i64(tcg_ctx, tcg_res, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_res); + gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env, + tcg_res, tcg_res); switch (opcode) { case 0xd: /* SQDMULL, SQDMULL2 */ @@ -9004,8 +9561,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) /* fall through */ case 0x9: /* SQDMLAL, SQDMLAL2 */ read_vec_element(s, tcg_op1, rd, 0, MO_64); - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_res, tcg_op1); + gen_helper_neon_addl_saturate_s64( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_op1); break; default: g_assert_not_reached(); @@ -9022,7 +9579,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx); gen_helper_neon_mull_s16(tcg_ctx, tcg_res, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_res); + gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, + tcg_res, tcg_res); switch (opcode) { case 0xd: /* SQDMULL, SQDMULL2 */ @@ -9034,8 +9592,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) { TCGv_i64 tcg_op3 = tcg_temp_new_i64(tcg_ctx); read_vec_element(s, tcg_op3, rd, 0, MO_32); - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_res, tcg_op3); + gen_helper_neon_addl_saturate_s32( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_op3); tcg_temp_free_i64(tcg_ctx, tcg_op3); break; } @@ -9066,16 +9624,20 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, switch (opcode) { case 0x1: /* SQADD */ if (u) { - gen_helper_neon_qadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } else { - gen_helper_neon_qadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } break; case 0x5: /* SQSUB */ if (u) { - gen_helper_neon_qsub_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qsub_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } else { - gen_helper_neon_qsub_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qsub_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } break; case 0x6: /* CMGT, CMHI */ @@ -9106,9 +9668,11 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, break; case 0x9: /* SQSHL, UQSHL */ if (u) { - gen_helper_neon_qshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } else { - gen_helper_neon_qshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } break; case 0xa: /* SRSHL, URSHL */ @@ -9120,9 +9684,11 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, break; case 0xb: /* SQRSHL, UQRSHL */ if (u) { - gen_helper_neon_qrshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qrshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } else { - gen_helper_neon_qrshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qrshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, + tcg_rm); } break; case 0x10: /* ADD, SUB */ @@ -9169,7 +9735,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements, tcg_res, fpst); break; case 0x18: /* FMAXNM */ - gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1a: /* FADD */ gen_helper_vfp_addd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9178,7 +9745,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_mulxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x1c: /* FCMEQ */ - gen_helper_neon_ceq_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_ceq_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1e: /* FMAX */ gen_helper_vfp_maxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9187,7 +9755,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_recpsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x38: /* FMINNM */ - gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x3a: /* FSUB */ gen_helper_vfp_subd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9196,16 +9765,19 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_mind(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x3f: /* FRSQRTS */ - gen_helper_rsqrtsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_rsqrtsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5b: /* FMUL */ gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x5c: /* FCMGE */ - gen_helper_neon_cge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_cge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5d: /* FACGE */ - gen_helper_neon_acge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_acge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5f: /* FDIV */ gen_helper_vfp_divd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9215,10 +9787,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_absd(tcg_ctx, tcg_res, tcg_res); break; case 0x7c: /* FCMGT */ - gen_helper_neon_cgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_cgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x7d: /* FACGT */ - gen_helper_neon_acgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_acgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; default: g_assert_not_reached(); @@ -9255,7 +9829,8 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x1c: /* FCMEQ */ - gen_helper_neon_ceq_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_ceq_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1e: /* FMAX */ gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9264,10 +9839,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_recpsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x18: /* FMAXNM */ - gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x38: /* FMINNM */ - gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x3a: /* FSUB */ gen_helper_vfp_subs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9276,16 +9853,19 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x3f: /* FRSQRTS */ - gen_helper_rsqrtsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_rsqrtsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5b: /* FMUL */ gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x5c: /* FCMGE */ - gen_helper_neon_cge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_cge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5d: /* FACGE */ - gen_helper_neon_acge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_acge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x5f: /* FDIV */ gen_helper_vfp_divs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); @@ -9295,10 +9875,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements, gen_helper_vfp_abss(tcg_ctx, tcg_res, tcg_res); break; case 0x7c: /* FCMGT */ - gen_helper_neon_cgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_cgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x7d: /* FACGT */ - gen_helper_neon_acgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_neon_acgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; default: g_assert_not_reached(); @@ -9376,10 +9958,10 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) case 0x9: /* SQSHL, UQSHL */ case 0xb: /* SQRSHL, UQRSHL */ break; - case 0x8: /* SSHL, USHL */ - case 0xa: /* SRSHL, URSHL */ - case 0x6: /* CMGT, CMHI */ - case 0x7: /* CMGE, CMHS */ + case 0x8: /* SSHL, USHL */ + case 0xa: /* SRSHL, URSHL */ + case 0x6: /* CMGT, CMHI */ + case 0x7: /* CMGE, CMHS */ case 0x11: /* CMTST, CMEQ */ case 0x10: /* ADD, SUB (vector) */ if (size != 3) { @@ -9429,49 +10011,49 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) switch (opcode) { case 0x1: /* SQADD, UQADD */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, - { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, - { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8}, + {gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16}, + {gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32}, }; genenvfn = fns[size][u]; break; } case 0x5: /* SQSUB, UQSUB */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, - { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, - { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8}, + {gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16}, + {gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32}, }; genenvfn = fns[size][u]; break; } case 0x9: /* SQSHL, UQSHL */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, - { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, - { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8}, + {gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16}, + {gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32}, }; genenvfn = fns[size][u]; break; } case 0xb: /* SQRSHL, UQRSHL */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, - { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, - { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8}, + {gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16}, + {gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32}, }; genenvfn = fns[size][u]; break; } case 0x16: /* SQDMULH, SQRDMULH */ { - static NeonGenTwoOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, - { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, + static NeonGenTwoOpEnvFn *const fns[2][2] = { + {gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16}, + {gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32}, }; assert(size == 1 || size == 2); genenvfn = fns[size - 1][u]; @@ -9511,7 +10093,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, int rm = extract32(insn, 16, 5); bool u = extract32(insn, 29, 1); bool a = extract32(insn, 23, 1); - int fpopcode = opcode | (a << 3) | (u << 4); + int fpopcode = opcode | (a << 3) | (u << 4); TCGv_ptr fpst; TCGv_i32 tcg_op1; TCGv_i32 tcg_op2; @@ -9582,7 +10164,6 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, write_fp_sreg(s, rd, tcg_res); - tcg_temp_free_i32(tcg_ctx, tcg_res); tcg_temp_free_i32(tcg_ctx, tcg_op1); tcg_temp_free_i32(tcg_ctx, tcg_op2); @@ -9647,16 +10228,20 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, switch (opcode) { case 0x0: /* SQRDMLAH */ if (size == 1) { - gen_helper_neon_qrdmlah_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlah_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, + ele2, ele3); } else { - gen_helper_neon_qrdmlah_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlah_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, + ele2, ele3); } break; case 0x1: /* SQRDMLSH */ if (size == 1) { - gen_helper_neon_qrdmlsh_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlsh_s16(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, + ele2, ele3); } else { - gen_helper_neon_qrdmlsh_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlsh_s32(tcg_ctx, ele3, tcg_ctx->cpu_env, ele1, + ele2, ele3); } break; default: @@ -9785,9 +10370,9 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, } } -static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, - bool is_scalar, bool is_u, bool is_q, - int size, int rn, int rd) +static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, bool is_scalar, + bool is_u, bool is_q, int size, int rn, + int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; bool is_double = (size == MO_64); @@ -9803,7 +10388,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx); TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0); TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx); - NeonGenTwoDoubleOPFn *genfn = NULL; + NeonGenTwoDoubleOpFn *genfn = NULL; bool swap = false; int pass; @@ -9845,7 +10430,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx); TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0); TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx); - NeonGenTwoSingleOPFn *genfn = NULL; + NeonGenTwoSingleOpFn *genfn = NULL; bool swap = false; int pass, maxpasses; @@ -9922,9 +10507,9 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, tcg_temp_free_ptr(tcg_ctx, fpst); } -static void handle_2misc_reciprocal(DisasContext *s, int opcode, - bool is_scalar, bool is_u, bool is_q, - int size, int rn, int rd) +static void handle_2misc_reciprocal(DisasContext *s, int opcode, bool is_scalar, + bool is_u, bool is_q, int size, int rn, + int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; bool is_double = (size == 3); @@ -9971,7 +10556,7 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode, switch (opcode) { case 0x3c: /* URECPE */ - gen_helper_recpe_u32(tcg_ctx, tcg_res, tcg_op, fpst); + gen_helper_recpe_u32(tcg_ctx, tcg_res, tcg_op); break; case 0x3d: /* FRECPE */ gen_helper_recpe_f32(tcg_ctx, tcg_res, tcg_op, fpst); @@ -10001,9 +10586,8 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode, tcg_temp_free_ptr(tcg_ctx, fpst); } -static void handle_2misc_narrow(DisasContext *s, bool scalar, - int opcode, bool u, bool is_q, - int size, int rn, int rd) +static void handle_2misc_narrow(DisasContext *s, bool scalar, int opcode, + bool u, bool is_q, int size, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; /* Handle 2-reg-misc ops which are narrowing (so each 2*size element @@ -10033,12 +10617,12 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, switch (opcode) { case 0x12: /* XTN, SQXTUN */ { - static NeonGenNarrowFn * const xtnfns[3] = { + static NeonGenNarrowFn *const xtnfns[3] = { gen_helper_neon_narrow_u8, gen_helper_neon_narrow_u16, tcg_gen_extrl_i64_i32, }; - static NeonGenNarrowEnvFn * const sqxtunfns[3] = { + static NeonGenNarrowEnvFn *const sqxtunfns[3] = { gen_helper_neon_unarrow_sat8, gen_helper_neon_unarrow_sat16, gen_helper_neon_unarrow_sat32, @@ -10052,13 +10636,12 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, } case 0x14: /* SQXTN, UQXTN */ { - static NeonGenNarrowEnvFn * const fns[3][2] = { - { gen_helper_neon_narrow_sat_s8, - gen_helper_neon_narrow_sat_u8 }, - { gen_helper_neon_narrow_sat_s16, - gen_helper_neon_narrow_sat_u16 }, - { gen_helper_neon_narrow_sat_s32, - gen_helper_neon_narrow_sat_u32 }, + static NeonGenNarrowEnvFn *const fns[3][2] = { + {gen_helper_neon_narrow_sat_s8, gen_helper_neon_narrow_sat_u8}, + {gen_helper_neon_narrow_sat_s16, + gen_helper_neon_narrow_sat_u16}, + {gen_helper_neon_narrow_sat_s32, + gen_helper_neon_narrow_sat_u32}, }; genenvfn = fns[size][u]; break; @@ -10066,7 +10649,8 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, case 0x16: /* FCVTN, FCVTN2 */ /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ if (size == 2) { - gen_helper_vfp_fcvtsd(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env); + gen_helper_vfp_fcvtsd(tcg_ctx, tcg_res[pass], tcg_op, + tcg_ctx->cpu_env); } else { TCGv_i32 tcg_lo = tcg_temp_new_i32(tcg_ctx); TCGv_i32 tcg_hi = tcg_temp_new_i32(tcg_ctx); @@ -10074,21 +10658,25 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, TCGv_i32 ahp = get_ahp_flag(tcg_ctx); tcg_gen_extr_i64_i32(tcg_ctx, tcg_lo, tcg_hi, tcg_op); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_lo, tcg_lo, fpst, ahp); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_hi, tcg_hi, fpst, ahp); - tcg_gen_deposit_i32(tcg_ctx, tcg_res[pass], tcg_lo, tcg_hi, 16, 16); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_lo, tcg_lo, fpst, + ahp); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_hi, tcg_hi, fpst, + ahp); + tcg_gen_deposit_i32(tcg_ctx, tcg_res[pass], tcg_lo, tcg_hi, 16, + 16); tcg_temp_free_i32(tcg_ctx, tcg_lo); tcg_temp_free_i32(tcg_ctx, tcg_hi); tcg_temp_free_ptr(tcg_ctx, fpst); tcg_temp_free_i32(tcg_ctx, ahp); } break; - case 0x56: /* FCVTXN, FCVTXN2 */ + case 0x56: /* FCVTXN, FCVTXN2 */ /* 64 bit to 32 bit float conversion * with von Neumann rounding (round to odd) */ assert(size == 2); - gen_helper_fcvtx_f64_to_f32(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env); + gen_helper_fcvtx_f64_to_f32(tcg_ctx, tcg_res[pass], tcg_op, + tcg_ctx->cpu_env); break; default: g_assert_not_reached(); @@ -10127,9 +10715,11 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, read_vec_element(s, tcg_rd, rd, pass, MO_64); if (is_u) { /* USQADD */ - gen_helper_neon_uqadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); } else { /* SUQADD */ - gen_helper_neon_sqadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); } write_vec_element(s, tcg_rd, rd, pass, MO_64); } @@ -10159,13 +10749,16 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, if (is_u) { /* USQADD */ switch (size) { case 0: - gen_helper_neon_uqadd_s8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; case 1: - gen_helper_neon_uqadd_s16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; case 2: - gen_helper_neon_uqadd_s32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; default: g_assert_not_reached(); @@ -10173,13 +10766,16 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, } else { /* SUQADD */ switch (size) { case 0: - gen_helper_neon_sqadd_u8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; case 1: - gen_helper_neon_sqadd_u16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; case 2: - gen_helper_neon_sqadd_u32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, + tcg_rn, tcg_rd); break; default: g_assert_not_reached(); @@ -10369,10 +10965,10 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) case 0x7: /* SQABS, SQNEG */ { NeonGenOneOpEnvFn *genfn; - static NeonGenOneOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, - { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, - { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, + static NeonGenOneOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8}, + {gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16}, + {gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32}, }; genfn = fns[size][u]; genfn(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn); @@ -10385,7 +10981,8 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) case 0x3b: /* FCVTZS */ { TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0); - gen_helper_vfp_tosls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + gen_helper_vfp_tosls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, + tcg_fpstatus); tcg_temp_free_i32(tcg_ctx, tcg_shift); break; } @@ -10396,7 +10993,8 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) case 0x7b: /* FCVTZU */ { TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0); - gen_helper_vfp_touls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + gen_helper_vfp_touls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, + tcg_fpstatus); tcg_temp_free_i32(tcg_ctx, tcg_shift); break; } @@ -10424,16 +11022,7 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, int size = 32 - clz32(immh) - 1; int immhb = immh << 3 | immb; int shift = 2 * (8 << size) - immhb; - bool accumulate = false; - int dsize = is_q ? 128 : 64; - int esize = 8 << size; - int elements = dsize/esize; - MemOp memop = size | (is_u ? 0 : MO_SIGN); - TCGv_i64 tcg_rn = new_tmp_a64(s); - TCGv_i64 tcg_rd = new_tmp_a64(s); - TCGv_i64 tcg_round; - uint64_t round_const; - int i; + GVecGen2iFn *gvec_fn; if (extract32(immh, 3, 1) && !is_q) { unallocated_encoding(s); @@ -10447,73 +11036,44 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, switch (opcode) { case 0x02: /* SSRA / USRA (accumulate) */ - if (is_u) { - /* Shift count same as element size produces zero to add. */ - if (shift == 8 << size) { - goto done; - } - gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]); - } else { - /* Shift count same as element size produces all sign to add. */ - if (shift == 8 << size) { - shift -= 1; - } - gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]); - } - return; + gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; + break; + case 0x08: /* SRI */ - /* Shift count same as element size is valid but does nothing. */ - if (shift == 8 << size) { - goto done; - } - gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]); - return; + gvec_fn = gen_gvec_sri; + break; case 0x00: /* SSHR / USHR */ if (is_u) { if (shift == 8 << size) { /* Shift count the same size as element size produces zero. */ - tcg_gen_gvec_dup8i(tcg_ctx, vec_full_reg_offset(s, rd), - is_q ? 16 : 8, vec_full_reg_size(s), 0); - } else { - gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size); + tcg_gen_gvec_dup_imm(tcg_ctx, size, vec_full_reg_offset(s, rd), + is_q ? 16 : 8, vec_full_reg_size(s), 0); + return; } + gvec_fn = tcg_gen_gvec_shri; } else { /* Shift count the same size as element size produces all sign. */ if (shift == 8 << size) { shift -= 1; } - gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size); + gvec_fn = tcg_gen_gvec_sari; } - return; + break; case 0x04: /* SRSHR / URSHR (rounding) */ + gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; break; + case 0x06: /* SRSRA / URSRA (accum + rounding) */ - accumulate = true; + gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; + break; default: g_assert_not_reached(); } - round_const = 1ULL << (shift - 1); - tcg_round = tcg_const_i64(tcg_ctx, round_const); - - for (i = 0; i < elements; i++) { - read_vec_element(s, tcg_rn, rn, i, memop); - if (accumulate) { - read_vec_element(s, tcg_rd, rd, i, memop); - } - - handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, - accumulate, is_u, size, shift); - - write_vec_element(s, tcg_rd, rd, i, size); - } - tcg_temp_free_i64(tcg_ctx, tcg_round); - - done: - clear_vec_high(s, is_q, rd); + gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); } /* SHL/SLI - Vector shift left */ @@ -10537,7 +11097,7 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, } if (insert) { - gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]); + gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); } else { gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); } @@ -10545,7 +11105,8 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, /* USHLL/SHLL - Vector shift left with widening */ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, - int immh, int immb, int opcode, int rn, int rd) + int immh, int immb, int opcode, int rn, + int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = 32 - clz32(immh) - 1; @@ -10553,7 +11114,7 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, int shift = immhb - (8 << size); int dsize = 64; int esize = 8 << size; - int elements = dsize/esize; + int elements = dsize / esize; TCGv_i64 tcg_rn = new_tmp_a64(s); TCGv_i64 tcg_rd = new_tmp_a64(s); int i; @@ -10582,15 +11143,15 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, } /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ -static void handle_vec_simd_shrn(DisasContext *s, bool is_q, - int immh, int immb, int opcode, int rn, int rd) +static void handle_vec_simd_shrn(DisasContext *s, bool is_q, int immh, int immb, + int opcode, int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int immhb = immh << 3 | immb; int size = 32 - clz32(immh) - 1; int dsize = 64; int esize = 8 << size; - int elements = dsize/esize; + int elements = dsize / esize; int shift = (2 * esize) - immhb; bool round = extract32(opcode, 0, 1); TCGv_i64 tcg_rn, tcg_rd, tcg_final; @@ -10619,11 +11180,12 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q, } for (i = 0; i < elements; i++) { - read_vec_element(s, tcg_rn, rn, i, size+1); - handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, - false, true, size+1, shift); + read_vec_element(s, tcg_rn, rn, i, size + 1); + handle_shri_with_rndacc(tcg_ctx, tcg_rd, tcg_rn, tcg_round, false, true, + size + 1, shift); - tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, esize); + tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, + esize); } if (!is_q) { @@ -10641,7 +11203,6 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q, clear_vec_high(s, is_q, rd); } - /* AdvSIMD shift by immediate * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 * +---+---+---+-------------+------+------+--------+---+------+------+ @@ -10688,15 +11249,15 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) break; case 0x12: /* SQSHRN / UQSHRN */ case 0x13: /* SQRSHRN / UQRSHRN */ - handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, - opcode, rn, rd); + handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, opcode, + rn, rd); break; case 0x14: /* SSHLL / USHLL */ handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); break; case 0x1c: /* SCVTF / UCVTF */ - handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, - opcode, rn, rd); + handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, opcode, + rn, rd); break; case 0xc: /* SQSHLU */ if (!is_u) { @@ -10720,13 +11281,13 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) /* Generate code to do a "long" addition or subtraction, ie one done in * TCGv_i64 on vector lanes twice the width specified by size. */ -static void gen_neon_addl(TCGContext *tcg_ctx, int size, bool is_sub, TCGv_i64 tcg_res, - TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) +static void gen_neon_addl(TCGContext *tcg_ctx, int size, bool is_sub, + TCGv_i64 tcg_res, TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) { - static NeonGenTwo64OpFn * const fns[3][2] = { - { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, - { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, - { tcg_gen_add_i64, tcg_gen_sub_i64 }, + static NeonGenTwo64OpFn *const fns[3][2] = { + {gen_helper_neon_addl_u16, gen_helper_neon_subl_u16}, + {gen_helper_neon_addl_u32, gen_helper_neon_subl_u32}, + {tcg_gen_add_i64, tcg_gen_sub_i64}, }; NeonGenTwo64OpFn *genfn; assert(size < 3); @@ -10806,23 +11367,24 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, tcg_gen_sub_i64(tcg_ctx, tcg_tmp1, tcg_op1, tcg_op2); tcg_gen_sub_i64(tcg_ctx, tcg_tmp2, tcg_op2, tcg_op1); tcg_gen_movcond_i64(tcg_ctx, is_u ? TCG_COND_GEU : TCG_COND_GE, - tcg_passres, - tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); + tcg_passres, tcg_op1, tcg_op2, tcg_tmp1, + tcg_tmp2); tcg_temp_free_i64(tcg_ctx, tcg_tmp1); tcg_temp_free_i64(tcg_ctx, tcg_tmp2); break; } - case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); break; - case 9: /* SQDMLAL, SQDMLAL2 */ + case 9: /* SQDMLAL, SQDMLAL2 */ case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, tcg_ctx->cpu_env, - tcg_passres, tcg_passres); + gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, + tcg_ctx->cpu_env, tcg_passres, + tcg_passres); break; default: g_assert_not_reached(); @@ -10833,12 +11395,15 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, if (accop < 0) { tcg_gen_neg_i64(tcg_ctx, tcg_passres, tcg_passres); } - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, + gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], + tcg_ctx->cpu_env, tcg_res[pass], tcg_passres); } else if (accop > 0) { - tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres); + tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], + tcg_passres); } else if (accop < 0) { - tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres); + tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], + tcg_passres); } if (accop != 0) { @@ -10870,9 +11435,9 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ { TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(tcg_ctx); - static NeonGenWidenFn * const widenfns[2][2] = { - { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, - { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, + static NeonGenWidenFn *const widenfns[2][2] = { + {gen_helper_neon_widen_s8, gen_helper_neon_widen_u8}, + {gen_helper_neon_widen_s16, gen_helper_neon_widen_u16}, }; NeonGenWidenFn *widenfn = widenfns[size][is_u]; @@ -10887,42 +11452,52 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ if (size == 0) { if (is_u) { - gen_helper_neon_abdl_u16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_abdl_u16(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } else { - gen_helper_neon_abdl_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_abdl_s16(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } } else { if (is_u) { - gen_helper_neon_abdl_u32(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_abdl_u32(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } else { - gen_helper_neon_abdl_s32(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_abdl_s32(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } } break; - case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ if (size == 0) { if (is_u) { - gen_helper_neon_mull_u8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_mull_u8(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } else { - gen_helper_neon_mull_s8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_mull_s8(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } } else { if (is_u) { - gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } else { - gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); } } break; - case 9: /* SQDMLAL, SQDMLAL2 */ + case 9: /* SQDMLAL, SQDMLAL2 */ case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ assert(size == 1); - gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env, - tcg_passres, tcg_passres); + gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, + tcg_op2); + gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, + tcg_ctx->cpu_env, tcg_passres, + tcg_passres); break; default: g_assert_not_reached(); @@ -10934,11 +11509,12 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, if (opcode == 9 || opcode == 11) { /* saturating accumulate ops */ if (accop < 0) { - gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, tcg_passres); + gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, + tcg_passres); } - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, - tcg_res[pass], - tcg_passres); + gen_helper_neon_addl_saturate_s32( + tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_res[pass], + tcg_passres); } else { gen_neon_addl(tcg_ctx, size, (accop < 0), tcg_res[pass], tcg_res[pass], tcg_passres); @@ -10966,10 +11542,10 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx); TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx); TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(tcg_ctx); - static NeonGenWidenFn * const widenfns[3][2] = { - { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, - { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, - { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, + static NeonGenWidenFn *const widenfns[3][2] = { + {gen_helper_neon_widen_s8, gen_helper_neon_widen_u8}, + {gen_helper_neon_widen_s16, gen_helper_neon_widen_u16}, + {tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64}, }; NeonGenWidenFn *widenfn = widenfns[size][is_u]; @@ -10978,8 +11554,8 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, widenfn(tcg_ctx, tcg_op2_wide, tcg_op2); tcg_temp_free_i32(tcg_ctx, tcg_op2); tcg_res[pass] = tcg_temp_new_i64(tcg_ctx); - gen_neon_addl(tcg_ctx, size, (opcode == 3), - tcg_res[pass], tcg_op1, tcg_op2_wide); + gen_neon_addl(tcg_ctx, size, (opcode == 3), tcg_res[pass], tcg_op1, + tcg_op2_wide); tcg_temp_free_i64(tcg_ctx, tcg_op1); tcg_temp_free_i64(tcg_ctx, tcg_op2_wide); } @@ -10990,7 +11566,8 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, } } -static void do_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i64 in) +static void do_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 res, + TCGv_i64 in) { tcg_gen_addi_i64(tcg_ctx, in, in, 1U << 31); tcg_gen_extrh_i64_i32(tcg_ctx, res, in); @@ -11008,19 +11585,20 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx); TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx); TCGv_i64 tcg_wideres = tcg_temp_new_i64(tcg_ctx); - static NeonGenNarrowFn * const narrowfns[3][2] = { - { gen_helper_neon_narrow_high_u8, - gen_helper_neon_narrow_round_high_u8 }, - { gen_helper_neon_narrow_high_u16, - gen_helper_neon_narrow_round_high_u16 }, - { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 }, + static NeonGenNarrowFn *const narrowfns[3][2] = { + {gen_helper_neon_narrow_high_u8, + gen_helper_neon_narrow_round_high_u8}, + {gen_helper_neon_narrow_high_u16, + gen_helper_neon_narrow_round_high_u16}, + {tcg_gen_extrh_i64_i32, do_narrow_round_high_u32}, }; NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; read_vec_element(s, tcg_op1, rn, pass, MO_64); read_vec_element(s, tcg_op2, rm, pass, MO_64); - gen_neon_addl(tcg_ctx, size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); + gen_neon_addl(tcg_ctx, size, (opcode == 6), tcg_wideres, tcg_op1, + tcg_op2); tcg_temp_free_i64(tcg_ctx, tcg_op1); tcg_temp_free_i64(tcg_ctx, tcg_op2); @@ -11121,7 +11699,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) break; } return; - case 9: /* SQDMLAL, SQDMLAL2 */ + case 9: /* SQDMLAL, SQDMLAL2 */ case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ if (is_u || size == 0) { @@ -11129,11 +11707,11 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) return; } /* fall through */ - case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ - case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ - case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ - case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ - case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ + case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ + case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ + case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ /* 64 x 64 -> 128 */ @@ -11243,19 +11821,24 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2); break; case 0x58: /* FMAXNMP */ - gen_helper_vfp_maxnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x5a: /* FADDP */ - gen_helper_vfp_addd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_addd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x5e: /* FMAXP */ - gen_helper_vfp_maxd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x78: /* FMINNMP */ - gen_helper_vfp_minnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_minnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x7e: /* FMINP */ - gen_helper_vfp_mind(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_mind(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; default: g_assert_not_reached(); @@ -11287,7 +11870,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, switch (opcode) { case 0x17: /* ADDP */ { - static NeonGenTwoOpFn * const fns[3] = { + static NeonGenTwoOpFn *const fns[3] = { gen_helper_neon_padd_u8, gen_helper_neon_padd_u16, tcg_gen_add_i32, @@ -11297,39 +11880,44 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, } case 0x14: /* SMAXP, UMAXP */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, - { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, - { tcg_gen_smax_i32, tcg_gen_umax_i32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8}, + {gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16}, + {tcg_gen_smax_i32, tcg_gen_umax_i32}, }; genfn = fns[size][u]; break; } case 0x15: /* SMINP, UMINP */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, - { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, - { tcg_gen_smin_i32, tcg_gen_umin_i32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8}, + {gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16}, + {tcg_gen_smin_i32, tcg_gen_umin_i32}, }; genfn = fns[size][u]; break; } /* The FP operations are all on single floats (32 bit) */ case 0x58: /* FMAXNMP */ - gen_helper_vfp_maxnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x5a: /* FADDP */ - gen_helper_vfp_adds(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_adds(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x5e: /* FMAXP */ - gen_helper_vfp_maxs(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_maxs(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x78: /* FMINNMP */ - gen_helper_vfp_minnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_minnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; case 0x7e: /* FMINP */ - gen_helper_vfp_mins(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_vfp_mins(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, + fpst); break; default: g_assert_not_reached(); @@ -11364,9 +11952,8 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) * together indicate the operation. size[0] indicates single * or double. */ - int fpopcode = extract32(insn, 11, 5) - | (extract32(insn, 23, 1) << 5) - | (extract32(insn, 29, 1) << 6); + int fpopcode = extract32(insn, 11, 5) | (extract32(insn, 23, 1) << 5) | + (extract32(insn, 29, 1) << 6); int is_q = extract32(insn, 30, 1); int size = extract32(insn, 22, 1); int rm = extract32(insn, 16, 5); @@ -11392,8 +11979,8 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, - rn, rm, rd); + handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, rn, + rm, rd); return; case 0x1b: /* FMULX */ case 0x1f: /* FRECPS */ @@ -11432,11 +12019,10 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) int is_s = extract32(insn, 23, 1); int is_2 = extract32(insn, 29, 1); int data = (is_2 << 1) | is_s; - tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, - is_q ? 16 : 8, vec_full_reg_size(s), - data, gen_helper_gvec_fmlal_a64); + tcg_gen_gvec_3_ptr( + tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, is_q ? 16 : 8, + vec_full_reg_size(s), data, gen_helper_gvec_fmlal_a64); } return; @@ -11467,13 +12053,13 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) return; } /* fall through */ - case 0x0: /* SHADD, UHADD */ - case 0x2: /* SRHADD, URHADD */ - case 0x4: /* SHSUB, UHSUB */ - case 0xc: /* SMAX, UMAX */ - case 0xd: /* SMIN, UMIN */ - case 0xe: /* SABD, UABD */ - case 0xf: /* SABA, UABA */ + case 0x0: /* SHADD, UHADD */ + case 0x2: /* SRHADD, URHADD */ + case 0x4: /* SHSUB, UHSUB */ + case 0xc: /* SMAX, UMAX */ + case 0xd: /* SMIN, UMIN */ + case 0xe: /* SABD, UABD */ + case 0xf: /* SABA, UABA */ case 0x12: /* MLA, MLS */ if (size == 3) { unallocated_encoding(s); @@ -11500,24 +12086,25 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) switch (opcode) { case 0x01: /* SQADD, UQADD */ - tcg_gen_gvec_4(tcg_ctx, vec_full_reg_offset(s, rd), - offsetof(CPUARMState, vfp.qc), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - is_q ? 16 : 8, vec_full_reg_size(s), - (u ? uqadd_op : sqadd_op) + size); + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); + } return; case 0x05: /* SQSUB, UQSUB */ - tcg_gen_gvec_4(tcg_ctx, vec_full_reg_offset(s, rd), - offsetof(CPUARMState, vfp.qc), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - is_q ? 16 : 8, vec_full_reg_size(s), - (u ? uqsub_op : sqsub_op) + size); + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); + } return; case 0x08: /* SSHL, USHL */ - gen_gvec_op3(s, is_q, rd, rn, rm, - u ? &ushl_op[size] : &sshl_op[size]); + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); + } return; case 0x0c: /* SMAX, UMAX */ if (u) { @@ -11533,6 +12120,20 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); } return; + case 0xe: /* SABD, UABD */ + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); + } + return; + case 0xf: /* SABA, UABA */ + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); + } + return; case 0x10: /* ADD, SUB */ if (u) { gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); @@ -11540,23 +12141,23 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); } return; - case 0x13: /* MUL, PMUL */ + case 0x13: /* MUL, PMUL */ if (!u) { /* MUL */ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); - } else { /* PMUL */ + } else { /* PMUL */ gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); } return; case 0x12: /* MLA, MLS */ if (u) { - gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]); + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); } else { - gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]); + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); } return; case 0x11: if (!u) { /* CMTST */ - gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]); + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); return; } /* else CMEQ */ @@ -11569,8 +12170,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) cond = u ? TCG_COND_GEU : TCG_COND_GE; do_gvec_cmp: tcg_gen_gvec_cmp(tcg_ctx, cond, size, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); return; } @@ -11607,80 +12207,69 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) switch (opcode) { case 0x0: /* SHADD, UHADD */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, - { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, - { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8}, + {gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16}, + {gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32}, }; genfn = fns[size][u]; break; } case 0x2: /* SRHADD, URHADD */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, - { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, - { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8}, + {gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16}, + {gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32}, }; genfn = fns[size][u]; break; } case 0x4: /* SHSUB, UHSUB */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, - { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, - { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8}, + {gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16}, + {gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32}, }; genfn = fns[size][u]; break; } case 0x9: /* SQSHL, UQSHL */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, - { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, - { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8}, + {gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16}, + {gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32}, }; genenvfn = fns[size][u]; break; } case 0xa: /* SRSHL, URSHL */ { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, - { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, - { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, + static NeonGenTwoOpFn *const fns[3][2] = { + {gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8}, + {gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16}, + {gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32}, }; genfn = fns[size][u]; break; } case 0xb: /* SQRSHL, UQRSHL */ { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, - { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, - { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, + static NeonGenTwoOpEnvFn *const fns[3][2] = { + {gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8}, + {gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16}, + {gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32}, }; genenvfn = fns[size][u]; break; } - case 0xe: /* SABD, UABD */ - case 0xf: /* SABA, UABA */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 }, - { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 }, - { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 }, - }; - genfn = fns[size][u]; - break; - } case 0x16: /* SQDMULH, SQRDMULH */ { - static NeonGenTwoOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, - { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, + static NeonGenTwoOpEnvFn *const fns[2][2] = { + {gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16}, + {gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32}, }; assert(size == 1 || size == 2); genenvfn = fns[size - 1][u]; @@ -11696,18 +12285,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genfn(tcg_ctx, tcg_res, tcg_op1, tcg_op2); } - if (opcode == 0xf) { - /* SABA, UABA: accumulating ops */ - static NeonGenTwoOpFn * const fns[3] = { - gen_helper_neon_add_u8, - gen_helper_neon_add_u16, - tcg_gen_add_i32, - }; - - read_vec_element_i32(s, tcg_op1, rd, pass, MO_32); - fns[size](tcg_ctx, tcg_res, tcg_op1, tcg_res); - } - write_vec_element_i32(s, tcg_res, rd, pass, MO_32); tcg_temp_free_i32(tcg_ctx, tcg_res); @@ -11834,7 +12411,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) rn = extract32(insn, 5, 5); rd = extract32(insn, 0, 5); - fpopcode = opcode | (a << 3) | (u << 4); + fpopcode = opcode | (a << 3) | (u << 4); datasize = is_q ? 128 : 64; elements = datasize / 16; @@ -11866,21 +12443,24 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) switch (fpopcode) { case 0x10: /* FMAXNMP */ - gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, - fpst); + gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res[pass], tcg_op1, + tcg_op2, fpst); break; case 0x12: /* FADDP */ - gen_helper_advsimd_addh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_addh(tcg_ctx, tcg_res[pass], tcg_op1, + tcg_op2, fpst); break; case 0x16: /* FMAXP */ - gen_helper_advsimd_maxh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_maxh(tcg_ctx, tcg_res[pass], tcg_op1, + tcg_op2, fpst); break; case 0x18: /* FMINNMP */ - gen_helper_advsimd_minnumh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, - fpst); + gen_helper_advsimd_minnumh(tcg_ctx, tcg_res[pass], tcg_op1, + tcg_op2, fpst); break; case 0x1e: /* FMINP */ - gen_helper_advsimd_minh(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_minh(tcg_ctx, tcg_res[pass], tcg_op1, + tcg_op2, fpst); break; default: g_assert_not_reached(); @@ -11906,68 +12486,84 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) switch (fpopcode) { case 0x0: /* FMAXNM */ - gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_maxnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1: /* FMLA */ read_vec_element_i32(s, tcg_res, rd, pass, MO_16); - gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_res, - fpst); + gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + tcg_res, fpst); break; case 0x2: /* FADD */ - gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_addh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x3: /* FMULX */ - gen_helper_advsimd_mulxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_mulxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x4: /* FCMEQ */ - gen_helper_advsimd_ceq_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_ceq_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x6: /* FMAX */ - gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x7: /* FRECPS */ gen_helper_recpsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); break; case 0x8: /* FMINNM */ - gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x9: /* FMLS */ /* As usual for ARM, separate negation for fused multiply-add */ tcg_gen_xori_i32(tcg_ctx, tcg_op1, tcg_op1, 0x8000); read_vec_element_i32(s, tcg_res, rd, pass, MO_16); - gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_res, - fpst); + gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + tcg_res, fpst); break; case 0xa: /* FSUB */ - gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0xe: /* FMIN */ - gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0xf: /* FRSQRTS */ - gen_helper_rsqrtsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_rsqrtsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x13: /* FMUL */ - gen_helper_advsimd_mulh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_mulh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x14: /* FCMGE */ - gen_helper_advsimd_cge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_cge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x15: /* FACGE */ - gen_helper_advsimd_acge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_acge_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x17: /* FDIV */ - gen_helper_advsimd_divh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_divh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1a: /* FABD */ - gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_subh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); tcg_gen_andi_i32(tcg_ctx, tcg_res, tcg_res, 0x7fff); break; case 0x1c: /* FCMGT */ - gen_helper_advsimd_cgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_cgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; case 0x1d: /* FACGT */ - gen_helper_advsimd_acgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_advsimd_acgt_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, + fpst); break; default: fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n", @@ -12028,9 +12624,8 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) case 0x1b: /* FCMLA, #270 */ case 0x1c: /* FCADD, #90 */ case 0x1e: /* FCADD, #270 */ - if (size == 0 - || (size == 1 && !dc_isar_feature(aa64_fp16, s)) - || (size == 3 && !is_q)) { + if (size == 0 || (size == 1 && !dc_isar_feature(aa64_fp16, s)) || + (size == 3 && !is_q)) { unallocated_encoding(s); return; } @@ -12050,29 +12645,11 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) switch (opcode) { case 0x0: /* SQRDMLAH (vector) */ - switch (size) { - case 1: - gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16); - break; - case 2: - gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32); - break; - default: - g_assert_not_reached(); - } + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size); return; case 0x1: /* SQRDMLSH (vector) */ - switch (size) { - case 1: - gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16); - break; - case 2: - gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32); - break; - default: - g_assert_not_reached(); - } + gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size); return; case 0x2: /* SDOT / UDOT */ @@ -12149,7 +12726,8 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, tcg_res[pass] = tcg_temp_new_i64(tcg_ctx); read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); - gen_helper_vfp_fcvtds(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env); + gen_helper_vfp_fcvtds(tcg_ctx, tcg_res[pass], tcg_op, + tcg_ctx->cpu_env); tcg_temp_free_i32(tcg_ctx, tcg_op); } for (pass = 0; pass < 2; pass++) { @@ -12167,8 +12745,8 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, tcg_res[pass] = tcg_temp_new_i32(tcg_ctx); read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_res[pass], tcg_res[pass], - fpst, ahp); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_res[pass], + tcg_res[pass], fpst, ahp); } for (pass = 0; pass < 4; pass++) { write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); @@ -12180,8 +12758,8 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, } } -static void handle_rev(DisasContext *s, int opcode, bool u, - bool is_q, int size, int rn, int rd) +static void handle_rev(DisasContext *s, int opcode, bool u, bool is_q, int size, + int rn, int rd) { TCGContext *tcg_ctx = s->uc->tcg_ctx; int op = (opcode << 1) | u; @@ -12237,10 +12815,11 @@ static void handle_rev(DisasContext *s, int opcode, bool u, int off = e_rev * esize; read_vec_element(s, tcg_rn, rn, i, size); if (off >= 64) { - tcg_gen_deposit_i64(tcg_ctx, tcg_rd_hi, tcg_rd_hi, - tcg_rn, off - 64, esize); + tcg_gen_deposit_i64(tcg_ctx, tcg_rd_hi, tcg_rd_hi, tcg_rn, + off - 64, esize); } else { - tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, off, esize); + tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, off, + esize); } } write_vec_element(s, tcg_rd, rd, 0, MO_64); @@ -12290,10 +12869,10 @@ static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, } else { for (pass = 0; pass < maxpass; pass++) { TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx); - NeonGenOneOpFn *genfn; - static NeonGenOneOpFn * const fns[2][2] = { - { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, - { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, + NeonGenOne64OpFn *genfn; + static NeonGenOne64OpFn *const fns[2][2] = { + {gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8}, + {gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16}, }; genfn = fns[size][u]; @@ -12334,7 +12913,7 @@ static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) TCGv_i64 tcg_res[2]; for (pass = 0; pass < 2; pass++) { - static NeonGenWidenFn * const widenfns[3] = { + static NeonGenWidenFn *const widenfns[3] = { gen_helper_neon_widen_u8, gen_helper_neon_widen_u16, tcg_gen_extu_i32_i64, @@ -12477,8 +13056,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x1c: case 0x1d: case 0x1e: - case 0x1f: - { + case 0x1f: { /* Floating point: U, size[1] and opcode indicate operation; * size[0] indicates single or double precision. */ @@ -12618,7 +13196,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - need_fpstatus = true; break; case 0x1e: /* FRINT32Z */ case 0x1f: /* FRINT64Z */ @@ -12667,6 +13244,23 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) return; } break; + case 0x8: /* CMGT, CMGE */ + if (u) { + gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); + } else { + gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); + } + return; + case 0x9: /* CMEQ, CMLE */ + if (u) { + gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); + } else { + gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); + } + return; + case 0xa: /* CMLT */ + gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); + return; case 0xb: if (u) { /* ABS, NEG */ gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); @@ -12690,8 +13284,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) read_vec_element(s, tcg_op, rn, pass, MO_64); - handle_2misc_64(s, opcode, u, tcg_res, tcg_op, - tcg_rmode, tcg_fpstatus); + handle_2misc_64(s, opcode, u, tcg_res, tcg_op, tcg_rmode, + tcg_fpstatus); write_vec_element(s, tcg_res, rd, pass, MO_64); @@ -12704,29 +13298,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) for (pass = 0; pass < (is_q ? 4 : 2); pass++) { TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx); TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx); - TCGCond cond; read_vec_element_i32(s, tcg_op, rn, pass, MO_32); if (size == 2) { /* Special cases for 32 bit elements */ switch (opcode) { - case 0xa: /* CMLT */ - /* 32 bit integer comparison against zero, result is - * test ? (2^32 - 1) : 0. We implement via setcond(test) - * and inverting. - */ - cond = TCG_COND_LT; - do_cmop: - tcg_gen_setcondi_i32(tcg_ctx, cond, tcg_res, tcg_op, 0); - tcg_gen_neg_i32(tcg_ctx, tcg_res, tcg_res); - break; - case 0x8: /* CMGT, CMGE */ - cond = u ? TCG_COND_GE : TCG_COND_GT; - goto do_cmop; - case 0x9: /* CMEQ, CMLE */ - cond = u ? TCG_COND_LE : TCG_COND_EQ; - goto do_cmop; case 0x4: /* CLS */ if (u) { tcg_gen_clzi_i32(tcg_ctx, tcg_res, tcg_op, 32); @@ -12736,9 +13313,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) break; case 0x7: /* SQABS, SQNEG */ if (u) { - gen_helper_neon_qneg_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op); + gen_helper_neon_qneg_s32(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op); } else { - gen_helper_neon_qabs_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op); + gen_helper_neon_qabs_s32(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op); } break; case 0x2f: /* FABS */ @@ -12748,7 +13327,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) gen_helper_vfp_negs(tcg_ctx, tcg_res, tcg_op); break; case 0x7f: /* FSQRT */ - gen_helper_vfp_sqrts(tcg_ctx, tcg_res, tcg_op, tcg_ctx->cpu_env); + gen_helper_vfp_sqrts(tcg_ctx, tcg_res, tcg_op, + tcg_ctx->cpu_env); break; case 0x1a: /* FCVTNS */ case 0x1b: /* FCVTMS */ @@ -12757,8 +13337,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x3b: /* FCVTZS */ { TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0); - gen_helper_vfp_tosls(tcg_ctx, tcg_res, tcg_op, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_tosls(tcg_ctx, tcg_res, tcg_op, tcg_shift, + tcg_fpstatus); tcg_temp_free_i32(tcg_ctx, tcg_shift); break; } @@ -12769,8 +13349,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x7b: /* FCVTZU */ { TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0); - gen_helper_vfp_touls(tcg_ctx, tcg_res, tcg_op, - tcg_shift, tcg_fpstatus); + gen_helper_vfp_touls(tcg_ctx, tcg_res, tcg_op, tcg_shift, + tcg_fpstatus); tcg_temp_free_i32(tcg_ctx, tcg_shift); break; } @@ -12783,18 +13363,21 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) gen_helper_rints(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); break; case 0x59: /* FRINTX */ - gen_helper_rints_exact(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_rints_exact(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x7c: /* URSQRTE */ - gen_helper_rsqrte_u32(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_rsqrte_u32(tcg_ctx, tcg_res, tcg_op); break; case 0x1e: /* FRINT32Z */ case 0x5e: /* FRINT32X */ - gen_helper_frint32_s(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_frint32_s(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x1f: /* FRINT64Z */ case 0x5f: /* FRINT64X */ - gen_helper_frint64_s(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_frint64_s(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; default: g_assert_not_reached(); @@ -12815,44 +13398,14 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x7: /* SQABS, SQNEG */ { NeonGenOneOpEnvFn *genfn; - static NeonGenOneOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, - { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, + static NeonGenOneOpEnvFn *const fns[2][2] = { + {gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8}, + {gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16}, }; genfn = fns[size][u]; genfn(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op); break; } - case 0x8: /* CMGT, CMGE */ - case 0x9: /* CMEQ, CMLE */ - case 0xa: /* CMLT */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 }, - { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 }, - { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 }, - }; - NeonGenTwoOpFn *genfn; - int comp; - bool reverse; - TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0); - - /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */ - comp = (opcode - 0x8) * 2 + u; - /* ...but LE, LT are implemented as reverse GE, GT */ - reverse = (comp > 2); - if (reverse) { - comp = 4 - comp; - } - genfn = fns[comp][size]; - if (reverse) { - genfn(tcg_ctx, tcg_res, tcg_zero, tcg_op); - } else { - genfn(tcg_ctx, tcg_res, tcg_op, tcg_zero); - } - tcg_temp_free_i32(tcg_ctx, tcg_zero); - break; - } case 0x4: /* CLS, CLZ */ if (u) { if (size == 0) { @@ -12957,8 +13510,7 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) } handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); return; - } - break; + } break; case 0x2c: /* FCMGT (zero) */ case 0x2d: /* FCMEQ (zero) */ case 0x2e: /* FCMLT (zero) */ @@ -13051,7 +13603,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) g_assert_not_reached(); } - /* Check additional constraints for the scalar encoding */ if (is_scalar) { if (!is_q) { @@ -13088,7 +13639,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x1c: /* FCVTAS */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x3d: /* FRECPE */ gen_helper_recpe_f16(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); @@ -13101,7 +13653,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x5c: /* FCVTAU */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x6f: /* FNEG */ tcg_gen_xori_i32(tcg_ctx, tcg_res, tcg_op, 0x8000); @@ -13132,7 +13685,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x1c: /* FCVTAS */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_f16tosinth(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x3d: /* FRECPE */ gen_helper_recpe_f16(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); @@ -13142,7 +13696,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x5c: /* FCVTAU */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_f16touinth(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x18: /* FRINTN */ case 0x19: /* FRINTM */ @@ -13150,10 +13705,12 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x39: /* FRINTZ */ case 0x58: /* FRINTA */ case 0x79: /* FRINTI */ - gen_helper_advsimd_rinth(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_rinth(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x59: /* FRINTX */ - gen_helper_advsimd_rinth_exact(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus); + gen_helper_advsimd_rinth_exact(tcg_ctx, tcg_res, tcg_op, + tcg_fpstatus); break; case 0x2f: /* FABS */ tcg_gen_andi_i32(tcg_ctx, tcg_res, tcg_op, 0x7fff); @@ -13390,40 +13947,38 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x1e: /* UDOT */ gen_gvec_op3_ool(s, is_q, rd, rn, rm, index, u ? gen_helper_gvec_udot_idx_b - : gen_helper_gvec_sdot_idx_b); + : gen_helper_gvec_sdot_idx_b); return; case 0x11: /* FCMLA #0 */ case 0x13: /* FCMLA #90 */ case 0x15: /* FCMLA #180 */ case 0x17: /* FCMLA #270 */ - { - int rot = extract32(insn, 13, 2); - int data = (index << 2) | rot; - tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), fpst, - is_q ? 16 : 8, vec_full_reg_size(s), data, - size == MO_64 - ? gen_helper_gvec_fcmlas_idx - : gen_helper_gvec_fcmlah_idx); - tcg_temp_free_ptr(tcg_ctx, fpst); - } + { + int rot = extract32(insn, 13, 2); + int data = (index << 2) | rot; + tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), fpst, is_q ? 16 : 8, + vec_full_reg_size(s), data, + size == MO_64 ? gen_helper_gvec_fcmlas_idx + : gen_helper_gvec_fcmlah_idx); + tcg_temp_free_ptr(tcg_ctx, fpst); + } return; case 0x00: /* FMLAL */ case 0x04: /* FMLSL */ case 0x18: /* FMLAL2 */ case 0x1c: /* FMLSL2 */ - { - int is_s = extract32(opcode, 2, 1); - int is_2 = u; - int data = (index << 2) | (is_2 << 1) | is_s; - tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, - is_q ? 16 : 8, vec_full_reg_size(s), - data, gen_helper_gvec_fmlal_idx_a64); - } + { + int is_s = extract32(opcode, 2, 1); + int is_2 = u; + int data = (index << 2) | (is_2 << 1) | is_s; + tcg_gen_gvec_3_ptr( + tcg_ctx, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), tcg_ctx->cpu_env, is_q ? 16 : 8, + vec_full_reg_size(s), data, gen_helper_gvec_fmlal_idx_a64); + } return; } @@ -13448,7 +14003,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) /* fall through */ case 0x01: /* FMLA */ read_vec_element(s, tcg_res, rd, pass, MO_64); - gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op, tcg_idx, tcg_res, fpst); + gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op, tcg_idx, + tcg_res, fpst); break; case 0x09: /* FMUL */ gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst); @@ -13502,9 +14058,9 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x10: /* MLA */ case 0x14: /* MLS */ { - static NeonGenTwoOpFn * const fns[2][2] = { - { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, - { tcg_gen_add_i32, tcg_gen_sub_i32 }, + static NeonGenTwoOpFn *const fns[2][2] = { + {gen_helper_neon_add_u16, gen_helper_neon_sub_u16}, + {tcg_gen_add_i32, tcg_gen_sub_i32}, }; NeonGenTwoOpFn *genfn; bool is_sub = opcode == 0x4; @@ -13534,11 +14090,11 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) tcg_gen_xori_i32(tcg_ctx, tcg_op, tcg_op, 0x80008000); } if (is_scalar) { - gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op, tcg_idx, - tcg_res, fpst); + gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op, + tcg_idx, tcg_res, fpst); } else { - gen_helper_advsimd_muladd2h(tcg_ctx, tcg_res, tcg_op, tcg_idx, - tcg_res, fpst); + gen_helper_advsimd_muladd2h(tcg_ctx, tcg_res, tcg_op, + tcg_idx, tcg_res, fpst); } break; case 2: @@ -13566,7 +14122,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } break; case 2: - gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst); + gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op, tcg_idx, + fpst); break; default: g_assert_not_reached(); @@ -13584,7 +14141,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } break; case 2: - gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst); + gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op, tcg_idx, + fpst); break; default: g_assert_not_reached(); @@ -13592,42 +14150,46 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) break; case 0x0c: /* SQDMULH */ if (size == 1) { - gen_helper_neon_qdmulh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx); + gen_helper_neon_qdmulh_s16( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx); } else { - gen_helper_neon_qdmulh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx); + gen_helper_neon_qdmulh_s32( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx); } break; case 0x0d: /* SQRDMULH */ if (size == 1) { - gen_helper_neon_qrdmulh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx); + gen_helper_neon_qrdmulh_s16( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx); } else { - gen_helper_neon_qrdmulh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx); + gen_helper_neon_qrdmulh_s32( + tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op, tcg_idx); } break; case 0x1d: /* SQRDMLAH */ read_vec_element_i32(s, tcg_res, rd, pass, is_scalar ? size : MO_32); if (size == 1) { - gen_helper_neon_qrdmlah_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx, tcg_res); + gen_helper_neon_qrdmlah_s16(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op, + tcg_idx, tcg_res); } else { - gen_helper_neon_qrdmlah_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx, tcg_res); + gen_helper_neon_qrdmlah_s32(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op, + tcg_idx, tcg_res); } break; case 0x1f: /* SQRDMLSH */ read_vec_element_i32(s, tcg_res, rd, pass, is_scalar ? size : MO_32); if (size == 1) { - gen_helper_neon_qrdmlsh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx, tcg_res); + gen_helper_neon_qrdmlsh_s16(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op, + tcg_idx, tcg_res); } else { - gen_helper_neon_qrdmlsh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, - tcg_op, tcg_idx, tcg_res); + gen_helper_neon_qrdmlsh_s32(tcg_ctx, tcg_res, + tcg_ctx->cpu_env, tcg_op, + tcg_idx, tcg_res); } break; default: @@ -13689,7 +14251,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) if (satop) { /* saturating, doubling */ - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, tcg_ctx->cpu_env, + gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, + tcg_ctx->cpu_env, tcg_passres, tcg_passres); } @@ -13702,18 +14265,20 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) switch (opcode) { case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ - tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres); + tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], + tcg_passres); break; case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ - tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres); + tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], + tcg_passres); break; case 0x7: /* SQDMLSL, SQDMLSL2 */ tcg_gen_neg_i64(tcg_ctx, tcg_passres, tcg_passres); /* fall through */ case 0x3: /* SQDMLAL, SQDMLAL2 */ - gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, - tcg_res[pass], - tcg_passres); + gen_helper_neon_addl_saturate_s64( + tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_res[pass], + tcg_passres); break; default: g_assert_not_reached(); @@ -13744,8 +14309,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) if (is_scalar) { read_vec_element_i32(s, tcg_op, rn, pass, size); } else { - read_vec_element_i32(s, tcg_op, rn, - pass + (is_q * 2), MO_32); + read_vec_element_i32(s, tcg_op, rn, pass + (is_q * 2), + MO_32); } tcg_res[pass] = tcg_temp_new_i64(tcg_ctx); @@ -13758,12 +14323,15 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } if (memop & MO_SIGN) { - gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op, tcg_idx); + gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op, + tcg_idx); } else { - gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op, tcg_idx); + gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op, + tcg_idx); } if (satop) { - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env, + gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, + tcg_ctx->cpu_env, tcg_passres, tcg_passres); } tcg_temp_free_i32(tcg_ctx, tcg_op); @@ -13777,20 +14345,20 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) switch (opcode) { case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ - gen_helper_neon_addl_u32(tcg_ctx, tcg_res[pass], tcg_res[pass], - tcg_passres); + gen_helper_neon_addl_u32(tcg_ctx, tcg_res[pass], + tcg_res[pass], tcg_passres); break; case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ - gen_helper_neon_subl_u32(tcg_ctx, tcg_res[pass], tcg_res[pass], - tcg_passres); + gen_helper_neon_subl_u32(tcg_ctx, tcg_res[pass], + tcg_res[pass], tcg_passres); break; case 0x7: /* SQDMLSL, SQDMLSL2 */ gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, tcg_passres); /* fall through */ case 0x3: /* SQDMLAL, SQDMLAL2 */ - gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, - tcg_res[pass], - tcg_passres); + gen_helper_neon_addl_saturate_s32( + tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_res[pass], + tcg_passres); break; default: g_assert_not_reached(); @@ -13827,15 +14395,13 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) */ static void disas_crypto_aes(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = extract32(insn, 22, 2); int opcode = extract32(insn, 12, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); int decrypt; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; - TCGv_i32 tcg_decrypt; - CryptoThreeOpIntFn *genfn; + gen_helper_gvec_2 *genfn2 = NULL; + gen_helper_gvec_3 *genfn3 = NULL; if (!dc_isar_feature(aa64_aes, s) || size != 0) { unallocated_encoding(s); @@ -13845,19 +14411,19 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) switch (opcode) { case 0x4: /* AESE */ decrypt = 0; - genfn = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aese; break; case 0x6: /* AESMC */ decrypt = 0; - genfn = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesmc; break; case 0x5: /* AESD */ decrypt = 1; - genfn = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aese; break; case 0x7: /* AESIMC */ decrypt = 1; - genfn = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesmc; break; default: unallocated_encoding(s); @@ -13868,15 +14434,11 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_decrypt = tcg_const_i32(tcg_ctx, decrypt); - - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); - tcg_temp_free_i32(tcg_ctx, tcg_decrypt); + if (genfn2) { + gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); + } else { + gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); + } } /* Crypto three-reg SHA @@ -13887,14 +14449,12 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) */ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = extract32(insn, 22, 2); int opcode = extract32(insn, 12, 3); int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - CryptoThreeOpFn *genfn; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; + gen_helper_gvec_3 *genfn; bool feature; if (size != 0) { @@ -13904,10 +14464,19 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SHA1C */ + genfn = gen_helper_crypto_sha1c; + feature = dc_isar_feature(aa64_sha1, s); + break; case 1: /* SHA1P */ + genfn = gen_helper_crypto_sha1p; + feature = dc_isar_feature(aa64_sha1, s); + break; case 2: /* SHA1M */ + genfn = gen_helper_crypto_sha1m; + feature = dc_isar_feature(aa64_sha1, s); + break; case 3: /* SHA1SU0 */ - genfn = NULL; + genfn = gen_helper_crypto_sha1su0; feature = dc_isar_feature(aa64_sha1, s); break; case 4: /* SHA256H */ @@ -13936,23 +14505,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - - if (genfn) { - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); - } else { - TCGv_i32 tcg_opcode = tcg_const_i32(tcg_ctx, opcode); - - gen_helper_crypto_sha1_3reg(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, - tcg_rm_ptr, tcg_opcode); - tcg_temp_free_i32(tcg_ctx, tcg_opcode); - } - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr); + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); } /* Crypto two-reg SHA @@ -13963,14 +14516,12 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) */ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int size = extract32(insn, 22, 2); int opcode = extract32(insn, 12, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - CryptoTwoOpFn *genfn; + gen_helper_gvec_2 *genfn; bool feature; - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; if (size != 0) { unallocated_encoding(s); @@ -14003,14 +14554,36 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) if (!fp_access_check(s)) { return; } + gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); +} - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); +static void gen_rax1_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 n, + TCGv_i64 m) +{ + tcg_gen_rotli_i64(tcg_ctx, d, m, 1); + tcg_gen_xor_i64(tcg_ctx, d, d, n); +} - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr); +static void gen_rax1_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, + TCGv_vec n, TCGv_vec m) +{ + tcg_gen_rotli_vec(tcg_ctx, vece, d, m, 1); + tcg_gen_xor_vec(tcg_ctx, vece, d, d, n); +} - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); +void gen_gvec_rax1(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, + uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = {INDEX_op_rotli_vec, 0}; + static const GVecGen3 op = { + .fni8 = gen_rax1_i64, + .fniv = gen_rax1_vec, + .opt_opc = vecop_list, + .fno = gen_helper_crypto_rax1, + .vece = MO_64, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); } /* Crypto three-reg SHA512 @@ -14021,32 +14594,32 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) */ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int opcode = extract32(insn, 10, 2); - int o = extract32(insn, 14, 1); + int o = extract32(insn, 14, 1); int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); bool feature; - CryptoThreeOpFn *genfn; + gen_helper_gvec_3 *oolfn = NULL; + GVecGen3Fn *gvecfn = NULL; if (o == 0) { switch (opcode) { case 0: /* SHA512H */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512h; + oolfn = gen_helper_crypto_sha512h; break; case 1: /* SHA512H2 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512h2; + oolfn = gen_helper_crypto_sha512h2; break; case 2: /* SHA512SU1 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512su1; + oolfn = gen_helper_crypto_sha512su1; break; case 3: /* RAX1 */ feature = dc_isar_feature(aa64_sha3, s); - genfn = NULL; + gvecfn = gen_gvec_rax1; break; default: g_assert_not_reached(); @@ -14055,15 +14628,15 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SM3PARTW1 */ feature = dc_isar_feature(aa64_sm3, s); - genfn = gen_helper_crypto_sm3partw1; + oolfn = gen_helper_crypto_sm3partw1; break; case 1: /* SM3PARTW2 */ feature = dc_isar_feature(aa64_sm3, s); - genfn = gen_helper_crypto_sm3partw2; + oolfn = gen_helper_crypto_sm3partw2; break; case 2: /* SM4EKEY */ feature = dc_isar_feature(aa64_sm4, s); - genfn = gen_helper_crypto_sm4ekey; + oolfn = gen_helper_crypto_sm4ekey; break; default: unallocated_encoding(s); @@ -14080,41 +14653,10 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) return; } - if (genfn) { - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr); + if (oolfn) { + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); } else { - TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; - int pass; - - tcg_op1 = tcg_temp_new_i64(tcg_ctx); - tcg_op2 = tcg_temp_new_i64(tcg_ctx); - tcg_res[0] = tcg_temp_new_i64(tcg_ctx); - tcg_res[1] = tcg_temp_new_i64(tcg_ctx); - - for (pass = 0; pass < 2; pass++) { - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - - tcg_gen_rotli_i64(tcg_ctx, tcg_res[pass], tcg_op2, 1); - tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_op1); - } - write_vec_element(s, tcg_res[0], rd, 0, MO_64); - write_vec_element(s, tcg_res[1], rd, 1, MO_64); - - tcg_temp_free_i64(tcg_ctx, tcg_op1); - tcg_temp_free_i64(tcg_ctx, tcg_op2); - tcg_temp_free_i64(tcg_ctx, tcg_res[0]); - tcg_temp_free_i64(tcg_ctx, tcg_res[1]); + gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); } } @@ -14126,22 +14668,17 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) */ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int opcode = extract32(insn, 10, 2); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; bool feature; - CryptoTwoOpFn *genfn; switch (opcode) { case 0: /* SHA512SU0 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512su0; break; case 1: /* SM4E */ feature = dc_isar_feature(aa64_sm4, s); - genfn = gen_helper_crypto_sm4e; break; default: unallocated_encoding(s); @@ -14157,13 +14694,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); + switch (opcode) { + case 0: /* SHA512SU0 */ + gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); + break; + case 1: /* SM4E */ + gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); + break; + default: + g_assert_not_reached(); + } } /* Crypto four-register @@ -14321,14 +14861,17 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn) */ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; + static gen_helper_gvec_3 *const fns[4] = { + gen_helper_crypto_sm3tt1a, + gen_helper_crypto_sm3tt1b, + gen_helper_crypto_sm3tt2a, + gen_helper_crypto_sm3tt2b, + }; int opcode = extract32(insn, 10, 2); int imm2 = extract32(insn, 12, 2); int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - TCGv_i32 tcg_imm2, tcg_opcode; if (!dc_isar_feature(aa64_sm3, s)) { unallocated_encoding(s); @@ -14339,20 +14882,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) return; } - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - tcg_imm2 = tcg_const_i32(tcg_ctx, imm2); - tcg_opcode = tcg_const_i32(tcg_ctx, opcode); - - gen_helper_crypto_sm3tt(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2, - tcg_opcode); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr); - tcg_temp_free_i32(tcg_ctx, tcg_imm2); - tcg_temp_free_i32(tcg_ctx, tcg_opcode); + gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); } /* C3.6 Data processing - SIMD, inc Crypto @@ -14362,40 +14892,39 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) */ static const AArch64DecodeTable data_proc_simd[] = { /* pattern , mask , fn */ - { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, - { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, - { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, - { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, - { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, - { 0x0e000400, 0x9fe08400, disas_simd_copy }, - { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ + {0x0e200400, 0x9f200400, disas_simd_three_reg_same}, + {0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra}, + {0x0e200000, 0x9f200c00, disas_simd_three_reg_diff}, + {0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc}, + {0x0e300800, 0x9f3e0c00, disas_simd_across_lanes}, + {0x0e000400, 0x9fe08400, disas_simd_copy}, + {0x0f000000, 0x9f000400, disas_simd_indexed}, /* vector indexed */ /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ - { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, - { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, - { 0x0e000000, 0xbf208c00, disas_simd_tb }, - { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, - { 0x2e000000, 0xbf208400, disas_simd_ext }, - { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, - { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, - { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, - { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, - { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, - { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, - { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ - { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, - { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, - { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, - { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, - { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, - { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, - { 0xce000000, 0xff808000, disas_crypto_four_reg }, - { 0xce800000, 0xffe00000, disas_crypto_xar }, - { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, - { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, - { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, - { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, - { 0x00000000, 0x00000000, NULL } -}; + {0x0f000400, 0x9ff80400, disas_simd_mod_imm}, + {0x0f000400, 0x9f800400, disas_simd_shift_imm}, + {0x0e000000, 0xbf208c00, disas_simd_tb}, + {0x0e000800, 0xbf208c00, disas_simd_zip_trn}, + {0x2e000000, 0xbf208400, disas_simd_ext}, + {0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same}, + {0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra}, + {0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff}, + {0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc}, + {0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise}, + {0x5e000400, 0xdfe08400, disas_simd_scalar_copy}, + {0x5f000000, 0xdf000400, disas_simd_indexed}, /* scalar indexed */ + {0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm}, + {0x4e280800, 0xff3e0c00, disas_crypto_aes}, + {0x5e000000, 0xff208c00, disas_crypto_three_reg_sha}, + {0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha}, + {0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512}, + {0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512}, + {0xce000000, 0xff808000, disas_crypto_four_reg}, + {0xce800000, 0xffe00000, disas_crypto_xar}, + {0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2}, + {0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16}, + {0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16}, + {0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16}, + {0x00000000, 0x00000000, NULL}}; static void disas_data_proc_simd(DisasContext *s, uint32_t insn) { @@ -14447,7 +14976,7 @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s) * table entry even for that case. */ return (tlb_hit(s->uc, entry->addr_code, addr) && - env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0); + arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs)); } /** @@ -14551,9 +15080,8 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) * everything else. This allows us to handle this now * instead of waiting until the insn is otherwise decoded. */ - if (s->btype != 0 - && s->guarded_page - && !btype_destination_ok(insn, s->bt, s->btype)) { + if (s->btype != 0 && s->guarded_page && + !btype_destination_ok(insn, s->bt, s->btype)) { gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_btitrap(s->btype), default_exception_el(s)); @@ -14566,7 +15094,9 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) } switch (extract32(insn, 25, 4)) { - case 0x0: case 0x1: case 0x3: /* UNALLOCATED */ + case 0x0: + case 0x1: + case 0x3: /* UNALLOCATED */ unallocated_encoding(s); break; case 0x2: @@ -14574,24 +15104,26 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s) unallocated_encoding(s); } break; - case 0x8: case 0x9: /* Data processing - immediate */ + case 0x8: + case 0x9: /* Data processing - immediate */ disas_data_proc_imm(s, insn); break; - case 0xa: case 0xb: /* Branch, exception generation and system insns */ + case 0xa: + case 0xb: /* Branch, exception generation and system insns */ disas_b_exc_sys(s, insn); break; case 0x4: case 0x6: case 0xc: - case 0xe: /* Loads and stores */ + case 0xe: /* Loads and stores */ disas_ldst(s, insn); break; case 0x5: - case 0xd: /* Data processing - register */ + case 0xd: /* Data processing - register */ disas_data_proc_reg(s, insn); break; case 0x7: - case 0xf: /* Data processing - SIMD and floating point */ + case 0xf: /* Data processing - SIMD and floating point */ disas_data_proc_simd_fp(s, insn); break; default: @@ -14630,8 +15162,8 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, /* If we are coming from secure EL0 in a system with a 32-bit EL3, then * there is no secure EL1, so we route exceptions to EL3. */ - dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) && - !arm_el_is_aa64(env, 3); + dc->secure_routed_to_el3 = + arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3); dc->thumb = 0; dc->sctlr_b = 0; dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE; @@ -14640,7 +15172,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX); dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII); - dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID); + dc->tcma = FIELD_EX32(tb_flags, TBFLAG_A64, TCMA); dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); dc->user = (dc->current_el == 0); dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL); @@ -14650,10 +15182,14 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT); dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE); dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV); + dc->ata = FIELD_EX32(tb_flags, TBFLAG_A64, ATA); + dc->mte_active[0] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE); + dc->mte_active[1] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE0_ACTIVE); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; dc->features = env->features; + dc->dcz_blocksize = arm_cpu->dcz_blocksize; /* Single step state. The code-generation logic here is: * SS_ACTIVE == 0: @@ -14691,9 +15227,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, init_tmp_a64_array(dc); } -static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) -{ -} +static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) {} static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) { @@ -14794,12 +15328,15 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_goto_tb(dc, 1, dc->base.pc_next); break; default: - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: gen_a64_set_pc_im(tcg_ctx, dc->base.pc_next); /* fall through */ case DISAS_EXIT: tcg_gen_exit_tb(tcg_ctx, NULL, 0); break; + case DISAS_UPDATE_NOCHAIN: + gen_a64_set_pc_im(tcg_ctx, dc->base.pc_next); + /* fall through */ case DISAS_JUMP: tcg_gen_lookup_and_goto_ptr(tcg_ctx); break; @@ -14814,8 +15351,7 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_a64_set_pc_im(tcg_ctx, dc->base.pc_next); gen_helper_yield(tcg_ctx, tcg_ctx->cpu_env); break; - case DISAS_WFI: - { + case DISAS_WFI: { /* This is a special case because we don't want to just halt the CPU * if trying to debug across a WFI. */ @@ -14842,10 +15378,9 @@ static void aarch64_sync_pc(DisasContextBase *db, CPUState *cpu) const TranslatorOps aarch64_translator_ops = { .init_disas_context = aarch64_tr_init_disas_context, - .tb_start = aarch64_tr_tb_start, - .insn_start = aarch64_tr_insn_start, - .breakpoint_check = aarch64_tr_breakpoint_check, - .translate_insn = aarch64_tr_translate_insn, - .tb_stop = aarch64_tr_tb_stop, - .pc_sync = aarch64_sync_pc -}; + .tb_start = aarch64_tr_tb_start, + .insn_start = aarch64_tr_insn_start, + .breakpoint_check = aarch64_tr_breakpoint_check, + .translate_insn = aarch64_tr_translate_insn, + .tb_stop = aarch64_tr_tb_stop, + .pc_sync = aarch64_sync_pc}; diff --git a/qemu/target/arm/translate-a64.h b/qemu/target/arm/translate-a64.h index 6092d1b02c..23bb6d490d 100644 --- a/qemu/target/arm/translate-a64.h +++ b/qemu/target/arm/translate-a64.h @@ -31,6 +31,7 @@ typedef struct TCGContext TCGContext; } while (0) TCGv_i64 new_tmp_a64(DisasContext *s); +TCGv_i64 new_tmp_a64_local(DisasContext *s); TCGv_i64 new_tmp_a64_zero(DisasContext *s); TCGv_i64 cpu_reg(DisasContext *s, int reg); TCGv_i64 cpu_reg_sp(DisasContext *s, int reg); @@ -41,6 +42,11 @@ TCGv_ptr get_fpstatus_ptr(TCGContext *tcg_ctx, bool); bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, unsigned int imms, unsigned int immr); bool sve_access_check(DisasContext *s); +TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr); +TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_size); +TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int count, int log2_esize); /* We should have at some point before trying to access an FP register * done the necessary access check, so assert that @@ -117,13 +123,7 @@ static inline int vec_full_reg_size(DisasContext *s) bool disas_sve(DisasContext *, uint32_t); -/* Note that the gvec expanders operate on offsets + sizes. */ -typedef void GVecGen2Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, uint32_t); -typedef void GVecGen2iFn(TCGContext *, unsigned, uint32_t, uint32_t, int64_t, - uint32_t, uint32_t); -typedef void GVecGen3Fn(TCGContext *, unsigned, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t); -typedef void GVecGen4Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t); +void gen_gvec_rax1(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); #endif /* TARGET_ARM_TRANSLATE_A64_H */ diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c new file mode 100644 index 0000000000..d3e353a2a4 --- /dev/null +++ b/qemu/target/arm/translate-neon.inc.c @@ -0,0 +1,4276 @@ +/* + * ARM translation: AArch32 Neon instructions + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2005-2007 CodeSourcery + * Copyright (c) 2007 OpenedHand, Ltd. + * Copyright (c) 2020 Linaro, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +/* + * This file is intended to be included from translate.c; it uses + * some macros and definitions provided by that file. + * It might be possible to convert it to a standalone .c file eventually. + */ + +static inline int plus1(DisasContext *s, int x) +{ + return x + 1; +} + +static inline int rsub_64(DisasContext *s, int x) +{ + return 64 - x; +} + +static inline int rsub_32(DisasContext *s, int x) +{ + return 32 - x; +} +static inline int rsub_16(DisasContext *s, int x) +{ + return 16 - x; +} +static inline int rsub_8(DisasContext *s, int x) +{ + return 8 - x; +} + +/* Include the generated Neon decoder */ +#include "decode-neon-dp.inc.c" +#include "decode-neon-ls.inc.c" +#include "decode-neon-shared.inc.c" + +/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE, + * where 0 is the least significant end of the register. + */ +static inline long +neon_element_offset(int reg, int element, MemOp size) +{ + int element_size = 1 << size; + int ofs = element * element_size; +#ifdef HOST_WORDS_BIGENDIAN + /* Calculate the offset assuming fully little-endian, + * then XOR to account for the order of the 8-byte units. + */ + if (element_size < 8) { + ofs ^= 8 - element_size; + } +#endif + return neon_reg_offset(reg, 0) + ofs; +} + +static void neon_load_element(TCGContext *tcg_ctx, TCGv_i32 var, int reg, int ele, MemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_load_element64(TCGContext *tcg_ctx, TCGv_i64 var, int reg, int ele, MemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld32u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_Q: + tcg_gen_ld_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_store_element(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i32 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_32: + tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static void neon_store_element64(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i64 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_32: + tcg_gen_st32_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + case MO_64: + tcg_gen_st_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + +static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int opr_sz; + TCGv_ptr fpst; + gen_helper_gvec_3_ptr *fn_gvec_ptr; + + if (!dc_isar_feature(aa32_vcma, s) + || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(tcg_ctx, 1); + fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; + tcg_gen_gvec_3_ptr(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + fpst, opr_sz, opr_sz, a->rot, + fn_gvec_ptr); + tcg_temp_free_ptr(tcg_ctx, fpst); + return true; +} + +static bool trans_VCADD(DisasContext *s, arg_VCADD *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int opr_sz; + TCGv_ptr fpst; + gen_helper_gvec_3_ptr *fn_gvec_ptr; + + if (!dc_isar_feature(aa32_vcma, s) + || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(tcg_ctx, 1); + fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; + tcg_gen_gvec_3_ptr(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + fpst, opr_sz, opr_sz, a->rot, + fn_gvec_ptr); + tcg_temp_free_ptr(tcg_ctx, fpst); + return true; +} + +static bool trans_VDOT(DisasContext *s, arg_VDOT *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int opr_sz; + gen_helper_gvec_3 *fn_gvec; + + if (!dc_isar_feature(aa32_dp, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; + tcg_gen_gvec_3_ool(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + opr_sz, opr_sz, 0, fn_gvec); + return true; +} + +static bool trans_VFML(DisasContext *s, arg_VFML *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int opr_sz; + + if (!dc_isar_feature(aa32_fhm, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + (a->vd & 0x10)) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + tcg_gen_gvec_3_ptr(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(a->q, a->vn), + vfp_reg_offset(a->q, a->vm), + tcg_ctx->cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ + gen_helper_gvec_fmlal_a32); + return true; +} + +static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + gen_helper_gvec_3_ptr *fn_gvec_ptr; + int opr_sz; + TCGv_ptr fpst; + + if (!dc_isar_feature(aa32_vcma, s)) { + return false; + } + if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vn) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx + : gen_helper_gvec_fcmlah_idx); + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(tcg_ctx, 1); + tcg_gen_gvec_3_ptr(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->vm), + fpst, opr_sz, opr_sz, + (a->index << 2) | a->rot, fn_gvec_ptr); + tcg_temp_free_ptr(tcg_ctx, fpst); + return true; +} + +static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + gen_helper_gvec_3 *fn_gvec; + int opr_sz; + TCGv_ptr fpst; + + if (!dc_isar_feature(aa32_dp, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn) & 0x10)) { + return false; + } + + if ((a->vd | a->vn) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; + opr_sz = (1 + a->q) * 8; + fpst = get_fpstatus_ptr(tcg_ctx, 1); + tcg_gen_gvec_3_ool(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(1, a->vn), + vfp_reg_offset(1, a->rm), + opr_sz, opr_sz, a->index, fn_gvec); + tcg_temp_free_ptr(tcg_ctx, fpst); + return true; +} + +static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int opr_sz; + + if (!dc_isar_feature(aa32_fhm, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + opr_sz = (1 + a->q) * 8; + tcg_gen_gvec_3_ptr(tcg_ctx, + vfp_reg_offset(1, a->vd), + vfp_reg_offset(a->q, a->vn), + vfp_reg_offset(a->q, a->rm), + tcg_ctx->cpu_env, opr_sz, opr_sz, + (a->index << 2) | a->s, /* is_2 == 0 */ + gen_helper_gvec_fmlal_idx_a32); + return true; +} + +static struct { + int nregs; + int interleave; + int spacing; +} const neon_ls_element_type[11] = { + {1, 4, 1}, + {1, 4, 2}, + {4, 1, 1}, + {2, 2, 2}, + {1, 3, 1}, + {1, 3, 2}, + {3, 1, 1}, + {1, 1, 1}, + {1, 2, 1}, + {1, 2, 2}, + {2, 1, 1} +}; + +static void gen_neon_ldst_base_update(TCGContext *tcg_ctx, DisasContext *s, int rm, int rn, + int stride) +{ + if (rm != 15) { + TCGv_i32 base; + + base = load_reg(s, rn); + if (rm == 13) { + tcg_gen_addi_i32(tcg_ctx, base, base, stride); + } else { + TCGv_i32 index; + index = load_reg(s, rm); + tcg_gen_add_i32(tcg_ctx, base, base, index); + tcg_temp_free_i32(tcg_ctx, index); + } + store_reg(s, rn, base); + } +} + +static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* Neon load/store multiple structures */ + int nregs, interleave, spacing, reg, n; + MemOp endian = s->be_data; + int mmu_idx = get_mem_index(s); + int size = a->size; + TCGv_i64 tmp64; + TCGv_i32 addr, tmp; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + if (a->itype > 10) { + return false; + } + /* Catch UNDEF cases for bad values of align field */ + switch (a->itype & 0xc) { + case 4: + if (a->align >= 2) { + return false; + } + break; + case 8: + if (a->align == 3) { + return false; + } + break; + default: + break; + } + nregs = neon_ls_element_type[a->itype].nregs; + interleave = neon_ls_element_type[a->itype].interleave; + spacing = neon_ls_element_type[a->itype].spacing; + if (size == 3 && (interleave | spacing) != 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* For our purposes, bytes are always little-endian. */ + if (size == 0) { + endian = MO_LE; + } + /* + * Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + if (interleave == 1 && endian == MO_LE) { + size = 3; + } + tmp64 = tcg_temp_new_i64(tcg_ctx); + addr = tcg_temp_new_i32(tcg_ctx); + tmp = tcg_const_i32(tcg_ctx, 1 << size); + load_reg_var(s, addr, a->rn); + for (reg = 0; reg < nregs; reg++) { + for (n = 0; n < 8 >> size; n++) { + int xs; + for (xs = 0; xs < interleave; xs++) { + int tt = a->vd + reg + spacing * xs; + + if (a->l) { + gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); + neon_store_element64(tcg_ctx, tt, n, size, tmp64); + } else { + neon_load_element64(tcg_ctx, tmp64, tt, n, size); + gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); + } + tcg_gen_add_i32(tcg_ctx, addr, addr, tmp); + } + } + } + tcg_temp_free_i32(tcg_ctx, addr); + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i64(tcg_ctx, tmp64); + + gen_neon_ldst_base_update( + tcg_ctx, s, a->rm, a->rn, nregs * interleave * 8); + return true; +} + +static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* Neon load single structure to all lanes */ + int reg, stride, vec_size; + int vd = a->vd; + int size = a->size; + int nregs = a->n + 1; + TCGv_i32 addr, tmp; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + + if (size == 3) { + if (nregs != 4 || a->a == 0) { + return false; + } + /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ + size = 2; + } + if (nregs == 1 && a->a == 1 && size == 0) { + return false; + } + if (nregs == 3 && a->a == 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * VLD1 to all lanes: T bit indicates how many Dregs to write. + * VLD2/3/4 to all lanes: T bit indicates register stride. + */ + stride = a->t ? 2 : 1; + vec_size = nregs == 1 ? stride * 8 : 8; + + tmp = tcg_temp_new_i32(tcg_ctx); + addr = tcg_temp_new_i32(tcg_ctx); + load_reg_var(s, addr, a->rn); + for (reg = 0; reg < nregs; reg++) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + if ((vd & 1) && vec_size == 16) { + /* + * We cannot write 16 bytes at once because the + * destination is unaligned. + */ + tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(vd, 0), + 8, 8, tmp); + tcg_gen_gvec_mov(tcg_ctx, 0, neon_reg_offset(vd + 1, 0), + neon_reg_offset(vd, 0), 8, 8); + } else { + tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(vd, 0), + vec_size, vec_size, tmp); + } + tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size); + vd += stride; + } + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, addr); + + gen_neon_ldst_base_update(tcg_ctx, s, a->rm, a->rn, (1 << size) * nregs); + + return true; +} + +static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* Neon load/store single structure to one lane */ + int reg; + int nregs = a->n + 1; + int vd = a->vd; + TCGv_i32 addr, tmp; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + + /* Catch the UNDEF cases. This is unavoidably a bit messy. */ + switch (nregs) { + case 1: + if (((a->align & (1 << a->size)) != 0) || + (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) { + return false; + } + break; + case 3: + if ((a->align & 1) != 0) { + return false; + } + /* fall through */ + case 2: + if (a->size == 2 && (a->align & 2) != 0) { + return false; + } + break; + case 4: + if ((a->size == 2) && ((a->align & 3) == 3)) { + return false; + } + break; + default: + abort(); + } + if ((vd + a->stride * (nregs - 1)) > 31) { + /* + * Attempts to write off the end of the register file are + * UNPREDICTABLE; we choose to UNDEF because otherwise we would + * access off the end of the array that holds the register data. + */ + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = tcg_temp_new_i32(tcg_ctx); + addr = tcg_temp_new_i32(tcg_ctx); + load_reg_var(s, addr, a->rn); + /* + * TODO: if we implemented alignment exceptions, we should check + * addr against the alignment encoded in a->align here. + */ + for (reg = 0; reg < nregs; reg++) { + if (a->l) { + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | a->size); + neon_store_element(tcg_ctx, vd, a->reg_idx, a->size, tmp); + } else { /* Store */ + neon_load_element(tcg_ctx, tmp, vd, a->reg_idx, a->size); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), + s->be_data | a->size); + } + vd += a->stride; + tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << a->size); + } + tcg_temp_free_i32(tcg_ctx, addr); + tcg_temp_free_i32(tcg_ctx, tmp); + + gen_neon_ldst_base_update(tcg_ctx, s, a->rm, a->rn, (1 << a->size) * nregs); + + return true; +} + +static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rn_ofs = neon_reg_offset(a->vn, 0); + int rm_ofs = neon_reg_offset(a->vm, 0); + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn(tcg_ctx, a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return true; +} + +#define DO_3SAME(INSN, FUNC) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + return do_3same(s, a, FUNC); \ + } + +DO_3SAME(VADD, tcg_gen_gvec_add) +DO_3SAME(VSUB, tcg_gen_gvec_sub) +DO_3SAME(VAND, tcg_gen_gvec_and) +DO_3SAME(VBIC, tcg_gen_gvec_andc) +DO_3SAME(VORR, tcg_gen_gvec_or) +DO_3SAME(VORN, tcg_gen_gvec_orc) +DO_3SAME(VEOR, tcg_gen_gvec_xor) +DO_3SAME(VSHL_S, gen_gvec_sshl) +DO_3SAME(VSHL_U, gen_gvec_ushl) +DO_3SAME(VQADD_S, gen_gvec_sqadd_qc) +DO_3SAME(VQADD_U, gen_gvec_uqadd_qc) +DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc) +DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc) + +/* These insns are all gvec_bitsel but with the inputs in various orders. */ +#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, unsigned vece, \ + uint32_t rd_ofs, \ + uint32_t rn_ofs, \ + uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_bitsel(tcg_ctx, vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \ + } \ + DO_3SAME(INSN, gen_##INSN##_3s) + +DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs) +DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs) +DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs) + +#define DO_3SAME_NO_SZ_3(INSN, FUNC) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size == 3) { \ + return false; \ + } \ + return do_3same(s, a, FUNC); \ + } + +DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax) +DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax) +DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin) +DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin) +DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul) +DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla) +DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls) +DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst) +DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd) +DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba) +DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd) +DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba) + +#define DO_3SAME_CMP(INSN, COND) \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_cmp(tcg_ctx, COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \ + } \ + DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s) + +DO_3SAME_CMP(VCGT_S, TCG_COND_GT) +DO_3SAME_CMP(VCGT_U, TCG_COND_GTU) +DO_3SAME_CMP(VCGE_S, TCG_COND_GE) +DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) +DO_3SAME_CMP(VCEQ, TCG_COND_EQ) + +#define WRAP_OOL_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \ + uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \ + { \ + tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \ + } + +WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b) + +static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) +{ + if (a->size != 0) { + return false; + } + return do_3same(s, a, gen_VMUL_p_3s); +} + +#define DO_VQRDMLAH(INSN, FUNC) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (!dc_isar_feature(aa32_rdm, s)) { \ + return false; \ + } \ + if (a->size != 1 && a->size != 2) { \ + return false; \ + } \ + return do_3same(s, a, FUNC); \ + } + +DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc) +DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc) + +#define DO_SHA1(NAME, FUNC) \ + WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ + static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (!dc_isar_feature(aa32_sha1, s)) { \ + return false; \ + } \ + return do_3same(s, a, gen_##NAME##_3s); \ + } + +DO_SHA1(SHA1C, gen_helper_crypto_sha1c) +DO_SHA1(SHA1P, gen_helper_crypto_sha1p) +DO_SHA1(SHA1M, gen_helper_crypto_sha1m) +DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0) + +#define DO_SHA2(NAME, FUNC) \ + WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ + static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (!dc_isar_feature(aa32_sha2, s)) { \ + return false; \ + } \ + return do_3same(s, a, gen_##NAME##_3s); \ + } + +DO_SHA2(SHA256H, gen_helper_crypto_sha256h) +DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) +DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) + +#define DO_3SAME_64(INSN, FUNC) \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static const GVecGen3 op = { .fni8 = FUNC }; \ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \ + } \ + DO_3SAME(INSN, gen_##INSN##_3s) + +#define DO_3SAME_64_ENV(INSN, FUNC) \ + static void gen_##INSN##_elt(TCGContext *tcg_ctx, \ + TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ + { \ + FUNC(tcg_ctx, d, tcg_ctx->cpu_env, n, m); \ + } \ + DO_3SAME_64(INSN, gen_##INSN##_elt) + +DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) +DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) +DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) +DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) +DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) +DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) + +#define DO_3SAME_32(INSN, FUNC) \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static const GVecGen3 ops[4] = { \ + { .fni4 = gen_helper_neon_##FUNC##8 }, \ + { .fni4 = gen_helper_neon_##FUNC##16 }, \ + { .fni4 = gen_helper_neon_##FUNC##32 }, \ + { 0 }, \ + }; \ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ + } \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size > 2) { \ + return false; \ + } \ + return do_3same(s, a, gen_##INSN##_3s); \ + } + +/* + * Some helper functions need to be passed the tcg_ctx->cpu_env. In order + * to use those with the gvec APIs like tcg_gen_gvec_3(tcg_ctx, ) we need + * to create wrapper functions whose prototype is a NeonGenTwoopfn(tcg_ctx, ) + * and which call a NeonGenTwoOpEnvFn(). + */ +#define WRAP_ENV_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, \ + TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \ + { \ + FUNC(tcg_ctx, d, tcg_ctx->cpu_env, n, m); \ + } + +#define DO_3SAME_32_ENV(INSN, FUNC) \ + WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ + WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ + WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static const GVecGen3 ops[4] = { \ + { .fni4 = gen_##INSN##_tramp8 }, \ + { .fni4 = gen_##INSN##_tramp16 }, \ + { .fni4 = gen_##INSN##_tramp32 }, \ + { 0 }, \ + }; \ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ + } \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size > 2) { \ + return false; \ + } \ + return do_3same(s, a, gen_##INSN##_3s); \ + } + +DO_3SAME_32(VHADD_S, hadd_s) +DO_3SAME_32(VHADD_U, hadd_u) +DO_3SAME_32(VHSUB_S, hsub_s) +DO_3SAME_32(VHSUB_U, hsub_u) +DO_3SAME_32(VRHADD_S, rhadd_s) +DO_3SAME_32(VRHADD_U, rhadd_u) +DO_3SAME_32(VRSHL_S, rshl_s) +DO_3SAME_32(VRSHL_U, rshl_u) + +DO_3SAME_32_ENV(VQSHL_S, qshl_s) +DO_3SAME_32_ENV(VQSHL_U, qshl_u) +DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) +DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) + +static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + /* Operations handled pairwise 32 bits at a time */ + TCGv_i32 tmp, tmp2, tmp3; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (a->size == 3) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + assert(a->q == 0); /* enforced by decode patterns */ + + /* + * Note that we have to be careful not to clobber the source operands + * in the "vm == vd" case by storing the result of the first pass too + * early. Since Q is 0 there are always just two passes, so instead + * of a complicated loop over each pass we just unroll. + */ + tmp = neon_load_reg(tcg_ctx, a->vn, 0); + tmp2 = neon_load_reg(tcg_ctx, a->vn, 1); + fn(tcg_ctx, tmp, tmp, tmp2); + tcg_temp_free_i32(tcg_ctx, tmp2); + + tmp3 = neon_load_reg(tcg_ctx, a->vm, 0); + tmp2 = neon_load_reg(tcg_ctx, a->vm, 1); + fn(tcg_ctx, tmp3, tmp3, tmp2); + tcg_temp_free_i32(tcg_ctx, tmp2); + + neon_store_reg(tcg_ctx, a->vd, 0, tmp); + neon_store_reg(tcg_ctx, a->vd, 1, tmp3); + return true; +} + +#define DO_3SAME_PAIR(INSN, func) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + static NeonGenTwoOpFn * const fns[] = { \ + gen_helper_neon_##func##8, \ + gen_helper_neon_##func##16, \ + gen_helper_neon_##func##32, \ + }; \ + if (a->size > 2) { \ + return false; \ + } \ + return do_3same_pair(s, a, fns[a->size]); \ + } + +/* 32-bit pairwise ops end up the same as the elementwise versions. */ +#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 +#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 +#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 +#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 +#define gen_helper_neon_padd_u32 tcg_gen_add_i32 + +DO_3SAME_PAIR(VPMAX_S, pmax_s) +DO_3SAME_PAIR(VPMIN_S, pmin_s) +DO_3SAME_PAIR(VPMAX_U, pmax_u) +DO_3SAME_PAIR(VPMIN_U, pmin_u) +DO_3SAME_PAIR(VPADD, padd_u) + +#define DO_3SAME_VQDMULH(INSN, FUNC) \ + WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \ + WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + static const GVecGen3 ops[2] = { \ + { .fni4 = gen_##INSN##_tramp16 }, \ + { .fni4 = gen_##INSN##_tramp32 }, \ + }; \ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \ + } \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size != 1 && a->size != 2) { \ + return false; \ + } \ + return do_3same(s, a, gen_##INSN##_3s); \ + } + +DO_3SAME_VQDMULH(VQDMULH, qdmulh) +DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) + +static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn, + bool reads_vd) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * FP operations handled elementwise 32 bits at a time. + * If reads_vd is true then the old value of Vd will be + * loaded before calling the callback function. This is + * used for multiply-accumulate type operations. + */ + TCGv_i32 tmp, tmp2; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + tmp = neon_load_reg(tcg_ctx, a->vn, pass); + tmp2 = neon_load_reg(tcg_ctx, a->vm, pass); + if (reads_vd) { + TCGv_i32 tmp_rd = neon_load_reg(tcg_ctx, a->vd, pass); + fn(tcg_ctx, tmp_rd, tmp, tmp2, fpstatus); + neon_store_reg(tcg_ctx, a->vd, pass, tmp_rd); + tcg_temp_free_i32(tcg_ctx, tmp); + } else { + fn(tcg_ctx, tmp, tmp, tmp2, fpstatus); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + tcg_temp_free_i32(tcg_ctx, tmp2); + } + tcg_temp_free_ptr(tcg_ctx, fpstatus); + return true; +} + +/* + * For all the functions using this macro, size == 1 means fp16, + * which is an architecture extension we don't implement yet. + */ +#define DO_3S_FP_GVEC(INSN,FUNC) \ + static void gen_##INSN##_3s(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rn_ofs, uint32_t rm_ofs, \ + uint32_t oprsz, uint32_t maxsz) \ + { \ + TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, 1); \ + tcg_gen_gvec_3_ptr(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, fpst, \ + oprsz, maxsz, 0, FUNC); \ + tcg_temp_free_ptr(tcg_ctx, fpst); \ + } \ + static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size != 0) { \ + /* TODO fp16 support */ \ + return false; \ + } \ + return do_3same(s, a, gen_##INSN##_3s); \ + } + + +DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s) +DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s) +DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s) +DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s) + +/* + * For all the functions using this macro, size == 1 means fp16, + * which is an architecture extension we don't implement yet. + */ +#define DO_3S_FP(INSN,FUNC,READS_VD) \ + static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size != 0) { \ + /* TODO fp16 support */ \ + return false; \ + } \ + return do_3same_fp(s, a, FUNC, READS_VD); \ + } + +DO_3S_FP(VCEQ, gen_helper_neon_ceq_f32, false) +DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false) +DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false) +DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false) +DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false) +DO_3S_FP(VMAX, gen_helper_vfp_maxs, false) +DO_3S_FP(VMIN, gen_helper_vfp_mins, false) + +static void gen_VMLA_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, + TCGv_i32 vm, TCGv_ptr fpstatus) +{ + gen_helper_vfp_muls(tcg_ctx, vn, vn, vm, fpstatus); + gen_helper_vfp_adds(tcg_ctx, vd, vd, vn, fpstatus); +} + +static void gen_VMLS_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, + TCGv_i32 vm, TCGv_ptr fpstatus) +{ + gen_helper_vfp_muls(tcg_ctx, vn, vn, vm, fpstatus); + gen_helper_vfp_subs(tcg_ctx, vd, vd, vn, fpstatus); +} + +DO_3S_FP(VMLA, gen_VMLA_fp_3s, true) +DO_3S_FP(VMLS, gen_VMLS_fp_3s, true) + +static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a) +{ + if (!arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same_fp(s, a, gen_helper_vfp_maxnums, false); +} + +static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) +{ + if (!arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same_fp(s, a, gen_helper_vfp_minnums, false); +} + +WRAP_ENV_FN(gen_VRECPS_tramp, gen_helper_recps_f32) + +static void gen_VRECPS_fp_3s(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen3 ops = { .fni4 = gen_VRECPS_tramp }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops); +} + +static bool trans_VRECPS_fp_3s(DisasContext *s, arg_3same *a) +{ + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same(s, a, gen_VRECPS_fp_3s); +} + +WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32) + +static void gen_VRSQRTS_fp_3s(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops); +} + +static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a) +{ + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same(s, a, gen_VRSQRTS_fp_3s); +} + +static void gen_VFMA_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, + TCGv_ptr fpstatus) +{ + gen_helper_vfp_muladds(tcg_ctx, vd, vn, vm, vd, fpstatus); +} + +static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a) +{ + if (!dc_isar_feature(aa32_simdfmac, s)) { + return false; + } + + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same_fp(s, a, gen_VFMA_fp_3s, true); +} + +static void gen_VFMS_fp_3s(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, + TCGv_ptr fpstatus) +{ + gen_helper_vfp_negs(tcg_ctx, vn, vn); + gen_helper_vfp_muladds(tcg_ctx, vd, vn, vm, vd, fpstatus); +} + +static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a) +{ + if (!dc_isar_feature(aa32_simdfmac, s)) { + return false; + } + + if (a->size != 0) { + /* TODO fp16 support */ + return false; + } + + return do_3same_fp(s, a, gen_VFMS_fp_3s, true); +} + +static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* FP operations handled pairwise 32 bits at a time */ + TCGv_i32 tmp, tmp2, tmp3; + TCGv_ptr fpstatus; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + assert(a->q == 0); /* enforced by decode patterns */ + + /* + * Note that we have to be careful not to clobber the source operands + * in the "vm == vd" case by storing the result of the first pass too + * early. Since Q is 0 there are always just two passes, so instead + * of a complicated loop over each pass we just unroll. + */ + fpstatus = get_fpstatus_ptr(tcg_ctx, 1); + tmp = neon_load_reg(tcg_ctx, a->vn, 0); + tmp2 = neon_load_reg(tcg_ctx, a->vn, 1); + fn(tcg_ctx, tmp, tmp, tmp2, fpstatus); + tcg_temp_free_i32(tcg_ctx, tmp2); + + tmp3 = neon_load_reg(tcg_ctx, a->vm, 0); + tmp2 = neon_load_reg(tcg_ctx, a->vm, 1); + fn(tcg_ctx, tmp3, tmp3, tmp2, fpstatus); + tcg_temp_free_i32(tcg_ctx, tmp2); + tcg_temp_free_ptr(tcg_ctx, fpstatus); + + neon_store_reg(tcg_ctx, a->vd, 0, tmp); + neon_store_reg(tcg_ctx, a->vd, 1, tmp3); + return true; +} + +/* + * For all the functions using this macro, size == 1 means fp16, + * which is an architecture extension we don't implement yet. + */ +#define DO_3S_FP_PAIR(INSN,FUNC) \ + static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ + { \ + if (a->size != 0) { \ + /* TODO fp16 support */ \ + return false; \ + } \ + return do_3same_fp_pair(s, a, FUNC); \ + } + +DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds) +DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs) +DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins) + +static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* Handle a 2-reg-shift insn which can be vectorized. */ + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rm_ofs = neon_reg_offset(a->vm, 0); + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn(tcg_ctx, a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size); + return true; +} + +#define DO_2SH(INSN, FUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_vector_2sh(s, a, FUNC); \ + } \ + +DO_2SH(VSHL, tcg_gen_gvec_shli) +DO_2SH(VSLI, gen_gvec_sli) +DO_2SH(VSRI, gen_gvec_sri) +DO_2SH(VSRA_S, gen_gvec_ssra) +DO_2SH(VSRA_U, gen_gvec_usra) +DO_2SH(VRSHR_S, gen_gvec_srshr) +DO_2SH(VRSHR_U, gen_gvec_urshr) +DO_2SH(VRSRA_S, gen_gvec_srsra) +DO_2SH(VRSRA_U, gen_gvec_ursra) + +static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) +{ + /* Signed shift out of range results in all-sign-bits */ + a->shift = MIN(a->shift, (8 << a->size) - 1); + return do_vector_2sh(s, a, tcg_gen_gvec_sari); +} + +static void gen_zero_rd_2sh(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_dup_imm(tcg_ctx, vece, rd_ofs, oprsz, maxsz, 0); +} + +static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) +{ + /* Shift out of range is architecturally valid and results in zero. */ + if (a->shift >= (8 << a->size)) { + return do_vector_2sh(s, a, gen_zero_rd_2sh); + } else { + return do_vector_2sh(s, a, tcg_gen_gvec_shri); + } +} + +static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, + NeonGenTwo64OpEnvFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * 2-reg-and-shift operations, size == 3 case, where the + * function needs to be passed tcg_ctx->cpu_env. + */ + TCGv_i64 constimm; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + constimm = tcg_const_i64(tcg_ctx, dup_const(a->size, a->shift)); + + for (pass = 0; pass < a->q + 1; pass++) { + TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx); + + neon_load_reg64(tcg_ctx, tmp, a->vm + pass); + fn(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, constimm); + neon_store_reg64(tcg_ctx, tmp, a->vd + pass); + tcg_temp_free_i64(tcg_ctx, tmp); + } + tcg_temp_free_i64(tcg_ctx, constimm); + return true; +} + +static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoOpEnvFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * 2-reg-and-shift operations, size < 3 case, where the + * helper needs to be passed tcg_ctx->cpu_env. + */ + TCGv_i32 constimm; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * To avoid excessive duplication of ops we implement shift + * by immediate using the variable shift operations. + */ + constimm = tcg_const_i32(tcg_ctx, dup_const(a->size, a->shift)); + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass); + fn(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, constimm); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + tcg_temp_free_i32(tcg_ctx, constimm); + return true; +} + +#define DO_2SHIFT_ENV(INSN, FUNC) \ + static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \ + } \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + static NeonGenTwoOpEnvFn * const fns[] = { \ + gen_helper_neon_##FUNC##8, \ + gen_helper_neon_##FUNC##16, \ + gen_helper_neon_##FUNC##32, \ + }; \ + assert(a->size < ARRAY_SIZE(fns)); \ + return do_2shift_env_32(s, a, fns[a->size]); \ + } + +DO_2SHIFT_ENV(VQSHLU, qshlu_s) +DO_2SHIFT_ENV(VQSHL_U, qshl_u) +DO_2SHIFT_ENV(VQSHL_S, qshl_s) + +static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, + NeonGenTwo64OpFn *shiftfn, + NeonGenNarrowEnvFn *narrowfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* 2-reg-and-shift narrowing-shift operations, size == 3 case */ + TCGv_i64 constimm, rm1, rm2; + TCGv_i32 rd; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vm & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is always a right shift, and the shiftfn is always a + * left-shift helper, which thus needs the negated shift count. + */ + constimm = tcg_const_i64(tcg_ctx, -a->shift); + rm1 = tcg_temp_new_i64(tcg_ctx); + rm2 = tcg_temp_new_i64(tcg_ctx); + + /* Load both inputs first to avoid potential overwrite if rm == rd */ + neon_load_reg64(tcg_ctx, rm1, a->vm); + neon_load_reg64(tcg_ctx, rm2, a->vm + 1); + + shiftfn(tcg_ctx, rm1, rm1, constimm); + rd = tcg_temp_new_i32(tcg_ctx); + narrowfn(tcg_ctx, rd, tcg_ctx->cpu_env, rm1); + neon_store_reg(tcg_ctx, a->vd, 0, rd); + + shiftfn(tcg_ctx, rm2, rm2, constimm); + rd = tcg_temp_new_i32(tcg_ctx); + narrowfn(tcg_ctx, rd, tcg_ctx->cpu_env, rm2); + neon_store_reg(tcg_ctx, a->vd, 1, rd); + + tcg_temp_free_i64(tcg_ctx, rm1); + tcg_temp_free_i64(tcg_ctx, rm2); + tcg_temp_free_i64(tcg_ctx, constimm); + + return true; +} + +static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoOpFn *shiftfn, + NeonGenNarrowEnvFn *narrowfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* 2-reg-and-shift narrowing-shift operations, size < 3 case */ + TCGv_i32 constimm, rm1, rm2, rm3, rm4; + TCGv_i64 rtmp; + uint32_t imm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vm & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is always a right shift, and the shiftfn is always a + * left-shift helper, which thus needs the negated shift count + * duplicated into each lane of the immediate value. + */ + if (a->size == 1) { + imm = (uint16_t)(-a->shift); + imm |= imm << 16; + } else { + /* size == 2 */ + imm = -a->shift; + } + constimm = tcg_const_i32(tcg_ctx, imm); + + /* Load all inputs first to avoid potential overwrite */ + rm1 = neon_load_reg(tcg_ctx, a->vm, 0); + rm2 = neon_load_reg(tcg_ctx, a->vm, 1); + rm3 = neon_load_reg(tcg_ctx, a->vm + 1, 0); + rm4 = neon_load_reg(tcg_ctx, a->vm + 1, 1); + rtmp = tcg_temp_new_i64(tcg_ctx); + + shiftfn(tcg_ctx, rm1, rm1, constimm); + shiftfn(tcg_ctx, rm2, rm2, constimm); + + tcg_gen_concat_i32_i64(tcg_ctx, rtmp, rm1, rm2); + tcg_temp_free_i32(tcg_ctx, rm2); + + narrowfn(tcg_ctx, rm1, tcg_ctx->cpu_env, rtmp); + neon_store_reg(tcg_ctx, a->vd, 0, rm1); + + shiftfn(tcg_ctx, rm3, rm3, constimm); + shiftfn(tcg_ctx, rm4, rm4, constimm); + tcg_temp_free_i32(tcg_ctx, constimm); + + tcg_gen_concat_i32_i64(tcg_ctx, rtmp, rm3, rm4); + tcg_temp_free_i32(tcg_ctx, rm4); + + narrowfn(tcg_ctx, rm3, tcg_ctx->cpu_env, rtmp); + tcg_temp_free_i64(tcg_ctx, rtmp); + neon_store_reg(tcg_ctx, a->vd, 1, rm3); + return true; +} + +#define DO_2SN_64(INSN, FUNC, NARROWFUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \ + } +#define DO_2SN_32(INSN, FUNC, NARROWFUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \ + } + +static void gen_neon_narrow_u32(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + tcg_gen_extrl_i64_i32(tcg_ctx, dest, src); +} + +static void gen_neon_narrow_u16(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + gen_helper_neon_narrow_u16(tcg_ctx, dest, src); +} + +static void gen_neon_narrow_u8(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) +{ + gen_helper_neon_narrow_u8(tcg_ctx, dest, src); +} + +DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32) +DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16) +DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8) + +DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32) +DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16) +DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8) + +DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32) +DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16) +DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8) + +DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32) +DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16) +DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8) +DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32) +DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16) +DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8) + +DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32) +DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16) +DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8) + +DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32) +DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16) +DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8) + +DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32) +DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16) +DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8) + +static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, + NeonGenWidenFn *widenfn, bool u) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i64 tmp; + TCGv_i32 rm0, rm1; + uint64_t widen_mask = 0; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + /* + * This is a widen-and-shift operation. The shift is always less + * than the width of the source type, so after widening the input + * vector we can simply shift the whole 64-bit widened register, + * and then clear the potential overflow bits resulting from left + * bits of the narrow input appearing as right bits of the left + * neighbour narrow input. Calculate a mask of bits to clear. + */ + if ((a->shift != 0) && (a->size < 2 || u)) { + int esize = 8 << a->size; + widen_mask = MAKE_64BIT_MASK(0, esize); + widen_mask >>= esize - a->shift; + widen_mask = dup_const(a->size + 1, widen_mask); + } + + rm0 = neon_load_reg(tcg_ctx, a->vm, 0); + rm1 = neon_load_reg(tcg_ctx, a->vm, 1); + tmp = tcg_temp_new_i64(tcg_ctx); + + widenfn(tcg_ctx, tmp, rm0); + tcg_temp_free_i32(tcg_ctx, rm0); + if (a->shift != 0) { + tcg_gen_shli_i64(tcg_ctx, tmp, tmp, a->shift); + tcg_gen_andi_i64(tcg_ctx, tmp, tmp, ~widen_mask); + } + neon_store_reg64(tcg_ctx, tmp, a->vd); + + widenfn(tcg_ctx, tmp, rm1); + tcg_temp_free_i32(tcg_ctx, rm1); + if (a->shift != 0) { + tcg_gen_shli_i64(tcg_ctx, tmp, tmp, a->shift); + tcg_gen_andi_i64(tcg_ctx, tmp, tmp, ~widen_mask); + } + neon_store_reg64(tcg_ctx, tmp, a->vd + 1); + tcg_temp_free_i64(tcg_ctx, tmp); + return true; +} + +static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_s8, + gen_helper_neon_widen_s16, + tcg_gen_ext_i32_i64, + }; + return do_vshll_2sh(s, a, widenfn[a->size], false); +} + +static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + }; + return do_vshll_2sh(s, a, widenfn[a->size], true); +} + +static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, + NeonGenTwoSingleOpFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* FP operations in 2-reg-and-shift group */ + TCGv_i32 tmp, shiftv; + TCGv_ptr fpstatus; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpstatus = get_fpstatus_ptr(tcg_ctx, 1); + shiftv = tcg_const_i32(tcg_ctx, a->shift); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + tmp = neon_load_reg(tcg_ctx, a->vm, pass); + fn(tcg_ctx, tmp, tmp, shiftv, fpstatus); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + tcg_temp_free_ptr(tcg_ctx, fpstatus); + tcg_temp_free_i32(tcg_ctx, shiftv); + return true; +} + +#define DO_FP_2SH(INSN, FUNC) \ + static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ + { \ + return do_fp_2sh(s, a, FUNC); \ + } + +DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos) +DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos) +DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero) +DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero) + +static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op) +{ + /* + * Expand the encoded constant. + * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. + * We choose to not special-case this and will behave as if a + * valid constant encoding of 0 had been given. + * cmode = 15 op = 1 must UNDEF; we assume decode has handled that. + */ + switch (cmode) { + case 0: case 1: + /* no-op */ + break; + case 2: case 3: + imm <<= 8; + break; + case 4: case 5: + imm <<= 16; + break; + case 6: case 7: + imm <<= 24; + break; + case 8: case 9: + imm |= imm << 16; + break; + case 10: case 11: + imm = (imm << 8) | (imm << 24); + break; + case 12: + imm = (imm << 8) | 0xff; + break; + case 13: + imm = (imm << 16) | 0xffff; + break; + case 14: + if (op) { + /* + * This is the only case where the top and bottom 32 bits + * of the encoded constant differ. + */ + uint64_t imm64 = 0; + int n; + + for (n = 0; n < 8; n++) { + if (imm & (1 << n)) { + imm64 |= (0xffULL << (n * 8)); + } + } + return imm64; + } + imm |= (imm << 8) | (imm << 16) | (imm << 24); + break; + case 15: + imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) + | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); + break; + } + if (op) { + imm = ~imm; + } + return dup_const(MO_32, imm); +} + +static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, + GVecGen2iFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + uint64_t imm; + int reg_ofs, vec_size; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + reg_ofs = neon_reg_offset(a->vd, 0); + vec_size = a->q ? 16 : 8; + imm = asimd_imm_const(a->imm, a->cmode, a->op); + + fn(tcg_ctx, MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size); + return true; +} + +static void gen_VMOV_1r(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, c); +} + +static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) +{ + /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ + GVecGen2iFn *fn; + + if ((a->cmode & 1) && a->cmode < 12) { + /* for op=1, the imm will be inverted, so BIC becomes AND. */ + fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; + } else { + /* There is one unallocated cmode/op combination in this space */ + if (a->cmode == 15 && a->op == 1) { + return false; + } + fn = gen_VMOV_1r; + } + return do_1reg_imm(s, a, fn); +} + +static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, + NeonGenWidenFn *widenfn, + NeonGenTwo64OpFn *opfn, + bool src1_wide) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */ + TCGv_i64 rn0_64, rn1_64, rm_64; + TCGv_i32 rm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!widenfn || !opfn) { + /* size == 3 case, which is an entirely different insn group */ + return false; + } + + if ((a->vd & 1) || (src1_wide && (a->vn & 1))) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rn0_64 = tcg_temp_new_i64(tcg_ctx); + rn1_64 = tcg_temp_new_i64(tcg_ctx); + rm_64 = tcg_temp_new_i64(tcg_ctx); + + if (src1_wide) { + neon_load_reg64(tcg_ctx, rn0_64, a->vn); + } else { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vn, 0); + widenfn(tcg_ctx, rn0_64, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + } + rm = neon_load_reg(tcg_ctx, a->vm, 0); + + widenfn(tcg_ctx, rm_64, rm); + tcg_temp_free_i32(tcg_ctx, rm); + opfn(tcg_ctx, rn0_64, rn0_64, rm_64); + + /* + * Load second pass inputs before storing the first pass result, to + * avoid incorrect results if a narrow input overlaps with the result. + */ + if (src1_wide) { + neon_load_reg64(tcg_ctx, rn1_64, a->vn + 1); + } else { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vn, 1); + widenfn(tcg_ctx, rn1_64, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + } + rm = neon_load_reg(tcg_ctx, a->vm, 1); + + neon_store_reg64(tcg_ctx, rn0_64, a->vd); + + widenfn(tcg_ctx, rm_64, rm); + tcg_temp_free_i32(tcg_ctx, rm); + opfn(tcg_ctx, rn1_64, rn1_64, rm_64); + neon_store_reg64(tcg_ctx, rn1_64, a->vd + 1); + + tcg_temp_free_i64(tcg_ctx, rn0_64); + tcg_temp_free_i64(tcg_ctx, rn1_64); + tcg_temp_free_i64(tcg_ctx, rm_64); + + return true; +} + +#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \ + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ + { \ + static NeonGenWidenFn * const widenfn[] = { \ + gen_helper_neon_widen_##S##8, \ + gen_helper_neon_widen_##S##16, \ + tcg_gen_##EXT##_i32_i64, \ + NULL, \ + }; \ + static NeonGenTwo64OpFn * const addfn[] = { \ + gen_helper_neon_##OP##l_u16, \ + gen_helper_neon_##OP##l_u32, \ + tcg_gen_##OP##_i64, \ + NULL, \ + }; \ + return do_prewiden_3d(s, a, widenfn[a->size], \ + addfn[a->size], SRC1WIDE); \ + } + +DO_PREWIDEN(VADDL_S, s, ext, add, false) +DO_PREWIDEN(VADDL_U, u, extu, add, false) +DO_PREWIDEN(VSUBL_S, s, ext, sub, false) +DO_PREWIDEN(VSUBL_U, u, extu, sub, false) +DO_PREWIDEN(VADDW_S, s, ext, add, true) +DO_PREWIDEN(VADDW_U, u, extu, add, true) +DO_PREWIDEN(VSUBW_S, s, ext, sub, true) +DO_PREWIDEN(VSUBW_U, u, extu, sub, true) + +static bool do_narrow_3d(DisasContext *s, arg_3diff *a, + NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */ + TCGv_i64 rn_64, rm_64; + TCGv_i32 rd0, rd1; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn || !narrowfn) { + /* size == 3 case, which is an entirely different insn group */ + return false; + } + + if ((a->vn | a->vm) & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rn_64 = tcg_temp_new_i64(tcg_ctx); + rm_64 = tcg_temp_new_i64(tcg_ctx); + rd0 = tcg_temp_new_i32(tcg_ctx); + rd1 = tcg_temp_new_i32(tcg_ctx); + + neon_load_reg64(tcg_ctx, rn_64, a->vn); + neon_load_reg64(tcg_ctx, rm_64, a->vm); + + opfn(tcg_ctx, rn_64, rn_64, rm_64); + + narrowfn(tcg_ctx, rd0, rn_64); + + neon_load_reg64(tcg_ctx, rn_64, a->vn + 1); + neon_load_reg64(tcg_ctx, rm_64, a->vm + 1); + + opfn(tcg_ctx, rn_64, rn_64, rm_64); + + narrowfn(tcg_ctx, rd1, rn_64); + + neon_store_reg(tcg_ctx, a->vd, 0, rd0); + neon_store_reg(tcg_ctx, a->vd, 1, rd1); + + tcg_temp_free_i64(tcg_ctx, rn_64); + tcg_temp_free_i64(tcg_ctx, rm_64); + + return true; +} + +#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \ + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ + { \ + static NeonGenTwo64OpFn * const addfn[] = { \ + gen_helper_neon_##OP##l_u16, \ + gen_helper_neon_##OP##l_u32, \ + tcg_gen_##OP##_i64, \ + NULL, \ + }; \ + static NeonGenNarrowFn * const narrowfn[] = { \ + gen_helper_neon_##NARROWTYPE##_high_u8, \ + gen_helper_neon_##NARROWTYPE##_high_u16, \ + EXTOP, \ + NULL, \ + }; \ + return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \ + } + +static void gen_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 rd, TCGv_i64 rn) +{ + tcg_gen_addi_i64(tcg_ctx, rn, rn, 1u << 31); + tcg_gen_extrh_i64_i32(tcg_ctx, rd, rn); +} + +DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32) +DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32) +DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32) +DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32) + +static bool do_long_3d(DisasContext *s, arg_3diff *a, + NeonGenTwoOpWidenFn *opfn, + NeonGenTwo64OpFn *accfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * 3-regs different lengths, long operations. + * These perform an operation on two inputs that returns a double-width + * result, and then possibly perform an accumulation operation of + * that result into the double-width destination. + */ + TCGv_i64 rd0, rd1, tmp; + TCGv_i32 rn, rm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* size == 3 case, which is an entirely different insn group */ + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rd0 = tcg_temp_new_i64(tcg_ctx); + rd1 = tcg_temp_new_i64(tcg_ctx); + + rn = neon_load_reg(tcg_ctx, a->vn, 0); + rm = neon_load_reg(tcg_ctx, a->vm, 0); + opfn(tcg_ctx, rd0, rn, rm); + tcg_temp_free_i32(tcg_ctx, rn); + tcg_temp_free_i32(tcg_ctx, rm); + + rn = neon_load_reg(tcg_ctx, a->vn, 1); + rm = neon_load_reg(tcg_ctx, a->vm, 1); + opfn(tcg_ctx, rd1, rn, rm); + tcg_temp_free_i32(tcg_ctx, rn); + tcg_temp_free_i32(tcg_ctx, rm); + + /* Don't store results until after all loads: they might overlap */ + if (accfn) { + tmp = tcg_temp_new_i64(tcg_ctx); + neon_load_reg64(tcg_ctx, tmp, a->vd); + accfn(tcg_ctx, tmp, tmp, rd0); + neon_store_reg64(tcg_ctx, tmp, a->vd); + neon_load_reg64(tcg_ctx, tmp, a->vd + 1); + accfn(tcg_ctx, tmp, tmp, rd1); + neon_store_reg64(tcg_ctx, tmp, a->vd + 1); + tcg_temp_free_i64(tcg_ctx, tmp); + } else { + neon_store_reg64(tcg_ctx, rd0, a->vd); + neon_store_reg64(tcg_ctx, rd1, a->vd + 1); + } + + tcg_temp_free_i64(tcg_ctx, rd0); + tcg_temp_free_i64(tcg_ctx, rd1); + + return true; +} + +static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_s16, + gen_helper_neon_abdl_s32, + gen_helper_neon_abdl_s64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_u16, + gen_helper_neon_abdl_u32, + gen_helper_neon_abdl_u64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_s16, + gen_helper_neon_abdl_s32, + gen_helper_neon_abdl_s64, + NULL, + }; + static NeonGenTwo64OpFn * const addfn[] = { + gen_helper_neon_addl_u16, + gen_helper_neon_addl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], addfn[a->size]); +} + +static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_abdl_u16, + gen_helper_neon_abdl_u32, + gen_helper_neon_abdl_u64, + NULL, + }; + static NeonGenTwo64OpFn * const addfn[] = { + gen_helper_neon_addl_u16, + gen_helper_neon_addl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], addfn[a->size]); +} + +static void gen_mull_s32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_muls2_i32(tcg_ctx, lo, hi, rn, rm); + tcg_gen_concat_i32_i64(tcg_ctx, rd, lo, hi); + + tcg_temp_free_i32(tcg_ctx, lo); + tcg_temp_free_i32(tcg_ctx, hi); +} + +static void gen_mull_u32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_mulu2_i32(tcg_ctx, lo, hi, rn, rm); + tcg_gen_concat_i32_i64(tcg_ctx, rd, lo, hi); + + tcg_temp_free_i32(tcg_ctx, lo); + tcg_temp_free_i32(tcg_ctx, hi); +} + +static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_mull_s8, + gen_helper_neon_mull_s16, + gen_mull_s32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + gen_helper_neon_mull_u8, + gen_helper_neon_mull_u16, + gen_mull_u32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +#define DO_VMLAL(INSN,MULL,ACC) \ + static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ + { \ + static NeonGenTwoOpWidenFn * const opfn[] = { \ + gen_helper_neon_##MULL##8, \ + gen_helper_neon_##MULL##16, \ + gen_##MULL##32, \ + NULL, \ + }; \ + static NeonGenTwo64OpFn * const accfn[] = { \ + gen_helper_neon_##ACC##l_u16, \ + gen_helper_neon_##ACC##l_u32, \ + tcg_gen_##ACC##_i64, \ + NULL, \ + }; \ + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \ + } + +DO_VMLAL(VMLAL_S,mull_s,add) +DO_VMLAL(VMLAL_U,mull_u,add) +DO_VMLAL(VMLSL_S,mull_s,sub) +DO_VMLAL(VMLSL_U,mull_u,sub) + +static void gen_VQDMULL_16(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + gen_helper_neon_mull_s16(tcg_ctx, rd, rn, rm); + gen_helper_neon_addl_saturate_s32(tcg_ctx, rd, tcg_ctx->cpu_env, rd, rd); +} + +static void gen_VQDMULL_32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) +{ + gen_mull_s32(tcg_ctx, rd, rn, rm); + gen_helper_neon_addl_saturate_s64(tcg_ctx, rd, tcg_ctx->cpu_env, rd, rd); +} + +static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], NULL); +} + +static void gen_VQDMLAL_acc_16(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + gen_helper_neon_addl_saturate_s32(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm); +} + +static void gen_VQDMLAL_acc_32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + gen_helper_neon_addl_saturate_s64(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm); +} + +static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLAL_acc_16, + gen_VQDMLAL_acc_32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); +} + +static void gen_VQDMLSL_acc_16(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + gen_helper_neon_negl_u32(tcg_ctx, rm, rm); + gen_helper_neon_addl_saturate_s32(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm); +} + +static void gen_VQDMLSL_acc_32(TCGContext *tcg_ctx, TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) +{ + tcg_gen_neg_i64(tcg_ctx, rm, rm); + gen_helper_neon_addl_saturate_s64(tcg_ctx, rd, tcg_ctx->cpu_env, rn, rm); +} + +static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLSL_acc_16, + gen_VQDMLSL_acc_32, + NULL, + }; + + return do_long_3d(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + gen_helper_gvec_3 *fn_gvec; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (a->vd & 1) { + return false; + } + + switch (a->size) { + case 0: + fn_gvec = gen_helper_neon_pmull_h; + break; + case 2: + if (!dc_isar_feature(aa32_pmull, s)) { + return false; + } + fn_gvec = gen_helper_gvec_pmull_q; + break; + default: + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tcg_gen_gvec_3_ool(tcg_ctx, neon_reg_offset(a->vd, 0), + neon_reg_offset(a->vn, 0), + neon_reg_offset(a->vm, 0), + 16, 16, 0, fn_gvec); + return true; +} + +static void gen_neon_dup_low16(TCGContext *tcg_ctx, TCGv_i32 var) +{ + TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_ext16u_i32(tcg_ctx, var, var); + tcg_gen_shli_i32(tcg_ctx, tmp, var, 16); + tcg_gen_or_i32(tcg_ctx, var, var, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); +} + +static void gen_neon_dup_high16(TCGContext *tcg_ctx, TCGv_i32 var) +{ + TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_andi_i32(tcg_ctx, var, var, 0xffff0000); + tcg_gen_shri_i32(tcg_ctx, tmp, var, 16); + tcg_gen_or_i32(tcg_ctx, var, var, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); +} + +static inline TCGv_i32 neon_get_scalar(TCGContext *tcg_ctx, int size, int reg) +{ + TCGv_i32 tmp; + if (size == 1) { + tmp = neon_load_reg(tcg_ctx, reg & 7, reg >> 4); + if (reg & 8) { + gen_neon_dup_high16(tcg_ctx, tmp); + } else { + gen_neon_dup_low16(tcg_ctx, tmp); + } + } else { + tmp = neon_load_reg(tcg_ctx, reg & 15, reg >> 4); + } + return tmp; +} + +static bool do_2scalar(DisasContext *s, arg_2scalar *a, + NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Two registers and a scalar: perform an operation between + * the input elements and the scalar, and then possibly + * perform an accumulation operation of that result into the + * destination. + */ + TCGv_i32 scalar; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* Bad size (including size == 3, which is a different insn group) */ + return false; + } + + if (a->q && ((a->vd | a->vn) & 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + scalar = neon_get_scalar(tcg_ctx, a->size, a->vm); + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vn, pass); + opfn(tcg_ctx, tmp, tmp, scalar); + if (accfn) { + TCGv_i32 rd = neon_load_reg(tcg_ctx, a->vd, pass); + accfn(tcg_ctx, tmp, rd, tmp); + tcg_temp_free_i32(tcg_ctx, rd); + } + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + tcg_temp_free_i32(tcg_ctx, scalar); + return true; +} + +static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + gen_helper_neon_add_u16, + tcg_gen_add_i32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + gen_helper_neon_sub_u16, + tcg_gen_sub_i32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +/* + * Rather than have a float-specific version of do_2scalar just for + * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into + * a NeonGenTwoOpFn. + */ +#define WRAP_FP_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 rd, \ + TCGv_i32 rn, TCGv_i32 rm) \ + { \ + TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); \ + FUNC(tcg_ctx, rd, rn, rm, fpstatus); \ + tcg_temp_free_ptr(tcg_ctx, fpstatus); \ + } + +WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls) +WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds) +WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs) + +static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_mul, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_mul, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_add, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_mul, + NULL, + }; + static NeonGenTwoOpFn * const accfn[] = { + NULL, + NULL, /* TODO: fp16 support */ + gen_VMUL_F_sub, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], accfn[a->size]); +} + +WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16) +WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32) +WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16) +WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32) + +static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_VQDMULH_16, + gen_VQDMULH_32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpFn * const opfn[] = { + NULL, + gen_VQRDMULH_16, + gen_VQRDMULH_32, + NULL, + }; + + return do_2scalar(s, a, opfn[a->size], NULL); +} + +static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, + NeonGenThreeOpEnvFn *opfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn + * performs a kind of fused op-then-accumulate using a helper + * function that takes all of rd, rn and the scalar at once. + */ + TCGv_i32 scalar; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + if (!dc_isar_feature(aa32_rdm, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* Bad size (including size == 3, which is a different insn group) */ + return false; + } + + if (a->q && ((a->vd | a->vn) & 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + scalar = neon_get_scalar(tcg_ctx, a->size, a->vm); + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 rn = neon_load_reg(tcg_ctx, a->vn, pass); + TCGv_i32 rd = neon_load_reg(tcg_ctx, a->vd, pass); + opfn(tcg_ctx, rd, tcg_ctx->cpu_env, rn, scalar, rd); + tcg_temp_free_i32(tcg_ctx, rn); + neon_store_reg(tcg_ctx, a->vd, pass, rd); + } + tcg_temp_free_i32(tcg_ctx, scalar); + + return true; +} + +static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenThreeOpEnvFn *opfn[] = { + NULL, + gen_helper_neon_qrdmlah_s16, + gen_helper_neon_qrdmlah_s32, + NULL, + }; + return do_vqrdmlah_2sc(s, a, opfn[a->size]); +} + +static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenThreeOpEnvFn *opfn[] = { + NULL, + gen_helper_neon_qrdmlsh_s16, + gen_helper_neon_qrdmlsh_s32, + NULL, + }; + return do_vqrdmlah_2sc(s, a, opfn[a->size]); +} + +static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, + NeonGenTwoOpWidenFn *opfn, + NeonGenTwo64OpFn *accfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Two registers and a scalar, long operations: perform an + * operation on the input elements and the scalar which produces + * a double-width result, and then possibly perform an accumulation + * operation of that result into the destination. + */ + TCGv_i32 scalar, rn; + TCGv_i64 rn0_64, rn1_64; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!opfn) { + /* Bad size (including size == 3, which is a different insn group) */ + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + scalar = neon_get_scalar(tcg_ctx, a->size, a->vm); + + /* Load all inputs before writing any outputs, in case of overlap */ + rn = neon_load_reg(tcg_ctx, a->vn, 0); + rn0_64 = tcg_temp_new_i64(tcg_ctx); + opfn(tcg_ctx, rn0_64, rn, scalar); + tcg_temp_free_i32(tcg_ctx, rn); + + rn = neon_load_reg(tcg_ctx, a->vn, 1); + rn1_64 = tcg_temp_new_i64(tcg_ctx); + opfn(tcg_ctx, rn1_64, rn, scalar); + tcg_temp_free_i32(tcg_ctx, rn); + tcg_temp_free_i32(tcg_ctx, scalar); + + if (accfn) { + TCGv_i64 t64 = tcg_temp_new_i64(tcg_ctx); + neon_load_reg64(tcg_ctx, t64, a->vd); + accfn(tcg_ctx, t64, t64, rn0_64); + neon_store_reg64(tcg_ctx, t64, a->vd); + neon_load_reg64(tcg_ctx, t64, a->vd + 1); + accfn(tcg_ctx, t64, t64, rn1_64); + neon_store_reg64(tcg_ctx, t64, a->vd + 1); + tcg_temp_free_i64(tcg_ctx, t64); + } else { + neon_store_reg64(tcg_ctx, rn0_64, a->vd); + neon_store_reg64(tcg_ctx, rn1_64, a->vd + 1); + } + tcg_temp_free_i64(tcg_ctx, rn0_64); + tcg_temp_free_i64(tcg_ctx, rn1_64); + return true; +} + +static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_helper_neon_mull_s16, + gen_mull_s32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], NULL); +} + +static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_helper_neon_mull_u16, + gen_mull_u32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], NULL); +} + +#define DO_VMLAL_2SC(INSN, MULL, ACC) \ + static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \ + { \ + static NeonGenTwoOpWidenFn * const opfn[] = { \ + NULL, \ + gen_helper_neon_##MULL##16, \ + gen_##MULL##32, \ + NULL, \ + }; \ + static NeonGenTwo64OpFn * const accfn[] = { \ + NULL, \ + gen_helper_neon_##ACC##l_u32, \ + tcg_gen_##ACC##_i64, \ + NULL, \ + }; \ + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \ + } + +DO_VMLAL_2SC(VMLAL_S, mull_s, add) +DO_VMLAL_2SC(VMLAL_U, mull_u, add) +DO_VMLAL_2SC(VMLSL_S, mull_s, sub) +DO_VMLAL_2SC(VMLSL_U, mull_u, sub) + +static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], NULL); +} + +static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLAL_acc_16, + gen_VQDMLAL_acc_32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a) +{ + static NeonGenTwoOpWidenFn * const opfn[] = { + NULL, + gen_VQDMULL_16, + gen_VQDMULL_32, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + NULL, + gen_VQDMLSL_acc_16, + gen_VQDMLSL_acc_32, + NULL, + }; + + return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); +} + +static bool trans_VEXT(DisasContext *s, arg_VEXT *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (a->imm > 7 && !a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + if (!a->q) { + /* Extract 64 bits from */ + TCGv_i64 left, right, dest; + + left = tcg_temp_new_i64(tcg_ctx); + right = tcg_temp_new_i64(tcg_ctx); + dest = tcg_temp_new_i64(tcg_ctx); + + neon_load_reg64(tcg_ctx, right, a->vn); + neon_load_reg64(tcg_ctx, left, a->vm); + tcg_gen_extract2_i64(tcg_ctx, dest, right, left, a->imm * 8); + neon_store_reg64(tcg_ctx, dest, a->vd); + + tcg_temp_free_i64(tcg_ctx, left); + tcg_temp_free_i64(tcg_ctx, right); + tcg_temp_free_i64(tcg_ctx, dest); + } else { + /* Extract 128 bits from */ + TCGv_i64 left, middle, right, destleft, destright; + + left = tcg_temp_new_i64(tcg_ctx); + middle = tcg_temp_new_i64(tcg_ctx); + right = tcg_temp_new_i64(tcg_ctx); + destleft = tcg_temp_new_i64(tcg_ctx); + destright = tcg_temp_new_i64(tcg_ctx); + + if (a->imm < 8) { + neon_load_reg64(tcg_ctx, right, a->vn); + neon_load_reg64(tcg_ctx, middle, a->vn + 1); + tcg_gen_extract2_i64(tcg_ctx, destright, right, middle, a->imm * 8); + neon_load_reg64(tcg_ctx, left, a->vm); + tcg_gen_extract2_i64(tcg_ctx, destleft, middle, left, a->imm * 8); + } else { + neon_load_reg64(tcg_ctx, right, a->vn + 1); + neon_load_reg64(tcg_ctx, middle, a->vm); + tcg_gen_extract2_i64(tcg_ctx, destright, right, middle, (a->imm - 8) * 8); + neon_load_reg64(tcg_ctx, left, a->vm + 1); + tcg_gen_extract2_i64(tcg_ctx, destleft, middle, left, (a->imm - 8) * 8); + } + + neon_store_reg64(tcg_ctx, destright, a->vd); + neon_store_reg64(tcg_ctx, destleft, a->vd + 1); + + tcg_temp_free_i64(tcg_ctx, destright); + tcg_temp_free_i64(tcg_ctx, destleft); + tcg_temp_free_i64(tcg_ctx, right); + tcg_temp_free_i64(tcg_ctx, middle); + tcg_temp_free_i64(tcg_ctx, left); + } + return true; +} + +static bool trans_VTBL(DisasContext *s, arg_VTBL *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int n; + TCGv_i32 tmp, tmp2, tmp3, tmp4; + TCGv_ptr ptr1; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + n = a->len + 1; + if ((a->vn + n) > 32) { + /* + * This is UNPREDICTABLE; we choose to UNDEF to avoid the + * helper function running off the end of the register file. + */ + return false; + } + n <<= 3; + if (a->op) { + tmp = neon_load_reg(tcg_ctx, a->vd, 0); + } else { + tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, tmp, 0); + } + tmp2 = neon_load_reg(tcg_ctx, a->vm, 0); + ptr1 = vfp_reg_ptr(tcg_ctx, true, a->vn); + tmp4 = tcg_const_i32(tcg_ctx, n); + gen_helper_neon_tbl(tcg_ctx, tmp2, tmp2, tmp, ptr1, tmp4); + tcg_temp_free_i32(tcg_ctx, tmp); + if (a->op) { + tmp = neon_load_reg(tcg_ctx, a->vd, 1); + } else { + tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_movi_i32(tcg_ctx, tmp, 0); + } + tmp3 = neon_load_reg(tcg_ctx, a->vm, 1); + gen_helper_neon_tbl(tcg_ctx, tmp3, tmp3, tmp, ptr1, tmp4); + tcg_temp_free_i32(tcg_ctx, tmp4); + tcg_temp_free_ptr(tcg_ctx, ptr1); + neon_store_reg(tcg_ctx, a->vd, 0, tmp2); + neon_store_reg(tcg_ctx, a->vd, 1, tmp3); + tcg_temp_free_i32(tcg_ctx, tmp); + return true; +} + +static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vd & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tcg_gen_gvec_dup_mem(tcg_ctx, a->size, neon_reg_offset(a->vd, 0), + neon_element_offset(a->vm, a->index, a->size), + a->q ? 16 : 8, a->q ? 16 : 8); + return true; +} + +static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int pass, half; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (a->size == 3) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + for (pass = 0; pass < (a->q ? 2 : 1); pass++) { + TCGv_i32 tmp[2]; + + for (half = 0; half < 2; half++) { + tmp[half] = neon_load_reg(tcg_ctx, a->vm, pass * 2 + half); + switch (a->size) { + case 0: + tcg_gen_bswap32_i32(tcg_ctx, tmp[half], tmp[half]); + break; + case 1: + gen_swap_half(tcg_ctx, tmp[half], tmp[half]); + break; + case 2: + break; + default: + g_assert_not_reached(); + } + } + neon_store_reg(tcg_ctx, a->vd, pass * 2, tmp[1]); + neon_store_reg(tcg_ctx, a->vd, pass * 2 + 1, tmp[0]); + } + return true; +} + +static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, + NeonGenWidenFn *widenfn, + NeonGenTwo64OpFn *opfn, + NeonGenTwo64OpFn *accfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Pairwise long operations: widen both halves of the pair, + * combine the pairs with the opfn, and then possibly accumulate + * into the destination with the accfn. + */ + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!widenfn) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + for (pass = 0; pass < a->q + 1; pass++) { + TCGv_i32 tmp; + TCGv_i64 rm0_64, rm1_64, rd_64; + + rm0_64 = tcg_temp_new_i64(tcg_ctx); + rm1_64 = tcg_temp_new_i64(tcg_ctx); + rd_64 = tcg_temp_new_i64(tcg_ctx); + tmp = neon_load_reg(tcg_ctx, a->vm, pass * 2); + widenfn(tcg_ctx, rm0_64, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + tmp = neon_load_reg(tcg_ctx, a->vm, pass * 2 + 1); + widenfn(tcg_ctx, rm1_64, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + opfn(tcg_ctx, rd_64, rm0_64, rm1_64); + tcg_temp_free_i64(tcg_ctx, rm0_64); + tcg_temp_free_i64(tcg_ctx, rm1_64); + + if (accfn) { + TCGv_i64 tmp64 = tcg_temp_new_i64(tcg_ctx); + neon_load_reg64(tcg_ctx, tmp64, a->vd + pass); + accfn(tcg_ctx, rd_64, tmp64, rd_64); + tcg_temp_free_i64(tcg_ctx, tmp64); + } + neon_store_reg64(tcg_ctx, rd_64, a->vd + pass); + tcg_temp_free_i64(tcg_ctx, rd_64); + } + return true; +} + +static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_s8, + gen_helper_neon_widen_s16, + tcg_gen_ext_i32_i64, + NULL, + }; + static NeonGenTwo64OpFn * const opfn[] = { + gen_helper_neon_paddl_u16, + gen_helper_neon_paddl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); +} + +static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + NULL, + }; + static NeonGenTwo64OpFn * const opfn[] = { + gen_helper_neon_paddl_u16, + gen_helper_neon_paddl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); +} + +static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_s8, + gen_helper_neon_widen_s16, + tcg_gen_ext_i32_i64, + NULL, + }; + static NeonGenTwo64OpFn * const opfn[] = { + gen_helper_neon_paddl_u16, + gen_helper_neon_paddl_u32, + tcg_gen_add_i64, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + gen_helper_neon_addl_u16, + gen_helper_neon_addl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], + accfn[a->size]); +} + +static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a) +{ + static NeonGenWidenFn * const widenfn[] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + NULL, + }; + static NeonGenTwo64OpFn * const opfn[] = { + gen_helper_neon_paddl_u16, + gen_helper_neon_paddl_u32, + tcg_gen_add_i64, + NULL, + }; + static NeonGenTwo64OpFn * const accfn[] = { + gen_helper_neon_addl_u16, + gen_helper_neon_addl_u32, + tcg_gen_add_i64, + NULL, + }; + + return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], + accfn[a->size]); +} + +typedef void ZipFn(TCGContext *, TCGv_ptr, TCGv_ptr); + +static bool do_zip_uzp(DisasContext *s, arg_2misc *a, + ZipFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_ptr pd, pm; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!fn) { + /* Bad size or size/q combination */ + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + pd = vfp_reg_ptr(tcg_ctx, true, a->vd); + pm = vfp_reg_ptr(tcg_ctx, true, a->vm); + fn(tcg_ctx, pd, pm); + tcg_temp_free_ptr(tcg_ctx, pd); + tcg_temp_free_ptr(tcg_ctx, pm); + return true; +} + +static bool trans_VUZP(DisasContext *s, arg_2misc *a) +{ + static ZipFn * const fn[2][4] = { + { + gen_helper_neon_unzip8, + gen_helper_neon_unzip16, + NULL, + NULL, + }, { + gen_helper_neon_qunzip8, + gen_helper_neon_qunzip16, + gen_helper_neon_qunzip32, + NULL, + } + }; + return do_zip_uzp(s, a, fn[a->q][a->size]); +} + +static bool trans_VZIP(DisasContext *s, arg_2misc *a) +{ + static ZipFn * const fn[2][4] = { + { + gen_helper_neon_zip8, + gen_helper_neon_zip16, + NULL, + NULL, + }, { + gen_helper_neon_qzip8, + gen_helper_neon_qzip16, + gen_helper_neon_qzip32, + NULL, + } + }; + return do_zip_uzp(s, a, fn[a->q][a->size]); +} + +static bool do_vmovn(DisasContext *s, arg_2misc *a, + NeonGenNarrowEnvFn *narrowfn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i64 rm; + TCGv_i32 rd0, rd1; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vm & 1) { + return false; + } + + if (!narrowfn) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rm = tcg_temp_new_i64(tcg_ctx); + rd0 = tcg_temp_new_i32(tcg_ctx); + rd1 = tcg_temp_new_i32(tcg_ctx); + + neon_load_reg64(tcg_ctx, rm, a->vm); + narrowfn(tcg_ctx, rd0, tcg_ctx->cpu_env, rm); + neon_load_reg64(tcg_ctx, rm, a->vm + 1); + narrowfn(tcg_ctx, rd1, tcg_ctx->cpu_env, rm); + neon_store_reg(tcg_ctx, a->vd, 0, rd0); + neon_store_reg(tcg_ctx, a->vd, 1, rd1); + tcg_temp_free_i64(tcg_ctx, rm); + return true; +} + +#define DO_VMOVN(INSN, FUNC) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + static NeonGenNarrowEnvFn * const narrowfn[] = { \ + FUNC##8, \ + FUNC##16, \ + FUNC##32, \ + NULL, \ + }; \ + return do_vmovn(s, a, narrowfn[a->size]); \ + } + +DO_VMOVN(VMOVN, gen_neon_narrow_u) +DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat) +DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s) +DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u) + +static bool trans_VSHLL(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i32 rm0, rm1; + TCGv_i64 rd; + static NeonGenWidenFn * const widenfns[] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + NULL, + }; + NeonGenWidenFn *widenfn = widenfns[a->size]; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->vd & 1) { + return false; + } + + if (!widenfn) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rd = tcg_temp_new_i64(tcg_ctx); + + rm0 = neon_load_reg(tcg_ctx, a->vm, 0); + rm1 = neon_load_reg(tcg_ctx, a->vm, 1); + + widenfn(tcg_ctx, rd, rm0); + tcg_gen_shli_i64(tcg_ctx, rd, rd, 8 << a->size); + neon_store_reg64(tcg_ctx, rd, a->vd); + widenfn(tcg_ctx, rd, rm1); + tcg_gen_shli_i64(tcg_ctx, rd, rd, 8 << a->size); + neon_store_reg64(tcg_ctx, rd, a->vd + 1); + + tcg_temp_free_i64(tcg_ctx, rd); + tcg_temp_free_i32(tcg_ctx, rm0); + tcg_temp_free_i32(tcg_ctx, rm1); + return true; +} + +static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_ptr fpst; + TCGv_i32 ahp, tmp, tmp2, tmp3; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !dc_isar_feature(aa32_fp16_spconv, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm & 1) || (a->size != 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, true); + ahp = get_ahp_flag(tcg_ctx); + tmp = neon_load_reg(tcg_ctx, a->vm, 0); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); + tmp2 = neon_load_reg(tcg_ctx, a->vm, 1); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp); + tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16); + tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + tmp = neon_load_reg(tcg_ctx, a->vm, 2); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); + tmp3 = neon_load_reg(tcg_ctx, a->vm, 3); + neon_store_reg(tcg_ctx, a->vd, 0, tmp2); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp); + tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16); + tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp); + neon_store_reg(tcg_ctx, a->vd, 1, tmp3); + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, ahp); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_ptr fpst; + TCGv_i32 ahp, tmp, tmp2, tmp3; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !dc_isar_feature(aa32_fp16_spconv, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd & 1) || (a->size != 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, true); + ahp = get_ahp_flag(tcg_ctx); + tmp3 = tcg_temp_new_i32(tcg_ctx); + tmp = neon_load_reg(tcg_ctx, a->vm, 0); + tmp2 = neon_load_reg(tcg_ctx, a->vm, 1); + tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); + neon_store_reg(tcg_ctx, a->vd, 0, tmp3); + tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp); + neon_store_reg(tcg_ctx, a->vd, 1, tmp); + tmp3 = tcg_temp_new_i32(tcg_ctx); + tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); + neon_store_reg(tcg_ctx, a->vd, 2, tmp3); + tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp); + neon_store_reg(tcg_ctx, a->vd, 3, tmp2); + tcg_temp_free_i32(tcg_ctx, ahp); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int vec_size = a->q ? 16 : 8; + int rd_ofs = neon_reg_offset(a->vd, 0); + int rm_ofs = neon_reg_offset(a->vm, 0); + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size == 3) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fn(tcg_ctx, a->size, rd_ofs, rm_ofs, vec_size, vec_size); + + return true; +} + +#define DO_2MISC_VEC(INSN, FN) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_2misc_vec(s, a, FN); \ + } + +DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg) +DO_2MISC_VEC(VABS, tcg_gen_gvec_abs) +DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0) +DO_2MISC_VEC(VCGT0, gen_gvec_cgt0) +DO_2MISC_VEC(VCLE0, gen_gvec_cle0) +DO_2MISC_VEC(VCGE0, gen_gvec_cge0) +DO_2MISC_VEC(VCLT0, gen_gvec_clt0) + +static bool trans_VMVN(DisasContext *s, arg_2misc *a) +{ + if (a->size != 0) { + return false; + } + return do_2misc_vec(s, a, tcg_gen_gvec_not); +} + +#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \ + static void WRAPNAME(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rm_ofs, uint32_t oprsz, \ + uint32_t maxsz) \ + { \ + tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \ + DATA, FUNC); \ + } + +#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \ + static void WRAPNAME(TCGContext *tcg_ctx, \ + unsigned vece, uint32_t rd_ofs, \ + uint32_t rm_ofs, uint32_t oprsz, \ + uint32_t maxsz) \ + { \ + tcg_gen_gvec_2_ool(tcg_ctx, rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \ + } + +WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0) +WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1) +WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0) +WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1) +WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0) +WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0) +WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0) + +#define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \ + return false; \ + } \ + return do_2misc_vec(s, a, gen_##INSN); \ + } + +DO_2M_CRYPTO(AESE, aa32_aes, 0) +DO_2M_CRYPTO(AESD, aa32_aes, 0) +DO_2M_CRYPTO(AESMC, aa32_aes, 0) +DO_2M_CRYPTO(AESIMC, aa32_aes, 0) +DO_2M_CRYPTO(SHA1H, aa32_sha1, 2) +DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2) +DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2) + +static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int pass; + + /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (!fn) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass); + fn(tcg_ctx, tmp, tmp); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + + return true; +} + +static bool trans_VREV32(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + tcg_gen_bswap32_i32, + gen_swap_half, + NULL, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static bool trans_VREV16(DisasContext *s, arg_2misc *a) +{ + if (a->size != 0) { + return false; + } + return do_2misc(s, a, gen_rev16); +} + +static bool trans_VCLS(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + gen_helper_neon_cls_s8, + gen_helper_neon_cls_s16, + gen_helper_neon_cls_s32, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static void do_VCLZ_32(TCGContext *tcg_ctx, TCGv_i32 rd, TCGv_i32 rm) +{ + tcg_gen_clzi_i32(tcg_ctx, rd, rm, 32); +} + +static bool trans_VCLZ(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + gen_helper_neon_clz_u8, + gen_helper_neon_clz_u16, + do_VCLZ_32, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static bool trans_VCNT(DisasContext *s, arg_2misc *a) +{ + if (a->size != 0) { + return false; + } + return do_2misc(s, a, gen_helper_neon_cnt_u8); +} + +static bool trans_VABS_F(DisasContext *s, arg_2misc *a) +{ + if (a->size != 2) { + return false; + } + /* TODO: FP16 : size == 1 */ + return do_2misc(s, a, gen_helper_vfp_abss); +} + +static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) +{ + if (a->size != 2) { + return false; + } + /* TODO: FP16 : size == 1 */ + return do_2misc(s, a, gen_helper_vfp_negs); +} + +static bool trans_VRECPE(DisasContext *s, arg_2misc *a) +{ + if (a->size != 2) { + return false; + } + return do_2misc(s, a, gen_helper_recpe_u32); +} + +static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) +{ + if (a->size != 2) { + return false; + } + return do_2misc(s, a, gen_helper_rsqrte_u32); +} + +#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 m) \ + { \ + FUNC(tcg_ctx, d, tcg_ctx->cpu_env, m); \ + } + +WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8) +WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16) +WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32) +WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8) +WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16) +WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32) + +static bool trans_VQABS(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + gen_VQABS_s8, + gen_VQABS_s16, + gen_VQABS_s32, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static bool trans_VQNEG(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + gen_VQNEG_s8, + gen_VQNEG_s16, + gen_VQNEG_s32, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static bool do_2misc_fp(DisasContext *s, arg_2misc *a, + NeonGenOneSingleOpFn *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + int pass; + TCGv_ptr fpst; + + /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size != 2) { + /* TODO: FP16 will be the size == 1 case */ + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, 1); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass); + fn(tcg_ctx, tmp, tmp, fpst); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +#define DO_2MISC_FP(INSN, FUNC) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_2misc_fp(s, a, FUNC); \ + } + +DO_2MISC_FP(VRECPE_F, gen_helper_recpe_f32) +DO_2MISC_FP(VRSQRTE_F, gen_helper_rsqrte_f32) +DO_2MISC_FP(VCVT_FS, gen_helper_vfp_sitos) +DO_2MISC_FP(VCVT_FU, gen_helper_vfp_uitos) +DO_2MISC_FP(VCVT_SF, gen_helper_vfp_tosizs) +DO_2MISC_FP(VCVT_UF, gen_helper_vfp_touizs) + +static bool trans_VRINTX(DisasContext *s, arg_2misc *a) +{ + if (!arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + return do_2misc_fp(s, a, gen_helper_rints_exact); +} + +#define WRAP_FP_CMP0_FWD(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 d, \ + TCGv_i32 m, TCGv_ptr fpst) \ + { \ + TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0); \ + FUNC(tcg_ctx, d, m, zero, fpst); \ + tcg_temp_free_i32(tcg_ctx, zero); \ + } +#define WRAP_FP_CMP0_REV(WRAPNAME, FUNC) \ + static void WRAPNAME(TCGContext *tcg_ctx, TCGv_i32 d, \ + TCGv_i32 m, TCGv_ptr fpst) \ + { \ + TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0); \ + FUNC(tcg_ctx, d, zero, m, fpst); \ + tcg_temp_free_i32(tcg_ctx, zero); \ + } + +#define DO_FP_CMP0(INSN, FUNC, REV) \ + WRAP_FP_CMP0_##REV(gen_##INSN, FUNC) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_2misc_fp(s, a, gen_##INSN); \ + } + +DO_FP_CMP0(VCGT0_F, gen_helper_neon_cgt_f32, FWD) +DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD) +DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD) +DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV) +DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV) + +static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Handle a VRINT* operation by iterating 32 bits at a time, + * with a specified rounding mode in operation. + */ + int pass; + TCGv_ptr fpst; + TCGv_i32 tcg_rmode; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size != 2) { + /* TODO: FP16 will be the size == 1 case */ + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, 1); + tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode)); + gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass); + gen_helper_rints(tcg_ctx, tmp, tmp, fpst); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env); + tcg_temp_free_i32(tcg_ctx, tcg_rmode); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +#define DO_VRINT(INSN, RMODE) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_vrint(s, a, RMODE); \ + } + +DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) +DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) +DO_VRINT(VRINTZ, FPROUNDING_ZERO) +DO_VRINT(VRINTM, FPROUNDING_NEGINF) +DO_VRINT(VRINTP, FPROUNDING_POSINF) + +static bool do_vcvt(DisasContext *s, arg_2misc *a, int rmode, bool is_signed) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Handle a VCVT* operation by iterating 32 bits at a time, + * with a specified rounding mode in operation. + */ + int pass; + TCGv_ptr fpst; + TCGv_i32 tcg_rmode, tcg_shift; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size != 2) { + /* TODO: FP16 will be the size == 1 case */ + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, 1); + tcg_shift = tcg_const_i32(tcg_ctx, 0); + tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode)); + gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(tcg_ctx, a->vm, pass); + if (is_signed) { + gen_helper_vfp_tosls(tcg_ctx, tmp, tmp, tcg_shift, fpst); + } else { + gen_helper_vfp_touls(tcg_ctx, tmp, tmp, tcg_shift, fpst); + } + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env); + tcg_temp_free_i32(tcg_ctx, tcg_rmode); + tcg_temp_free_i32(tcg_ctx, tcg_shift); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +#define DO_VCVT(INSN, RMODE, SIGNED) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_vcvt(s, a, RMODE, SIGNED); \ + } + +DO_VCVT(VCVTAU, FPROUNDING_TIEAWAY, false) +DO_VCVT(VCVTAS, FPROUNDING_TIEAWAY, true) +DO_VCVT(VCVTNU, FPROUNDING_TIEEVEN, false) +DO_VCVT(VCVTNS, FPROUNDING_TIEEVEN, true) +DO_VCVT(VCVTPU, FPROUNDING_POSINF, false) +DO_VCVT(VCVTPS, FPROUNDING_POSINF, true) +DO_VCVT(VCVTMU, FPROUNDING_NEGINF, false) +DO_VCVT(VCVTMS, FPROUNDING_NEGINF, true) + +static bool trans_VSWP(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i64 rm, rd; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size != 0) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rm = tcg_temp_new_i64(tcg_ctx); + rd = tcg_temp_new_i64(tcg_ctx); + for (pass = 0; pass < (a->q ? 2 : 1); pass++) { + neon_load_reg64(tcg_ctx, rm, a->vm + pass); + neon_load_reg64(tcg_ctx, rd, a->vd + pass); + neon_store_reg64(tcg_ctx, rm, a->vd + pass); + neon_store_reg64(tcg_ctx, rd, a->vm + pass); + } + tcg_temp_free_i64(tcg_ctx, rm); + tcg_temp_free_i64(tcg_ctx, rd); + + return true; +} +static void gen_neon_trn_u8(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 rd, tmp; + + rd = tcg_temp_new_i32(tcg_ctx); + tmp = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_shli_i32(tcg_ctx, rd, t0, 8); + tcg_gen_andi_i32(tcg_ctx, rd, rd, 0xff00ff00); + tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0x00ff00ff); + tcg_gen_or_i32(tcg_ctx, rd, rd, tmp); + + tcg_gen_shri_i32(tcg_ctx, t1, t1, 8); + tcg_gen_andi_i32(tcg_ctx, t1, t1, 0x00ff00ff); + tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xff00ff00); + tcg_gen_or_i32(tcg_ctx, t1, t1, tmp); + tcg_gen_mov_i32(tcg_ctx, t0, rd); + + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, rd); +} + +static void gen_neon_trn_u16(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 rd, tmp; + + rd = tcg_temp_new_i32(tcg_ctx); + tmp = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_shli_i32(tcg_ctx, rd, t0, 16); + tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0xffff); + tcg_gen_or_i32(tcg_ctx, rd, rd, tmp); + tcg_gen_shri_i32(tcg_ctx, t1, t1, 16); + tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xffff0000); + tcg_gen_or_i32(tcg_ctx, t1, t1, tmp); + tcg_gen_mov_i32(tcg_ctx, t0, rd); + + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, rd); +} + +static bool trans_VTRN(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + TCGv_i32 tmp, tmp2; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (a->size == 3) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + if (a->size == 2) { + for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) { + tmp = neon_load_reg(tcg_ctx, a->vm, pass); + tmp2 = neon_load_reg(tcg_ctx, a->vd, pass + 1); + neon_store_reg(tcg_ctx, a->vm, pass, tmp2); + neon_store_reg(tcg_ctx, a->vd, pass + 1, tmp); + } + } else { + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + tmp = neon_load_reg(tcg_ctx, a->vm, pass); + tmp2 = neon_load_reg(tcg_ctx, a->vd, pass); + if (a->size == 0) { + gen_neon_trn_u8(tcg_ctx, tmp, tmp2); + } else { + gen_neon_trn_u16(tcg_ctx, tmp, tmp2); + } + neon_store_reg(tcg_ctx, a->vm, pass, tmp2); + neon_store_reg(tcg_ctx, a->vd, pass, tmp); + } + } + return true; +} diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c index ff2f8ff323..fefbc3a202 100644 --- a/qemu/target/arm/translate-sve.c +++ b/qemu/target/arm/translate-sve.c @@ -178,7 +178,7 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word) { TCGContext *tcg_ctx = s->uc->tcg_ctx; unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_dup64i(tcg_ctx, vec_full_reg_offset(s, rd), vsz, vsz, word); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); } /* Invoke a vector expander on two Pregs. */ @@ -1481,7 +1481,7 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) unsigned oprsz = size_for_gvec(setsz / 8); if (oprsz * 8 == setsz) { - tcg_gen_gvec_dup64i(tcg_ctx, ofs, oprsz, maxsz, word); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, ofs, oprsz, maxsz, word); goto done; } } @@ -2088,7 +2088,11 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) unsigned nofs = vec_reg_offset(s, a->rn, index, esz); tcg_gen_gvec_dup_mem(tcg_ctx, esz, dofs, nofs, vsz, vsz); } else { - tcg_gen_gvec_dup64i(tcg_ctx, dofs, vsz, vsz, 0); + /* + * While dup_mem handles 128-bit elements, dup_imm does not. + * Thankfully element size doesn't matter for splatting zero. + */ + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, vsz, vsz, 0); } } return true; @@ -3341,7 +3345,7 @@ static bool trans_FDUP(DisasContext *s, arg_FDUP *a) imm = vfp_expand_imm(a->esz, a->imm); imm = dup_const(a->esz, imm); - tcg_gen_gvec_dup64i(tcg_ctx, dofs, vsz, vsz, imm); + tcg_gen_gvec_dup_imm(tcg_ctx, a->esz, dofs, vsz, vsz, imm); } return true; } @@ -3356,7 +3360,7 @@ static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) unsigned vsz = vec_full_reg_size(s); int dofs = vec_full_reg_offset(s, a->rd); - tcg_gen_gvec_dup64i(tcg_ctx, dofs, vsz, vsz, dup_const(a->esz, a->imm)); + tcg_gen_gvec_dup_imm(tcg_ctx, a->esz, dofs, vsz, vsz, a->imm); } return true; } @@ -4050,41 +4054,33 @@ static bool trans_FCADD(DisasContext *s, arg_FCADD *a) typedef void gen_helper_sve_fmla(TCGContext *, TCGv_env, TCGv_ptr, TCGv_i32); -static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn) +static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, + gen_helper_gvec_5_ptr *fn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - if (fn == NULL) { + if (a->esz == 0) { return false; } - if (!sve_access_check(s)) { - return true; + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16); + tcg_gen_gvec_5_ptr(tcg_ctx, + vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vec_full_reg_offset(s, a->ra), + pred_full_reg_offset(s, a->pg), + status, vsz, vsz, 0, fn); + tcg_temp_free_ptr(tcg_ctx, status); } - unsigned vsz = vec_full_reg_size(s); - unsigned desc; - TCGv_i32 t_desc; - TCGv_ptr pg = tcg_temp_new_ptr(tcg_ctx); - - /* We would need 7 operands to pass these arguments "properly". - * So we encode all the register numbers into the descriptor. - */ - desc = deposit32(a->rd, 5, 5, a->rn); - desc = deposit32(desc, 10, 5, a->rm); - desc = deposit32(desc, 15, 5, a->ra); - desc = simd_desc(vsz, vsz, desc); - - t_desc = tcg_const_i32(tcg_ctx, desc); - tcg_gen_addi_ptr(tcg_ctx, pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, a->pg)); - fn(tcg_ctx, tcg_ctx->cpu_env, pg, t_desc); - tcg_temp_free_i32(tcg_ctx, t_desc); - tcg_temp_free_ptr(tcg_ctx, pg); return true; } #define DO_FMLA(NAME, name) \ static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \ { \ - static gen_helper_sve_fmla * const fns[4] = { \ + static gen_helper_gvec_5_ptr * const fns[4] = { \ NULL, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ }; \ @@ -4101,7 +4097,8 @@ DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - static gen_helper_sve_fmla * const fns[3] = { + static gen_helper_gvec_5_ptr * const fns[4] = { + NULL, gen_helper_sve_fcmla_zpzzz_h, gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, @@ -4112,25 +4109,15 @@ static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a) } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - unsigned desc; - TCGv_i32 t_desc; - TCGv_ptr pg = tcg_temp_new_ptr(tcg_ctx); - - /* We would need 7 operands to pass these arguments "properly". - * So we encode all the register numbers into the descriptor. - */ - desc = deposit32(a->rd, 5, 5, a->rn); - desc = deposit32(desc, 10, 5, a->rm); - desc = deposit32(desc, 15, 5, a->ra); - desc = deposit32(desc, 20, 2, a->rot); - desc = sextract32(desc, 0, 22); - desc = simd_desc(vsz, vsz, desc); - - t_desc = tcg_const_i32(tcg_ctx, desc); - tcg_gen_addi_ptr(tcg_ctx, pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, a->pg)); - fns[a->esz - 1](tcg_ctx, tcg_ctx->cpu_env, pg, t_desc); - tcg_temp_free_i32(tcg_ctx, t_desc); - tcg_temp_free_ptr(tcg_ctx, pg); + TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16); + tcg_gen_gvec_5_ptr(tcg_ctx, + vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vec_full_reg_offset(s, a->ra), + pred_full_reg_offset(s, a->pg), + status, vsz, vsz, a->rot, fns[a->esz]); + tcg_temp_free_ptr(tcg_ctx, status); } return true; } @@ -4468,15 +4455,17 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) int len_remain = len % 8; int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 addr, t0, t1; + TCGv_i64 dirty_addr, clean_addr, t0, t1; - addr = tcg_temp_new_i64(tcg_ctx); - t0 = tcg_temp_new_i64(tcg_ctx); + dirty_addr = tcg_temp_new_i64(tcg_ctx); + tcg_gen_addi_i64(tcg_ctx, dirty_addr, cpu_reg_sp(s, rn), imm); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + tcg_temp_free_i64(tcg_ctx, dirty_addr); - /* Note that unpredicated load/store of vector/predicate registers + /* + * Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian - * operations on larger quantities. There is no nice way to force - * a little-endian load for aarch64_be-linux-user out of line. + * operations on larger quantities. * * Attempt to keep code expansion to a minimum by limiting the * amount of unrolling done. @@ -4484,56 +4473,58 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) if (nparts <= 4) { int i; + t0 = tcg_temp_new_i64(tcg_ctx); for (i = 0; i < len_align; i += 8) { - tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + i); - tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LEQ); + tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ); tcg_gen_st_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + i); + tcg_gen_addi_i64(tcg_ctx, clean_addr, cpu_reg_sp(s, rn), 8); } + tcg_temp_free_i64(tcg_ctx, t0); } else { TCGLabel *loop = gen_new_label(tcg_ctx); TCGv_ptr tp, i = tcg_const_local_ptr(tcg_ctx, 0); - gen_set_label(tcg_ctx, loop); + /* Copy the clean address into a local temp, live across the loop. */ + t0 = clean_addr; + clean_addr = new_tmp_a64_local(s); + tcg_gen_mov_i64(tcg_ctx, clean_addr, t0); - /* Minimize the number of local temps that must be re-read from - * the stack each iteration. Instead, re-compute values other - * than the loop counter. - */ - tp = tcg_temp_new_ptr(tcg_ctx); - tcg_gen_addi_ptr(tcg_ctx, tp, i, imm); - tcg_gen_extu_ptr_i64(tcg_ctx, addr, tp); - tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, rn)); + gen_set_label(tcg_ctx, loop); - tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LEQ); + t0 = tcg_temp_new_i64(tcg_ctx); + tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8); + tp = tcg_temp_new_ptr(tcg_ctx); tcg_gen_add_ptr(tcg_ctx, tp, tcg_ctx->cpu_env, i); tcg_gen_addi_ptr(tcg_ctx, i, i, 8); tcg_gen_st_i64(tcg_ctx, t0, tp, vofs); tcg_temp_free_ptr(tcg_ctx, tp); + tcg_temp_free_i64(tcg_ctx, t0); tcg_gen_brcondi_ptr(tcg_ctx, TCG_COND_LTU, i, len_align, loop); tcg_temp_free_ptr(tcg_ctx, i); } - /* Predicate register loads can be any multiple of 2. + /* + * Predicate register loads can be any multiple of 2. * Note that we still store the entire 64-bit unit into cpu_env. */ if (len_remain) { - tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + len_align); - + t0 = tcg_temp_new_i64(tcg_ctx); switch (len_remain) { case 2: case 4: case 8: - tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LE | ctz32(len_remain)); + tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, + MO_LE | ctz32(len_remain)); break; case 6: t1 = tcg_temp_new_i64(tcg_ctx); - tcg_gen_qemu_ld_i64(tcg_ctx, t0, addr, midx, MO_LEUL); - tcg_gen_addi_i64(tcg_ctx, addr, addr, 4); - tcg_gen_qemu_ld_i64(tcg_ctx, t1, addr, midx, MO_LEUW); - tcg_gen_deposit_i64(tcg_ctx, t0, t0, t1, 32, 32); + tcg_gen_qemu_ld_i64(tcg_ctx, t0, clean_addr, midx, MO_LEUL); + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 4); + tcg_gen_qemu_ld_i64(tcg_ctx, t1, clean_addr, midx, MO_LEUW); tcg_gen_deposit_i64(tcg_ctx, t0, t0, t1, 32, 32); tcg_temp_free_i64(tcg_ctx, t1); break; @@ -4541,9 +4532,8 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) g_assert_not_reached(); } tcg_gen_st_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + len_align); + tcg_temp_free_i64(tcg_ctx, t0); } - tcg_temp_free_i64(tcg_ctx, addr); - tcg_temp_free_i64(tcg_ctx, t0); } /* Similarly for stores. */ @@ -4554,10 +4544,12 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) int len_remain = len % 8; int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 addr, t0; + TCGv_i64 dirty_addr, clean_addr, t0; - addr = tcg_temp_new_i64(tcg_ctx); - t0 = tcg_temp_new_i64(tcg_ctx); + dirty_addr = tcg_temp_new_i64(tcg_ctx); + tcg_gen_addi_i64(tcg_ctx, dirty_addr, cpu_reg_sp(s, rn), imm); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + tcg_temp_free_i64(tcg_ctx, dirty_addr); /* Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian @@ -4570,33 +4562,34 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) if (nparts <= 4) { int i; + t0 = tcg_temp_new_i64(tcg_ctx); for (i = 0; i < len_align; i += 8) { tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + i); - tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + i); - tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEQ); + tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(tcg_ctx, clean_addr, cpu_reg_sp(s, rn), 8); } } else { TCGLabel *loop = gen_new_label(tcg_ctx); - TCGv_ptr t2, i = tcg_const_local_ptr(tcg_ctx, 0); - - gen_set_label(tcg_ctx, loop); + TCGv_ptr tp, i = tcg_const_local_ptr(tcg_ctx, 0); - t2 = tcg_temp_new_ptr(tcg_ctx); - tcg_gen_add_ptr(tcg_ctx, t2, tcg_ctx->cpu_env, i); - tcg_gen_ld_i64(tcg_ctx, t0, t2, vofs); - /* Minimize the number of local temps that must be re-read from - * the stack each iteration. Instead, re-compute values other - * than the loop counter. - */ - tcg_gen_addi_ptr(tcg_ctx, t2, i, imm); - tcg_gen_extu_ptr_i64(tcg_ctx, addr, t2); - tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, rn)); - tcg_temp_free_ptr(tcg_ctx, t2); + /* Copy the clean address into a local temp, live across the loop. */ + t0 = clean_addr; + clean_addr = new_tmp_a64_local(s); + tcg_gen_mov_i64(tcg_ctx, clean_addr, t0); - tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEQ); + gen_set_label(tcg_ctx, loop); + t0 = tcg_temp_new_i64(tcg_ctx); + tp = tcg_temp_new_ptr(tcg_ctx); + tcg_gen_add_ptr(tcg_ctx, tp, tcg_ctx->cpu_env, i); + tcg_gen_ld_i64(tcg_ctx, t0, tp, vofs); tcg_gen_addi_ptr(tcg_ctx, i, i, 8); + tcg_temp_free_ptr(tcg_ctx, tp); + + tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8); + tcg_temp_free_i64(tcg_ctx, t0); tcg_gen_brcondi_ptr(tcg_ctx, TCG_COND_LTU, i, len_align, loop); tcg_temp_free_ptr(tcg_ctx, i); @@ -4604,29 +4597,29 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) /* Predicate register stores can be any multiple of 2. */ if (len_remain) { + t0 = tcg_temp_new_i64(tcg_ctx); tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + len_align); - tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + len_align); switch (len_remain) { case 2: case 4: case 8: - tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LE | ctz32(len_remain)); + tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, + MO_LE | ctz32(len_remain)); break; case 6: - tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEUL); - tcg_gen_addi_i64(tcg_ctx, addr, addr, 4); + tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEUL); + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 4); tcg_gen_shri_i64(tcg_ctx, t0, t0, 32); - tcg_gen_qemu_st_i64(tcg_ctx, t0, addr, midx, MO_LEUW); + tcg_gen_qemu_st_i64(tcg_ctx, t0, clean_addr, midx, MO_LEUW); break; default: g_assert_not_reached(); } + tcg_temp_free_i64(tcg_ctx, t0); } - tcg_temp_free_i64(tcg_ctx, addr); - tcg_temp_free_i64(tcg_ctx, t0); } static bool trans_LDR_zri(DisasContext *s, arg_rri *a) @@ -4691,27 +4684,36 @@ static const uint8_t dtype_esz[16] = { 3, 2, 1, 3 }; -static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype) -{ - return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s)); -} - static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, - int dtype, gen_helper_gvec_mem *fn) + int dtype, uint32_t mte_n, bool is_write, + gen_helper_gvec_mem *fn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; TCGv_i32 t_desc; - int desc; + int desc = 0; - /* For e.g. LD4, there are not enough arguments to pass all 4 + /* + * For e.g. LD4, there are not enough arguments to pass all 4 * registers as pointers, so encode the regno into the data field. * For consistency, do this even for LD1. */ - desc = sve_memopidx(s, dtype); - desc |= zt << MEMOPIDX_SHIFT; - desc = simd_desc(vsz, vsz, desc); + if (s->mte_active[0]) { + int msz = dtype_msz(dtype); + + FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc); + FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc); + FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc); + FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc); + FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz, desc); + FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz, desc); + desc <<= SVE_MTEDESC_SHIFT; + } else { + addr = clean_data_tbi(s, addr); + } + + desc = simd_desc(vsz, vsz, zt | desc); t_desc = tcg_const_i32(tcg_ctx, desc); t_pg = tcg_temp_new_ptr(tcg_ctx); @@ -4725,64 +4727,132 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, static void do_ld_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype, int nreg) { - static gen_helper_gvec_mem * const fns[2][16][4] = { - /* Little-endian */ - { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, - gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, - { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, - gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, - { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, - gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, - { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, - gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, - - /* Big-endian */ - { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, - gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, - { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, - gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, - { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, - gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, - { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, - gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } + static gen_helper_gvec_mem * const fns[2][2][16][4] = { + { /* mte inactive, little-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, + gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, + { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, + gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, + { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, + gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, + + /* mte inactive, big-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, + gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, + { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, + gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, + { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, + gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, + + { /* mte active, little-endian */ + { { gen_helper_sve_ld1bb_r_mte, + gen_helper_sve_ld2bb_r_mte, + gen_helper_sve_ld3bb_r_mte, + gen_helper_sve_ld4bb_r_mte }, + { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_le_r_mte, + gen_helper_sve_ld2hh_le_r_mte, + gen_helper_sve_ld3hh_le_r_mte, + gen_helper_sve_ld4hh_le_r_mte }, + { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_le_r_mte, + gen_helper_sve_ld2ss_le_r_mte, + gen_helper_sve_ld3ss_le_r_mte, + gen_helper_sve_ld4ss_le_r_mte }, + { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_le_r_mte, + gen_helper_sve_ld2dd_le_r_mte, + gen_helper_sve_ld3dd_le_r_mte, + gen_helper_sve_ld4dd_le_r_mte } }, + + /* mte active, big-endian */ + { { gen_helper_sve_ld1bb_r_mte, + gen_helper_sve_ld2bb_r_mte, + gen_helper_sve_ld3bb_r_mte, + gen_helper_sve_ld4bb_r_mte }, + { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_be_r_mte, + gen_helper_sve_ld2hh_be_r_mte, + gen_helper_sve_ld3hh_be_r_mte, + gen_helper_sve_ld4hh_be_r_mte }, + { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_be_r_mte, + gen_helper_sve_ld2ss_be_r_mte, + gen_helper_sve_ld3ss_be_r_mte, + gen_helper_sve_ld4ss_be_r_mte }, + { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_be_r_mte, + gen_helper_sve_ld2dd_be_r_mte, + gen_helper_sve_ld3dd_be_r_mte, + gen_helper_sve_ld4dd_be_r_mte } } }, }; - gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg]; + gen_helper_gvec_mem *fn + = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; - /* While there are holes in the table, they are not + /* + * While there are holes in the table, they are not * accessible via the instruction encoding. */ assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, dtype, fn); + do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); } static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) @@ -4819,56 +4889,98 @@ static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - static gen_helper_gvec_mem * const fns[2][16] = { - /* Little-endian */ - { gen_helper_sve_ldff1bb_r, - gen_helper_sve_ldff1bhu_r, - gen_helper_sve_ldff1bsu_r, - gen_helper_sve_ldff1bdu_r, - - gen_helper_sve_ldff1sds_le_r, - gen_helper_sve_ldff1hh_le_r, - gen_helper_sve_ldff1hsu_le_r, - gen_helper_sve_ldff1hdu_le_r, - - gen_helper_sve_ldff1hds_le_r, - gen_helper_sve_ldff1hss_le_r, - gen_helper_sve_ldff1ss_le_r, - gen_helper_sve_ldff1sdu_le_r, - - gen_helper_sve_ldff1bds_r, - gen_helper_sve_ldff1bss_r, - gen_helper_sve_ldff1bhs_r, - gen_helper_sve_ldff1dd_le_r }, - - /* Big-endian */ - { gen_helper_sve_ldff1bb_r, - gen_helper_sve_ldff1bhu_r, - gen_helper_sve_ldff1bsu_r, - gen_helper_sve_ldff1bdu_r, - - gen_helper_sve_ldff1sds_be_r, - gen_helper_sve_ldff1hh_be_r, - gen_helper_sve_ldff1hsu_be_r, - gen_helper_sve_ldff1hdu_be_r, - - gen_helper_sve_ldff1hds_be_r, - gen_helper_sve_ldff1hss_be_r, - gen_helper_sve_ldff1ss_be_r, - gen_helper_sve_ldff1sdu_be_r, - - gen_helper_sve_ldff1bds_r, - gen_helper_sve_ldff1bss_r, - gen_helper_sve_ldff1bhs_r, - gen_helper_sve_ldff1dd_be_r }, + static gen_helper_gvec_mem * const fns[2][2][16] = { + { /* mte inactive, little-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, + + gen_helper_sve_ldff1sds_le_r, + gen_helper_sve_ldff1hh_le_r, + gen_helper_sve_ldff1hsu_le_r, + gen_helper_sve_ldff1hdu_le_r, + + gen_helper_sve_ldff1hds_le_r, + gen_helper_sve_ldff1hss_le_r, + gen_helper_sve_ldff1ss_le_r, + gen_helper_sve_ldff1sdu_le_r, + + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_le_r }, + + /* mte inactive, big-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, + + gen_helper_sve_ldff1sds_be_r, + gen_helper_sve_ldff1hh_be_r, + gen_helper_sve_ldff1hsu_be_r, + gen_helper_sve_ldff1hdu_be_r, + + gen_helper_sve_ldff1hds_be_r, + gen_helper_sve_ldff1hss_be_r, + gen_helper_sve_ldff1ss_be_r, + gen_helper_sve_ldff1sdu_be_r, + + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_be_r } }, + + { /* mte active, little-endian */ + { gen_helper_sve_ldff1bb_r_mte, + gen_helper_sve_ldff1bhu_r_mte, + gen_helper_sve_ldff1bsu_r_mte, + gen_helper_sve_ldff1bdu_r_mte, + + gen_helper_sve_ldff1sds_le_r_mte, + gen_helper_sve_ldff1hh_le_r_mte, + gen_helper_sve_ldff1hsu_le_r_mte, + gen_helper_sve_ldff1hdu_le_r_mte, + + gen_helper_sve_ldff1hds_le_r_mte, + gen_helper_sve_ldff1hss_le_r_mte, + gen_helper_sve_ldff1ss_le_r_mte, + gen_helper_sve_ldff1sdu_le_r_mte, + + gen_helper_sve_ldff1bds_r_mte, + gen_helper_sve_ldff1bss_r_mte, + gen_helper_sve_ldff1bhs_r_mte, + gen_helper_sve_ldff1dd_le_r_mte }, + + /* mte active, big-endian */ + { gen_helper_sve_ldff1bb_r_mte, + gen_helper_sve_ldff1bhu_r_mte, + gen_helper_sve_ldff1bsu_r_mte, + gen_helper_sve_ldff1bdu_r_mte, + + gen_helper_sve_ldff1sds_be_r_mte, + gen_helper_sve_ldff1hh_be_r_mte, + gen_helper_sve_ldff1hsu_be_r_mte, + gen_helper_sve_ldff1hdu_be_r_mte, + + gen_helper_sve_ldff1hds_be_r_mte, + gen_helper_sve_ldff1hss_be_r_mte, + gen_helper_sve_ldff1ss_be_r_mte, + gen_helper_sve_ldff1sdu_be_r_mte, + + gen_helper_sve_ldff1bds_r_mte, + gen_helper_sve_ldff1bss_r_mte, + gen_helper_sve_ldff1bhs_r_mte, + gen_helper_sve_ldff1dd_be_r_mte } }, }; if (sve_access_check(s)) { TCGv_i64 addr = new_tmp_a64(s); tcg_gen_shli_i64(tcg_ctx, addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, a->rn)); - do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, - fns[s->be_data == MO_BE][a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, + fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); } return true; } @@ -4876,48 +4988,90 @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - static gen_helper_gvec_mem * const fns[2][16] = { - /* Little-endian */ - { gen_helper_sve_ldnf1bb_r, - gen_helper_sve_ldnf1bhu_r, - gen_helper_sve_ldnf1bsu_r, - gen_helper_sve_ldnf1bdu_r, - - gen_helper_sve_ldnf1sds_le_r, - gen_helper_sve_ldnf1hh_le_r, - gen_helper_sve_ldnf1hsu_le_r, - gen_helper_sve_ldnf1hdu_le_r, - - gen_helper_sve_ldnf1hds_le_r, - gen_helper_sve_ldnf1hss_le_r, - gen_helper_sve_ldnf1ss_le_r, - gen_helper_sve_ldnf1sdu_le_r, - - gen_helper_sve_ldnf1bds_r, - gen_helper_sve_ldnf1bss_r, - gen_helper_sve_ldnf1bhs_r, - gen_helper_sve_ldnf1dd_le_r }, - - /* Big-endian */ - { gen_helper_sve_ldnf1bb_r, - gen_helper_sve_ldnf1bhu_r, - gen_helper_sve_ldnf1bsu_r, - gen_helper_sve_ldnf1bdu_r, - - gen_helper_sve_ldnf1sds_be_r, - gen_helper_sve_ldnf1hh_be_r, - gen_helper_sve_ldnf1hsu_be_r, - gen_helper_sve_ldnf1hdu_be_r, - - gen_helper_sve_ldnf1hds_be_r, - gen_helper_sve_ldnf1hss_be_r, - gen_helper_sve_ldnf1ss_be_r, - gen_helper_sve_ldnf1sdu_be_r, - - gen_helper_sve_ldnf1bds_r, - gen_helper_sve_ldnf1bss_r, - gen_helper_sve_ldnf1bhs_r, - gen_helper_sve_ldnf1dd_be_r }, + static gen_helper_gvec_mem * const fns[2][2][16] = { + { /* mte inactive, little-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, + + gen_helper_sve_ldnf1sds_le_r, + gen_helper_sve_ldnf1hh_le_r, + gen_helper_sve_ldnf1hsu_le_r, + gen_helper_sve_ldnf1hdu_le_r, + + gen_helper_sve_ldnf1hds_le_r, + gen_helper_sve_ldnf1hss_le_r, + gen_helper_sve_ldnf1ss_le_r, + gen_helper_sve_ldnf1sdu_le_r, + + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_le_r }, + + /* mte inactive, big-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, + + gen_helper_sve_ldnf1sds_be_r, + gen_helper_sve_ldnf1hh_be_r, + gen_helper_sve_ldnf1hsu_be_r, + gen_helper_sve_ldnf1hdu_be_r, + + gen_helper_sve_ldnf1hds_be_r, + gen_helper_sve_ldnf1hss_be_r, + gen_helper_sve_ldnf1ss_be_r, + gen_helper_sve_ldnf1sdu_be_r, + + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_be_r } }, + + { /* mte inactive, little-endian */ + { gen_helper_sve_ldnf1bb_r_mte, + gen_helper_sve_ldnf1bhu_r_mte, + gen_helper_sve_ldnf1bsu_r_mte, + gen_helper_sve_ldnf1bdu_r_mte, + + gen_helper_sve_ldnf1sds_le_r_mte, + gen_helper_sve_ldnf1hh_le_r_mte, + gen_helper_sve_ldnf1hsu_le_r_mte, + gen_helper_sve_ldnf1hdu_le_r_mte, + + gen_helper_sve_ldnf1hds_le_r_mte, + gen_helper_sve_ldnf1hss_le_r_mte, + gen_helper_sve_ldnf1ss_le_r_mte, + gen_helper_sve_ldnf1sdu_le_r_mte, + + gen_helper_sve_ldnf1bds_r_mte, + gen_helper_sve_ldnf1bss_r_mte, + gen_helper_sve_ldnf1bhs_r_mte, + gen_helper_sve_ldnf1dd_le_r_mte }, + + /* mte inactive, big-endian */ + { gen_helper_sve_ldnf1bb_r_mte, + gen_helper_sve_ldnf1bhu_r_mte, + gen_helper_sve_ldnf1bsu_r_mte, + gen_helper_sve_ldnf1bdu_r_mte, + + gen_helper_sve_ldnf1sds_be_r_mte, + gen_helper_sve_ldnf1hh_be_r_mte, + gen_helper_sve_ldnf1hsu_be_r_mte, + gen_helper_sve_ldnf1hdu_be_r_mte, + + gen_helper_sve_ldnf1hds_be_r_mte, + gen_helper_sve_ldnf1hss_be_r_mte, + gen_helper_sve_ldnf1ss_be_r_mte, + gen_helper_sve_ldnf1sdu_be_r_mte, + + gen_helper_sve_ldnf1bds_r_mte, + gen_helper_sve_ldnf1bss_r_mte, + gen_helper_sve_ldnf1bhs_r_mte, + gen_helper_sve_ldnf1dd_be_r_mte } }, }; if (sve_access_check(s)) { @@ -4927,8 +5081,8 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) TCGv_i64 addr = new_tmp_a64(s); tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, a->rn), off); - do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, - fns[s->be_data == MO_BE][a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, + fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); } return true; } @@ -4948,9 +5102,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz) int desc, poff; /* Load the first quadword using the normal predicated load helpers. */ - desc = sve_memopidx(s, msz_dtype(s, msz)); - desc |= zt << MEMOPIDX_SHIFT; - desc = simd_desc(16, 16, desc); + desc = simd_desc(16, 16, zt); t_desc = tcg_const_i32(tcg_ctx, desc); poff = pred_full_reg_offset(s, pg); @@ -5026,8 +5178,14 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) unsigned psz = pred_full_reg_size(s); unsigned esz = dtype_esz[a->dtype]; unsigned msz = dtype_msz(a->dtype); - TCGLabel *over = gen_new_label(tcg_ctx); - TCGv_i64 temp; + TCGLabel *over; + TCGv_i64 temp, clean_addr; + + if (!sve_access_check(s)) { + return true; + } + + over = gen_new_label(tcg_ctx); /* If the guarding predicate has no bits set, no load occurs. */ if (psz <= 8) { @@ -5050,7 +5208,9 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) /* Load the data. */ temp = tcg_temp_new_i64(tcg_ctx); tcg_gen_addi_i64(tcg_ctx, temp, cpu_reg_sp(s, a->rn), a->imm << msz); - tcg_gen_qemu_ld_i64(tcg_ctx, temp, temp, get_mem_index(s), + clean_addr = gen_mte_check1(s, temp, false, true, msz); + + tcg_gen_qemu_ld_i64(tcg_ctx, temp, clean_addr, get_mem_index(s), s->be_data | dtype_mop[a->dtype]); /* Broadcast to *all* elements. */ @@ -5067,73 +5227,125 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz, int esz, int nreg) { - static gen_helper_gvec_mem * const fn_single[2][4][4] = { - { { gen_helper_sve_st1bb_r, - gen_helper_sve_st1bh_r, - gen_helper_sve_st1bs_r, - gen_helper_sve_st1bd_r }, - { NULL, - gen_helper_sve_st1hh_le_r, - gen_helper_sve_st1hs_le_r, - gen_helper_sve_st1hd_le_r }, - { NULL, NULL, - gen_helper_sve_st1ss_le_r, - gen_helper_sve_st1sd_le_r }, - { NULL, NULL, NULL, - gen_helper_sve_st1dd_le_r } }, - { { gen_helper_sve_st1bb_r, - gen_helper_sve_st1bh_r, - gen_helper_sve_st1bs_r, - gen_helper_sve_st1bd_r }, - { NULL, - gen_helper_sve_st1hh_be_r, - gen_helper_sve_st1hs_be_r, - gen_helper_sve_st1hd_be_r }, - { NULL, NULL, - gen_helper_sve_st1ss_be_r, - gen_helper_sve_st1sd_be_r }, - { NULL, NULL, NULL, - gen_helper_sve_st1dd_be_r } }, + static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { + { { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_le_r, + gen_helper_sve_st1hs_le_r, + gen_helper_sve_st1hd_le_r }, + { NULL, NULL, + gen_helper_sve_st1ss_le_r, + gen_helper_sve_st1sd_le_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_le_r } }, + { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_be_r, + gen_helper_sve_st1hs_be_r, + gen_helper_sve_st1hd_be_r }, + { NULL, NULL, + gen_helper_sve_st1ss_be_r, + gen_helper_sve_st1sd_be_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_be_r } } }, + + { { { gen_helper_sve_st1bb_r_mte, + gen_helper_sve_st1bh_r_mte, + gen_helper_sve_st1bs_r_mte, + gen_helper_sve_st1bd_r_mte }, + { NULL, + gen_helper_sve_st1hh_le_r_mte, + gen_helper_sve_st1hs_le_r_mte, + gen_helper_sve_st1hd_le_r_mte }, + { NULL, NULL, + gen_helper_sve_st1ss_le_r_mte, + gen_helper_sve_st1sd_le_r_mte }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_le_r_mte } }, + { { gen_helper_sve_st1bb_r_mte, + gen_helper_sve_st1bh_r_mte, + gen_helper_sve_st1bs_r_mte, + gen_helper_sve_st1bd_r_mte }, + { NULL, + gen_helper_sve_st1hh_be_r_mte, + gen_helper_sve_st1hs_be_r_mte, + gen_helper_sve_st1hd_be_r_mte }, + { NULL, NULL, + gen_helper_sve_st1ss_be_r_mte, + gen_helper_sve_st1sd_be_r_mte }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_be_r_mte } } }, }; - static gen_helper_gvec_mem * const fn_multiple[2][3][4] = { - { { gen_helper_sve_st2bb_r, - gen_helper_sve_st2hh_le_r, - gen_helper_sve_st2ss_le_r, - gen_helper_sve_st2dd_le_r }, - { gen_helper_sve_st3bb_r, - gen_helper_sve_st3hh_le_r, - gen_helper_sve_st3ss_le_r, - gen_helper_sve_st3dd_le_r }, - { gen_helper_sve_st4bb_r, - gen_helper_sve_st4hh_le_r, - gen_helper_sve_st4ss_le_r, - gen_helper_sve_st4dd_le_r } }, - { { gen_helper_sve_st2bb_r, - gen_helper_sve_st2hh_be_r, - gen_helper_sve_st2ss_be_r, - gen_helper_sve_st2dd_be_r }, - { gen_helper_sve_st3bb_r, - gen_helper_sve_st3hh_be_r, - gen_helper_sve_st3ss_be_r, - gen_helper_sve_st3dd_be_r }, - { gen_helper_sve_st4bb_r, - gen_helper_sve_st4hh_be_r, - gen_helper_sve_st4ss_be_r, - gen_helper_sve_st4dd_be_r } }, + static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { + { { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_le_r, + gen_helper_sve_st2ss_le_r, + gen_helper_sve_st2dd_le_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_le_r, + gen_helper_sve_st3ss_le_r, + gen_helper_sve_st3dd_le_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_le_r, + gen_helper_sve_st4ss_le_r, + gen_helper_sve_st4dd_le_r } }, + { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_be_r, + gen_helper_sve_st2ss_be_r, + gen_helper_sve_st2dd_be_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_be_r, + gen_helper_sve_st3ss_be_r, + gen_helper_sve_st3dd_be_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_be_r, + gen_helper_sve_st4ss_be_r, + gen_helper_sve_st4dd_be_r } } }, + { { { gen_helper_sve_st2bb_r_mte, + gen_helper_sve_st2hh_le_r_mte, + gen_helper_sve_st2ss_le_r_mte, + gen_helper_sve_st2dd_le_r_mte }, + { gen_helper_sve_st3bb_r_mte, + gen_helper_sve_st3hh_le_r_mte, + gen_helper_sve_st3ss_le_r_mte, + gen_helper_sve_st3dd_le_r_mte }, + { gen_helper_sve_st4bb_r_mte, + gen_helper_sve_st4hh_le_r_mte, + gen_helper_sve_st4ss_le_r_mte, + gen_helper_sve_st4dd_le_r_mte } }, + { { gen_helper_sve_st2bb_r_mte, + gen_helper_sve_st2hh_be_r_mte, + gen_helper_sve_st2ss_be_r_mte, + gen_helper_sve_st2dd_be_r_mte }, + { gen_helper_sve_st3bb_r_mte, + gen_helper_sve_st3hh_be_r_mte, + gen_helper_sve_st3ss_be_r_mte, + gen_helper_sve_st3dd_be_r_mte }, + { gen_helper_sve_st4bb_r_mte, + gen_helper_sve_st4hh_be_r_mte, + gen_helper_sve_st4ss_be_r_mte, + gen_helper_sve_st4dd_be_r_mte } } }, }; gen_helper_gvec_mem *fn; int be = s->be_data == MO_BE; if (nreg == 0) { /* ST1 */ - fn = fn_single[be][msz][esz]; + fn = fn_single[s->mte_active[0]][be][msz][esz]; + nreg = 1; } else { /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ assert(msz == esz); - fn = fn_multiple[be][nreg - 1][msz]; + fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; } assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn); + do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); } static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) @@ -5174,7 +5386,7 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) */ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, - int scale, TCGv_i64 scalar, int msz, + int scale, TCGv_i64 scalar, int msz, bool is_write, gen_helper_gvec_mem_scatter *fn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; @@ -5183,11 +5395,17 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, TCGv_ptr t_pg = tcg_temp_new_ptr(tcg_ctx); TCGv_ptr t_zt = tcg_temp_new_ptr(tcg_ctx); TCGv_i32 t_desc; - int desc; + int desc = 0; - desc = sve_memopidx(s, msz_dtype(s, msz)); - desc |= scale << MEMOPIDX_SHIFT; - desc = simd_desc(vsz, vsz, desc); + if (s->mte_active[0]) { + FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s), desc); + FIELD_DP32(desc, MTEDESC, TBI, s->tbid, desc); + FIELD_DP32(desc, MTEDESC, TCMA, s->tcma, desc); + FIELD_DP32(desc, MTEDESC, WRITE, is_write, desc); + FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz, desc); + desc <<= SVE_MTEDESC_SHIFT; + } + desc = simd_desc(vsz, vsz, desc | scale); t_desc = tcg_const_i32(tcg_ctx, desc); tcg_gen_addi_ptr(tcg_ctx, t_pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, pg)); @@ -5201,176 +5419,339 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, tcg_temp_free_i32(tcg_ctx, t_desc); } -/* Indexed by [be][ff][xs][u][msz]. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = { - /* Little-endian */ - { { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_le_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_le_zsu, - gen_helper_sve_ldss_le_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_le_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_le_zss, - gen_helper_sve_ldss_le_zss, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_le_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_le_zsu, - gen_helper_sve_ldffss_le_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_le_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_le_zss, - gen_helper_sve_ldffss_le_zss, } } } }, - - /* Big-endian */ - { { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_be_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_be_zsu, - gen_helper_sve_ldss_be_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_be_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_be_zss, - gen_helper_sve_ldss_be_zss, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_be_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_be_zsu, - gen_helper_sve_ldffss_be_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_be_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_be_zss, - gen_helper_sve_ldffss_be_zss, } } } }, +/* Indexed by [mte][be][ff][xs][u][msz]. */ +static gen_helper_gvec_mem_scatter * const +gather_load_fn32[2][2][2][2][2][3] = { + { /* MTE Inactive */ + { /* Little-endian */ + { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_le_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_le_zsu, + gen_helper_sve_ldss_le_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_le_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_le_zss, + gen_helper_sve_ldss_le_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_le_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_le_zsu, + gen_helper_sve_ldffss_le_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_le_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_le_zss, + gen_helper_sve_ldffss_le_zss, } } } }, + + { /* Big-endian */ + { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_be_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_be_zsu, + gen_helper_sve_ldss_be_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_be_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_be_zss, + gen_helper_sve_ldss_be_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_be_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_be_zsu, + gen_helper_sve_ldffss_be_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_be_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_be_zss, + gen_helper_sve_ldffss_be_zss, } } } } }, + { /* MTE Active */ + { /* Little-endian */ + { { { gen_helper_sve_ldbss_zsu_mte, + gen_helper_sve_ldhss_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldbsu_zsu_mte, + gen_helper_sve_ldhsu_le_zsu_mte, + gen_helper_sve_ldss_le_zsu_mte, } }, + { { gen_helper_sve_ldbss_zss_mte, + gen_helper_sve_ldhss_le_zss_mte, + NULL, }, + { gen_helper_sve_ldbsu_zss_mte, + gen_helper_sve_ldhsu_le_zss_mte, + gen_helper_sve_ldss_le_zss_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu_mte, + gen_helper_sve_ldffhss_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zsu_mte, + gen_helper_sve_ldffhsu_le_zsu_mte, + gen_helper_sve_ldffss_le_zsu_mte, } }, + { { gen_helper_sve_ldffbss_zss_mte, + gen_helper_sve_ldffhss_le_zss_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zss_mte, + gen_helper_sve_ldffhsu_le_zss_mte, + gen_helper_sve_ldffss_le_zss_mte, } } } }, + + { /* Big-endian */ + { { { gen_helper_sve_ldbss_zsu_mte, + gen_helper_sve_ldhss_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldbsu_zsu_mte, + gen_helper_sve_ldhsu_be_zsu_mte, + gen_helper_sve_ldss_be_zsu_mte, } }, + { { gen_helper_sve_ldbss_zss_mte, + gen_helper_sve_ldhss_be_zss_mte, + NULL, }, + { gen_helper_sve_ldbsu_zss_mte, + gen_helper_sve_ldhsu_be_zss_mte, + gen_helper_sve_ldss_be_zss_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu_mte, + gen_helper_sve_ldffhss_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zsu_mte, + gen_helper_sve_ldffhsu_be_zsu_mte, + gen_helper_sve_ldffss_be_zsu_mte, } }, + { { gen_helper_sve_ldffbss_zss_mte, + gen_helper_sve_ldffhss_be_zss_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zss_mte, + gen_helper_sve_ldffhsu_be_zss_mte, + gen_helper_sve_ldffss_be_zss_mte, } } } } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = { - /* Little-endian */ - { { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_le_zsu, - gen_helper_sve_ldsds_le_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_le_zsu, - gen_helper_sve_ldsdu_le_zsu, - gen_helper_sve_lddd_le_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_le_zss, - gen_helper_sve_ldsds_le_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_le_zss, - gen_helper_sve_ldsdu_le_zss, - gen_helper_sve_lddd_le_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_le_zd, - gen_helper_sve_ldsds_le_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_le_zd, - gen_helper_sve_ldsdu_le_zd, - gen_helper_sve_lddd_le_zd, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_le_zsu, - gen_helper_sve_ldffsds_le_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_le_zsu, - gen_helper_sve_ldffsdu_le_zsu, - gen_helper_sve_ldffdd_le_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_le_zss, - gen_helper_sve_ldffsds_le_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_le_zss, - gen_helper_sve_ldffsdu_le_zss, - gen_helper_sve_ldffdd_le_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_le_zd, - gen_helper_sve_ldffsds_le_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_le_zd, - gen_helper_sve_ldffsdu_le_zd, - gen_helper_sve_ldffdd_le_zd, } } } }, - - /* Big-endian */ - { { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_be_zsu, - gen_helper_sve_ldsds_be_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_be_zsu, - gen_helper_sve_ldsdu_be_zsu, - gen_helper_sve_lddd_be_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_be_zss, - gen_helper_sve_ldsds_be_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_be_zss, - gen_helper_sve_ldsdu_be_zss, - gen_helper_sve_lddd_be_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_be_zd, - gen_helper_sve_ldsds_be_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_be_zd, - gen_helper_sve_ldsdu_be_zd, - gen_helper_sve_lddd_be_zd, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_be_zsu, - gen_helper_sve_ldffsds_be_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_be_zsu, - gen_helper_sve_ldffsdu_be_zsu, - gen_helper_sve_ldffdd_be_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_be_zss, - gen_helper_sve_ldffsds_be_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_be_zss, - gen_helper_sve_ldffsdu_be_zss, - gen_helper_sve_ldffdd_be_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_be_zd, - gen_helper_sve_ldffsds_be_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_be_zd, - gen_helper_sve_ldffsdu_be_zd, - gen_helper_sve_ldffdd_be_zd, } } } }, +static gen_helper_gvec_mem_scatter * const +gather_load_fn64[2][2][2][3][2][4] = { + { /* MTE Inactive */ + { /* Little-endian */ + { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_le_zsu, + gen_helper_sve_ldsds_le_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_le_zsu, + gen_helper_sve_ldsdu_le_zsu, + gen_helper_sve_lddd_le_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_le_zss, + gen_helper_sve_ldsds_le_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_le_zss, + gen_helper_sve_ldsdu_le_zss, + gen_helper_sve_lddd_le_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_le_zd, + gen_helper_sve_ldsds_le_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_le_zd, + gen_helper_sve_ldsdu_le_zd, + gen_helper_sve_lddd_le_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_le_zsu, + gen_helper_sve_ldffsds_le_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_le_zsu, + gen_helper_sve_ldffsdu_le_zsu, + gen_helper_sve_ldffdd_le_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_le_zss, + gen_helper_sve_ldffsds_le_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_le_zss, + gen_helper_sve_ldffsdu_le_zss, + gen_helper_sve_ldffdd_le_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_le_zd, + gen_helper_sve_ldffsds_le_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_le_zd, + gen_helper_sve_ldffsdu_le_zd, + gen_helper_sve_ldffdd_le_zd, } } } }, + { /* Big-endian */ + { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_be_zsu, + gen_helper_sve_ldsds_be_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_be_zsu, + gen_helper_sve_ldsdu_be_zsu, + gen_helper_sve_lddd_be_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_be_zss, + gen_helper_sve_ldsds_be_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_be_zss, + gen_helper_sve_ldsdu_be_zss, + gen_helper_sve_lddd_be_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_be_zd, + gen_helper_sve_ldsds_be_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_be_zd, + gen_helper_sve_ldsdu_be_zd, + gen_helper_sve_lddd_be_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_be_zsu, + gen_helper_sve_ldffsds_be_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_be_zsu, + gen_helper_sve_ldffsdu_be_zsu, + gen_helper_sve_ldffdd_be_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_be_zss, + gen_helper_sve_ldffsds_be_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_be_zss, + gen_helper_sve_ldffsdu_be_zss, + gen_helper_sve_ldffdd_be_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_be_zd, + gen_helper_sve_ldffsds_be_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_be_zd, + gen_helper_sve_ldffsdu_be_zd, + gen_helper_sve_ldffdd_be_zd, } } } } }, + { /* MTE Active */ + { /* Little-endian */ + { { { gen_helper_sve_ldbds_zsu_mte, + gen_helper_sve_ldhds_le_zsu_mte, + gen_helper_sve_ldsds_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldbdu_zsu_mte, + gen_helper_sve_ldhdu_le_zsu_mte, + gen_helper_sve_ldsdu_le_zsu_mte, + gen_helper_sve_lddd_le_zsu_mte, } }, + { { gen_helper_sve_ldbds_zss_mte, + gen_helper_sve_ldhds_le_zss_mte, + gen_helper_sve_ldsds_le_zss_mte, + NULL, }, + { gen_helper_sve_ldbdu_zss_mte, + gen_helper_sve_ldhdu_le_zss_mte, + gen_helper_sve_ldsdu_le_zss_mte, + gen_helper_sve_lddd_le_zss_mte, } }, + { { gen_helper_sve_ldbds_zd_mte, + gen_helper_sve_ldhds_le_zd_mte, + gen_helper_sve_ldsds_le_zd_mte, + NULL, }, + { gen_helper_sve_ldbdu_zd_mte, + gen_helper_sve_ldhdu_le_zd_mte, + gen_helper_sve_ldsdu_le_zd_mte, + gen_helper_sve_lddd_le_zd_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu_mte, + gen_helper_sve_ldffhds_le_zsu_mte, + gen_helper_sve_ldffsds_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zsu_mte, + gen_helper_sve_ldffhdu_le_zsu_mte, + gen_helper_sve_ldffsdu_le_zsu_mte, + gen_helper_sve_ldffdd_le_zsu_mte, } }, + { { gen_helper_sve_ldffbds_zss_mte, + gen_helper_sve_ldffhds_le_zss_mte, + gen_helper_sve_ldffsds_le_zss_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zss_mte, + gen_helper_sve_ldffhdu_le_zss_mte, + gen_helper_sve_ldffsdu_le_zss_mte, + gen_helper_sve_ldffdd_le_zss_mte, } }, + { { gen_helper_sve_ldffbds_zd_mte, + gen_helper_sve_ldffhds_le_zd_mte, + gen_helper_sve_ldffsds_le_zd_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zd_mte, + gen_helper_sve_ldffhdu_le_zd_mte, + gen_helper_sve_ldffsdu_le_zd_mte, + gen_helper_sve_ldffdd_le_zd_mte, } } } }, + { /* Big-endian */ + { { { gen_helper_sve_ldbds_zsu_mte, + gen_helper_sve_ldhds_be_zsu_mte, + gen_helper_sve_ldsds_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldbdu_zsu_mte, + gen_helper_sve_ldhdu_be_zsu_mte, + gen_helper_sve_ldsdu_be_zsu_mte, + gen_helper_sve_lddd_be_zsu_mte, } }, + { { gen_helper_sve_ldbds_zss_mte, + gen_helper_sve_ldhds_be_zss_mte, + gen_helper_sve_ldsds_be_zss_mte, + NULL, }, + { gen_helper_sve_ldbdu_zss_mte, + gen_helper_sve_ldhdu_be_zss_mte, + gen_helper_sve_ldsdu_be_zss_mte, + gen_helper_sve_lddd_be_zss_mte, } }, + { { gen_helper_sve_ldbds_zd_mte, + gen_helper_sve_ldhds_be_zd_mte, + gen_helper_sve_ldsds_be_zd_mte, + NULL, }, + { gen_helper_sve_ldbdu_zd_mte, + gen_helper_sve_ldhdu_be_zd_mte, + gen_helper_sve_ldsdu_be_zd_mte, + gen_helper_sve_lddd_be_zd_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu_mte, + gen_helper_sve_ldffhds_be_zsu_mte, + gen_helper_sve_ldffsds_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zsu_mte, + gen_helper_sve_ldffhdu_be_zsu_mte, + gen_helper_sve_ldffsdu_be_zsu_mte, + gen_helper_sve_ldffdd_be_zsu_mte, } }, + { { gen_helper_sve_ldffbds_zss_mte, + gen_helper_sve_ldffhds_be_zss_mte, + gen_helper_sve_ldffsds_be_zss_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zss_mte, + gen_helper_sve_ldffhdu_be_zss_mte, + gen_helper_sve_ldffsdu_be_zss_mte, + gen_helper_sve_ldffdd_be_zss_mte, } }, + { { gen_helper_sve_ldffbds_zd_mte, + gen_helper_sve_ldffhds_be_zd_mte, + gen_helper_sve_ldffsds_be_zd_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zd_mte, + gen_helper_sve_ldffhdu_be_zd_mte, + gen_helper_sve_ldffsdu_be_zd_mte, + gen_helper_sve_ldffdd_be_zd_mte, } } } } }, }; static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) { gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; if (!sve_access_check(s)) { return true; @@ -5378,16 +5759,16 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) switch (a->esz) { case MO_32: - fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; break; } assert(fn != NULL); do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), a->msz, fn); + cpu_reg_sp(s, a->rn), a->msz, false, fn); return true; } @@ -5395,7 +5776,8 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; TCGv_i64 imm; if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { @@ -5407,10 +5789,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) switch (a->esz) { case MO_32: - fn = gather_load_fn32[be][a->ff][0][a->u][a->msz]; + fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[be][a->ff][2][a->u][a->msz]; + fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; break; } assert(fn != NULL); @@ -5419,63 +5801,108 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(tcg_ctx, a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn); tcg_temp_free_i64(tcg_ctx, imm); return true; } -/* Indexed by [be][xs][msz]. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = { - /* Little-endian */ - { { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_le_zsu, - gen_helper_sve_stss_le_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_le_zss, - gen_helper_sve_stss_le_zss, } }, - /* Big-endian */ - { { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_be_zsu, - gen_helper_sve_stss_be_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_be_zss, - gen_helper_sve_stss_be_zss, } }, +/* Indexed by [mte][be][xs][msz]. */ +static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_le_zsu, + gen_helper_sve_stss_le_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_le_zss, + gen_helper_sve_stss_le_zss, } }, + { /* Big-endian */ + { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_be_zsu, + gen_helper_sve_stss_be_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_be_zss, + gen_helper_sve_stss_be_zss, } } }, + { /* MTE Active */ + { /* Little-endian */ + { gen_helper_sve_stbs_zsu_mte, + gen_helper_sve_sths_le_zsu_mte, + gen_helper_sve_stss_le_zsu_mte, }, + { gen_helper_sve_stbs_zss_mte, + gen_helper_sve_sths_le_zss_mte, + gen_helper_sve_stss_le_zss_mte, } }, + { /* Big-endian */ + { gen_helper_sve_stbs_zsu_mte, + gen_helper_sve_sths_be_zsu_mte, + gen_helper_sve_stss_be_zsu_mte, }, + { gen_helper_sve_stbs_zss_mte, + gen_helper_sve_sths_be_zss_mte, + gen_helper_sve_stss_be_zss_mte, } } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = { - /* Little-endian */ - { { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_le_zsu, - gen_helper_sve_stsd_le_zsu, - gen_helper_sve_stdd_le_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_le_zss, - gen_helper_sve_stsd_le_zss, - gen_helper_sve_stdd_le_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_le_zd, - gen_helper_sve_stsd_le_zd, - gen_helper_sve_stdd_le_zd, } }, - /* Big-endian */ - { { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_be_zsu, - gen_helper_sve_stsd_be_zsu, - gen_helper_sve_stdd_be_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_be_zss, - gen_helper_sve_stsd_be_zss, - gen_helper_sve_stdd_be_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_be_zd, - gen_helper_sve_stsd_be_zd, - gen_helper_sve_stdd_be_zd, } }, +static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_le_zsu, + gen_helper_sve_stsd_le_zsu, + gen_helper_sve_stdd_le_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_le_zss, + gen_helper_sve_stsd_le_zss, + gen_helper_sve_stdd_le_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_le_zd, + gen_helper_sve_stsd_le_zd, + gen_helper_sve_stdd_le_zd, } }, + { /* Big-endian */ + { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_be_zsu, + gen_helper_sve_stsd_be_zsu, + gen_helper_sve_stdd_be_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_be_zss, + gen_helper_sve_stsd_be_zss, + gen_helper_sve_stdd_be_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_be_zd, + gen_helper_sve_stsd_be_zd, + gen_helper_sve_stdd_be_zd, } } }, + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbd_zsu_mte, + gen_helper_sve_sthd_le_zsu_mte, + gen_helper_sve_stsd_le_zsu_mte, + gen_helper_sve_stdd_le_zsu_mte, }, + { gen_helper_sve_stbd_zss_mte, + gen_helper_sve_sthd_le_zss_mte, + gen_helper_sve_stsd_le_zss_mte, + gen_helper_sve_stdd_le_zss_mte, }, + { gen_helper_sve_stbd_zd_mte, + gen_helper_sve_sthd_le_zd_mte, + gen_helper_sve_stsd_le_zd_mte, + gen_helper_sve_stdd_le_zd_mte, } }, + { /* Big-endian */ + { gen_helper_sve_stbd_zsu_mte, + gen_helper_sve_sthd_be_zsu_mte, + gen_helper_sve_stsd_be_zsu_mte, + gen_helper_sve_stdd_be_zsu_mte, }, + { gen_helper_sve_stbd_zss_mte, + gen_helper_sve_sthd_be_zss_mte, + gen_helper_sve_stsd_be_zss_mte, + gen_helper_sve_stdd_be_zss_mte, }, + { gen_helper_sve_stbd_zd_mte, + gen_helper_sve_sthd_be_zd_mte, + gen_helper_sve_stsd_be_zd_mte, + gen_helper_sve_stdd_be_zd_mte, } } }, }; static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) { gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; if (a->esz < a->msz || (a->msz == 0 && a->scale)) { return false; @@ -5485,16 +5912,16 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) } switch (a->esz) { case MO_32: - fn = scatter_store_fn32[be][a->xs][a->msz]; + fn = scatter_store_fn32[mte][be][a->xs][a->msz]; break; case MO_64: - fn = scatter_store_fn64[be][a->xs][a->msz]; + fn = scatter_store_fn64[mte][be][a->xs][a->msz]; break; default: g_assert_not_reached(); } do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), a->msz, fn); + cpu_reg_sp(s, a->rn), a->msz, true, fn); return true; } @@ -5502,7 +5929,8 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; TCGv_i64 imm; if (a->esz < a->msz) { @@ -5514,10 +5942,10 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) switch (a->esz) { case MO_32: - fn = scatter_store_fn32[be][0][a->msz]; + fn = scatter_store_fn32[mte][be][0][a->msz]; break; case MO_64: - fn = scatter_store_fn64[be][2][a->msz]; + fn = scatter_store_fn64[mte][be][2][a->msz]; break; } assert(fn != NULL); @@ -5526,7 +5954,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(tcg_ctx, a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn); tcg_temp_free_i64(tcg_ctx, imm); return true; } diff --git a/qemu/target/arm/translate-vfp.inc.c b/qemu/target/arm/translate-vfp.inc.c index 4773efb9b2..e0fd1dfda9 100644 --- a/qemu/target/arm/translate-vfp.inc.c +++ b/qemu/target/arm/translate-vfp.inc.c @@ -122,15 +122,14 @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled) if (s->v7m_lspact) { /* * Lazy state saving affects external memory and also the NVIC, - * so we must mark it as an IO operation for icount. + * so we must mark it as an IO operation for icount (and cause + * this to be the last insn in the TB). */ if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { + s->base.is_jmp = DISAS_UPDATE_EXIT; gen_io_start(tcg_ctx); } gen_helper_v7m_preserve_fp_state(tcg_ctx, tcg_ctx->cpu_env); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(tcg_ctx); - } /* * If the preserve_fp_state helper doesn't throw an exception * then it will clear LSPACT; we don't need to repeat this for @@ -1911,12 +1910,6 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) return false; } - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - if (!vfp_access_check(s)) { return true; } @@ -2930,6 +2923,6 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) tcg_temp_free_i32(tcg_ctx, fptr); /* End the TB, because we have updated FP control bits */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; return true; } diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 744d8ff709..489db79713 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -368,47 +368,10 @@ static void gen_revsh(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_i32 var) tcg_gen_ext16s_i32(tcg_ctx, dest, var); } -/* 32x32->64 multiply. Marks inputs as dead. */ -static TCGv_i64 gen_mulu_i64_i32(TCGContext *tcg_ctx, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx); - TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx); - TCGv_i64 ret; - - tcg_gen_mulu2_i32(tcg_ctx, lo, hi, a, b); - tcg_temp_free_i32(tcg_ctx, a); - tcg_temp_free_i32(tcg_ctx, b); - - ret = tcg_temp_new_i64(tcg_ctx); - tcg_gen_concat_i32_i64(tcg_ctx, ret, lo, hi); - tcg_temp_free_i32(tcg_ctx, lo); - tcg_temp_free_i32(tcg_ctx, hi); - - return ret; -} - -static TCGv_i64 gen_muls_i64_i32(TCGContext *tcg_ctx, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 lo = tcg_temp_new_i32(tcg_ctx); - TCGv_i32 hi = tcg_temp_new_i32(tcg_ctx); - TCGv_i64 ret; - - tcg_gen_muls2_i32(tcg_ctx, lo, hi, a, b); - tcg_temp_free_i32(tcg_ctx, a); - tcg_temp_free_i32(tcg_ctx, b); - - ret = tcg_temp_new_i64(tcg_ctx); - tcg_gen_concat_i32_i64(tcg_ctx, ret, lo, hi); - tcg_temp_free_i32(tcg_ctx, lo); - tcg_temp_free_i32(tcg_ctx, hi); - - return ret; -} - /* Swap low and high halfwords. */ -static void gen_swap_half(TCGContext *tcg_ctx, TCGv_i32 var) +static void gen_swap_half(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_i32 var) { - tcg_gen_rotri_i32(tcg_ctx, var, var, 16); + tcg_gen_rotri_i32(tcg_ctx, dest, var, 16); } /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead. @@ -1197,25 +1160,6 @@ neon_reg_offset (int reg, int n) return vfp_reg_offset(0, sreg); } -/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE, - * where 0 is the least significant end of the register. - */ -static inline long -neon_element_offset(int reg, int element, MemOp size) -{ - int element_size = 1 << size; - int ofs = element * element_size; -#ifdef HOST_WORDS_BIGENDIAN - /* Calculate the offset assuming fully little-endian, - * then XOR to account for the order of the 8-byte units. - */ - if (element_size < 8) { - ofs ^= 8 - element_size; - } -#endif - return neon_reg_offset(reg, 0) + ofs; -} - static TCGv_i32 neon_load_reg(TCGContext *tcg_ctx, int reg, int pass) { TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); @@ -1223,98 +1167,12 @@ static TCGv_i32 neon_load_reg(TCGContext *tcg_ctx, int reg, int pass) return tmp; } -static void neon_load_element(TCGContext *tcg_ctx, TCGv_i32 var, int reg, int ele, MemOp mop) -{ - long offset = neon_element_offset(reg, ele, mop & MO_SIZE); - - switch (mop) { - case MO_UB: - tcg_gen_ld8u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_UW: - tcg_gen_ld16u_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_UL: - tcg_gen_ld_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - default: - g_assert_not_reached(); - break; - } -} - -static void neon_load_element64(TCGContext *tcg_ctx, TCGv_i64 var, int reg, int ele, MemOp mop) -{ - long offset = neon_element_offset(reg, ele, mop & MO_SIZE); - - switch (mop) { - case MO_UB: - tcg_gen_ld8u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_UW: - tcg_gen_ld16u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_UL: - tcg_gen_ld32u_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_Q: - tcg_gen_ld_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - default: - g_assert_not_reached(); - break; - } -} - static void neon_store_reg(TCGContext *tcg_ctx, int reg, int pass, TCGv_i32 var) { tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, neon_reg_offset(reg, pass)); tcg_temp_free_i32(tcg_ctx, var); } -static void neon_store_element(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i32 var) -{ - long offset = neon_element_offset(reg, ele, size); - - switch (size) { - case MO_8: - tcg_gen_st8_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_16: - tcg_gen_st16_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_32: - tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - default: - g_assert_not_reached(); - break; - } -} - -static void neon_store_element64(TCGContext *tcg_ctx, int reg, int ele, MemOp size, TCGv_i64 var) -{ - long offset = neon_element_offset(reg, ele, size); - - switch (size) { - case MO_8: - tcg_gen_st8_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_16: - tcg_gen_st16_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_32: - tcg_gen_st32_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - case MO_64: - tcg_gen_st_i64(tcg_ctx, var, tcg_ctx->cpu_env, offset); - break; - default: - g_assert_not_reached(); - break; - } -} - static inline void neon_load_reg64(TCGContext *tcg_ctx, TCGv_i64 var, int reg) { tcg_gen_ld_i64(tcg_ctx, var, tcg_ctx->cpu_env, vfp_reg_offset(1, reg)); @@ -1344,8 +1202,9 @@ static TCGv_ptr vfp_reg_ptr(TCGContext *tcg_ctx, bool dp, int reg) #define ARM_CP_RW_BIT (1 << 20) -/* Include the VFP decoder */ +/* Include the VFP and Neon decoder */ #include "translate-vfp.inc.c" +#include "translate-neon.inc.c" static inline void iwmmxt_load_reg(TCGContext *tcg_ctx, TCGv_i64 var, int reg) { @@ -2660,8 +2519,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) ((VFP_REG_SHR_POS(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1)) #else #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n)) -#define VFP_SREG(insn, bigbit, smallbit) \ - ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1)) #endif #define VFP_DREG(reg, insn, bigbit, smallbit) do { \ if (dc_isar_feature(aa32_simd_r32, s)) { \ @@ -2674,39 +2531,15 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) }} while (0) #ifdef _MSC_VER -#define VFP_SREG_D(insn) VFP_SREG_POS(insn, 12, 22) #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22) -#define VFP_SREG_N(insn) VFP_SREG_POS(insn, 16, 7) #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7) -#define VFP_SREG_M(insn) VFP_SREG_NEG(insn, 0, 5) #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5) #else -#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22) #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22) -#define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7) #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7) -#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5) #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5) #endif -static void gen_neon_dup_low16(TCGContext *tcg_ctx, TCGv_i32 var) -{ - TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); - tcg_gen_ext16u_i32(tcg_ctx, var, var); - tcg_gen_shli_i32(tcg_ctx, tmp, var, 16); - tcg_gen_or_i32(tcg_ctx, var, var, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); -} - -static void gen_neon_dup_high16(TCGContext *tcg_ctx, TCGv_i32 var) -{ - TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); - tcg_gen_andi_i32(tcg_ctx, var, var, 0xffff0000); - tcg_gen_shri_i32(tcg_ctx, tmp, var, 16); - tcg_gen_or_i32(tcg_ctx, var, var, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); -} - static inline bool use_goto_tb(DisasContext *s, target_ulong dest) { struct uc_struct *uc = s->uc; @@ -3015,7 +2848,7 @@ static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn) tcg_temp_free_i32(tcg_ctx, tcg_tgtmode); tcg_temp_free_i32(tcg_ctx, tcg_regno); tcg_temp_free_i32(tcg_ctx, tcg_reg); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) @@ -3038,7 +2871,7 @@ static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) tcg_temp_free_i32(tcg_ctx, tcg_tgtmode); tcg_temp_free_i32(tcg_ctx, tcg_regno); store_reg(s, rn, tcg_reg); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } /* Store value to PC as for an exception return (ie don't @@ -3077,1047 +2910,663 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc) gen_rfe(s, pc, load_cpu_field(tcg_ctx, spsr)); } -#define CPU_V001 tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1 +static void gen_gvec_fn3_qc(TCGContext *tcg_ctx, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, + uint32_t max_sz, gen_helper_gvec_3_ptr *fn) +{ + TCGv_ptr qc_ptr = tcg_temp_new_ptr(tcg_ctx); + + tcg_gen_addi_ptr(tcg_ctx, qc_ptr, tcg_ctx->cpu_env, offsetof(CPUARMState, vfp.qc)); + tcg_gen_gvec_3_ptr(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, qc_ptr, + opr_sz, max_sz, 0, fn); + tcg_temp_free_ptr(tcg_ctx, qc_ptr); +} -static inline void gen_neon_add(TCGContext *tcg_ctx, int size, TCGv_i32 t0, TCGv_i32 t1) +void gen_gvec_sqrdmlah_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { - switch (size) { - case 0: gen_helper_neon_add_u8(tcg_ctx, t0, t0, t1); break; - case 1: gen_helper_neon_add_u16(tcg_ctx, t0, t0, t1); break; - case 2: tcg_gen_add_i32(tcg_ctx, t0, t0, t1); break; - default: abort(); - } + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); } -static inline void gen_neon_rsb(TCGContext *tcg_ctx, int size, TCGv_i32 t0, TCGv_i32 t1) +void gen_gvec_sqrdmlsh_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { - switch (size) { - case 0: gen_helper_neon_sub_u8(tcg_ctx, t0, t1, t0); break; - case 1: gen_helper_neon_sub_u16(tcg_ctx, t0, t1, t0); break; - case 2: tcg_gen_sub_i32(tcg_ctx, t0, t1, t0); break; - default: return; - } + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); +} + +#define GEN_CMP0(NAME, COND) \ + static void gen_##NAME##0_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a) \ + { \ + tcg_gen_setcondi_i32(tcg_ctx, COND, d, a, 0); \ + tcg_gen_neg_i32(tcg_ctx, d, d); \ + } \ + static void gen_##NAME##0_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a) \ + { \ + tcg_gen_setcondi_i64(tcg_ctx, COND, d, a, 0); \ + tcg_gen_neg_i64(tcg_ctx, d, d); \ + } \ + static void gen_##NAME##0_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a) \ + { \ + TCGv_vec zero = tcg_const_zeros_vec_matching(tcg_ctx, d); \ + tcg_gen_cmp_vec(tcg_ctx, COND, vece, d, a, zero); \ + tcg_temp_free_vec(tcg_ctx, zero); \ + } \ + void gen_gvec_##NAME##0(TCGContext *tcg_ctx, unsigned vece, uint32_t d, uint32_t m, \ + uint32_t opr_sz, uint32_t max_sz) \ + { \ + const GVecGen2 op[4] = { \ + { .fno = gen_helper_gvec_##NAME##0_b, \ + .fniv = gen_##NAME##0_vec, \ + .opt_opc = vecop_list_cmp, \ + .vece = MO_8 }, \ + { .fno = gen_helper_gvec_##NAME##0_h, \ + .fniv = gen_##NAME##0_vec, \ + .opt_opc = vecop_list_cmp, \ + .vece = MO_16 }, \ + { .fni4 = gen_##NAME##0_i32, \ + .fniv = gen_##NAME##0_vec, \ + .opt_opc = vecop_list_cmp, \ + .vece = MO_32 }, \ + { .fni8 = gen_##NAME##0_i64, \ + .fniv = gen_##NAME##0_vec, \ + .opt_opc = vecop_list_cmp, \ + .prefer_i64 = TCG_TARGET_REG_BITS == 64, \ + .vece = MO_64 }, \ + }; \ + tcg_gen_gvec_2(tcg_ctx, d, m, opr_sz, max_sz, &op[vece]); \ + } + +static const TCGOpcode vecop_list_cmp[] = { + INDEX_op_cmp_vec, 0 +}; + +GEN_CMP0(ceq, TCG_COND_EQ) +GEN_CMP0(cle, TCG_COND_LE) +GEN_CMP0(cge, TCG_COND_GE) +GEN_CMP0(clt, TCG_COND_LT) +GEN_CMP0(cgt, TCG_COND_GT) + +#undef GEN_CMP0 + +static void gen_ssra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar8i_i64(tcg_ctx, a, a, shift); + tcg_gen_vec_add8_i64(tcg_ctx, d, d, a); } -/* 32-bit pairwise ops end up the same as the elementwise versions. */ -#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 -#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 -#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 -#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 +static void gen_ssra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar16i_i64(tcg_ctx, a, a, shift); + tcg_gen_vec_add16_i64(tcg_ctx, d, d, a); +} -#define GEN_NEON_INTEGER_OP_ENV(name) do { \ - switch ((size << 1) | u) { \ - case 0: \ - gen_helper_neon_##name##_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - case 1: \ - gen_helper_neon_##name##_u8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - case 2: \ - gen_helper_neon_##name##_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - case 3: \ - gen_helper_neon_##name##_u16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - case 4: \ - gen_helper_neon_##name##_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - case 5: \ - gen_helper_neon_##name##_u32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); \ - break; \ - default: return 1; \ - }} while (0) +static void gen_ssra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_sari_i32(tcg_ctx, a, a, shift); + tcg_gen_add_i32(tcg_ctx, d, d, a); +} -#define GEN_NEON_INTEGER_OP(name) do { \ - switch ((size << 1) | u) { \ - case 0: \ - gen_helper_neon_##name##_s8(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - case 1: \ - gen_helper_neon_##name##_u8(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - case 2: \ - gen_helper_neon_##name##_s16(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - case 3: \ - gen_helper_neon_##name##_u16(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - case 4: \ - gen_helper_neon_##name##_s32(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - case 5: \ - gen_helper_neon_##name##_u32(tcg_ctx, tmp, tmp, tmp2); \ - break; \ - default: return 1; \ - }} while (0) +static void gen_ssra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_sari_i64(tcg_ctx, a, a, shift); + tcg_gen_add_i64(tcg_ctx, d, d, a); +} -static TCGv_i32 neon_load_scratch(TCGContext *tcg_ctx, int scratch) +static void gen_ssra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { - TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); - tcg_gen_ld_i32(tcg_ctx, tmp, tcg_ctx->cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); - return tmp; + tcg_gen_sari_vec(tcg_ctx, vece, a, a, sh); + tcg_gen_add_vec(tcg_ctx, vece, d, d, a); } -static void neon_store_scratch(TCGContext *tcg_ctx, int scratch, TCGv_i32 var) +void gen_gvec_ssra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) { - tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); - tcg_temp_free_i32(tcg_ctx, var); + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_ssra8_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_ssra16_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_ssra32_i32, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_ssra64_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_b, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. + */ + shift = MIN(shift, (8 << vece) - 1); + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } -static inline TCGv_i32 neon_get_scalar(TCGContext *tcg_ctx, int size, int reg) +static void gen_usra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) { - TCGv_i32 tmp; - if (size == 1) { - tmp = neon_load_reg(tcg_ctx, reg & 7, reg >> 4); - if (reg & 8) { - gen_neon_dup_high16(tcg_ctx, tmp); - } else { - gen_neon_dup_low16(tcg_ctx, tmp); - } - } else { - tmp = neon_load_reg(tcg_ctx, reg & 15, reg >> 4); - } - return tmp; + tcg_gen_vec_shr8i_i64(tcg_ctx, a, a, shift); + tcg_gen_vec_add8_i64(tcg_ctx, d, d, a); } -static int gen_neon_unzip(TCGContext *tcg_ctx, int rd, int rm, int size, int q) +static void gen_usra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) { - TCGv_ptr pd, pm; - - if (!q && size == 2) { - return 1; - } - pd = vfp_reg_ptr(tcg_ctx, true, rd); - pm = vfp_reg_ptr(tcg_ctx, true, rm); - if (q) { - switch (size) { - case 0: - gen_helper_neon_qunzip8(tcg_ctx, pd, pm); - break; - case 1: - gen_helper_neon_qunzip16(tcg_ctx, pd, pm); - break; - case 2: - gen_helper_neon_qunzip32(tcg_ctx, pd, pm); - break; - default: - abort(); - } - } else { - switch (size) { - case 0: - gen_helper_neon_unzip8(tcg_ctx, pd, pm); - break; - case 1: - gen_helper_neon_unzip16(tcg_ctx, pd, pm); - break; - default: - abort(); - } - } - tcg_temp_free_ptr(tcg_ctx, pd); - tcg_temp_free_ptr(tcg_ctx, pm); - return 0; + tcg_gen_vec_shr16i_i64(tcg_ctx, a, a, shift); + tcg_gen_vec_add16_i64(tcg_ctx, d, d, a); } -static int gen_neon_zip(TCGContext *tcg_ctx, int rd, int rm, int size, int q) +static void gen_usra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift) { - TCGv_ptr pd, pm; + tcg_gen_shri_i32(tcg_ctx, a, a, shift); + tcg_gen_add_i32(tcg_ctx, d, d, a); +} - if (!q && size == 2) { - return 1; - } - pd = vfp_reg_ptr(tcg_ctx, true, rd); - pm = vfp_reg_ptr(tcg_ctx, true, rm); - if (q) { - switch (size) { - case 0: - gen_helper_neon_qzip8(tcg_ctx, pd, pm); - break; - case 1: - gen_helper_neon_qzip16(tcg_ctx, pd, pm); - break; - case 2: - gen_helper_neon_qzip32(tcg_ctx, pd, pm); - break; - default: - abort(); - } +static void gen_usra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(tcg_ctx, a, a, shift); + tcg_gen_add_i64(tcg_ctx, d, d, a); +} + +static void gen_usra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_shri_vec(tcg_ctx, vece, a, a, sh); + tcg_gen_add_vec(tcg_ctx, vece, d, d, a); +} + +void gen_gvec_usra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_usra8_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8, }, + { .fni8 = gen_usra16_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16, }, + { .fni4 = gen_usra32_i32, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32, }, + { .fni8 = gen_usra64_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64, }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Unsigned results in all zeros as input to accumulate: nop. + */ + if (shift < (8 << vece)) { + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } else { - switch (size) { - case 0: - gen_helper_neon_zip8(tcg_ctx, pd, pm); - break; - case 1: - gen_helper_neon_zip16(tcg_ctx, pd, pm); - break; - default: - abort(); - } + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rd_ofs, opr_sz, max_sz); } - tcg_temp_free_ptr(tcg_ctx, pd); - tcg_temp_free_ptr(tcg_ctx, pm); - return 0; } -static void gen_neon_trn_u8(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1) +/* + * Shift one less than the requested amount, and the low bit is + * the rounding bit. For the 8 and 16-bit operations, because we + * mask the low bit, we can perform a normal integer shift instead + * of a vector shift. + */ +static void gen_srshr8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - TCGv_i32 rd, tmp; + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - rd = tcg_temp_new_i32(tcg_ctx); - tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1); + tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_8, 1)); + tcg_gen_vec_sar8i_i64(tcg_ctx, d, a, sh); + tcg_gen_vec_add8_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - tcg_gen_shli_i32(tcg_ctx, rd, t0, 8); - tcg_gen_andi_i32(tcg_ctx, rd, rd, 0xff00ff00); - tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0x00ff00ff); - tcg_gen_or_i32(tcg_ctx, rd, rd, tmp); +static void gen_srshr16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - tcg_gen_shri_i32(tcg_ctx, t1, t1, 8); - tcg_gen_andi_i32(tcg_ctx, t1, t1, 0x00ff00ff); - tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xff00ff00); - tcg_gen_or_i32(tcg_ctx, t1, t1, tmp); - tcg_gen_mov_i32(tcg_ctx, t0, rd); + tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1); + tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_16, 1)); + tcg_gen_vec_sar16i_i64(tcg_ctx, d, a, sh); + tcg_gen_vec_add16_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, rd); +static void gen_srshr32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_extract_i32(tcg_ctx, t, a, sh - 1, 1); + tcg_gen_sari_i32(tcg_ctx, d, a, sh); + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); } -static void gen_neon_trn_u16(TCGContext *tcg_ctx, TCGv_i32 t0, TCGv_i32 t1) +static void gen_srshr64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - TCGv_i32 rd, tmp; + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - rd = tcg_temp_new_i32(tcg_ctx); - tmp = tcg_temp_new_i32(tcg_ctx); + tcg_gen_extract_i64(tcg_ctx, t, a, sh - 1, 1); + tcg_gen_sari_i64(tcg_ctx, d, a, sh); + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - tcg_gen_shli_i32(tcg_ctx, rd, t0, 16); - tcg_gen_andi_i32(tcg_ctx, tmp, t1, 0xffff); - tcg_gen_or_i32(tcg_ctx, rd, rd, tmp); - tcg_gen_shri_i32(tcg_ctx, t1, t1, 16); - tcg_gen_andi_i32(tcg_ctx, tmp, t0, 0xffff0000); - tcg_gen_or_i32(tcg_ctx, t1, t1, tmp); - tcg_gen_mov_i32(tcg_ctx, t0, rd); +static void gen_srshr_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + TCGv_vec ones = tcg_temp_new_vec_matching(tcg_ctx, d); - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, rd); -} - - -static struct { - int nregs; - int interleave; - int spacing; -} const neon_ls_element_type[11] = { - {1, 4, 1}, - {1, 4, 2}, - {4, 1, 1}, - {2, 2, 2}, - {1, 3, 1}, - {1, 3, 2}, - {3, 1, 1}, - {1, 1, 1}, - {1, 2, 1}, - {1, 2, 2}, - {2, 1, 1} -}; + tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh - 1); + tcg_gen_dupi_vec(tcg_ctx, vece, ones, 1); + tcg_gen_and_vec(tcg_ctx, vece, t, t, ones); + tcg_gen_sari_vec(tcg_ctx, vece, d, a, sh); + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); + + tcg_temp_free_vec(tcg_ctx, t); + tcg_temp_free_vec(tcg_ctx, ones); +} -/* Translate a NEON load/store element instruction. Return nonzero if the - instruction is invalid. */ -static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) +void gen_gvec_srshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - int rd, rn, rm; - int op; - int nregs; - int interleave; - int spacing; - int stride; - int size; - int reg; - int load; - int n; - int vec_size; - int mmu_idx; - MemOp endian; - TCGv_i32 addr; - TCGv_i32 tmp; - TCGv_i32 tmp2; - TCGv_i64 tmp64; + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_srshr8_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_srshr16_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_srshr32_i32, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_srshr64_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; - /* FIXME: this access check should not take precedence over UNDEF - * for invalid encodings; we will generate incorrect syndrome information - * for attempts to execute invalid vfp/neon encodings with FP disabled. - */ - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); - if (!s->vfp_enabled) - return 1; - VFP_DREG_D(rd, insn); - rn = (insn >> 16) & 0xf; - rm = insn & 0xf; - load = (insn & (1 << 21)) != 0; - endian = s->be_data; - mmu_idx = get_mem_index(s); - if ((insn & (1 << 23)) == 0) { - /* Load store all elements. */ - op = (insn >> 8) & 0xf; - size = (insn >> 6) & 3; - if (op > 10) - return 1; - /* Catch UNDEF cases for bad values of align field */ - switch (op & 0xc) { - case 4: - if (((insn >> 5) & 1) == 1) { - return 1; - } - break; - case 8: - if (((insn >> 4) & 3) == 3) { - return 1; - } - break; - default: - break; - } - nregs = neon_ls_element_type[op].nregs; - interleave = neon_ls_element_type[op].interleave; - spacing = neon_ls_element_type[op].spacing; - if (size == 3 && (interleave | spacing) != 1) { - return 1; - } - /* For our purposes, bytes are always little-endian. */ - if (size == 0) { - endian = MO_LE; - } - /* Consecutive little-endian elements from a single register - * can be promoted to a larger little-endian operation. + if (shift == (8 << vece)) { + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. With rounding, this produces + * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. + * I.e. always zero. */ - if (interleave == 1 && endian == MO_LE) { - size = 3; - } - tmp64 = tcg_temp_new_i64(tcg_ctx); - addr = tcg_temp_new_i32(tcg_ctx); - tmp2 = tcg_const_i32(tcg_ctx, 1 << size); - load_reg_var(s, addr, rn); - for (reg = 0; reg < nregs; reg++) { - for (n = 0; n < 8 >> size; n++) { - int xs; - for (xs = 0; xs < interleave; xs++) { - int tt = rd + reg + spacing * xs; - - if (load) { - gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); - neon_store_element64(tcg_ctx, tt, n, size, tmp64); - } else { - neon_load_element64(tcg_ctx, tmp64, tt, n, size); - gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); - } - tcg_gen_add_i32(tcg_ctx, addr, addr, tmp2); - } - } - } - tcg_temp_free_i32(tcg_ctx, addr); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_i64(tcg_ctx, tmp64); - stride = nregs * interleave * 8; + tcg_gen_gvec_dup_imm(tcg_ctx, vece, rd_ofs, opr_sz, max_sz, 0); } else { - size = (insn >> 10) & 3; - if (size == 3) { - /* Load single element to all lanes. */ - int a = (insn >> 4) & 1; - if (!load) { - return 1; - } - size = (insn >> 6) & 3; - nregs = ((insn >> 8) & 3) + 1; - - if (size == 3) { - if (nregs != 4 || a == 0) { - return 1; - } - /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */ - size = 2; - } - if (nregs == 1 && a == 1 && size == 0) { - return 1; - } - if (nregs == 3 && a == 1) { - return 1; - } - addr = tcg_temp_new_i32(tcg_ctx); - load_reg_var(s, addr, rn); - - /* VLD1 to all lanes: bit 5 indicates how many Dregs to write. - * VLD2/3/4 to all lanes: bit 5 indicates register stride. - */ - stride = (insn & (1 << 5)) ? 2 : 1; - vec_size = nregs == 1 ? stride * 8 : 8; - - tmp = tcg_temp_new_i32(tcg_ctx); - for (reg = 0; reg < nregs; reg++) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); - if ((rd & 1) && vec_size == 16) { - /* We cannot write 16 bytes at once because the - * destination is unaligned. - */ - tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(rd, 0), - 8, 8, tmp); - tcg_gen_gvec_mov(tcg_ctx, 0, neon_reg_offset(rd + 1, 0), - neon_reg_offset(rd, 0), 8, 8); - } else { - tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(rd, 0), - vec_size, vec_size, tmp); - } - tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size); - rd += stride; - } - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, addr); - stride = (1 << size) * nregs; - } else { - /* Single element. */ - int idx = (insn >> 4) & 0xf; - int reg_idx; - switch (size) { - case 0: - reg_idx = (insn >> 5) & 7; - stride = 1; - break; - case 1: - reg_idx = (insn >> 6) & 3; - stride = (insn & (1 << 5)) ? 2 : 1; - break; - case 2: - reg_idx = (insn >> 7) & 1; - stride = (insn & (1 << 6)) ? 2 : 1; - break; - default: - abort(); - } - nregs = ((insn >> 8) & 3) + 1; - /* Catch the UNDEF cases. This is unavoidably a bit messy. */ - switch (nregs) { - case 1: - if (((idx & (1 << size)) != 0) || - (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) { - return 1; - } - break; - case 3: - if ((idx & 1) != 0) { - return 1; - } - /* fall through */ - case 2: - if (size == 2 && (idx & 2) != 0) { - return 1; - } - break; - case 4: - if ((size == 2) && ((idx & 3) == 3)) { - return 1; - } - break; - default: - abort(); - } - if ((rd + stride * (nregs - 1)) > 31) { - /* Attempts to write off the end of the register file - * are UNPREDICTABLE; we choose to UNDEF because otherwise - * the neon_load_reg() would write off the end of the array. - */ - return 1; - } - tmp = tcg_temp_new_i32(tcg_ctx); - addr = tcg_temp_new_i32(tcg_ctx); - load_reg_var(s, addr, rn); - for (reg = 0; reg < nregs; reg++) { - if (load) { - gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); - neon_store_element(tcg_ctx, rd, reg_idx, size, tmp); - } else { /* Store */ - neon_load_element(tcg_ctx, tmp, rd, reg_idx, size); - gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), - s->be_data | size); - } - rd += stride; - tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size); - } - tcg_temp_free_i32(tcg_ctx, addr); - tcg_temp_free_i32(tcg_ctx, tmp); - stride = nregs * (1 << size); - } + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } - if (rm != 15) { - TCGv_i32 base; - - base = load_reg(s, rn); - if (rm == 13) { - tcg_gen_addi_i32(tcg_ctx, base, base, stride); - } else { - TCGv_i32 index; - index = load_reg(s, rm); - tcg_gen_add_i32(tcg_ctx, base, base, index); - tcg_temp_free_i32(tcg_ctx, index); - } - store_reg(s, rn, base); - } - return 0; } -static inline void gen_neon_narrow(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src) +static void gen_srsra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - switch (size) { - case 0: gen_helper_neon_narrow_u8(tcg_ctx, dest, src); break; - case 1: gen_helper_neon_narrow_u16(tcg_ctx, dest, src); break; - case 2: tcg_gen_extrl_i64_i32(tcg_ctx, dest, src); break; - default: abort(); - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + gen_srshr8_i64(tcg_ctx, t, a, sh); + tcg_gen_vec_add8_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static inline void gen_neon_narrow_sats(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src) +static void gen_srsra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - switch (size) { - case 0: gen_helper_neon_narrow_sat_s8(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 1: gen_helper_neon_narrow_sat_s16(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 2: gen_helper_neon_narrow_sat_s32(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - default: abort(); - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + gen_srshr16_i64(tcg_ctx, t, a, sh); + tcg_gen_vec_add16_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static inline void gen_neon_narrow_satu(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src) +static void gen_srsra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh) { - switch (size) { - case 0: gen_helper_neon_narrow_sat_u8(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 1: gen_helper_neon_narrow_sat_u16(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 2: gen_helper_neon_narrow_sat_u32(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - default: abort(); - } + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + + gen_srshr32_i32(tcg_ctx, t, a, sh); + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); } -static inline void gen_neon_unarrow_sats(TCGContext *tcg_ctx, int size, TCGv_i32 dest, TCGv_i64 src) +static void gen_srsra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - switch (size) { - case 0: gen_helper_neon_unarrow_sat8(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 1: gen_helper_neon_unarrow_sat16(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - case 2: gen_helper_neon_unarrow_sat32(tcg_ctx, dest, tcg_ctx->cpu_env, src); break; - default: abort(); - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + gen_srshr64_i64(tcg_ctx, t, a, sh); + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static inline void gen_neon_shift_narrow(TCGContext *tcg_ctx, int size, TCGv_i32 var, TCGv_i32 shift, - int q, int u) +static void gen_srsra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { - if (q) { - if (u) { - switch (size) { - case 1: gen_helper_neon_rshl_u16(tcg_ctx, var, var, shift); break; - case 2: gen_helper_neon_rshl_u32(tcg_ctx, var, var, shift); break; - default: abort(); - } - } else { - switch (size) { - case 1: gen_helper_neon_rshl_s16(tcg_ctx, var, var, shift); break; - case 2: gen_helper_neon_rshl_s32(tcg_ctx, var, var, shift); break; - default: abort(); - } - } - } else { - if (u) { - switch (size) { - case 1: gen_helper_neon_shl_u16(tcg_ctx, var, var, shift); break; - case 2: gen_ushl_i32(tcg_ctx, var, var, shift); break; - default: abort(); - } - } else { - switch (size) { - case 1: gen_helper_neon_shl_s16(tcg_ctx, var, var, shift); break; - case 2: gen_sshl_i32(tcg_ctx, var, var, shift); break; - default: abort(); - } - } - } + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + + gen_srshr_vec(tcg_ctx, vece, t, a, sh); + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); } -static inline void gen_neon_widen(TCGContext *tcg_ctx, TCGv_i64 dest, TCGv_i32 src, int size, int u) +void gen_gvec_srsra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) { - if (u) { - switch (size) { - case 0: gen_helper_neon_widen_u8(tcg_ctx, dest, src); break; - case 1: gen_helper_neon_widen_u16(tcg_ctx, dest, src); break; - case 2: tcg_gen_extu_i32_i64(tcg_ctx, dest, src); break; - default: abort(); - } + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_srsra8_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fni8 = gen_srsra16_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_srsra32_i32, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_srsra64_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. With rounding, this produces + * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. + * I.e. always zero. With accumulation, this leaves D unchanged. + */ + if (shift == (8 << vece)) { + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rd_ofs, opr_sz, max_sz); } else { - switch (size) { - case 0: gen_helper_neon_widen_s8(tcg_ctx, dest, src); break; - case 1: gen_helper_neon_widen_s16(tcg_ctx, dest, src); break; - case 2: tcg_gen_ext_i32_i64(tcg_ctx, dest, src); break; - default: abort(); - } + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } - tcg_temp_free_i32(tcg_ctx, src); } -static inline void gen_neon_addl(TCGContext *tcg_ctx, int size) +static void gen_urshr8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - switch (size) { - case 0: gen_helper_neon_addl_u16(tcg_ctx, CPU_V001); break; - case 1: gen_helper_neon_addl_u32(tcg_ctx, CPU_V001); break; - case 2: tcg_gen_add_i64(tcg_ctx, CPU_V001); break; - default: abort(); - } -} + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); -static inline void gen_neon_subl(TCGContext *tcg_ctx, int size) -{ - switch (size) { - case 0: gen_helper_neon_subl_u16(tcg_ctx, CPU_V001); break; - case 1: gen_helper_neon_subl_u32(tcg_ctx, CPU_V001); break; - case 2: tcg_gen_sub_i64(tcg_ctx, CPU_V001); break; - default: abort(); - } + tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1); + tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_8, 1)); + tcg_gen_vec_shr8i_i64(tcg_ctx, d, a, sh); + tcg_gen_vec_add8_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static inline void gen_neon_negl(TCGContext *tcg_ctx, TCGv_i64 var, int size) +static void gen_urshr16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - switch (size) { - case 0: gen_helper_neon_negl_u16(tcg_ctx, var, var); break; - case 1: gen_helper_neon_negl_u32(tcg_ctx, var, var); break; - case 2: - tcg_gen_neg_i64(tcg_ctx, var, var); - break; - default: abort(); - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + tcg_gen_shri_i64(tcg_ctx, t, a, sh - 1); + tcg_gen_andi_i64(tcg_ctx, t, t, dup_const(MO_16, 1)); + tcg_gen_vec_shr16i_i64(tcg_ctx, d, a, sh); + tcg_gen_vec_add16_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static inline void gen_neon_addl_saturate(TCGContext *tcg_ctx, TCGv_i64 op0, TCGv_i64 op1, int size) +static void gen_urshr32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh) { - switch (size) { - case 1: gen_helper_neon_addl_saturate_s32(tcg_ctx, op0, tcg_ctx->cpu_env, op0, op1); break; - case 2: gen_helper_neon_addl_saturate_s64(tcg_ctx, op0, tcg_ctx->cpu_env, op0, op1); break; - default: abort(); - } + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_extract_i32(tcg_ctx, t, a, sh - 1, 1); + tcg_gen_shri_i32(tcg_ctx, d, a, sh); + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); } -static inline void gen_neon_mull(TCGContext *tcg_ctx, TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b, - int size, int u) +static void gen_urshr64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - TCGv_i64 tmp; - - switch ((size << 1) | u) { - case 0: gen_helper_neon_mull_s8(tcg_ctx, dest, a, b); break; - case 1: gen_helper_neon_mull_u8(tcg_ctx, dest, a, b); break; - case 2: gen_helper_neon_mull_s16(tcg_ctx, dest, a, b); break; - case 3: gen_helper_neon_mull_u16(tcg_ctx, dest, a, b); break; - case 4: - tmp = gen_muls_i64_i32(tcg_ctx, a, b); - tcg_gen_mov_i64(tcg_ctx, dest, tmp); - tcg_temp_free_i64(tcg_ctx, tmp); - break; - case 5: - tmp = gen_mulu_i64_i32(tcg_ctx, a, b); - tcg_gen_mov_i64(tcg_ctx, dest, tmp); - tcg_temp_free_i64(tcg_ctx, tmp); - break; - default: abort(); - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - /* gen_helper_neon_mull_[su]{8|16} do not free their parameters. - Don't forget to clean them now. */ - if (size < 2) { - tcg_temp_free_i32(tcg_ctx, a); - tcg_temp_free_i32(tcg_ctx, b); - } + tcg_gen_extract_i64(tcg_ctx, t, a, sh - 1, 1); + tcg_gen_shri_i64(tcg_ctx, d, a, sh); + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static void gen_neon_narrow_op(TCGContext *tcg_ctx, int op, int u, int size, - TCGv_i32 dest, TCGv_i64 src) +static void gen_urshr_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) { - if (op) { - if (u) { - gen_neon_unarrow_sats(tcg_ctx, size, dest, src); - } else { - gen_neon_narrow(tcg_ctx, size, dest, src); - } - } else { - if (u) { - gen_neon_narrow_satu(tcg_ctx, size, dest, src); - } else { - gen_neon_narrow_sats(tcg_ctx, size, dest, src); - } - } -} + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + TCGv_vec ones = tcg_temp_new_vec_matching(tcg_ctx, d); -/* Symbolic constants for op fields for Neon 3-register same-length. - * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B - * table A7-9. - */ -#define NEON_3R_VHADD 0 -#define NEON_3R_VQADD 1 -#define NEON_3R_VRHADD 2 -#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */ -#define NEON_3R_VHSUB 4 -#define NEON_3R_VQSUB 5 -#define NEON_3R_VCGT 6 -#define NEON_3R_VCGE 7 -#define NEON_3R_VSHL 8 -#define NEON_3R_VQSHL 9 -#define NEON_3R_VRSHL 10 -#define NEON_3R_VQRSHL 11 -#define NEON_3R_VMAX 12 -#define NEON_3R_VMIN 13 -#define NEON_3R_VABD 14 -#define NEON_3R_VABA 15 -#define NEON_3R_VADD_VSUB 16 -#define NEON_3R_VTST_VCEQ 17 -#define NEON_3R_VML 18 /* VMLA, VMLS */ -#define NEON_3R_VMUL 19 -#define NEON_3R_VPMAX 20 -#define NEON_3R_VPMIN 21 -#define NEON_3R_VQDMULH_VQRDMULH 22 -#define NEON_3R_VPADD_VQRDMLAH 23 -#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */ -#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */ -#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */ -#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */ -#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */ -#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */ -#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */ -#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */ - -static const uint8_t neon_3r_sizes[] = { - [NEON_3R_VHADD] = 0x7, - [NEON_3R_VQADD] = 0xf, - [NEON_3R_VRHADD] = 0x7, - [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */ - [NEON_3R_VHSUB] = 0x7, - [NEON_3R_VQSUB] = 0xf, - [NEON_3R_VCGT] = 0x7, - [NEON_3R_VCGE] = 0x7, - [NEON_3R_VSHL] = 0xf, - [NEON_3R_VQSHL] = 0xf, - [NEON_3R_VRSHL] = 0xf, - [NEON_3R_VQRSHL] = 0xf, - [NEON_3R_VMAX] = 0x7, - [NEON_3R_VMIN] = 0x7, - [NEON_3R_VABD] = 0x7, - [NEON_3R_VABA] = 0x7, - [NEON_3R_VADD_VSUB] = 0xf, - [NEON_3R_VTST_VCEQ] = 0x7, - [NEON_3R_VML] = 0x7, - [NEON_3R_VMUL] = 0x7, - [NEON_3R_VPMAX] = 0x7, - [NEON_3R_VPMIN] = 0x7, - [NEON_3R_VQDMULH_VQRDMULH] = 0x6, - [NEON_3R_VPADD_VQRDMLAH] = 0x7, - [NEON_3R_SHA] = 0xf, /* size field encodes op type */ - [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */ - [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */ - [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */ - [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */ - [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */ - [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */ - [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */ -}; + tcg_gen_shri_vec(tcg_ctx, vece, t, a, shift - 1); + tcg_gen_dupi_vec(tcg_ctx, vece, ones, 1); + tcg_gen_and_vec(tcg_ctx, vece, t, t, ones); + tcg_gen_shri_vec(tcg_ctx, vece, d, a, shift); + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); -/* Symbolic constants for op fields for Neon 2-register miscellaneous. - * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B - * table A7-13. - */ -#define NEON_2RM_VREV64 0 -#define NEON_2RM_VREV32 1 -#define NEON_2RM_VREV16 2 -#define NEON_2RM_VPADDL 4 -#define NEON_2RM_VPADDL_U 5 -#define NEON_2RM_AESE 6 /* Includes AESD */ -#define NEON_2RM_AESMC 7 /* Includes AESIMC */ -#define NEON_2RM_VCLS 8 -#define NEON_2RM_VCLZ 9 -#define NEON_2RM_VCNT 10 -#define NEON_2RM_VMVN 11 -#define NEON_2RM_VPADAL 12 -#define NEON_2RM_VPADAL_U 13 -#define NEON_2RM_VQABS 14 -#define NEON_2RM_VQNEG 15 -#define NEON_2RM_VCGT0 16 -#define NEON_2RM_VCGE0 17 -#define NEON_2RM_VCEQ0 18 -#define NEON_2RM_VCLE0 19 -#define NEON_2RM_VCLT0 20 -#define NEON_2RM_SHA1H 21 -#define NEON_2RM_VABS 22 -#define NEON_2RM_VNEG 23 -#define NEON_2RM_VCGT0_F 24 -#define NEON_2RM_VCGE0_F 25 -#define NEON_2RM_VCEQ0_F 26 -#define NEON_2RM_VCLE0_F 27 -#define NEON_2RM_VCLT0_F 28 -#define NEON_2RM_VABS_F 30 -#define NEON_2RM_VNEG_F 31 -#define NEON_2RM_VSWP 32 -#define NEON_2RM_VTRN 33 -#define NEON_2RM_VUZP 34 -#define NEON_2RM_VZIP 35 -#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */ -#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */ -#define NEON_2RM_VSHLL 38 -#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */ -#define NEON_2RM_VRINTN 40 -#define NEON_2RM_VRINTX 41 -#define NEON_2RM_VRINTA 42 -#define NEON_2RM_VRINTZ 43 -#define NEON_2RM_VCVT_F16_F32 44 -#define NEON_2RM_VRINTM 45 -#define NEON_2RM_VCVT_F32_F16 46 -#define NEON_2RM_VRINTP 47 -#define NEON_2RM_VCVTAU 48 -#define NEON_2RM_VCVTAS 49 -#define NEON_2RM_VCVTNU 50 -#define NEON_2RM_VCVTNS 51 -#define NEON_2RM_VCVTPU 52 -#define NEON_2RM_VCVTPS 53 -#define NEON_2RM_VCVTMU 54 -#define NEON_2RM_VCVTMS 55 -#define NEON_2RM_VRECPE 56 -#define NEON_2RM_VRSQRTE 57 -#define NEON_2RM_VRECPE_F 58 -#define NEON_2RM_VRSQRTE_F 59 -#define NEON_2RM_VCVT_FS 60 -#define NEON_2RM_VCVT_FU 61 -#define NEON_2RM_VCVT_SF 62 -#define NEON_2RM_VCVT_UF 63 - -static bool neon_2rm_is_v8_op(int op) -{ - /* Return true if this neon 2reg-misc op is ARMv8 and up */ - switch (op) { - case NEON_2RM_VRINTN: - case NEON_2RM_VRINTA: - case NEON_2RM_VRINTM: - case NEON_2RM_VRINTP: - case NEON_2RM_VRINTZ: - case NEON_2RM_VRINTX: - case NEON_2RM_VCVTAU: - case NEON_2RM_VCVTAS: - case NEON_2RM_VCVTNU: - case NEON_2RM_VCVTNS: - case NEON_2RM_VCVTPU: - case NEON_2RM_VCVTPS: - case NEON_2RM_VCVTMU: - case NEON_2RM_VCVTMS: - return true; - default: - return false; - } + tcg_temp_free_vec(tcg_ctx, t); + tcg_temp_free_vec(tcg_ctx, ones); } -/* Each entry in this array has bit n set if the insn allows - * size value n (otherwise it will UNDEF). Since unallocated - * op values will have no bits set they always UNDEF. - */ -static const uint8_t neon_2rm_sizes[] = { - [NEON_2RM_VREV64] = 0x7, - [NEON_2RM_VREV32] = 0x3, - [NEON_2RM_VREV16] = 0x1, - [NEON_2RM_VPADDL] = 0x7, - [NEON_2RM_VPADDL_U] = 0x7, - [NEON_2RM_AESE] = 0x1, - [NEON_2RM_AESMC] = 0x1, - [NEON_2RM_VCLS] = 0x7, - [NEON_2RM_VCLZ] = 0x7, - [NEON_2RM_VCNT] = 0x1, - [NEON_2RM_VMVN] = 0x1, - [NEON_2RM_VPADAL] = 0x7, - [NEON_2RM_VPADAL_U] = 0x7, - [NEON_2RM_VQABS] = 0x7, - [NEON_2RM_VQNEG] = 0x7, - [NEON_2RM_VCGT0] = 0x7, - [NEON_2RM_VCGE0] = 0x7, - [NEON_2RM_VCEQ0] = 0x7, - [NEON_2RM_VCLE0] = 0x7, - [NEON_2RM_VCLT0] = 0x7, - [NEON_2RM_SHA1H] = 0x4, - [NEON_2RM_VABS] = 0x7, - [NEON_2RM_VNEG] = 0x7, - [NEON_2RM_VCGT0_F] = 0x4, - [NEON_2RM_VCGE0_F] = 0x4, - [NEON_2RM_VCEQ0_F] = 0x4, - [NEON_2RM_VCLE0_F] = 0x4, - [NEON_2RM_VCLT0_F] = 0x4, - [NEON_2RM_VABS_F] = 0x4, - [NEON_2RM_VNEG_F] = 0x4, - [NEON_2RM_VSWP] = 0x1, - [NEON_2RM_VTRN] = 0x7, - [NEON_2RM_VUZP] = 0x7, - [NEON_2RM_VZIP] = 0x7, - [NEON_2RM_VMOVN] = 0x7, - [NEON_2RM_VQMOVN] = 0x7, - [NEON_2RM_VSHLL] = 0x7, - [NEON_2RM_SHA1SU1] = 0x4, - [NEON_2RM_VRINTN] = 0x4, - [NEON_2RM_VRINTX] = 0x4, - [NEON_2RM_VRINTA] = 0x4, - [NEON_2RM_VRINTZ] = 0x4, - [NEON_2RM_VCVT_F16_F32] = 0x2, - [NEON_2RM_VRINTM] = 0x4, - [NEON_2RM_VCVT_F32_F16] = 0x2, - [NEON_2RM_VRINTP] = 0x4, - [NEON_2RM_VCVTAU] = 0x4, - [NEON_2RM_VCVTAS] = 0x4, - [NEON_2RM_VCVTNU] = 0x4, - [NEON_2RM_VCVTNS] = 0x4, - [NEON_2RM_VCVTPU] = 0x4, - [NEON_2RM_VCVTPS] = 0x4, - [NEON_2RM_VCVTMU] = 0x4, - [NEON_2RM_VCVTMS] = 0x4, - [NEON_2RM_VRECPE] = 0x4, - [NEON_2RM_VRSQRTE] = 0x4, - [NEON_2RM_VRECPE_F] = 0x4, - [NEON_2RM_VRSQRTE_F] = 0x4, - [NEON_2RM_VCVT_FS] = 0x4, - [NEON_2RM_VCVT_FU] = 0x4, - [NEON_2RM_VCVT_SF] = 0x4, - [NEON_2RM_VCVT_UF] = 0x4, -}; +void gen_gvec_urshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_urshr8_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_urshr16_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_urshr32_i32, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_urshr64_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); -/* Expand v8.1 simd helper. */ -static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, - int q, int rd, int rn, int rm) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - if (dc_isar_feature(aa32_rdm, s)) { - int opr_sz = (1 + q) * 8; - tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd), - vfp_reg_offset(1, rn), - vfp_reg_offset(1, rm), tcg_ctx->cpu_env, - opr_sz, opr_sz, 0, fn); - return 0; + if (shift == (8 << vece)) { + /* + * Shifts larger than the element size are architecturally valid. + * Unsigned results in zero. With rounding, this produces a + * copy of the most significant bit. + */ + tcg_gen_gvec_shri(tcg_ctx, vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); + } else { + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } - return 1; } -static void gen_ssra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +static void gen_ursra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - tcg_gen_vec_sar8i_i64(tcg_ctx, a, a, shift); - tcg_gen_vec_add8_i64(tcg_ctx, d, d, a); -} + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); -static void gen_ssra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar16i_i64(tcg_ctx, a, a, shift); - tcg_gen_vec_add16_i64(tcg_ctx, d, d, a); + if (sh == 8) { + tcg_gen_vec_shr8i_i64(tcg_ctx, t, a, 7); + } else { + gen_urshr8_i64(tcg_ctx, t, a, sh); + } + tcg_gen_vec_add8_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static void gen_ssra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift) +static void gen_ursra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - tcg_gen_sari_i32(tcg_ctx, a, a, shift); - tcg_gen_add_i32(tcg_ctx, d, d, a); -} + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); -static void gen_ssra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_sari_i64(tcg_ctx, a, a, shift); - tcg_gen_add_i64(tcg_ctx, d, d, a); + if (sh == 16) { + tcg_gen_vec_shr16i_i64(tcg_ctx, t, a, 15); + } else { + gen_urshr16_i64(tcg_ctx, t, a, sh); + } + tcg_gen_vec_add16_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static void gen_ssra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +static void gen_ursra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t sh) { - tcg_gen_sari_vec(tcg_ctx, vece, a, a, sh); - tcg_gen_add_vec(tcg_ctx, vece, d, d, a); -} - -static const TCGOpcode vecop_list_ssra[] = { - INDEX_op_sari_vec, INDEX_op_add_vec, 0 -}; - -const GVecGen2i ssra_op[4] = { - { .fni8 = gen_ssra8_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opt_opc = vecop_list_ssra, - .vece = MO_8 }, - { .fni8 = gen_ssra16_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opt_opc = vecop_list_ssra, - .vece = MO_16 }, - { .fni4 = gen_ssra32_i32, - .fniv = gen_ssra_vec, - .load_dest = true, - .opt_opc = vecop_list_ssra, - .vece = MO_32 }, - { .fni8 = gen_ssra64_i64, - .fniv = gen_ssra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list_ssra, - .load_dest = true, - .vece = MO_64 }, -}; + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); -static void gen_usra8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr8i_i64(tcg_ctx, a, a, shift); - tcg_gen_vec_add8_i64(tcg_ctx, d, d, a); + if (sh == 32) { + tcg_gen_shri_i32(tcg_ctx, t, a, 31); + } else { + gen_urshr32_i32(tcg_ctx, t, a, sh); + } + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); } -static void gen_usra16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +static void gen_ursra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t sh) { - tcg_gen_vec_shr16i_i64(tcg_ctx, a, a, shift); - tcg_gen_vec_add16_i64(tcg_ctx, d, d, a); -} + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); -static void gen_usra32_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(tcg_ctx, a, a, shift); - tcg_gen_add_i32(tcg_ctx, d, d, a); + if (sh == 64) { + tcg_gen_shri_i64(tcg_ctx, t, a, 63); + } else { + gen_urshr64_i64(tcg_ctx, t, a, sh); + } + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); } -static void gen_usra64_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) +static void gen_ursra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { - tcg_gen_shri_i64(tcg_ctx, a, a, shift); - tcg_gen_add_i64(tcg_ctx, d, d, a); + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + + if (sh == (8 << vece)) { + tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh - 1); + } else { + gen_urshr_vec(tcg_ctx, vece, t, a, sh); + } + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); } -static void gen_usra_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +void gen_gvec_ursra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) { - tcg_gen_shri_vec(tcg_ctx, vece, a, a, sh); - tcg_gen_add_vec(tcg_ctx, vece, d, d, a); -} + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_ursra8_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fni8 = gen_ursra16_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_ursra32_i32, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_ursra64_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; -static const TCGOpcode vecop_list_usra[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 -}; + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); -const GVecGen2i usra_op[4] = { - { .fni8 = gen_usra8_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opt_opc = vecop_list_usra, - .vece = MO_8, }, - { .fni8 = gen_usra16_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opt_opc = vecop_list_usra, - .vece = MO_16, }, - { .fni4 = gen_usra32_i32, - .fniv = gen_usra_vec, - .load_dest = true, - .opt_opc = vecop_list_usra, - .vece = MO_32, }, - { .fni8 = gen_usra64_i64, - .fniv = gen_usra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list_usra, - .vece = MO_64, }, -}; + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); +} static void gen_shr8_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) { @@ -4157,47 +3606,62 @@ static void gen_shr64_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64 static void gen_shr_ins_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { - if (sh == 0) { - tcg_gen_mov_vec(tcg_ctx, d, a); + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d); + + tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); + tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh); + tcg_gen_and_vec(tcg_ctx, vece, d, d, m); + tcg_gen_or_vec(tcg_ctx, vece, d, d, t); + + tcg_temp_free_vec(tcg_ctx, t); + tcg_temp_free_vec(tcg_ctx, m); +} + +void gen_gvec_sri(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; + const GVecGen2i ops[4] = { + { .fni8 = gen_shr8_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shr16_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shr32_ins_i32, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_shr64_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* Shift of esize leaves destination unchanged. */ + if (shift < (8 << vece)) { + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); } else { - TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); - TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d); - - tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); - tcg_gen_shri_vec(tcg_ctx, vece, t, a, sh); - tcg_gen_and_vec(tcg_ctx, vece, d, d, m); - tcg_gen_or_vec(tcg_ctx, vece, d, d, t); - - tcg_temp_free_vec(tcg_ctx, t); - tcg_temp_free_vec(tcg_ctx, m); - } -} - -static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 }; - -const GVecGen2i sri_op[4] = { - { .fni8 = gen_shr8_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sri, - .vece = MO_8 }, - { .fni8 = gen_shr16_ins_i64, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sri, - .vece = MO_16 }, - { .fni4 = gen_shr32_ins_i32, - .fniv = gen_shr_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sri, - .vece = MO_32 }, - { .fni8 = gen_shr64_ins_i64, - .fniv = gen_shr_ins_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list_sri, - .vece = MO_64 }, -}; + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rd_ofs, opr_sz, max_sz); + } +} static void gen_shl8_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t shift) { @@ -4235,47 +3699,60 @@ static void gen_shl64_ins_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64 static void gen_shl_ins_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) { - if (sh == 0) { - tcg_gen_mov_vec(tcg_ctx, d, a); + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d); + + tcg_gen_shli_vec(tcg_ctx, vece, t, a, sh); + tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK(0, sh)); + tcg_gen_and_vec(tcg_ctx, vece, d, d, m); + tcg_gen_or_vec(tcg_ctx, vece, d, d, t); + + tcg_temp_free_vec(tcg_ctx, t); + tcg_temp_free_vec(tcg_ctx, m); +} + +void gen_gvec_sli(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; + const GVecGen2i ops[4] = { + { .fni8 = gen_shl8_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shl16_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shl32_ins_i32, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_shl64_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [0..esize-1]. */ + tcg_debug_assert(shift >= 0); + tcg_debug_assert(shift < (8 << vece)); + + if (shift == 0) { + tcg_gen_gvec_mov(tcg_ctx, vece, rd_ofs, rm_ofs, opr_sz, max_sz); } else { - TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); - TCGv_vec m = tcg_temp_new_vec_matching(tcg_ctx, d); - - tcg_gen_dupi_vec(tcg_ctx, vece, m, MAKE_64BIT_MASK(0, sh)); - tcg_gen_shli_vec(tcg_ctx, vece, t, a, sh); - tcg_gen_and_vec(tcg_ctx, vece, d, d, m); - tcg_gen_or_vec(tcg_ctx, vece, d, d, t); - - tcg_temp_free_vec(tcg_ctx, t); - tcg_temp_free_vec(tcg_ctx, m); - } -} - -static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 }; - -const GVecGen2i sli_op[4] = { - { .fni8 = gen_shl8_ins_i64, - .fniv = gen_shl_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sli, - .vece = MO_8 }, - { .fni8 = gen_shl16_ins_i64, - .fniv = gen_shl_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sli, - .vece = MO_16 }, - { .fni4 = gen_shl32_ins_i32, - .fniv = gen_shl_ins_vec, - .load_dest = true, - .opt_opc = vecop_list_sli, - .vece = MO_32 }, - { .fni8 = gen_shl64_ins_i64, - .fniv = gen_shl_ins_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list_sli, - .vece = MO_64 }, -}; + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } +} static void gen_mla8_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) { @@ -4340,62 +3817,69 @@ static void gen_mls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, * these tables are shared with AArch64 which does support them. */ +void gen_gvec_mla(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_mla8_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_mla16_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_mla32_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_mla64_i64, + .fniv = gen_mla_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} -static const TCGOpcode vecop_list_mla[] = { - INDEX_op_mul_vec, INDEX_op_add_vec, 0 -}; - -static const TCGOpcode vecop_list_mls[] = { - INDEX_op_mul_vec, INDEX_op_sub_vec, 0 -}; - -const GVecGen3 mla_op[4] = { - { .fni4 = gen_mla8_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list_mla, - .vece = MO_8 }, - { .fni4 = gen_mla16_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list_mla, - .vece = MO_16 }, - { .fni4 = gen_mla32_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list_mla, - .vece = MO_32 }, - { .fni8 = gen_mla64_i64, - .fniv = gen_mla_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list_mla, - .vece = MO_64 }, -}; - -const GVecGen3 mls_op[4] = { - { .fni4 = gen_mls8_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list_mls, - .vece = MO_8 }, - { .fni4 = gen_mls16_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list_mls, - .vece = MO_16 }, - { .fni4 = gen_mls32_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list_mls, - .vece = MO_32 }, - { .fni8 = gen_mls64_i64, - .fniv = gen_mls_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list_mls, - .vece = MO_64 }, -}; +void gen_gvec_mls(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_mls8_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_mls16_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_mls32_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_mls64_i64, + .fniv = gen_mls_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} /* CMTST : test is "if (X & Y != 0)". */ static void gen_cmtst_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) @@ -4419,27 +3903,31 @@ static void gen_cmtst_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_v tcg_gen_cmp_vec(tcg_ctx, TCG_COND_NE, vece, d, d, a); } -static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 }; - -const GVecGen3 cmtst_op[4] = { - { .fni4 = gen_helper_neon_tst_u8, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list_cmtst, - .vece = MO_8 }, - { .fni4 = gen_helper_neon_tst_u16, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list_cmtst, - .vece = MO_16 }, - { .fni4 = gen_cmtst_i32, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list_cmtst, - .vece = MO_32 }, - { .fni8 = gen_cmtst_i64, - .fniv = gen_cmtst_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list_cmtst, - .vece = MO_64 }, -}; +void gen_gvec_cmtst(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_helper_neon_tst_u8, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_tst_u16, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_cmtst_i32, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_cmtst_i64, + .fniv = gen_cmtst_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} void gen_ushl_i32(TCGContext *tcg_ctx, TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) { @@ -4557,29 +4045,33 @@ static void gen_ushl_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec dst, tcg_temp_free_vec(tcg_ctx, rsh); } -static const TCGOpcode ushl_list[] = { - INDEX_op_neg_vec, INDEX_op_shlv_vec, - INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 -}; - -const GVecGen3 ushl_op[4] = { - { .fniv = gen_ushl_vec, - .fno = gen_helper_gvec_ushl_b, - .opt_opc = ushl_list, - .vece = MO_8 }, - { .fniv = gen_ushl_vec, - .fno = gen_helper_gvec_ushl_h, - .opt_opc = ushl_list, - .vece = MO_16 }, - { .fni4 = gen_ushl_i32, - .fniv = gen_ushl_vec, - .opt_opc = ushl_list, - .vece = MO_32 }, - { .fni8 = gen_ushl_i64, - .fniv = gen_ushl_vec, - .opt_opc = ushl_list, - .vece = MO_64 }, -}; +void gen_gvec_ushl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_shlv_vec, + INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_ushl_i32, + .fniv = gen_ushl_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_ushl_i64, + .fniv = gen_ushl_vec, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} void gen_sshl_i32(TCGContext *tcg_ctx, TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) { @@ -4691,29 +4183,33 @@ static void gen_sshl_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec dst, tcg_temp_free_vec(tcg_ctx, tmp); } -static const TCGOpcode sshl_list[] = { - INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, - INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 -}; - -const GVecGen3 sshl_op[4] = { - { .fniv = gen_sshl_vec, - .fno = gen_helper_gvec_sshl_b, - .opt_opc = sshl_list, - .vece = MO_8 }, - { .fniv = gen_sshl_vec, - .fno = gen_helper_gvec_sshl_h, - .opt_opc = sshl_list, - .vece = MO_16 }, - { .fni4 = gen_sshl_i32, - .fniv = gen_sshl_vec, - .opt_opc = sshl_list, - .vece = MO_32 }, - { .fni8 = gen_sshl_i64, - .fniv = gen_sshl_vec, - .opt_opc = sshl_list, - .vece = MO_64 }, -}; +void gen_gvec_sshl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, + INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_sshl_i32, + .fniv = gen_sshl_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_sshl_i64, + .fniv = gen_sshl_vec, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} static void gen_uqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b) @@ -4726,32 +4222,37 @@ static void gen_uqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v tcg_temp_free_vec(tcg_ctx, x); } -static const TCGOpcode vecop_list_uqadd[] = { - INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 -}; - -const GVecGen4 uqadd_op[4] = { - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_b, - .write_aofs = true, - .opt_opc = vecop_list_uqadd, - .vece = MO_8 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_h, - .write_aofs = true, - .opt_opc = vecop_list_uqadd, - .vece = MO_16 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_s, - .write_aofs = true, - .opt_opc = vecop_list_uqadd, - .vece = MO_32 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_d, - .write_aofs = true, - .opt_opc = vecop_list_uqadd, - .vece = MO_64 }, -}; +void gen_gvec_uqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_b, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_h, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_s, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_d, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} static void gen_sqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b) @@ -4764,32 +4265,37 @@ static void gen_sqadd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v tcg_temp_free_vec(tcg_ctx, x); } -static const TCGOpcode vecop_list_sqadd[] = { - INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 -}; - -const GVecGen4 sqadd_op[4] = { - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_b, - .opt_opc = vecop_list_sqadd, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_h, - .opt_opc = vecop_list_sqadd, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_s, - .opt_opc = vecop_list_sqadd, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_d, - .opt_opc = vecop_list_sqadd, - .write_aofs = true, - .vece = MO_64 }, -}; +void gen_gvec_sqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} static void gen_uqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b) @@ -4802,32 +4308,37 @@ static void gen_uqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v tcg_temp_free_vec(tcg_ctx, x); } -static const TCGOpcode vecop_list_uqsub[] = { - INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 -}; - -const GVecGen4 uqsub_op[4] = { - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_b, - .opt_opc = vecop_list_uqsub, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_h, - .opt_opc = vecop_list_uqsub, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_s, - .opt_opc = vecop_list_uqsub, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_d, - .opt_opc = vecop_list_uqsub, - .write_aofs = true, - .vece = MO_64 }, -}; +void gen_gvec_uqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} static void gen_sqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b) @@ -4840,2321 +4351,274 @@ static void gen_sqsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec t, TCGv_v tcg_temp_free_vec(tcg_ctx, x); } -static const TCGOpcode vecop_list_sqsub[] = { - INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 -}; - -const GVecGen4 sqsub_op[4] = { - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_b, - .opt_opc = vecop_list_sqsub, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_h, - .opt_opc = vecop_list_sqsub, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_s, - .opt_opc = vecop_list_sqsub, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_d, - .opt_opc = vecop_list_sqsub, - .write_aofs = true, - .vece = MO_64 }, -}; - -/* Translate a NEON data processing instruction. Return nonzero if the - instruction is invalid. - We process data in a mixture of 32-bit and 64-bit chunks. - Mostly we use 32-bit chunks so we can use normal scalar instructions. */ - -static int disas_neon_data_insn(DisasContext *s, uint32_t insn) +void gen_gvec_sqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - int op; - int q; - int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs; - int size; - int shift; - int pass; - int count; - int pairwise; - int u; - int vec_size; - uint32_t imm; - TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; - TCGv_ptr ptr1, ptr2, ptr3; - TCGv_i64 tmp64; - - /* FIXME: this access check should not take precedence over UNDEF - * for invalid encodings; we will generate incorrect syndrome information - * for attempts to execute invalid vfp/neon encodings with FP disabled. - */ - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } - - if (!s->vfp_enabled) - return 1; - q = (insn & (1 << 6)) != 0; - u = (insn >> 24) & 1; - VFP_DREG_D(rd, insn); - VFP_DREG_N(rn, insn); - VFP_DREG_M(rm, insn); - size = (insn >> 20) & 3; - vec_size = q ? 16 : 8; - rd_ofs = neon_reg_offset(rd, 0); - rn_ofs = neon_reg_offset(rn, 0); - rm_ofs = neon_reg_offset(rm, 0); - - if ((insn & (1 << 23)) == 0) { - /* Three register same length. */ - op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); - /* Catch invalid op and bad size combinations: UNDEF */ - if ((neon_3r_sizes[op] & (1 << size)) == 0) { - return 1; - } - /* All insns of this form UNDEF for either this condition or the - * superset of cases "Q==1"; we catch the latter later. - */ - if (q && ((rd | rn | rm) & 1)) { - return 1; - } - switch (op) { - case NEON_3R_SHA: - /* The SHA-1/SHA-256 3-register instructions require special - * treatment here, as their size field is overloaded as an - * op type selector, and they all consume their input in a - * single pass. - */ - if (!q) { - return 1; - } - if (!u) { /* SHA-1 */ - if (!dc_isar_feature(aa32_sha1, s)) { - return 1; - } - ptr1 = vfp_reg_ptr(tcg_ctx, true, rd); - ptr2 = vfp_reg_ptr(tcg_ctx, true, rn); - ptr3 = vfp_reg_ptr(tcg_ctx, true, rm); - tmp4 = tcg_const_i32(tcg_ctx, size); - gen_helper_crypto_sha1_3reg(tcg_ctx, ptr1, ptr2, ptr3, tmp4); - tcg_temp_free_i32(tcg_ctx, tmp4); - } else { /* SHA-256 */ - if (!dc_isar_feature(aa32_sha2, s) || size == 3) { - return 1; - } - ptr1 = vfp_reg_ptr(tcg_ctx, true, rd); - ptr2 = vfp_reg_ptr(tcg_ctx, true, rn); - ptr3 = vfp_reg_ptr(tcg_ctx, true, rm); - switch (size) { - case 0: - gen_helper_crypto_sha256h(tcg_ctx, ptr1, ptr2, ptr3); - break; - case 1: - gen_helper_crypto_sha256h2(tcg_ctx, ptr1, ptr2, ptr3); - break; - case 2: - gen_helper_crypto_sha256su1(tcg_ctx, ptr1, ptr2, ptr3); - break; - } - } - tcg_temp_free_ptr(tcg_ctx, ptr1); - tcg_temp_free_ptr(tcg_ctx, ptr2); - tcg_temp_free_ptr(tcg_ctx, ptr3); - return 0; - - case NEON_3R_VPADD_VQRDMLAH: - if (!u) { - break; /* VPADD */ - } - /* VQRDMLAH */ - switch (size) { - case 1: - return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16, - q, rd, rn, rm); - case 2: - return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32, - q, rd, rn, rm); - } - return 1; - - case NEON_3R_VFM_VQRDMLSH: - if (!u) { - /* VFM, VFMS */ - if (size == 1) { - return 1; - } - break; - } - /* VQRDMLSH */ - switch (size) { - case 1: - return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16, - q, rd, rn, rm); - case 2: - return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32, - q, rd, rn, rm); - } - return 1; - - case NEON_3R_LOGIC: /* Logic ops. */ - switch ((u << 2) | size) { - case 0: /* VAND */ - tcg_gen_gvec_and(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 1: /* VBIC */ - tcg_gen_gvec_andc(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 2: /* VORR */ - tcg_gen_gvec_or(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 3: /* VORN */ - tcg_gen_gvec_orc(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 4: /* VEOR */ - tcg_gen_gvec_xor(tcg_ctx, 0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 5: /* VBSL */ - tcg_gen_gvec_bitsel(tcg_ctx, MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - break; - case 6: /* VBIT */ - tcg_gen_gvec_bitsel(tcg_ctx, MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs, - vec_size, vec_size); - break; - case 7: /* VBIF */ - tcg_gen_gvec_bitsel(tcg_ctx, MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs, - vec_size, vec_size); - break; - } - return 0; - - case NEON_3R_VADD_VSUB: - if (u) { - tcg_gen_gvec_sub(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { - tcg_gen_gvec_add(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - - case NEON_3R_VQADD: - tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, vec_size, vec_size, - (u ? uqadd_op : sqadd_op) + size); - return 0; - - case NEON_3R_VQSUB: - tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, vec_size, vec_size, - (u ? uqsub_op : sqsub_op) + size); - return 0; - - case NEON_3R_VMUL: /* VMUL */ - if (u) { - /* Polynomial case allows only P8. */ - if (size != 0) { - return 1; - } - tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, - 0, gen_helper_gvec_pmul_b); - } else { - tcg_gen_gvec_mul(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - - case NEON_3R_VML: /* VMLA, VMLS */ - tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, - u ? &mls_op[size] : &mla_op[size]); - return 0; - - case NEON_3R_VTST_VCEQ: - if (u) { /* VCEQ */ - tcg_gen_gvec_cmp(tcg_ctx, TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { /* VTST */ - tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size, &cmtst_op[size]); - } - return 0; - - case NEON_3R_VCGT: - tcg_gen_gvec_cmp(tcg_ctx, u ? TCG_COND_GTU : TCG_COND_GT, size, - rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); - return 0; - - case NEON_3R_VCGE: - tcg_gen_gvec_cmp(tcg_ctx, u ? TCG_COND_GEU : TCG_COND_GE, size, - rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); - return 0; - - case NEON_3R_VMAX: - if (u) { - tcg_gen_gvec_umax(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { - tcg_gen_gvec_smax(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - case NEON_3R_VMIN: - if (u) { - tcg_gen_gvec_umin(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } else { - tcg_gen_gvec_smin(tcg_ctx, size, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } - return 0; - - case NEON_3R_VSHL: - /* Note the operation is vshl vd,vm,vn */ - tcg_gen_gvec_3(tcg_ctx, rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size, - u ? &ushl_op[size] : &sshl_op[size]); - return 0; - } - - if (size == 3) { - /* 64-bit element instructions. */ - for (pass = 0; pass < (q ? 2 : 1); pass++) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + pass); - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + pass); - switch (op) { - case NEON_3R_VQSHL: - if (u) { - gen_helper_neon_qshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } else { - gen_helper_neon_qshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } - break; - case NEON_3R_VRSHL: - if (u) { - gen_helper_neon_rshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } else { - gen_helper_neon_rshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } - break; - case NEON_3R_VQRSHL: - if (u) { - gen_helper_neon_qrshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } else { - gen_helper_neon_qrshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V1, tcg_ctx->cpu_V0); - } - break; - default: - abort(); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - return 0; - } - pairwise = 0; - switch (op) { - case NEON_3R_VQSHL: - case NEON_3R_VRSHL: - case NEON_3R_VQRSHL: - { - int rtmp; - /* Shift instruction operands are reversed. */ - rtmp = rn; - rn = rm; - rm = rtmp; - } - break; - case NEON_3R_VPADD_VQRDMLAH: - case NEON_3R_VPMAX: - case NEON_3R_VPMIN: - pairwise = 1; - break; - case NEON_3R_FLOAT_ARITH: - pairwise = (u && size < 2); /* if VPADD (float) */ - break; - case NEON_3R_FLOAT_MINMAX: - pairwise = u; /* if VPMIN/VPMAX (float) */ - break; - case NEON_3R_FLOAT_CMP: - if (!u && size) { - /* no encoding for U=0 C=1x */ - return 1; - } - break; - case NEON_3R_FLOAT_ACMP: - if (!u) { - return 1; - } - break; - case NEON_3R_FLOAT_MISC: - /* VMAXNM/VMINNM in ARMv8 */ - if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) { - return 1; - } - break; - case NEON_3R_VFM_VQRDMLSH: - if (!dc_isar_feature(aa32_simdfmac, s)) { - return 1; - } - break; - default: - break; - } - - if (pairwise && q) { - /* All the pairwise insns UNDEF if Q is set */ - return 1; - } - - for (pass = 0; pass < (q ? 4 : 2); pass++) { - - if (pairwise) { - /* Pairwise. */ - if (pass < 1) { - tmp = neon_load_reg(tcg_ctx, rn, 0); - tmp2 = neon_load_reg(tcg_ctx, rn, 1); - } else { - tmp = neon_load_reg(tcg_ctx, rm, 0); - tmp2 = neon_load_reg(tcg_ctx, rm, 1); - } - } else { - /* Elementwise. */ - tmp = neon_load_reg(tcg_ctx, rn, pass); - tmp2 = neon_load_reg(tcg_ctx, rm, pass); - } - switch (op) { - case NEON_3R_VHADD: - GEN_NEON_INTEGER_OP(hadd); - break; - case NEON_3R_VRHADD: - GEN_NEON_INTEGER_OP(rhadd); - break; - case NEON_3R_VHSUB: - GEN_NEON_INTEGER_OP(hsub); - break; - case NEON_3R_VQSHL: - GEN_NEON_INTEGER_OP_ENV(qshl); - break; - case NEON_3R_VRSHL: - GEN_NEON_INTEGER_OP(rshl); - break; - case NEON_3R_VQRSHL: - GEN_NEON_INTEGER_OP_ENV(qrshl); - break; - case NEON_3R_VABD: - GEN_NEON_INTEGER_OP(abd); - break; - case NEON_3R_VABA: - GEN_NEON_INTEGER_OP(abd); - tcg_temp_free_i32(tcg_ctx, tmp2); - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - gen_neon_add(tcg_ctx, size, tmp, tmp2); - break; - case NEON_3R_VPMAX: - GEN_NEON_INTEGER_OP(pmax); - break; - case NEON_3R_VPMIN: - GEN_NEON_INTEGER_OP(pmin); - break; - case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */ - if (!u) { /* VQDMULH */ - switch (size) { - case 1: - gen_helper_neon_qdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - break; - case 2: - gen_helper_neon_qdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - break; - default: abort(); - } - } else { /* VQRDMULH */ - switch (size) { - case 1: - gen_helper_neon_qrdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - break; - case 2: - gen_helper_neon_qrdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - break; - default: abort(); - } - } - break; - case NEON_3R_VPADD_VQRDMLAH: - switch (size) { - case 0: gen_helper_neon_padd_u8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_padd_u16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: tcg_gen_add_i32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - break; - case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */ - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - switch ((u << 2) | size) { - case 0: /* VADD */ - case 4: /* VPADD */ - gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus); - break; - case 2: /* VSUB */ - gen_helper_vfp_subs(tcg_ctx, tmp, tmp, tmp2, fpstatus); - break; - case 6: /* VABD */ - gen_helper_neon_abd_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - break; - default: - abort(); - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_3R_FLOAT_MULTIPLY: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_muls(tcg_ctx, tmp, tmp, tmp2, fpstatus); - if (!u) { - tcg_temp_free_i32(tcg_ctx, tmp2); - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - if (size == 0) { - gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - gen_helper_vfp_subs(tcg_ctx, tmp, tmp2, tmp, fpstatus); - } - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_3R_FLOAT_CMP: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - if (!u) { - gen_helper_neon_ceq_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - if (size == 0) { - gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_3R_FLOAT_ACMP: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - if (size == 0) { - gen_helper_neon_acge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - gen_helper_neon_acgt_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_3R_FLOAT_MINMAX: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - if (size == 0) { - gen_helper_vfp_maxs(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - gen_helper_vfp_mins(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_3R_FLOAT_MISC: - if (u) { - /* VMAXNM/VMINNM */ - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - if (size == 0) { - gen_helper_vfp_maxnums(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } else { - gen_helper_vfp_minnums(tcg_ctx, tmp, tmp, tmp2, fpstatus); - } - tcg_temp_free_ptr(tcg_ctx, fpstatus); - } else { - if (size == 0) { - gen_helper_recps_f32(tcg_ctx, tmp, tmp, tmp2, tcg_ctx->cpu_env); - } else { - gen_helper_rsqrts_f32(tcg_ctx, tmp, tmp, tmp2, tcg_ctx->cpu_env); - } - } - break; - case NEON_3R_VFM_VQRDMLSH: - { - /* VFMA, VFMS: fused multiply-add */ - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - TCGv_i32 tmp3 = neon_load_reg(tcg_ctx, rd, pass); - if (size) { - /* VFMS */ - gen_helper_vfp_negs(tcg_ctx, tmp, tmp); - } - gen_helper_vfp_muladds(tcg_ctx, tmp, tmp, tmp2, tmp3, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp3); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - default: - abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - - /* Save the result. For elementwise operations we can put it - straight into the destination register. For pairwise operations - we have to be careful to avoid clobbering the source operands. */ - if (pairwise && rd == rm) { - neon_store_scratch(tcg_ctx, pass, tmp); - } else { - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - - } /* for pass */ - if (pairwise && rd == rm) { - for (pass = 0; pass < (q ? 4 : 2); pass++) { - tmp = neon_load_scratch(tcg_ctx, pass); - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - } - /* End of 3 register same size operations. */ - } else if (insn & (1 << 4)) { - if ((insn & 0x00380080) != 0) { - /* Two registers and shift. */ - op = (insn >> 8) & 0xf; - if (insn & (1 << 7)) { - /* 64-bit shift. */ - if (op > 7) { - return 1; - } - size = 3; - } else { - size = 2; - while ((insn & (1 << (size + 19))) == 0) - size--; - } - shift = (insn >> 16) & ((1 << (3 + size)) - 1); - if (op < 8) { - /* Shift by immediate: - VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ - if (q && ((rd | rm) & 1)) { - return 1; - } - if (!u && (op == 4 || op == 6)) { - return 1; - } - /* Right shifts are encoded as N - shift, where N is the - element size in bits. */ - if (op <= 4) { - shift = shift - (1 << (size + 3)); - } - - switch (op) { - case 0: /* VSHR */ - /* Right shift comes here negative. */ - shift = -shift; - /* Shifts larger than the element size are architecturally - * valid. Unsigned results in all zeros; signed results - * in all sign bits. - */ - if (!u) { - tcg_gen_gvec_sari(tcg_ctx, size, rd_ofs, rm_ofs, - MIN(shift, (8 << size) - 1), - vec_size, vec_size); - } else if (shift >= 8 << size) { - tcg_gen_gvec_dup8i(tcg_ctx, rd_ofs, vec_size, vec_size, 0); - } else { - tcg_gen_gvec_shri(tcg_ctx, size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 1: /* VSRA */ - /* Right shift comes here negative. */ - shift = -shift; - /* Shifts larger than the element size are architecturally - * valid. Unsigned results in all zeros; signed results - * in all sign bits. - */ - if (!u) { - tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size, - MIN(shift, (8 << size) - 1), - &ssra_op[size]); - } else if (shift >= 8 << size) { - /* rd += 0 */ - } else { - tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size, - shift, &usra_op[size]); - } - return 0; - - case 4: /* VSRI */ - if (!u) { - return 1; - } - /* Right shift comes here negative. */ - shift = -shift; - /* Shift out of range leaves destination unchanged. */ - if (shift < 8 << size) { - tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size, - shift, &sri_op[size]); - } - return 0; - - case 5: /* VSHL, VSLI */ - if (u) { /* VSLI */ - /* Shift out of range leaves destination unchanged. */ - if (shift < 8 << size) { - tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, - vec_size, shift, &sli_op[size]); - } - } else { /* VSHL */ - /* Shifts larger than the element size are - * architecturally valid and results in zero. - */ - if (shift >= 8 << size) { - tcg_gen_gvec_dup8i(tcg_ctx, rd_ofs, vec_size, vec_size, 0); - } else { - tcg_gen_gvec_shli(tcg_ctx, size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - } - return 0; - } - - if (size == 3) { - count = q + 1; - } else { - count = q ? 4: 2; - } - - /* To avoid excessive duplication of ops we implement shift - * by immediate using the variable shift operations. - */ - imm = dup_const(size, shift); - - for (pass = 0; pass < count; pass++) { - if (size == 3) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rm + pass); - tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_V1, imm); - switch (op) { - case 2: /* VRSHR */ - case 3: /* VRSRA */ - if (u) - gen_helper_neon_rshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - else - gen_helper_neon_rshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - break; - case 6: /* VQSHLU */ - gen_helper_neon_qshlu_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - break; - case 7: /* VQSHL */ - if (u) { - gen_helper_neon_qshl_u64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - } else { - gen_helper_neon_qshl_s64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_env, - tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - } - break; - default: - g_assert_not_reached(); - break; - } - if (op == 3) { - /* Accumulate. */ - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass); - tcg_gen_add_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } else { /* size < 3 */ - /* Operands in T0 and T1. */ - tmp = neon_load_reg(tcg_ctx, rm, pass); - tmp2 = tcg_temp_new_i32(tcg_ctx); - tcg_gen_movi_i32(tcg_ctx, tmp2, imm); - switch (op) { - case 2: /* VRSHR */ - case 3: /* VRSRA */ - GEN_NEON_INTEGER_OP(rshl); - break; - case 6: /* VQSHLU */ - switch (size) { - case 0: - gen_helper_neon_qshlu_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, - tmp, tmp2); - break; - case 1: - gen_helper_neon_qshlu_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, - tmp, tmp2); - break; - case 2: - gen_helper_neon_qshlu_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, - tmp, tmp2); - break; - default: - abort(); - } - break; - case 7: /* VQSHL */ - GEN_NEON_INTEGER_OP_ENV(qshl); - break; - default: - g_assert_not_reached(); - break; - } - tcg_temp_free_i32(tcg_ctx, tmp2); - - if (op == 3) { - /* Accumulate. */ - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - gen_neon_add(tcg_ctx, size, tmp, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp2); - } - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - } /* for pass */ - } else if (op < 10) { - /* Shift by immediate and narrow: - VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ - int input_unsigned = (op == 8) ? !u : u; - if (rm & 1) { - return 1; - } - shift = shift - (1 << (size + 3)); - size++; - if (size == 3) { - tmp64 = tcg_const_i64(tcg_ctx, shift); - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rm); - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + 1); - for (pass = 0; pass < 2; pass++) { - TCGv_i64 in; - if (pass == 0) { - in = tcg_ctx->cpu_V0; - } else { - in = tcg_ctx->cpu_V1; - } - if (q) { - if (input_unsigned) { - gen_helper_neon_rshl_u64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64); - } else { - gen_helper_neon_rshl_s64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64); - } - } else { - if (input_unsigned) { - gen_ushl_i64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64); - } else { - gen_sshl_i64(tcg_ctx, tcg_ctx->cpu_V0, in, tmp64); - } - } - tmp = tcg_temp_new_i32(tcg_ctx); - gen_neon_narrow_op(tcg_ctx, op == 8, u, size - 1, tmp, tcg_ctx->cpu_V0); - neon_store_reg(tcg_ctx, rd, pass, tmp); - } /* for pass */ - tcg_temp_free_i64(tcg_ctx, tmp64); - } else { - if (size == 1) { - imm = (uint16_t)shift; - imm |= imm << 16; - } else { - /* size == 2 */ - imm = (uint32_t)shift; - } - tmp2 = tcg_const_i32(tcg_ctx, imm); - tmp4 = neon_load_reg(tcg_ctx, rm + 1, 0); - tmp5 = neon_load_reg(tcg_ctx, rm + 1, 1); - for (pass = 0; pass < 2; pass++) { - if (pass == 0) { - tmp = neon_load_reg(tcg_ctx, rm, 0); - } else { - tmp = tmp4; - } - gen_neon_shift_narrow(tcg_ctx, size, tmp, tmp2, q, - input_unsigned); - if (pass == 0) { - tmp3 = neon_load_reg(tcg_ctx, rm, 1); - } else { - tmp3 = tmp5; - } - gen_neon_shift_narrow(tcg_ctx, size, tmp3, tmp2, q, - input_unsigned); - tcg_gen_concat_i32_i64(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, tmp3); - tmp = tcg_temp_new_i32(tcg_ctx); - gen_neon_narrow_op(tcg_ctx, op == 8, u, size - 1, tmp, tcg_ctx->cpu_V0); - neon_store_reg(tcg_ctx, rd, pass, tmp); - } /* for pass */ - tcg_temp_free_i32(tcg_ctx, tmp2); - } - } else if (op == 10) { - /* VSHLL, VMOVL */ - if (q || (rd & 1)) { - return 1; - } - tmp = neon_load_reg(tcg_ctx, rm, 0); - tmp2 = neon_load_reg(tcg_ctx, rm, 1); - for (pass = 0; pass < 2; pass++) { - if (pass == 1) - tmp = tmp2; - - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, u); - - if (shift != 0) { - /* The shift is less than the width of the source - type, so we can just shift the whole register. */ - tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, shift); - /* Widen the result of shift: we need to clear - * the potential overflow bits resulting from - * left bits of the narrow input appearing as - * right bits of left the neighbour narrow - * input. */ - if (size < 2 || !u) { - uint64_t imm64; - if (size == 0) { - imm = (0xffu >> (8 - shift)); - imm |= imm << 16; - } else if (size == 1) { - imm = 0xffff >> (16 - shift); - } else { - /* size == 2 */ - imm = 0xffffffff >> (32 - shift); - } - if (size < 2) { - imm64 = imm | (((uint64_t)imm) << 32); - } else { - imm64 = imm; - } - tcg_gen_andi_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, ~imm64); - } - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - } else if (op >= 14) { - /* VCVT fixed-point. */ - TCGv_ptr fpst; - TCGv_i32 shiftv; - VFPGenFixPointFn *fn; - - if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) { - return 1; - } + static const TCGOpcode vecop_list[] = { + INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_4(tcg_ctx, rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} - if (!(op & 1)) { - if (u) { - fn = gen_helper_vfp_ultos; - } else { - fn = gen_helper_vfp_sltos; - } - } else { - if (u) { - fn = gen_helper_vfp_touls_round_to_zero; - } else { - fn = gen_helper_vfp_tosls_round_to_zero; - } - } +static void gen_sabd_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); - /* We have already masked out the must-be-1 top bit of imm6, - * hence this 32-shift where the ARM ARM has 64-imm6. - */ - shift = 32 - shift; - fpst = get_fpstatus_ptr(tcg_ctx, 1); - shiftv = tcg_const_i32(tcg_ctx, shift); - for (pass = 0; pass < (q ? 4 : 2); pass++) { - TCGv_i32 tmpf = neon_load_reg(tcg_ctx, rm, pass); - fn(tcg_ctx, tmpf, tmpf, shiftv, fpst); - neon_store_reg(tcg_ctx, rd, pass, tmpf); - } - tcg_temp_free_ptr(tcg_ctx, fpst); - tcg_temp_free_i32(tcg_ctx, shiftv); - } else { - return 1; - } - } else { /* (insn & 0x00380080) == 0 */ - int invert, reg_ofs, vec_size; + tcg_gen_sub_i32(tcg_ctx, t, a, b); + tcg_gen_sub_i32(tcg_ctx, d, b, a); + tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LT, d, a, b, d, t); + tcg_temp_free_i32(tcg_ctx, t); +} - if (q && (rd & 1)) { - return 1; - } +static void gen_sabd_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - op = (insn >> 8) & 0xf; - /* One register and immediate. */ - imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf); - invert = (insn & (1 << 5)) != 0; - /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. - * We choose to not special-case this and will behave as if a - * valid constant encoding of 0 had been given. - */ - switch (op) { - case 0: case 1: - /* no-op */ - break; - case 2: case 3: - imm <<= 8; - break; - case 4: case 5: - imm <<= 16; - break; - case 6: case 7: - imm <<= 24; - break; - case 8: case 9: - imm |= imm << 16; - break; - case 10: case 11: - imm = (imm << 8) | (imm << 24); - break; - case 12: - imm = (imm << 8) | 0xff; - break; - case 13: - imm = (imm << 16) | 0xffff; - break; - case 14: - imm |= (imm << 8) | (imm << 16) | (imm << 24); - if (invert) { - imm = ~imm; - } - break; - case 15: - if (invert) { - return 1; - } - imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) - | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); - break; - } - if (invert) { - imm = ~imm; - } + tcg_gen_sub_i64(tcg_ctx, t, a, b); + tcg_gen_sub_i64(tcg_ctx, d, b, a); + tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LT, d, a, b, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - reg_ofs = neon_reg_offset(rd, 0); - vec_size = q ? 16 : 8; +static void gen_sabd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); - if (op & 1 && op < 12) { - if (invert) { - /* The immediate value has already been inverted, - * so BIC becomes AND. - */ - tcg_gen_gvec_andi(tcg_ctx, MO_32, reg_ofs, reg_ofs, imm, - vec_size, vec_size); - } else { - tcg_gen_gvec_ori(tcg_ctx, MO_32, reg_ofs, reg_ofs, imm, - vec_size, vec_size); - } - } else { - /* VMOV, VMVN. */ - if (op == 14 && invert) { - TCGv_i64 t64 = tcg_temp_new_i64(tcg_ctx); - - for (pass = 0; pass <= q; ++pass) { - uint64_t val = 0; - int n; - - for (n = 0; n < 8; n++) { - if (imm & (1 << (n + pass * 8))) { - val |= 0xffull << (n * 8); - } - } - tcg_gen_movi_i64(tcg_ctx, t64, val); - neon_store_reg64(tcg_ctx, t64, rd + pass); - } - tcg_temp_free_i64(tcg_ctx, t64); - } else { - tcg_gen_gvec_dup32i(tcg_ctx, reg_ofs, vec_size, vec_size, imm); - } - } - } - } else { /* (insn & 0x00800010 == 0x00800000) */ - if (size != 3) { - op = (insn >> 8) & 0xf; - if ((insn & (1 << 6)) == 0) { - /* Three registers of different lengths. */ - int src1_wide; - int src2_wide; - int prewiden; - /* undefreq: bit 0 : UNDEF if size == 0 - * bit 1 : UNDEF if size == 1 - * bit 2 : UNDEF if size == 2 - * bit 3 : UNDEF if U == 1 - * Note that [2:0] set implies 'always UNDEF' - */ - int undefreq; - /* prewiden, src1_wide, src2_wide, undefreq */ - static const int neon_3reg_wide[16][4] = { - {1, 0, 0, 0}, /* VADDL */ - {1, 1, 0, 0}, /* VADDW */ - {1, 0, 0, 0}, /* VSUBL */ - {1, 1, 0, 0}, /* VSUBW */ - {0, 1, 1, 0}, /* VADDHN */ - {0, 0, 0, 0}, /* VABAL */ - {0, 1, 1, 0}, /* VSUBHN */ - {0, 0, 0, 0}, /* VABDL */ - {0, 0, 0, 0}, /* VMLAL */ - {0, 0, 0, 9}, /* VQDMLAL */ - {0, 0, 0, 0}, /* VMLSL */ - {0, 0, 0, 9}, /* VQDMLSL */ - {0, 0, 0, 0}, /* Integer VMULL */ - {0, 0, 0, 1}, /* VQDMULL */ - {0, 0, 0, 0xa}, /* Polynomial VMULL */ - {0, 0, 0, 7}, /* Reserved: always UNDEF */ - }; - - prewiden = neon_3reg_wide[op][0]; - src1_wide = neon_3reg_wide[op][1]; - src2_wide = neon_3reg_wide[op][2]; - undefreq = neon_3reg_wide[op][3]; - - if ((undefreq & (1 << size)) || - ((undefreq & 8) && u)) { - return 1; - } - if ((src1_wide && (rn & 1)) || - (src2_wide && (rm & 1)) || - (!src2_wide && (rd & 1))) { - return 1; - } + tcg_gen_smin_vec(tcg_ctx, vece, t, a, b); + tcg_gen_smax_vec(tcg_ctx, vece, d, a, b); + tcg_gen_sub_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); +} - /* Handle polynomial VMULL in a single pass. */ - if (op == 14) { - if (size == 0) { - /* VMULL.P8 */ - tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, 16, 16, - 0, gen_helper_neon_pmull_h); - } else { - /* VMULL.P64 */ - if (!dc_isar_feature(aa32_pmull, s)) { - return 1; - } - tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, 16, 16, - 0, gen_helper_gvec_pmull_q); - } - return 0; - } +void gen_gvec_sabd(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_sabd_i32, + .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_sabd_i64, + .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} - /* Avoid overlapping operands. Wide source operands are - always aligned so will never overlap with wide - destinations in problematic ways. */ - if (rd == rm && !src2_wide) { - tmp = neon_load_reg(tcg_ctx, rm, 1); - neon_store_scratch(tcg_ctx, 2, tmp); - } else if (rd == rn && !src1_wide) { - tmp = neon_load_reg(tcg_ctx, rn, 1); - neon_store_scratch(tcg_ctx, 2, tmp); - } - tmp3 = NULL; - for (pass = 0; pass < 2; pass++) { - if (src1_wide) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + pass); - tmp = NULL; - } else { - if (pass == 1 && rd == rn) { - tmp = neon_load_scratch(tcg_ctx, 2); - } else { - tmp = neon_load_reg(tcg_ctx, rn, pass); - } - if (prewiden) { - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, u); - } - } - if (src2_wide) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + pass); - tmp2 = NULL; - } else { - if (pass == 1 && rd == rm) { - tmp2 = neon_load_scratch(tcg_ctx, 2); - } else { - tmp2 = neon_load_reg(tcg_ctx, rm, pass); - } - if (prewiden) { - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V1, tmp2, size, u); - } - } - switch (op) { - case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */ - gen_neon_addl(tcg_ctx, size); - break; - case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */ - gen_neon_subl(tcg_ctx, size); - break; - case 5: case 7: /* VABAL, VABDL */ - switch ((size << 1) | u) { - case 0: - gen_helper_neon_abdl_s16(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - case 1: - gen_helper_neon_abdl_u16(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - case 2: - gen_helper_neon_abdl_s32(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - case 3: - gen_helper_neon_abdl_u32(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - case 4: - gen_helper_neon_abdl_s64(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - case 5: - gen_helper_neon_abdl_u64(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2); - break; - default: abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp); - break; - case 8: case 9: case 10: case 11: case 12: case 13: - /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ - gen_neon_mull(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2, size, u); - break; - default: /* 15 is RESERVED: caught earlier */ - abort(); - } - if (op == 13) { - /* VQDMULL */ - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size); - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } else if (op == 5 || (op >= 8 && op <= 11)) { - /* Accumulate. */ - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass); - switch (op) { - case 10: /* VMLSL */ - gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size); - /* Fall through */ - case 5: case 8: /* VABAL, VMLAL */ - gen_neon_addl(tcg_ctx, size); - break; - case 9: case 11: /* VQDMLAL, VQDMLSL */ - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size); - if (op == 11) { - gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size); - } - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, size); - break; - default: - abort(); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } else if (op == 4 || op == 6) { - /* Narrowing operation. */ - tmp = tcg_temp_new_i32(tcg_ctx); - if (!u) { - switch (size) { - case 0: - gen_helper_neon_narrow_high_u8(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - case 1: - gen_helper_neon_narrow_high_u16(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - case 2: - tcg_gen_extrh_i64_i32(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - default: abort(); - } - } else { - switch (size) { - case 0: - gen_helper_neon_narrow_round_high_u8(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - case 1: - gen_helper_neon_narrow_round_high_u16(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - case 2: - tcg_gen_addi_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, 1u << 31); - tcg_gen_extrh_i64_i32(tcg_ctx, tmp, tcg_ctx->cpu_V0); - break; - default: abort(); - } - } - if (pass == 0) { - tmp3 = tmp; - } else { - neon_store_reg(tcg_ctx, rd, 0, tmp3); - neon_store_reg(tcg_ctx, rd, 1, tmp); - } - } else { - /* Write back the result. */ - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - } - } else { - /* Two registers and a scalar. NB that for ops of this form - * the ARM ARM labels bit 24 as Q, but it is in our variable - * 'u', not 'q'. - */ - if (size == 0) { - return 1; - } - switch (op) { - case 1: /* Float VMLA scalar */ - case 5: /* Floating point VMLS scalar */ - case 9: /* Floating point VMUL scalar */ - if (size == 1) { - return 1; - } - /* fall through */ - case 0: /* Integer VMLA scalar */ - case 4: /* Integer VMLS scalar */ - case 8: /* Integer VMUL scalar */ - case 12: /* VQDMULH scalar */ - case 13: /* VQRDMULH scalar */ - if (u && ((rd | rn) & 1)) { - return 1; - } - tmp = neon_get_scalar(tcg_ctx, size, rm); - neon_store_scratch(tcg_ctx, 0, tmp); - for (pass = 0; pass < (u ? 4 : 2); pass++) { - tmp = neon_load_scratch(tcg_ctx, 0); - tmp2 = neon_load_reg(tcg_ctx, rn, pass); - if (op == 12) { - if (size == 1) { - gen_helper_neon_qdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - } else { - gen_helper_neon_qdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - } - } else if (op == 13) { - if (size == 1) { - gen_helper_neon_qrdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - } else { - gen_helper_neon_qrdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2); - } - } else if (op & 1) { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_muls(tcg_ctx, tmp, tmp, tmp2, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - } else { - switch (size) { - case 0: gen_helper_neon_mul_u8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_mul_u16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: tcg_gen_mul_i32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - } - tcg_temp_free_i32(tcg_ctx, tmp2); - if (op < 8) { - /* Accumulate. */ - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - switch (op) { - case 0: - gen_neon_add(tcg_ctx, size, tmp, tmp2); - break; - case 1: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case 4: - gen_neon_rsb(tcg_ctx, size, tmp, tmp2); - break; - case 5: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_subs(tcg_ctx, tmp, tmp2, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - default: - abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - } - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - break; - case 3: /* VQDMLAL scalar */ - case 7: /* VQDMLSL scalar */ - case 11: /* VQDMULL scalar */ - if (u == 1) { - return 1; - } - /* fall through */ - case 2: /* VMLAL sclar */ - case 6: /* VMLSL scalar */ - case 10: /* VMULL scalar */ - if (rd & 1) { - return 1; - } - tmp2 = neon_get_scalar(tcg_ctx, size, rm); - /* We need a copy of tmp2 because gen_neon_mull - * deletes it during pass 0. */ - tmp4 = tcg_temp_new_i32(tcg_ctx); - tcg_gen_mov_i32(tcg_ctx, tmp4, tmp2); - tmp3 = neon_load_reg(tcg_ctx, rn, 1); - - for (pass = 0; pass < 2; pass++) { - if (pass == 0) { - tmp = neon_load_reg(tcg_ctx, rn, 0); - } else { - tmp = tmp3; - tmp2 = tmp4; - } - gen_neon_mull(tcg_ctx, tcg_ctx->cpu_V0, tmp, tmp2, size, u); - if (op != 11) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass); - } - switch (op) { - case 6: - gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size); - /* Fall through */ - case 2: - gen_neon_addl(tcg_ctx, size); - break; - case 3: case 7: - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size); - if (op == 7) { - gen_neon_negl(tcg_ctx, tcg_ctx->cpu_V0, size); - } - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1, size); - break; - case 10: - /* no-op */ - break; - case 11: - gen_neon_addl_saturate(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, size); - break; - default: - abort(); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - break; - case 14: /* VQRDMLAH scalar */ - case 15: /* VQRDMLSH scalar */ - { - NeonGenThreeOpEnvFn *fn; - - if (!dc_isar_feature(aa32_rdm, s)) { - return 1; - } - if (u && ((rd | rn) & 1)) { - return 1; - } - if (op == 14) { - if (size == 1) { - fn = gen_helper_neon_qrdmlah_s16; - } else { - fn = gen_helper_neon_qrdmlah_s32; - } - } else { - if (size == 1) { - fn = gen_helper_neon_qrdmlsh_s16; - } else { - fn = gen_helper_neon_qrdmlsh_s32; - } - } - - tmp2 = neon_get_scalar(tcg_ctx, size, rm); - for (pass = 0; pass < (u ? 4 : 2); pass++) { - tmp = neon_load_reg(tcg_ctx, rn, pass); - tmp3 = neon_load_reg(tcg_ctx, rd, pass); - fn(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp3); - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - } - break; - default: - g_assert_not_reached(); - break; - } - } - } else { /* size == 3 */ - if (!u) { - /* Extract. */ - imm = (insn >> 8) & 0xf; +static void gen_uabd_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); - if (imm > 7 && !q) - return 1; + tcg_gen_sub_i32(tcg_ctx, t, a, b); + tcg_gen_sub_i32(tcg_ctx, d, b, a); + tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LTU, d, a, b, d, t); + tcg_temp_free_i32(tcg_ctx, t); +} - if (q && ((rd | rn | rm) & 1)) { - return 1; - } +static void gen_uabd_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); - if (imm == 0) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn); - if (q) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rn + 1); - } - } else if (imm == 8) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + 1); - if (q) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm); - } - } else if (q) { - tmp64 = tcg_temp_new_i64(tcg_ctx); - if (imm < 8) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn); - neon_load_reg64(tcg_ctx, tmp64, rn + 1); - } else { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn + 1); - neon_load_reg64(tcg_ctx, tmp64, rm); - } - tcg_gen_shri_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, (imm & 7) * 8); - tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V1, tmp64, 64 - ((imm & 7) * 8)); - tcg_gen_or_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - if (imm < 8) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm); - } else { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm + 1); - imm -= 8; - } - tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V1, tcg_ctx->cpu_V1, 64 - (imm * 8)); - tcg_gen_shri_i64(tcg_ctx, tmp64, tmp64, imm * 8); - tcg_gen_or_i64(tcg_ctx, tcg_ctx->cpu_V1, tcg_ctx->cpu_V1, tmp64); - tcg_temp_free_i64(tcg_ctx, tmp64); - } else { - /* BUGFIX */ - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rn); - tcg_gen_shri_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, imm * 8); - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rm); - tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V1, tcg_ctx->cpu_V1, 64 - (imm * 8)); - tcg_gen_or_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, tcg_ctx->cpu_V1); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd); - if (q) { - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + 1); - } - } else if ((insn & (1 << 11)) == 0) { - /* Two register misc. */ - op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); - size = (insn >> 18) & 3; - /* UNDEF for unknown op values and bad op-size combinations */ - if ((neon_2rm_sizes[op] & (1 << size)) == 0) { - return 1; - } - if (neon_2rm_is_v8_op(op) && - !arm_dc_feature(s, ARM_FEATURE_V8)) { - return 1; - } - if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) && - q && ((rm | rd) & 1)) { - return 1; - } - switch (op) { - case NEON_2RM_VREV64: - for (pass = 0; pass < (q ? 2 : 1); pass++) { - tmp = neon_load_reg(tcg_ctx, rm, pass * 2); - tmp2 = neon_load_reg(tcg_ctx, rm, pass * 2 + 1); - switch (size) { - case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp, tmp); break; - case 1: gen_swap_half(tcg_ctx, tmp); break; - case 2: /* no-op */ break; - default: abort(); - } - neon_store_reg(tcg_ctx, rd, pass * 2 + 1, tmp); - if (size == 2) { - neon_store_reg(tcg_ctx, rd, pass * 2, tmp2); - } else { - switch (size) { - case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp2, tmp2); break; - case 1: gen_swap_half(tcg_ctx, tmp2); break; - default: abort(); - } - neon_store_reg(tcg_ctx, rd, pass * 2, tmp2); - } - } - break; - case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U: - case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U: - for (pass = 0; pass < q + 1; pass++) { - tmp = neon_load_reg(tcg_ctx, rm, pass * 2); - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, op & 1); - tmp = neon_load_reg(tcg_ctx, rm, pass * 2 + 1); - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V1, tmp, size, op & 1); - switch (size) { - case 0: gen_helper_neon_paddl_u16(tcg_ctx, CPU_V001); break; - case 1: gen_helper_neon_paddl_u32(tcg_ctx, CPU_V001); break; - case 2: tcg_gen_add_i64(tcg_ctx, CPU_V001); break; - default: abort(); - } - if (op >= NEON_2RM_VPADAL) { - /* Accumulate. */ - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V1, rd + pass); - gen_neon_addl(tcg_ctx, size); - } - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - break; - case NEON_2RM_VTRN: - if (size == 2) { - int n; - for (n = 0; n < (q ? 4 : 2); n += 2) { - tmp = neon_load_reg(tcg_ctx, rm, n); - tmp2 = neon_load_reg(tcg_ctx, rd, n + 1); - neon_store_reg(tcg_ctx, rm, n, tmp2); - neon_store_reg(tcg_ctx, rd, n + 1, tmp); - } - } else { - goto elementwise; - } - break; - case NEON_2RM_VUZP: - if (gen_neon_unzip(tcg_ctx, rd, rm, size, q)) { - return 1; - } - break; - case NEON_2RM_VZIP: - if (gen_neon_zip(tcg_ctx, rd, rm, size, q)) { - return 1; - } - break; - case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN: - /* also VQMOVUN; op field and mnemonics don't line up */ - if (rm & 1) { - return 1; - } - tmp2 = NULL; - for (pass = 0; pass < 2; pass++) { - neon_load_reg64(tcg_ctx, tcg_ctx->cpu_V0, rm + pass); - tmp = tcg_temp_new_i32(tcg_ctx); - gen_neon_narrow_op(tcg_ctx, op == NEON_2RM_VMOVN, q, size, - tmp, tcg_ctx->cpu_V0); - if (pass == 0) { - tmp2 = tmp; - } else { - neon_store_reg(tcg_ctx, rd, 0, tmp2); - neon_store_reg(tcg_ctx, rd, 1, tmp); - } - } - break; - case NEON_2RM_VSHLL: - if (q || (rd & 1)) { - return 1; - } - tmp = neon_load_reg(tcg_ctx, rm, 0); - tmp2 = neon_load_reg(tcg_ctx, rm, 1); - for (pass = 0; pass < 2; pass++) { - if (pass == 1) - tmp = tmp2; - gen_neon_widen(tcg_ctx, tcg_ctx->cpu_V0, tmp, size, 1); - tcg_gen_shli_i64(tcg_ctx, tcg_ctx->cpu_V0, tcg_ctx->cpu_V0, 8 << size); - neon_store_reg64(tcg_ctx, tcg_ctx->cpu_V0, rd + pass); - } - break; - case NEON_2RM_VCVT_F16_F32: - { - TCGv_ptr fpst; - TCGv_i32 ahp; - - if (!dc_isar_feature(aa32_fp16_spconv, s) || - q || (rm & 1)) { - return 1; - } - fpst = get_fpstatus_ptr(tcg_ctx, true); - ahp = get_ahp_flag(tcg_ctx); - tmp = neon_load_reg(tcg_ctx, rm, 0); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); - tmp2 = neon_load_reg(tcg_ctx, rm, 1); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp); - tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16); - tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); - tmp = neon_load_reg(tcg_ctx, rm, 2); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); - tmp3 = neon_load_reg(tcg_ctx, rm, 3); - neon_store_reg(tcg_ctx, rd, 0, tmp2); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp); - tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16); - tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp); - neon_store_reg(tcg_ctx, rd, 1, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, ahp); - tcg_temp_free_ptr(tcg_ctx, fpst); - break; - } - case NEON_2RM_VCVT_F32_F16: - { - TCGv_ptr fpst; - TCGv_i32 ahp; - if (!dc_isar_feature(aa32_fp16_spconv, s) || - q || (rd & 1)) { - return 1; - } - fpst = get_fpstatus_ptr(tcg_ctx, true); - ahp = get_ahp_flag(tcg_ctx); - tmp3 = tcg_temp_new_i32(tcg_ctx); - tmp = neon_load_reg(tcg_ctx, rm, 0); - tmp2 = neon_load_reg(tcg_ctx, rm, 1); - tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); - neon_store_reg(tcg_ctx, rd, 0, tmp3); - tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp); - neon_store_reg(tcg_ctx, rd, 1, tmp); - tmp3 = tcg_temp_new_i32(tcg_ctx); - tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); - neon_store_reg(tcg_ctx, rd, 2, tmp3); - tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp); - neon_store_reg(tcg_ctx, rd, 3, tmp2); - tcg_temp_free_i32(tcg_ctx, ahp); - tcg_temp_free_ptr(tcg_ctx, fpst); - break; - } - case NEON_2RM_AESE: case NEON_2RM_AESMC: - if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { - return 1; - } - ptr1 = vfp_reg_ptr(tcg_ctx, true, rd); - ptr2 = vfp_reg_ptr(tcg_ctx, true, rm); - - /* Bit 6 is the lowest opcode bit; it distinguishes between - * encryption (AESE/AESMC) and decryption (AESD/AESIMC) - */ - tmp3 = tcg_const_i32(tcg_ctx, extract32(insn, 6, 1)); - - if (op == NEON_2RM_AESE) { - gen_helper_crypto_aese(tcg_ctx, ptr1, ptr2, tmp3); - } else { - gen_helper_crypto_aesmc(tcg_ctx, ptr1, ptr2, tmp3); - } - tcg_temp_free_ptr(tcg_ctx, ptr1); - tcg_temp_free_ptr(tcg_ctx, ptr2); - tcg_temp_free_i32(tcg_ctx, tmp3); - break; - case NEON_2RM_SHA1H: - if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { - return 1; - } - ptr1 = vfp_reg_ptr(tcg_ctx, true, rd); - ptr2 = vfp_reg_ptr(tcg_ctx, true, rm); + tcg_gen_sub_i64(tcg_ctx, t, a, b); + tcg_gen_sub_i64(tcg_ctx, d, b, a); + tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LTU, d, a, b, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - gen_helper_crypto_sha1h(tcg_ctx, ptr1, ptr2); +static void gen_uabd_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); - tcg_temp_free_ptr(tcg_ctx, ptr1); - tcg_temp_free_ptr(tcg_ctx, ptr2); - break; - case NEON_2RM_SHA1SU1: - if ((rm | rd) & 1) { - return 1; - } - /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */ - if (q) { - if (!dc_isar_feature(aa32_sha2, s)) { - return 1; - } - } else if (!dc_isar_feature(aa32_sha1, s)) { - return 1; - } - ptr1 = vfp_reg_ptr(tcg_ctx, true, rd); - ptr2 = vfp_reg_ptr(tcg_ctx, true, rm); - if (q) { - gen_helper_crypto_sha256su0(tcg_ctx, ptr1, ptr2); - } else { - gen_helper_crypto_sha1su1(tcg_ctx, ptr1, ptr2); - } - tcg_temp_free_ptr(tcg_ctx, ptr1); - tcg_temp_free_ptr(tcg_ctx, ptr2); - break; - - case NEON_2RM_VMVN: - tcg_gen_gvec_not(tcg_ctx, 0, rd_ofs, rm_ofs, vec_size, vec_size); - break; - case NEON_2RM_VNEG: - tcg_gen_gvec_neg(tcg_ctx, size, rd_ofs, rm_ofs, vec_size, vec_size); - break; - case NEON_2RM_VABS: - tcg_gen_gvec_abs(tcg_ctx, size, rd_ofs, rm_ofs, vec_size, vec_size); - break; - - default: - elementwise: - for (pass = 0; pass < (q ? 4 : 2); pass++) { - tmp = neon_load_reg(tcg_ctx, rm, pass); - switch (op) { - case NEON_2RM_VREV32: - switch (size) { - case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp, tmp); break; - case 1: gen_swap_half(tcg_ctx, tmp); break; - default: abort(); - } - break; - case NEON_2RM_VREV16: - gen_rev16(tcg_ctx, tmp, tmp); - break; - case NEON_2RM_VCLS: - switch (size) { - case 0: gen_helper_neon_cls_s8(tcg_ctx, tmp, tmp); break; - case 1: gen_helper_neon_cls_s16(tcg_ctx, tmp, tmp); break; - case 2: gen_helper_neon_cls_s32(tcg_ctx, tmp, tmp); break; - default: abort(); - } - break; - case NEON_2RM_VCLZ: - switch (size) { - case 0: gen_helper_neon_clz_u8(tcg_ctx, tmp, tmp); break; - case 1: gen_helper_neon_clz_u16(tcg_ctx, tmp, tmp); break; - case 2: tcg_gen_clzi_i32(tcg_ctx, tmp, tmp, 32); break; - default: abort(); - } - break; - case NEON_2RM_VCNT: - gen_helper_neon_cnt_u8(tcg_ctx, tmp, tmp); - break; - case NEON_2RM_VQABS: - switch (size) { - case 0: - gen_helper_neon_qabs_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - case 1: - gen_helper_neon_qabs_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - case 2: - gen_helper_neon_qabs_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - default: abort(); - } - break; - case NEON_2RM_VQNEG: - switch (size) { - case 0: - gen_helper_neon_qneg_s8(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - case 1: - gen_helper_neon_qneg_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - case 2: - gen_helper_neon_qneg_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp); - break; - default: abort(); - } - break; - case NEON_2RM_VCGT0: case NEON_2RM_VCLE0: - tmp2 = tcg_const_i32(tcg_ctx, 0); - switch(size) { - case 0: gen_helper_neon_cgt_s8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_cgt_s16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: gen_helper_neon_cgt_s32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - if (op == NEON_2RM_VCLE0) { - tcg_gen_not_i32(tcg_ctx, tmp, tmp); - } - break; - case NEON_2RM_VCGE0: case NEON_2RM_VCLT0: - tmp2 = tcg_const_i32(tcg_ctx, 0); - switch(size) { - case 0: gen_helper_neon_cge_s8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_cge_s16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: gen_helper_neon_cge_s32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - if (op == NEON_2RM_VCLT0) { - tcg_gen_not_i32(tcg_ctx, tmp, tmp); - } - break; - case NEON_2RM_VCEQ0: - tmp2 = tcg_const_i32(tcg_ctx, 0); - switch(size) { - case 0: gen_helper_neon_ceq_u8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_ceq_u16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: gen_helper_neon_ceq_u32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tcg_ctx, tmp2); - break; - case NEON_2RM_VCGT0_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - tmp2 = tcg_const_i32(tcg_ctx, 0); - gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCGE0_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - tmp2 = tcg_const_i32(tcg_ctx, 0); - gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCEQ0_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - tmp2 = tcg_const_i32(tcg_ctx, 0); - gen_helper_neon_ceq_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCLE0_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - tmp2 = tcg_const_i32(tcg_ctx, 0); - gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp2, tmp, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCLT0_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - tmp2 = tcg_const_i32(tcg_ctx, 0); - gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp2, tmp, fpstatus); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VABS_F: - gen_helper_vfp_abss(tcg_ctx, tmp, tmp); - break; - case NEON_2RM_VNEG_F: - gen_helper_vfp_negs(tcg_ctx, tmp, tmp); - break; - case NEON_2RM_VSWP: - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - neon_store_reg(tcg_ctx, rm, pass, tmp2); - break; - case NEON_2RM_VTRN: - tmp2 = neon_load_reg(tcg_ctx, rd, pass); - switch (size) { - case 0: gen_neon_trn_u8(tcg_ctx, tmp, tmp2); break; - case 1: gen_neon_trn_u16(tcg_ctx, tmp, tmp2); break; - default: abort(); - } - neon_store_reg(tcg_ctx, rm, pass, tmp2); - break; - case NEON_2RM_VRINTN: - case NEON_2RM_VRINTA: - case NEON_2RM_VRINTM: - case NEON_2RM_VRINTP: - case NEON_2RM_VRINTZ: - { - TCGv_i32 tcg_rmode; - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - int rmode; - - if (op == NEON_2RM_VRINTZ) { - rmode = FPROUNDING_ZERO; - } else { - rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1]; - } - - tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode)); - gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, - tcg_ctx->cpu_env); - gen_helper_rints(tcg_ctx, tmp, tmp, fpstatus); - gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, - tcg_ctx->cpu_env); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - tcg_temp_free_i32(tcg_ctx, tcg_rmode); - break; - } - case NEON_2RM_VRINTX: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_rints_exact(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCVTAU: - case NEON_2RM_VCVTAS: - case NEON_2RM_VCVTNU: - case NEON_2RM_VCVTNS: - case NEON_2RM_VCVTPU: - case NEON_2RM_VCVTPS: - case NEON_2RM_VCVTMU: - case NEON_2RM_VCVTMS: - { - bool is_signed = !extract32(insn, 7, 1); - TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, 1); - TCGv_i32 tcg_rmode, tcg_shift; - int rmode = fp_decode_rm[extract32(insn, 8, 2)]; - - tcg_shift = tcg_const_i32(tcg_ctx, 0); - tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode)); - gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, - tcg_ctx->cpu_env); - - if (is_signed) { - gen_helper_vfp_tosls(tcg_ctx, tmp, tmp, - tcg_shift, fpst); - } else { - gen_helper_vfp_touls(tcg_ctx, tmp, tmp, - tcg_shift, fpst); - } - - gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, - tcg_ctx->cpu_env); - tcg_temp_free_i32(tcg_ctx, tcg_rmode); - tcg_temp_free_i32(tcg_ctx, tcg_shift); - tcg_temp_free_ptr(tcg_ctx, fpst); - break; - } - case NEON_2RM_VRECPE: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_recpe_u32(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VRSQRTE: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_rsqrte_u32(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VRECPE_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_recpe_f32(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VRSQRTE_F: - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_rsqrte_f32(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_sitos(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */ - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_uitos(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */ - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_tosizs(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */ - { - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - gen_helper_vfp_touizs(tcg_ctx, tmp, tmp, fpstatus); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - break; - } - default: - /* Reserved op values were caught by the - * neon_2rm_sizes[] check earlier. - */ - abort(); - } - neon_store_reg(tcg_ctx, rd, pass, tmp); - } - break; - } - } else if ((insn & (1 << 10)) == 0) { - /* VTBL, VTBX. */ - int n = ((insn >> 8) & 3) + 1; - if ((rn + n) > 32) { - /* This is UNPREDICTABLE; we choose to UNDEF to avoid the - * helper function running off the end of the register file. - */ - return 1; - } - n <<= 3; - if (insn & (1 << 6)) { - tmp = neon_load_reg(tcg_ctx, rd, 0); - } else { - tmp = tcg_temp_new_i32(tcg_ctx); - tcg_gen_movi_i32(tcg_ctx, tmp, 0); - } - tmp2 = neon_load_reg(tcg_ctx, rm, 0); - ptr1 = vfp_reg_ptr(tcg_ctx, true, rn); - tmp5 = tcg_const_i32(tcg_ctx, n); - gen_helper_neon_tbl(tcg_ctx, tmp2, tmp2, tmp, ptr1, tmp5); - tcg_temp_free_i32(tcg_ctx, tmp); - if (insn & (1 << 6)) { - tmp = neon_load_reg(tcg_ctx, rd, 1); - } else { - tmp = tcg_temp_new_i32(tcg_ctx); - tcg_gen_movi_i32(tcg_ctx, tmp, 0); - } - tmp3 = neon_load_reg(tcg_ctx, rm, 1); - gen_helper_neon_tbl(tcg_ctx, tmp3, tmp3, tmp, ptr1, tmp5); - tcg_temp_free_i32(tcg_ctx, tmp5); - tcg_temp_free_ptr(tcg_ctx, ptr1); - neon_store_reg(tcg_ctx, rd, 0, tmp2); - neon_store_reg(tcg_ctx, rd, 1, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp); - } else if ((insn & 0x380) == 0) { - /* VDUP */ - int element; - MemOp size; + tcg_gen_umin_vec(tcg_ctx, vece, t, a, b); + tcg_gen_umax_vec(tcg_ctx, vece, d, a, b); + tcg_gen_sub_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); +} - if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { - return 1; - } - if (insn & (1 << 16)) { - size = MO_8; - element = (insn >> 17) & 7; - } else if (insn & (1 << 17)) { - size = MO_16; - element = (insn >> 18) & 3; - } else { - size = MO_32; - element = (insn >> 19) & 1; - } - tcg_gen_gvec_dup_mem(tcg_ctx, size, neon_reg_offset(rd, 0), - neon_element_offset(rm, element, size), - q ? 16 : 8, q ? 16 : 8); - } else { - return 1; - } - } - } - return 0; +void gen_gvec_uabd(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_uabd_i32, + .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_uabd_i64, + .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -/* Advanced SIMD three registers of the same length extension. - * 31 25 23 22 20 16 12 11 10 9 8 3 0 - * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+ - * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm | - * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+ - */ -static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) +static void gen_saba_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - gen_helper_gvec_3 *fn_gvec = NULL; - gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL; - int rd, rn, rm, opr_sz; - int data = 0; - int off_rn, off_rm; - bool is_long = false, q = extract32(insn, 6, 1); - bool ptr_is_env = false; - - if ((insn & 0xfe200f10) == 0xfc200800) { - /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */ - int size = extract32(insn, 20, 1); - data = extract32(insn, 23, 2); /* rot */ - if (!dc_isar_feature(aa32_vcma, s) - || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { - return 1; - } - fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; - } else if ((insn & 0xfea00f10) == 0xfc800800) { - /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */ - int size = extract32(insn, 20, 1); - data = extract32(insn, 24, 1); /* rot */ - if (!dc_isar_feature(aa32_vcma, s) - || (!size && !dc_isar_feature(aa32_fp16_arith, s))) { - return 1; - } - fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; - } else if ((insn & 0xfeb00f00) == 0xfc200d00) { - /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */ - bool u = extract32(insn, 4, 1); - if (!dc_isar_feature(aa32_dp, s)) { - return 1; - } - fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; - } else if ((insn & 0xff300f10) == 0xfc200810) { - /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */ - int is_s = extract32(insn, 23, 1); - if (!dc_isar_feature(aa32_fhm, s)) { - return 1; - } - is_long = true; - data = is_s; /* is_2 == 0 */ - fn_gvec_ptr = gen_helper_gvec_fmlal_a32; - ptr_is_env = true; - } else { - return 1; - } + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + gen_sabd_i32(tcg_ctx, t, a, b); + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); +} - VFP_DREG_D(rd, insn); - if (rd & q) { - return 1; - } - if (q || !is_long) { - VFP_DREG_N(rn, insn); - VFP_DREG_M(rm, insn); - if ((rn | rm) & q & !is_long) { - return 1; - } - off_rn = vfp_reg_offset(1, rn); - off_rm = vfp_reg_offset(1, rm); - } else { - rn = VFP_SREG_N(insn); - rm = VFP_SREG_M(insn); - off_rn = vfp_reg_offset(0, rn); - off_rm = vfp_reg_offset(0, rm); - } +static void gen_saba_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + gen_sabd_i64(tcg_ctx, t, a, b); + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } - if (!s->vfp_enabled) { - return 1; - } +static void gen_saba_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + gen_sabd_vec(tcg_ctx, vece, t, a, b); + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); +} - opr_sz = (1 + q) * 8; - if (fn_gvec_ptr) { - TCGv_ptr ptr; - if (ptr_is_env) { - ptr = tcg_ctx->cpu_env; - } else { - ptr = get_fpstatus_ptr(tcg_ctx, 1); - } - tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm, ptr, - opr_sz, opr_sz, data, fn_gvec_ptr); - if (!ptr_is_env) { - tcg_temp_free_ptr(tcg_ctx, ptr); - } - } else { - tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm, - opr_sz, opr_sz, data, fn_gvec); - } - return 0; +void gen_gvec_saba(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_add_vec, + INDEX_op_smin_vec, INDEX_op_smax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_saba_i32, + .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_saba_i64, + .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -/* Advanced SIMD two registers and a scalar extension. - * 31 24 23 22 20 16 12 11 10 9 8 3 0 - * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+ - * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm | - * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+ - * - */ +static void gen_uaba_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + gen_uabd_i32(tcg_ctx, t, a, b); + tcg_gen_add_i32(tcg_ctx, d, d, t); + tcg_temp_free_i32(tcg_ctx, t); +} -static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) +static void gen_uaba_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - gen_helper_gvec_3 *fn_gvec = NULL; - gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL; - int rd, rn, rm, opr_sz, data; - int off_rn, off_rm; - bool is_long = false, q = extract32(insn, 6, 1); - bool ptr_is_env = false; - - if ((insn & 0xff000f10) == 0xfe000800) { - /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */ - int rot = extract32(insn, 20, 2); - int size = extract32(insn, 23, 1); - int index; - - if (!dc_isar_feature(aa32_vcma, s)) { - return 1; - } - if (size == 0) { - if (!dc_isar_feature(aa32_fp16_arith, s)) { - return 1; - } - /* For fp16, rm is just Vm, and index is M. */ - rm = extract32(insn, 0, 4); - index = extract32(insn, 5, 1); - } else { - /* For fp32, rm is the usual M:Vm, and index is 0. */ - VFP_DREG_M(rm, insn); - index = 0; - } - data = (index << 2) | rot; - fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx - : gen_helper_gvec_fcmlah_idx); - } else if ((insn & 0xffb00f00) == 0xfe200d00) { - /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */ - int u = extract32(insn, 4, 1); - - if (!dc_isar_feature(aa32_dp, s)) { - return 1; - } - fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; - /* rm is just Vm, and index is M. */ - data = extract32(insn, 5, 1); /* index */ - rm = extract32(insn, 0, 4); - } else if ((insn & 0xffa00f10) == 0xfe000810) { - /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */ - int is_s = extract32(insn, 20, 1); - int vm20 = extract32(insn, 0, 3); - int vm3 = extract32(insn, 3, 1); - int m = extract32(insn, 5, 1); - int index; - - if (!dc_isar_feature(aa32_fhm, s)) { - return 1; - } - if (q) { - rm = vm20; - index = m * 2 + vm3; - } else { - rm = vm20 * 2 + m; - index = vm3; - } - is_long = true; - data = (index << 2) | is_s; /* is_2 == 0 */ - fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32; - ptr_is_env = true; - } else { - return 1; - } + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + gen_uabd_i64(tcg_ctx, t, a, b); + tcg_gen_add_i64(tcg_ctx, d, d, t); + tcg_temp_free_i64(tcg_ctx, t); +} - VFP_DREG_D(rd, insn); - if (rd & q) { - return 1; - } - if (q || !is_long) { - VFP_DREG_N(rn, insn); - if (rn & q & !is_long) { - return 1; - } - off_rn = vfp_reg_offset(1, rn); - off_rm = vfp_reg_offset(1, rm); - } else { - rn = VFP_SREG_N(insn); - off_rn = vfp_reg_offset(0, rn); - off_rm = vfp_reg_offset(0, rm); - } - if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_simd_access_trap(1, 0xe, false), s->fp_excp_el); - return 0; - } - if (!s->vfp_enabled) { - return 1; - } +static void gen_uaba_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + gen_uabd_vec(tcg_ctx, vece, t, a, b); + tcg_gen_add_vec(tcg_ctx, vece, d, d, t); + tcg_temp_free_vec(tcg_ctx, t); +} - opr_sz = (1 + q) * 8; - if (fn_gvec_ptr) { - TCGv_ptr ptr; - if (ptr_is_env) { - ptr = tcg_ctx->cpu_env; - } else { - ptr = get_fpstatus_ptr(tcg_ctx, 1); - } - tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm, ptr, - opr_sz, opr_sz, data, fn_gvec_ptr); - if (!ptr_is_env) { - tcg_temp_free_ptr(tcg_ctx, ptr); - } - } else { - tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, rd), off_rn, off_rm, - opr_sz, opr_sz, data, fn_gvec); - } - return 0; +void gen_gvec_uaba(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_add_vec, + INDEX_op_umin_vec, INDEX_op_umax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_uaba_i32, + .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_uaba_i64, + .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } static int disas_coproc_insn(DisasContext *s, uint32_t insn) @@ -7734,7 +5198,7 @@ static void gen_srs(DisasContext *s, tcg_temp_free_i32(tcg_ctx, tmp); } tcg_temp_free_i32(tcg_ctx, addr); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } /* Generate a label used for skipping this instruction */ @@ -10076,7 +7540,7 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); if (m_swap) { - gen_swap_half(tcg_ctx, t2); + gen_swap_half(tcg_ctx, t2, t2); } gen_smul_dual(tcg_ctx, t1, t2); @@ -10135,7 +7599,7 @@ static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); if (m_swap) { - gen_swap_half(tcg_ctx, t2); + gen_swap_half(tcg_ctx, t2, t2); } gen_smul_dual(tcg_ctx, t1, t2); @@ -10490,9 +7954,6 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) // gen_io_start(tcg_ctx); } gen_helper_cpsr_write_eret(tcg_ctx, tcg_ctx->cpu_env, tmp); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - // gen_io_end(tcg_ctx); - } tcg_temp_free_i32(tcg_ctx, tmp); /* Must exit loop to check un-masked IRQs */ s->base.is_jmp = DISAS_EXIT; @@ -10878,7 +8339,7 @@ static bool trans_SETEND(DisasContext *s, arg_SETEND *a) } if (a->E != (s->be_data == MO_BE)) { gen_helper_setend(tcg_ctx, tcg_ctx->cpu_env); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } return true; } @@ -10964,33 +8425,14 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) /* Unconditional instructions. */ /* TODO: Perhaps merge these into one decodetree output file. */ if (disas_a32_uncond(s, insn) || - disas_vfp_uncond(s, insn)) { + disas_vfp_uncond(s, insn) || + disas_neon_dp(s, insn) || + disas_neon_ls(s, insn) || + disas_neon_shared(s, insn)) { return; } /* fall back to legacy decoder */ - if (((insn >> 25) & 7) == 1) { - /* NEON Data processing. */ - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - goto illegal_op; - } - - if (disas_neon_data_insn(s, insn)) { - goto illegal_op; - } - return; - } - if ((insn & 0x0f100000) == 0x04000000) { - /* NEON load/store. */ - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - goto illegal_op; - } - - if (disas_neon_ls_insn(s, insn)) { - goto illegal_op; - } - return; - } if ((insn & 0x0e000f00) == 0x0c000100) { if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { /* iWMMXt register transfer. */ @@ -11000,18 +8442,6 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) } } } - } else if ((insn & 0x0e000a00) == 0x0c000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - if (disas_neon_insn_3same_ext(s, insn)) { - goto illegal_op; - } - return; - } else if ((insn & 0x0f000a00) == 0x0e000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - if (disas_neon_insn_2reg_scalar_ext(s, insn)) { - goto illegal_op; - } - return; } goto illegal_op; } @@ -11126,6 +8556,33 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) ARCH(6T2); } + if ((insn & 0xef000000) == 0xef000000) { + /* + * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq + * transform into + * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq + */ + uint32_t a32_insn = (insn & 0xe2ffffff) | + ((insn & (1 << 28)) >> 4) | (1 << 28); + + if (disas_neon_dp(s, a32_insn)) { + return; + } + } + + if ((insn & 0xff100000) == 0xf9000000) { + /* + * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq + * transform into + * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq + */ + uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000; + + if (disas_neon_ls(s, a32_insn)) { + return; + } + } + /* * TODO: Perhaps merge these into one decodetree output file. * Note disas_vfp is written for a32 with cond field in the @@ -11133,6 +8590,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) */ if (disas_t32(s, insn) || disas_vfp_uncond(s, insn) || + disas_neon_shared(s, insn) || ((insn >> 28) == 0xe && disas_vfp(s, insn))) { return; } @@ -11162,24 +8620,9 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) } break; } - if ((insn & 0xfe000a00) == 0xfc000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - /* The Thumb2 and ARM encodings are identical. */ - if (disas_neon_insn_3same_ext(s, insn)) { - goto illegal_op; - } - } else if ((insn & 0xff000a00) == 0xfe000800 - && arm_dc_feature(s, ARM_FEATURE_V8)) { - /* The Thumb2 and ARM encodings are identical. */ - if (disas_neon_insn_2reg_scalar_ext(s, insn)) { - goto illegal_op; - } - } else if (((insn >> 24) & 3) == 3) { - /* Translate into the equivalent ARM encoding. */ - insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28); - if (disas_neon_data_insn(s, insn)) { - goto illegal_op; - } + if (((insn >> 24) & 3) == 3) { + /* Neon DP, but failed disas_neon_dp() */ + goto illegal_op; } else if (((insn >> 8) & 0xe) == 10) { /* VFP, but failed disas_vfp. */ goto illegal_op; @@ -11192,12 +8635,6 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn) } break; case 12: - if ((insn & 0x01100000) == 0x01000000) { - if (disas_neon_ls_insn(s, insn)) { - goto illegal_op; - } - break; - } goto illegal_op; default: illegal_op: @@ -11685,7 +9122,8 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) break; case DISAS_NEXT: case DISAS_TOO_MANY: - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: + case DISAS_UPDATE_NOCHAIN: gen_set_pc_im(dc, dc->base.pc_next); /* fall through */ default: @@ -11709,10 +9147,13 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) case DISAS_TOO_MANY: gen_goto_tb(dc, 1, dc->base.pc_next); break; + case DISAS_UPDATE_NOCHAIN: + gen_set_pc_im(dc, dc->base.pc_next); + /* fall through */ case DISAS_JUMP: gen_goto_ptr(tcg_ctx); break; - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: gen_set_pc_im(dc, dc->base.pc_next); /* fall through */ default: diff --git a/qemu/target/arm/translate.h b/qemu/target/arm/translate.h index 62ea7a5277..b0c4539267 100644 --- a/qemu/target/arm/translate.h +++ b/qemu/target/arm/translate.h @@ -29,6 +29,7 @@ typedef struct DisasContext { ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */ uint8_t tbii; /* TBI1|TBI0 for insns */ uint8_t tbid; /* TBI1|TBI0 for data */ + uint8_t tcma; /* TCMA1|TCMA0 for MTE */ bool ns; /* Use non-secure CPREG bank on access */ int fp_excp_el; /* FP exception EL or 0 if enabled */ int sve_excp_el; /* SVE exception EL or 0 if enabled */ @@ -76,6 +77,10 @@ typedef struct DisasContext { bool unpriv; /* True if v8.3-PAuth is active. */ bool pauth_active; + /* True if v8.5-MTE access to tags is enabled. */ + bool ata; + /* True if v8.5-MTE tag checks affect the PE; index with is_unpriv. */ + bool mte_active[2]; /* True with v8.5-BTI and SCTLR_ELx.BT* set. */ bool bt; /* True if any CP15 access is trapped by HSTR_EL2 */ @@ -85,6 +90,8 @@ typedef struct DisasContext { * < 0, set by the current instruction. */ int8_t btype; + /* A copy of cpu->dcz_blocksize. */ + uint8_t dcz_blocksize; /* True if this page is guarded. */ bool guarded_page; /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ @@ -145,7 +152,8 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) /* is_jmp field values */ #define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */ -#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */ +/* CPU state was modified dynamically; exit to main loop for interrupts. */ +#define DISAS_UPDATE_EXIT DISAS_TARGET_1 /* These instructions trap after executing, so the A32/T32 decoder must * defer them until after the conditional execution state has been updated. * WFI also needs special handling when single-stepping. @@ -161,13 +169,16 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) * custom end-of-TB code) */ #define DISAS_BX_EXCRET DISAS_TARGET_8 -/* For instructions which want an immediate exit to the main loop, - * as opposed to attempting to use lookup_and_goto_ptr. Unlike - * DISAS_UPDATE this doesn't write the PC on exiting the translation - * loop so you need to ensure something (gen_a64_set_pc_im or runtime - * helper) has done so before we reach return from cpu_tb_exec. +/* + * For instructions which want an immediate exit to the main loop, as opposed + * to attempting to use lookup_and_goto_ptr. Unlike DISAS_UPDATE_EXIT, this + * doesn't write the PC on exiting the translation loop so you need to ensure + * something (gen_a64_set_pc_im or runtime helper) has done so before we reach + * return from cpu_tb_exec. */ #define DISAS_EXIT DISAS_TARGET_9 +/* CPU state was modified dynamically; no need to exit, but do not chain. */ +#define DISAS_UPDATE_NOCHAIN DISAS_TARGET_10 #ifdef TARGET_AARCH64 void a64_translate_init(struct uc_struct *uc); @@ -274,28 +285,110 @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex) uint64_t vfp_expand_imm(int size, uint8_t imm8); /* Vector operations shared between ARM and AArch64. */ -extern const GVecGen3 mla_op[4]; -extern const GVecGen3 mls_op[4]; -extern const GVecGen3 cmtst_op[4]; -extern const GVecGen3 sshl_op[4]; -extern const GVecGen3 ushl_op[4]; -extern const GVecGen2i ssra_op[4]; -extern const GVecGen2i usra_op[4]; -extern const GVecGen2i sri_op[4]; -extern const GVecGen2i sli_op[4]; -extern const GVecGen4 uqadd_op[4]; -extern const GVecGen4 sqadd_op[4]; -extern const GVecGen4 uqsub_op[4]; -extern const GVecGen4 sqsub_op[4]; +void gen_gvec_ceq0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_clt0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_cgt0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_cle0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_cge0(TCGContext *, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_mla(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_mls(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_cmtst(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sshl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_ushl(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + void gen_cmtst_i64(TCGContext *, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void gen_ushl_i32(TCGContext *, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void gen_sshl_i32(TCGContext *, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void gen_ushl_i64(TCGContext *, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void gen_sshl_i64(TCGContext *, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void gen_gvec_uqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sqadd_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sqsub_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_ssra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_usra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_srshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_urshr(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_srsra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_ursra(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_sri(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sli(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_sqrdmlah_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sqrdmlsh_qc(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_sabd(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uabd(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_saba(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uaba(TCGContext *,unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + /* * Forward to the isar_feature_* tests given a DisasContext pointer. */ #define dc_isar_feature(name, ctx) isar_feature_##name(ctx->isar) +/* Note that the gvec expanders operate on offsets + sizes. */ +typedef void GVecGen2Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, uint32_t); +typedef void GVecGen2iFn(TCGContext *, unsigned, uint32_t, uint32_t, int64_t, + uint32_t, uint32_t); +typedef void GVecGen3Fn(TCGContext *, unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); +typedef void GVecGen4Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); + +/* Function prototype for gen_ functions for calling Neon helpers */ +typedef void NeonGenOneOpFn(TCGContext *, TCGv_i32, TCGv_i32); +typedef void NeonGenOneOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32); +typedef void NeonGenTwoOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32); +typedef void NeonGenTwoOpEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); +typedef void NeonGenTwo64OpFn(TCGContext*, TCGv_i64, TCGv_i64, TCGv_i64); +typedef void NeonGenTwo64OpEnvFn(TCGContext *, TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); +typedef void NeonGenNarrowFn(TCGContext *, TCGv_i32, TCGv_i64); +typedef void NeonGenNarrowEnvFn(TCGContext *, TCGv_i32, TCGv_ptr, TCGv_i64); +typedef void NeonGenWidenFn(TCGContext *, TCGv_i64, TCGv_i32); +typedef void NeonGenTwoOpWidenFn(TCGContext *, TCGv_i64, TCGv_i32, TCGv_i32); +typedef void NeonGenOneSingleOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_ptr); +typedef void NeonGenTwoSingleOpFn(TCGContext *, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); +typedef void NeonGenTwoDoubleOpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); +typedef void NeonGenOne64OpFn(TCGContext *, TCGv_i64, TCGv_i64); +typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); +typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); +typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); +typedef void AtomicThreeOpFn(TCGContext *, TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp); + #endif /* TARGET_ARM_TRANSLATE_H */ diff --git a/qemu/target/arm/vec_helper.c b/qemu/target/arm/vec_helper.c index a1839eed81..84a54cee05 100644 --- a/qemu/target/arm/vec_helper.c +++ b/qemu/target/arm/vec_helper.c @@ -22,7 +22,7 @@ #include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" - +#include "vec_internal.h" /* Note that vector data is stored in host-endian 64-bit chunks, so addressing units smaller than that needs a host-endian fixup. */ @@ -36,21 +36,9 @@ #define H4(x) (x) #endif -#define SET_QC() env->vfp.qc[0] = 1 - -static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) -{ - uint64_t *d = (uint64_t *)((char *)vd + opr_sz); - uintptr_t i; - - for (i = opr_sz; i < max_sz; i += 8) { - *d++ = 0; - } -} - /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */ -static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1, - int16_t src2, int16_t src3) +static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2, + int16_t src3, uint32_t *sat) { /* Simplify: * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16 @@ -60,7 +48,7 @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1, ret = ((int32_t)src3 << 15) + ret + (1 << 14); ret >>= 15; if (ret != (int16_t)ret) { - SET_QC(); + *sat = 1; ret = (ret < 0 ? -0x8000 : 0x7fff); } return ret; @@ -69,30 +57,30 @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1, uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1, uint32_t src2, uint32_t src3) { - uint16_t e1 = inl_qrdmlah_s16(env, src1, src2, src3); - uint16_t e2 = inl_qrdmlah_s16(env, src1 >> 16, src2 >> 16, src3 >> 16); + uint32_t *sat = &env->vfp.qc[0]; + uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat); + uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat); return deposit32(e1, 16, 16, e2); } void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm, - void *ve, uint32_t desc) + void *vq, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); int16_t *d = vd; int16_t *n = vn; int16_t *m = vm; - CPUARMState *env = ve; uintptr_t i; for (i = 0; i < opr_sz / 2; ++i) { - d[i] = inl_qrdmlah_s16(env, n[i], m[i], d[i]); + d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } /* Signed saturating rounding doubling multiply-subtract high half, 16-bit */ -static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1, - int16_t src2, int16_t src3) +static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2, + int16_t src3, uint32_t *sat) { /* Similarly, using subtraction: * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16 @@ -102,7 +90,7 @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1, ret = ((int32_t)src3 << 15) - ret + (1 << 14); ret >>= 15; if (ret != (int16_t)ret) { - SET_QC(); + *sat = 1; ret = (ret < 0 ? -0x8000 : 0x7fff); } return ret; @@ -111,85 +99,97 @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1, uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1, uint32_t src2, uint32_t src3) { - uint16_t e1 = inl_qrdmlsh_s16(env, src1, src2, src3); - uint16_t e2 = inl_qrdmlsh_s16(env, src1 >> 16, src2 >> 16, src3 >> 16); + uint32_t *sat = &env->vfp.qc[0]; + uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat); + uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat); return deposit32(e1, 16, 16, e2); } void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm, - void *ve, uint32_t desc) + void *vq, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); int16_t *d = vd; int16_t *n = vn; int16_t *m = vm; - CPUARMState *env = ve; uintptr_t i; for (i = 0; i < opr_sz / 2; ++i) { - d[i] = inl_qrdmlsh_s16(env, n[i], m[i], d[i]); + d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */ -uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1, - int32_t src2, int32_t src3) +static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2, + int32_t src3, uint32_t *sat) { /* Simplify similarly to int_qrdmlah_s16 above. */ int64_t ret = (int64_t)src1 * src2; ret = ((int64_t)src3 << 31) + ret + (1 << 30); ret >>= 31; if (ret != (int32_t)ret) { - SET_QC(); + *sat = 1; ret = (ret < 0 ? INT32_MIN : INT32_MAX); } return ret; } +uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1, + int32_t src2, int32_t src3) +{ + uint32_t *sat = &env->vfp.qc[0]; + return inl_qrdmlah_s32(src1, src2, src3, sat); +} + void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm, - void *ve, uint32_t desc) + void *vq, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); int32_t *d = vd; int32_t *n = vn; int32_t *m = vm; - CPUARMState *env = ve; uintptr_t i; for (i = 0; i < opr_sz / 4; ++i) { - d[i] = helper_neon_qrdmlah_s32(env, n[i], m[i], d[i]); + d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } /* Signed saturating rounding doubling multiply-subtract high half, 32-bit */ -uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1, - int32_t src2, int32_t src3) +static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2, + int32_t src3, uint32_t *sat) { /* Simplify similarly to int_qrdmlsh_s16 above. */ int64_t ret = (int64_t)src1 * src2; ret = ((int64_t)src3 << 31) - ret + (1 << 30); ret >>= 31; if (ret != (int32_t)ret) { - SET_QC(); + *sat = 1; ret = (ret < 0 ? INT32_MIN : INT32_MAX); } return ret; } +uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1, + int32_t src2, int32_t src3) +{ + uint32_t *sat = &env->vfp.qc[0]; + return inl_qrdmlsh_s32(src1, src2, src3, sat); +} + void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, - void *ve, uint32_t desc) + void *vq, uint32_t desc) { uintptr_t opr_sz = simd_oprsz(desc); int32_t *d = vd; int32_t *n = vn; int32_t *m = vm; - CPUARMState *env = ve; uintptr_t i; for (i = 0; i < opr_sz / 4; ++i) { - d[i] = helper_neon_qrdmlsh_s32(env, n[i], m[i], d[i]); + d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq); } clear_tail(d, opr_sz, simd_maxsz(desc)); } @@ -681,6 +681,11 @@ static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat) return result; } +static float32 float32_abd(float32 op1, float32 op2, float_status *stat) +{ + return float32_abs(float32_sub(op1, op2, stat)); +} + #define DO_3OP(NAME, FUNC, TYPE) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ @@ -708,6 +713,8 @@ DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16) DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32) DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64) +DO_3OP(gvec_fabd_s, float32_abd, float32) + #ifdef TARGET_AARCH64 DO_3OP(gvec_recps_h, helper_recpsf_f16, float16) @@ -901,6 +908,118 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn, clear_tail(d, oprsz, simd_maxsz(desc)); } +#define DO_SRA(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + d[i] += n[i] >> shift; \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_SRA(gvec_ssra_b, int8_t) +DO_SRA(gvec_ssra_h, int16_t) +DO_SRA(gvec_ssra_s, int32_t) +DO_SRA(gvec_ssra_d, int64_t) + +DO_SRA(gvec_usra_b, uint8_t) +DO_SRA(gvec_usra_h, uint16_t) +DO_SRA(gvec_usra_s, uint32_t) +DO_SRA(gvec_usra_d, uint64_t) + +#undef DO_SRA + +#define DO_RSHR(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + TYPE tmp = n[i] >> (shift - 1); \ + d[i] = (tmp >> 1) + (tmp & 1); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_RSHR(gvec_srshr_b, int8_t) +DO_RSHR(gvec_srshr_h, int16_t) +DO_RSHR(gvec_srshr_s, int32_t) +DO_RSHR(gvec_srshr_d, int64_t) + +DO_RSHR(gvec_urshr_b, uint8_t) +DO_RSHR(gvec_urshr_h, uint16_t) +DO_RSHR(gvec_urshr_s, uint32_t) +DO_RSHR(gvec_urshr_d, uint64_t) + +#undef DO_RSHR + +#define DO_RSRA(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + TYPE tmp = n[i] >> (shift - 1); \ + d[i] += (tmp >> 1) + (tmp & 1); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_RSRA(gvec_srsra_b, int8_t) +DO_RSRA(gvec_srsra_h, int16_t) +DO_RSRA(gvec_srsra_s, int32_t) +DO_RSRA(gvec_srsra_d, int64_t) + +DO_RSRA(gvec_ursra_b, uint8_t) +DO_RSRA(gvec_ursra_h, uint16_t) +DO_RSRA(gvec_ursra_s, uint32_t) +DO_RSRA(gvec_ursra_d, uint64_t) + +#undef DO_RSRA + +#define DO_SRI(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + d[i] = deposit64(d[i], 0, sizeof(TYPE) * 8 - shift, n[i] >> shift); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_SRI(gvec_sri_b, uint8_t) +DO_SRI(gvec_sri_h, uint16_t) +DO_SRI(gvec_sri_s, uint32_t) +DO_SRI(gvec_sri_d, uint64_t) + +#undef DO_SRI + +#define DO_SLI(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + int shift = simd_data(desc); \ + TYPE *d = vd, *n = vn; \ + for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ + d[i] = deposit64(d[i], shift, sizeof(TYPE) * 8 - shift, n[i]); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_SLI(gvec_sli_b, uint8_t) +DO_SLI(gvec_sli_h, uint16_t) +DO_SLI(gvec_sli_s, uint32_t) +DO_SLI(gvec_sli_d, uint64_t) + +#undef DO_SLI + /* * Convert float16 to float32, raising no exceptions and * preserving exceptional values, including SNaN. @@ -1263,3 +1382,76 @@ void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) } } #endif + +#define DO_CMP0(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \ + TYPE nn = *(TYPE *)((char*)vn + i); \ + *(TYPE *)((char*)vd + i) = -(nn OP 0); \ + } \ + clear_tail(vd, opr_sz, simd_maxsz(desc)); \ +} + +DO_CMP0(gvec_ceq0_b, int8_t, ==) +DO_CMP0(gvec_clt0_b, int8_t, <) +DO_CMP0(gvec_cle0_b, int8_t, <=) +DO_CMP0(gvec_cgt0_b, int8_t, >) +DO_CMP0(gvec_cge0_b, int8_t, >=) + +DO_CMP0(gvec_ceq0_h, int16_t, ==) +DO_CMP0(gvec_clt0_h, int16_t, <) +DO_CMP0(gvec_cle0_h, int16_t, <=) +DO_CMP0(gvec_cgt0_h, int16_t, >) +DO_CMP0(gvec_cge0_h, int16_t, >=) + +#undef DO_CMP0 + +#define DO_ABD(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + \ + for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \ + d[i] = n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +DO_ABD(gvec_sabd_b, int8_t) +DO_ABD(gvec_sabd_h, int16_t) +DO_ABD(gvec_sabd_s, int32_t) +DO_ABD(gvec_sabd_d, int64_t) + +DO_ABD(gvec_uabd_b, uint8_t) +DO_ABD(gvec_uabd_h, uint16_t) +DO_ABD(gvec_uabd_s, uint32_t) +DO_ABD(gvec_uabd_d, uint64_t) + +#undef DO_ABD + +#define DO_ABA(NAME, TYPE) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + TYPE *d = vd, *n = vn, *m = vm; \ + \ + for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \ + d[i] += n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +DO_ABA(gvec_saba_b, int8_t) +DO_ABA(gvec_saba_h, int16_t) +DO_ABA(gvec_saba_s, int32_t) +DO_ABA(gvec_saba_d, int64_t) + +DO_ABA(gvec_uaba_b, uint8_t) +DO_ABA(gvec_uaba_h, uint16_t) +DO_ABA(gvec_uaba_s, uint32_t) +DO_ABA(gvec_uaba_d, uint64_t) + +#undef DO_ABA diff --git a/qemu/target/arm/vec_internal.h b/qemu/target/arm/vec_internal.h new file mode 100644 index 0000000000..3aa74b0151 --- /dev/null +++ b/qemu/target/arm/vec_internal.h @@ -0,0 +1,33 @@ +/* + * ARM AdvSIMD / SVE Vector Helpers + * + * Copyright (c) 2020 Linaro + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef TARGET_ARM_VEC_INTERNALS_H +#define TARGET_ARM_VEC_INTERNALS_H + +static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) +{ + uint64_t *d = (uint64_t *)((char*)vd + opr_sz); + uintptr_t i; + + for (i = opr_sz; i < max_sz; i += 8) { + *d++ = 0; + } +} + +#endif /* TARGET_ARM_VEC_INTERNALS_H */ diff --git a/qemu/target/arm/vfp_helper.c b/qemu/target/arm/vfp_helper.c index 55bce5957a..753b5ed5e2 100644 --- a/qemu/target/arm/vfp_helper.c +++ b/qemu/target/arm/vfp_helper.c @@ -262,7 +262,7 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env) return float64_sqrt(a, &env->vfp.fp_status); } -static void softfloat_to_vfp_compare(CPUARMState *env, int cmp) +static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp) { uint32_t flags = 0; switch (cmp) { @@ -536,7 +536,7 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode) * it would affect flushing input denormals. */ float_status *fpst = fpstp; - flag save = get_flush_inputs_to_zero(fpst); + bool save = get_flush_inputs_to_zero(fpst); set_flush_inputs_to_zero(false, fpst); float32 r = float16_to_float32(a, !ahp_mode, fpst); set_flush_inputs_to_zero(save, fpst); @@ -549,7 +549,7 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode) * it would affect flushing output denormals. */ float_status *fpst = fpstp; - flag save = get_flush_to_zero(fpst); + bool save = get_flush_to_zero(fpst); set_flush_to_zero(false, fpst); float16 r = float32_to_float16(a, !ahp_mode, fpst); set_flush_to_zero(save, fpst); @@ -562,7 +562,7 @@ float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode) * it would affect flushing input denormals. */ float_status *fpst = fpstp; - flag save = get_flush_inputs_to_zero(fpst); + bool save = get_flush_inputs_to_zero(fpst); set_flush_inputs_to_zero(false, fpst); float64 r = float16_to_float64(a, !ahp_mode, fpst); set_flush_inputs_to_zero(save, fpst); @@ -575,7 +575,7 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode) * it would affect flushing output denormals. */ float_status *fpst = fpstp; - flag save = get_flush_to_zero(fpst); + bool save = get_flush_to_zero(fpst); set_flush_to_zero(false, fpst); float16 r = float64_to_float16(a, !ahp_mode, fpst); set_flush_to_zero(save, fpst); @@ -586,7 +586,7 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode) #define float32_three make_float32(0x40400000) #define float32_one_point_five make_float32(0x3fc00000) -float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env) +float32 HELPER(recps_f32)(CPUARMState *env, float32 a, float32 b) { float_status *s = &env->vfp.standard_fp_status; if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) || @@ -599,7 +599,7 @@ float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env) return float32_sub(float32_two, float32_mul(a, b, s), s); } -float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env) +float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b) { float_status *s = &env->vfp.standard_fp_status; float32 product; @@ -702,11 +702,9 @@ static bool round_to_inf(float_status *fpst, bool sign_bit) return sign_bit; case float_round_to_zero: /* Round to Zero */ return false; + default: + g_assert_not_reached(); } - - g_assert_not_reached(); - // never reach here - return false; } uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp) @@ -1030,9 +1028,8 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp) return make_float64(val); } -uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp) +uint32_t HELPER(recpe_u32)(uint32_t a) { - /* float_status *s = fpstp; */ int input, estimate; if ((a & 0x80000000) == 0) { @@ -1045,7 +1042,7 @@ uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp) return deposit32(0, (32 - 9), 9, estimate); } -uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp) +uint32_t HELPER(rsqrte_u32)(uint32_t a) { int estimate; diff --git a/qemu/target/i386/cpu.c b/qemu/target/i386/cpu.c index 86103b09e3..0cdd7a1ed5 100644 --- a/qemu/target/i386/cpu.c +++ b/qemu/target/i386/cpu.c @@ -852,10 +852,10 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .type = CPUID_FEATURE_WORD, .feat_names = { NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", - NULL, NULL, NULL, NULL, - NULL, NULL, "md-clear", NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL /* pconfig */, NULL, + "fsrm", NULL, NULL, NULL, + "avx512-vp2intersect", NULL, "md-clear", NULL, + NULL, NULL, "serialize", NULL, + "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "spec-ctrl", "stibp", NULL, "arch-capabilities", "core-capability", "ssbd", @@ -1001,6 +1001,22 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .index = MSR_IA32_CORE_CAPABILITY, }, }, + [FEAT_PERF_CAPABILITIES] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, "full-width-write", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_PERF_CAPABILITIES, + }, + }, [FEAT_VMX_PROCBASED_CTLS] = { .type = MSR_FEATURE_WORD, @@ -2722,6 +2738,13 @@ static X86CPUDefinition builtin_x86_defs[] = { { NULL /* end of list */ } } }, + { + .version = 4, + .props = (PropValue[]) { + { "vmx-eptp-switching", "on" }, + { /* end of list */ } + } + }, { 0 /* end of list */ } } }, @@ -2827,6 +2850,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .versions = (X86CPUVersionDefinition[]) { { .version = 1 }, { .version = 2, + .note = "ARCH_CAPABILITIES", .props = (PropValue[]) { { "arch-capabilities", "on" }, { "rdctl-no", "on" }, @@ -2838,12 +2862,20 @@ static X86CPUDefinition builtin_x86_defs[] = { }, { .version = 3, .alias = "Cascadelake-Server-noTSX", + .note = "ARCH_CAPABILITIES, no TSX", .props = (PropValue[]) { { "hle", "off" }, { "rtm", "off" }, { NULL /* end of list */ } }, }, + { .version = 4, + .note = "ARCH_CAPABILITIES, no TSX", + .props = (PropValue[]) { + { "vmx-eptp-switching", "on" }, + { /* end of list */ } + }, + }, { 0 /* end of list */ } } }, @@ -3059,6 +3091,7 @@ static X86CPUDefinition builtin_x86_defs[] = { { .version = 1 }, { .version = 2, + .note = "no TSX", .alias = "Icelake-Client-noTSX", .props = (PropValue[]) { { "hle", "off" }, @@ -3196,6 +3229,20 @@ static X86CPUDefinition builtin_x86_defs[] = { { NULL /* end of list */ } }, }, + { + .version = 4, + .props = (PropValue[]) { + { "sha-ni", "on" }, + { "avx512ifma", "on" }, + { "rdpid", "on" }, + { "fsrm", "on" }, + { "vmx-rdseed-exit", "on" }, + { "vmx-pml", "on" }, + { "vmx-eptp-switching", "on" }, + { "model", "106" }, + { /* end of list */ } + }, + }, { 0 /* end of list */ } } }, @@ -3296,6 +3343,7 @@ static X86CPUDefinition builtin_x86_defs[] = { { .version = 1 }, { .version = 2, + .note = "no MPX, no MONITOR", .props = (PropValue[]) { { "monitor", "off" }, { "mpx", "off" }, @@ -3828,16 +3876,6 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, } } -/* Convert all '_' in a feature string option name to '-', to make feature - * name conform to QOM property naming rule, which uses '-' instead of '_'. - */ -static inline void feat2prop(char *s) -{ - while ((s = strchr(s, '_'))) { - *s = '-'; - } -} - static void x86_cpu_filter_features(X86CPU *cpu, bool verbose); static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, @@ -3888,6 +3926,13 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) } x86_cpuid_set_vendor(cpu, def->vendor); + + /* + * Properties in versioned CPU model are not user specified features. + * We can simply clear env->user_features here since it will be filled later + * in x86_cpu_expand_features() based on plus_features and minus_features. + */ + memset(&env->user_features, 0, sizeof(env->user_features)); } void cpu_clear_apic_feature(CPUX86State *env) @@ -4042,6 +4087,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ebx |= (cs->nr_cores * cs->nr_threads) << 16; *edx |= CPUID_HT; } + if (!cpu->enable_pmu) { + *ecx &= ~CPUID_EXT_PDCM; + } break; case 2: /* cache info: needed for Pentium Pro compatibility */ @@ -4330,9 +4378,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, host_cpuid(index, 0, eax, ebx, ecx, edx); break; } - *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | \ + *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | (L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES); - *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | \ + *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | (L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES); *ecx = encode_cache_cpuid80000005(env->cache_info_amd.l1d_cache); *edx = encode_cache_cpuid80000005(env->cache_info_amd.l1i_cache); @@ -4343,13 +4391,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, host_cpuid(index, 0, eax, ebx, ecx, edx); break; } - *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | \ + *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | (L2_DTLB_2M_ENTRIES << 16) | \ - (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | \ + (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | (L2_ITLB_2M_ENTRIES); - *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | \ + *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | (L2_DTLB_4K_ENTRIES << 16) | \ - (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | \ + (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | (L2_ITLB_4K_ENTRIES); encode_cache_cpuid80000006(env->cache_info_amd.l2_cache, cpu->enable_l3_cache ? @@ -4376,11 +4424,20 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax = cpu->phys_bits; } *ebx = env->features[FEAT_8000_0008_EBX]; - *ecx = 0; - *edx = 0; if (cs->nr_cores * cs->nr_threads > 1) { - *ecx |= (cs->nr_cores * cs->nr_threads) - 1; + /* + * Bits 15:12 is "The number of bits in the initial + * Core::X86::Apic::ApicId[ApicId] value that indicate + * thread ID within a package". This is already stored at + * CPUX86State::pkg_offset. + * Bits 7:0 is "The number of threads in the package is NC+1" + */ + *ecx = (env->pkg_offset << 12) | + ((cs->nr_cores * cs->nr_threads) - 1); + } else { + *ecx = 0; } + *edx = 0; break; case 0x8000000A: if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) { @@ -4478,6 +4535,7 @@ static void x86_cpu_reset(CPUState *dev) /* init to reset state */ env->hflags2 |= HF2_GIF_MASK; + env->hflags &= ~HF_GUEST_MASK; cpu_x86_update_cr0(env, 0x60000010); env->a20_mask = ~0x0; @@ -4707,7 +4765,7 @@ static void x86_cpu_expand_features(X86CPU *cpu) */ env->features[w] |= x86_cpu_get_supported_feature_word(w, cpu->migratable) & - ~env->user_features[w] & \ + ~env->user_features[w] & ~feature_word_info[w].no_autoenable_flags; } } @@ -4740,7 +4798,7 @@ static void x86_cpu_expand_features(X86CPU *cpu) // TODO: Add a warning? // mark_unavailable_features(cpu, FEAT_7_0_EBX, // CPUID_7_0_EBX_INTEL_PT, - // "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,+intel-pt,level=0x14\""); + // "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,+intel-pt,min-level=0x14\""); } } diff --git a/qemu/target/i386/cpu.h b/qemu/target/i386/cpu.h index 10d93b89ac..bc0664ffde 100644 --- a/qemu/target/i386/cpu.h +++ b/qemu/target/i386/cpu.h @@ -352,6 +352,8 @@ typedef enum X86Seg { #define MSR_IA32_ARCH_CAPABILITIES 0x10a #define ARCH_CAP_TSX_CTRL_MSR (1<<7) +#define MSR_IA32_PERF_CAPABILITIES 0x345 + #define MSR_IA32_TSX_CTRL 0x122 #define MSR_IA32_TSCDEADLINE 0x6e0 @@ -525,6 +527,7 @@ typedef enum FeatureWord { FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ FEAT_ARCH_CAPABILITIES, FEAT_CORE_CAPABILITY, + FEAT_PERF_CAPABILITIES, FEAT_VMX_PROCBASED_CTLS, FEAT_VMX_SECONDARY_CTLS, FEAT_VMX_PINBASED_CTLS, @@ -768,6 +771,14 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Multiply Accumulation Single Precision */ #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) +/* Fast Short Rep Mov */ +#define CPUID_7_0_EDX_FSRM (1U << 4) +/* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ +#define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) +/* SERIALIZE instruction */ +#define CPUID_7_0_EDX_SERIALIZE (1U << 14) +/* TSX Suspend Load Address Tracking instruction */ +#define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) /* Speculation Control */ #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Single Thread Indirect Branch Predictors */ @@ -1571,6 +1582,7 @@ typedef struct CPUX86State { bool tsc_valid; int64_t tsc_khz; int64_t user_tsc_khz; /* for sanity check only */ + uint64_t apic_bus_freq; uint64_t mcg_cap; uint64_t mcg_ctl; @@ -2043,6 +2055,11 @@ static inline bool cpu_has_vmx(CPUX86State *env) return env->features[FEAT_1_ECX] & CPUID_EXT_VMX; } +static inline bool cpu_has_svm(CPUX86State *env) +{ + return env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM; +} + /* * In order for a vCPU to enter VMX operation it must have CR4.VMXE set. * Since it was set, CR4.VMXE must remain set as long as vCPU is in @@ -2068,6 +2085,7 @@ static inline bool cpu_vmx_maybe_enabled(CPUX86State *env) /* fpu_helper.c */ void update_fp_status(CPUX86State *env); void update_mxcsr_status(CPUX86State *env); +void update_mxcsr_from_sse_status(CPUX86State *env); static inline void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr) { diff --git a/qemu/target/i386/excp_helper.c b/qemu/target/i386/excp_helper.c index cca25d322e..800c75dffd 100644 --- a/qemu/target/i386/excp_helper.c +++ b/qemu/target/i386/excp_helper.c @@ -261,8 +261,8 @@ static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type, } ptep = pde | PG_NX_MASK; - /* if PSE bit is set, then we use a 4MB page */ - if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { + /* if host cr4 PSE bit is set, then we use a 4MB page */ + if ((pde & PG_PSE_MASK) && (env->nested_pg_mode & SVM_NPT_PSE)) { page_size = 4096 * 1024; pte_addr = pde_addr; diff --git a/qemu/target/i386/fpu_helper.c b/qemu/target/i386/fpu_helper.c index b3f537000f..ea121fbfe5 100644 --- a/qemu/target/i386/fpu_helper.c +++ b/qemu/target/i386/fpu_helper.c @@ -25,6 +25,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "fpu/softfloat.h" +#include "fpu/softfloat-macros.h" #define FPU_RC_MASK 0xc00 #define FPU_RC_NEAR 0x000 @@ -55,8 +56,13 @@ #define FPUC_EM 0x3f #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) +#define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) +#define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) +#define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) +#define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) +#define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) static void cpu_clear_ignne(CPUX86State *env) { @@ -141,12 +147,32 @@ static void fpu_set_exception(CPUX86State *env, int mask) } } +static inline uint8_t save_exception_flags(CPUX86State *env) +{ + uint8_t old_flags = get_float_exception_flags(&env->fp_status); + set_float_exception_flags(0, &env->fp_status); + return old_flags; +} + +static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) +{ + uint8_t new_flags = get_float_exception_flags(&env->fp_status); + float_raise(old_flags, &env->fp_status); + fpu_set_exception(env, + ((new_flags & float_flag_invalid ? FPUS_IE : 0) | + (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | + (new_flags & float_flag_overflow ? FPUS_OE : 0) | + (new_flags & float_flag_underflow ? FPUS_UE : 0) | + (new_flags & float_flag_inexact ? FPUS_PE : 0) | + (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); +} + static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) { - if (floatx80_is_zero(b)) { - fpu_set_exception(env, FPUS_ZE); - } - return floatx80_div(a, b, &env->fp_status); + uint8_t old_flags = save_exception_flags(env); + floatx80 ret = floatx80_div(a, b, &env->fp_status); + merge_exception_flags(env, old_flags); + return ret; } static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) @@ -158,6 +184,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) void helper_flds_FT0(CPUX86State *env, uint32_t val) { + uint8_t old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -165,10 +192,12 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val) u.i = val; FT0 = float32_to_floatx80(u.f, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fldl_FT0(CPUX86State *env, uint64_t val) { + uint8_t old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -176,6 +205,7 @@ void helper_fldl_FT0(CPUX86State *env, uint64_t val) u.i = val; FT0 = float64_to_floatx80(u.f, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fildl_FT0(CPUX86State *env, int32_t val) @@ -185,6 +215,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val) void helper_flds_ST0(CPUX86State *env, uint32_t val) { + uint8_t old_flags = save_exception_flags(env); int new_fpstt; union { float32 f; @@ -196,10 +227,12 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val) env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); env->fpstt = new_fpstt; env->fptags[new_fpstt] = 0; /* validate stack entry */ + merge_exception_flags(env, old_flags); } void helper_fldl_ST0(CPUX86State *env, uint64_t val) { + uint8_t old_flags = save_exception_flags(env); int new_fpstt; union { float64 f; @@ -211,6 +244,7 @@ void helper_fldl_ST0(CPUX86State *env, uint64_t val) env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); env->fpstt = new_fpstt; env->fptags[new_fpstt] = 0; /* validate stack entry */ + merge_exception_flags(env, old_flags); } void helper_fildl_ST0(CPUX86State *env, int32_t val) @@ -235,90 +269,108 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val) uint32_t helper_fsts_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); union { float32 f; uint32_t i; } u; u.f = floatx80_to_float32(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); return u.i; } uint64_t helper_fstl_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); union { float64 f; uint64_t i; } u; u.f = floatx80_to_float64(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); return u.i; } int32_t helper_fist_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); if (val != (int16_t)val) { + set_float_exception_flags(float_flag_invalid, &env->fp_status); val = -32768; } + merge_exception_flags(env, old_flags); return val; } int32_t helper_fistl_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int32_t val; - signed char old_exp_flags; - - old_exp_flags = get_float_exception_flags(&env->fp_status); - set_float_exception_flags(0, &env->fp_status); val = floatx80_to_int32(ST0, &env->fp_status); if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { val = 0x80000000; } - set_float_exception_flags(get_float_exception_flags(&env->fp_status) - | old_exp_flags, &env->fp_status); + merge_exception_flags(env, old_flags); return val; } int64_t helper_fistll_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int64_t val; - signed char old_exp_flags; - - old_exp_flags = get_float_exception_flags(&env->fp_status); - set_float_exception_flags(0, &env->fp_status); val = floatx80_to_int64(ST0, &env->fp_status); if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { val = 0x8000000000000000ULL; } - set_float_exception_flags(get_float_exception_flags(&env->fp_status) - | old_exp_flags, &env->fp_status); + merge_exception_flags(env, old_flags); return val; } int32_t helper_fistt_ST0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); if (val != (int16_t)val) { + set_float_exception_flags(float_flag_invalid, &env->fp_status); val = -32768; } + merge_exception_flags(env, old_flags); return val; } int32_t helper_fisttl_ST0(CPUX86State *env) { - return floatx80_to_int32_round_to_zero(ST0, &env->fp_status); + uint8_t old_flags = save_exception_flags(env); + int32_t val; + + val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { + val = 0x80000000; + } + merge_exception_flags(env, old_flags); + return val; } int64_t helper_fisttll_ST0(CPUX86State *env) { - return floatx80_to_int64_round_to_zero(ST0, &env->fp_status); + uint8_t old_flags = save_exception_flags(env); + int64_t val; + + val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { + val = 0x8000000000000000ULL; + } + merge_exception_flags(env, old_flags); + return val; } void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) @@ -400,62 +452,78 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; void helper_fcom_ST0_FT0(CPUX86State *env) { - int ret; + uint8_t old_flags = save_exception_flags(env); + FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; + merge_exception_flags(env, old_flags); } void helper_fucom_ST0_FT0(CPUX86State *env) { - int ret; + uint8_t old_flags = save_exception_flags(env); + FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; + merge_exception_flags(env, old_flags); } static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_fcomi_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int eflags; - int ret; + FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); eflags = cpu_cc_compute_all(env, CC_OP); eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; CC_SRC = eflags; + merge_exception_flags(env, old_flags); } void helper_fucomi_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); int eflags; - int ret; + FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); eflags = cpu_cc_compute_all(env, CC_OP); eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; CC_SRC = eflags; + merge_exception_flags(env, old_flags); } void helper_fadd_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); ST0 = floatx80_add(ST0, FT0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fmul_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); ST0 = floatx80_mul(ST0, FT0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fsub_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); ST0 = floatx80_sub(ST0, FT0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fsubr_ST0_FT0(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); ST0 = floatx80_sub(FT0, ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fdiv_ST0_FT0(CPUX86State *env) @@ -472,22 +540,30 @@ void helper_fdivr_ST0_FT0(CPUX86State *env) void helper_fadd_STN_ST0(CPUX86State *env, int st_index) { + uint8_t old_flags = save_exception_flags(env); ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fmul_STN_ST0(CPUX86State *env, int st_index) { + uint8_t old_flags = save_exception_flags(env); ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fsub_STN_ST0(CPUX86State *env, int st_index) { + uint8_t old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) { + uint8_t old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) @@ -519,58 +595,81 @@ void helper_fabs_ST0(CPUX86State *env) void helper_fld1_ST0(CPUX86State *env) { - //ST0 = floatx80_one; - floatx80 one = { 0x8000000000000000LL, 0x3fff }; - ST0 = one; + ST0 = floatx80_one; } void helper_fldl2t_ST0(CPUX86State *env) { - //ST0 = floatx80_l2t; - floatx80 l2t = { 0xd49a784bcd1b8afeLL, 0x4000 }; - ST0 = l2t; + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_UP: + ST0 = floatx80_l2t_u; + break; + default: + ST0 = floatx80_l2t; + break; + } } void helper_fldl2e_ST0(CPUX86State *env) { - //ST0 = floatx80_l2e; - floatx80 l2e = { 0xb8aa3b295c17f0bcLL, 0x3fff }; - ST0 = l2e; + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_l2e_d; + break; + default: + ST0 = floatx80_l2e; + break; + } } void helper_fldpi_ST0(CPUX86State *env) { - //ST0 = floatx80_pi; - floatx80 pi = { 0xc90fdaa22168c235LL, 0x4000 }; - ST0 = pi; + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_pi_d; + break; + default: + ST0 = floatx80_pi; + break; + } } void helper_fldlg2_ST0(CPUX86State *env) { - //ST0 = floatx80_lg2; - floatx80 lg2 = { 0x9a209a84fbcff799LL, 0x3ffd }; - ST0 = lg2; + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_lg2_d; + break; + default: + ST0 = floatx80_lg2; + break; + } } void helper_fldln2_ST0(CPUX86State *env) { - //ST0 = floatx80_ln2; - floatx80 ln2 = { 0xb17217f7d1cf79acLL, 0x3ffe }; - ST0 = ln2; + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_ln2_d; + break; + default: + ST0 = floatx80_ln2; + break; + } } void helper_fldz_ST0(CPUX86State *env) { - //ST0 = floatx80_zero; - floatx80 zero = { 0x0000000000000000LL, 0x0000 }; - ST0 = zero; + ST0 = floatx80_zero; } void helper_fldz_FT0(CPUX86State *env) { - //FT0 = floatx80_zero; - floatx80 zero = { 0x0000000000000000LL, 0x0000 }; - FT0 = zero; + FT0 = floatx80_zero; } uint32_t helper_fnstsw(CPUX86State *env) @@ -684,18 +783,31 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) { + uint8_t old_flags = save_exception_flags(env); int v; target_ulong mem_ref, mem_end; int64_t val; + CPU_LDoubleU temp; + + temp.d = ST0; val = floatx80_to_int64(ST0, &env->fp_status); mem_ref = ptr; + if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { + set_float_exception_flags(float_flag_invalid, &env->fp_status); + while (mem_ref < ptr + 7) { + cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); + } + cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); + cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); + cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); + merge_exception_flags(env, old_flags); + return; + } mem_end = mem_ref + 9; - if (val < 0) { + if (SIGND(temp)) { cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); - if (val != 0x8000000000000000LL) { - val = -val; - } + val = -val; } else { cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); } @@ -705,35 +817,399 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) } v = val % 100; val = val / 100; - v = (int)((unsigned int)(v / 10) << 4) | (v % 10); + v = ((v / 10) << 4) | (v % 10); cpu_stb_data_ra(env, mem_ref++, v, GETPC()); } while (mem_ref < mem_end) { cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); } + merge_exception_flags(env, old_flags); } -void helper_f2xm1(CPUX86State *env) -{ - double val = floatx80_to_double(env, ST0); +/* 128-bit significand of log(2). */ +#define ln2_sig_high 0xb17217f7d1cf79abULL +#define ln2_sig_low 0xc9e3b39803f2f6afULL - val = pow(2.0, val) - 1.0; - ST0 = double_to_floatx80(env, val); -} +/* + * Polynomial coefficients for an approximation to (2^x - 1) / x, on + * the interval [-1/64, 1/64]. + */ +#define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) +#define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) +#define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) +#define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) +#define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) +#define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) +#define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) +#define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) +#define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) + +struct f2xm1_data { + /* + * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 + * are very close to exact floatx80 values. + */ + floatx80 t; + /* The value of 2^t. */ + floatx80 exp2; + /* The value of 2^t - 1. */ + floatx80 exp2m1; +}; + +static const struct f2xm1_data f2xm1_table[65] = { + { make_floatx80_init(0xbfff, 0x8000000000000000ULL), + make_floatx80_init(0x3ffe, 0x8000000000000000ULL), + make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, + { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), + make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), + make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, + { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), + make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), + make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, + { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), + make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), + make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, + { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), + make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), + make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, + { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), + make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), + make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, + { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), + make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), + make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, + { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), + make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), + make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, + { make_floatx80_init(0xbffe, 0xc000000000006530ULL), + make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), + make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, + { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), + make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), + make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, + { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), + make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), + make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, + { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), + make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), + make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, + { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), + make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), + make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, + { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), + make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), + make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, + { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), + make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), + make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, + { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), + make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), + make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, + { make_floatx80_init(0xbffe, 0x800000000000227dULL), + make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), + make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, + { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), + make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), + make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, + { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), + make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), + make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, + { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), + make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), + make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, + { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), + make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), + make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, + { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), + make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), + make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, + { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), + make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), + make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, + { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), + make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), + make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, + { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), + make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), + make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, + { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), + make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), + make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, + { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), + make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), + make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, + { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), + make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), + make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, + { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), + make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), + make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, + { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), + make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), + make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, + { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), + make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), + make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, + { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), + make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), + make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, + { floatx80_zero_init, + make_floatx80_init(0x3fff, 0x8000000000000000ULL), + floatx80_zero_init }, + { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), + make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), + make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, + { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), + make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), + make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, + { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), + make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), + make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, + { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), + make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), + make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, + { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), + make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), + make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, + { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), + make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), + make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, + { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), + make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), + make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, + { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), + make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), + make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, + { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), + make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), + make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, + { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), + make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), + make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, + { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), + make_floatx80_init(0x3fff, 0xa27043030c49370aULL), + make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, + { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), + make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), + make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, + { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), + make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), + make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, + { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), + make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), + make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, + { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), + make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), + make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, + { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), + make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), + make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, + { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), + make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), + make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, + { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), + make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), + make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, + { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), + make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), + make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, + { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), + make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), + make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, + { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), + make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), + make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, + { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), + make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), + make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, + { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), + make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), + make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, + { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), + make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), + make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, + { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), + make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), + make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, + { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), + make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), + make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, + { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), + make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), + make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, + { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), + make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), + make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, + { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), + make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), + make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, + { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), + make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), + make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, + { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), + make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), + make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, + { make_floatx80_init(0x3fff, 0x8000000000000000ULL), + make_floatx80_init(0x4000, 0x8000000000000000ULL), + make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, +}; -void helper_fyl2x(CPUX86State *env) +void helper_f2xm1(CPUX86State *env) { - double fptemp = floatx80_to_double(env, ST0); - - if (fptemp > 0.0) { - fptemp = log(fptemp) / log(2.0); /* log2(ST) */ - fptemp *= floatx80_to_double(env, ST1); - ST1 = double_to_floatx80(env, fptemp); - fpop(env); + uint8_t old_flags = save_exception_flags(env); + uint64_t sig = extractFloatx80Frac(ST0); + int32_t exp = extractFloatx80Exp(ST0); + bool sign = extractFloatx80Sign(ST0); + + if (floatx80_invalid_encoding(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_silence_nan(ST0, &env->fp_status); + } + } else if (exp > 0x3fff || + (exp == 0x3fff && sig != (0x8000000000000000ULL))) { + /* Out of range for the instruction, treat as invalid. */ + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else if (exp == 0x3fff) { + /* Argument 1 or -1, exact result 1 or -0.5. */ + if (sign) { + ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); + } + } else if (exp < 0x3fb0) { + if (!floatx80_is_zero(ST0)) { + /* + * Multiplying the argument by an extra-precision version + * of log(2) is sufficiently precise. Zero arguments are + * returned unchanged. + */ + uint64_t sig0, sig1, sig2; + if (exp == 0) { + normalizeFloatx80Subnormal(sig, &exp, &sig); + } + mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, + &sig2); + /* This result is inexact. */ + sig1 |= 1; + ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1, + &env->fp_status); + } } else { - env->fpus &= ~0x4700; - env->fpus |= 0x400; + floatx80 tmp, y, accum; + bool asign, bsign; + int32_t n, aexp, bexp; + uint64_t asig0, asig1, asig2, bsig0, bsig1; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + signed char save_prec = env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = 80; + + /* Find the nearest multiple of 1/32 to the argument. */ + tmp = floatx80_scalbn(ST0, 5, &env->fp_status); + n = 32 + floatx80_to_int32(tmp, &env->fp_status); + y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); + + if (floatx80_is_zero(y)) { + /* + * Use the value of 2^t - 1 from the table, to avoid + * needing to special-case zero as a result of + * multiplication below. + */ + ST0 = f2xm1_table[n].t; + set_float_exception_flags(float_flag_inexact, &env->fp_status); + env->fp_status.float_rounding_mode = save_mode; + } else { + /* + * Compute the lower parts of a polynomial expansion for + * (2^y - 1) / y. + */ + accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); + + /* + * The full polynomial expansion is f2xm1_coeff_0 + accum + * (where accum has much lower magnitude, and so, in + * particular, carry out of the addition is not possible). + * (This expansion is only accurate to about 70 bits, not + * 128 bits.) + */ + aexp = extractFloatx80Exp(f2xm1_coeff_0); + asign = extractFloatx80Sign(f2xm1_coeff_0); + shift128RightJamming(extractFloatx80Frac(accum), 0, + aexp - extractFloatx80Exp(accum), + &asig0, &asig1); + bsig0 = extractFloatx80Frac(f2xm1_coeff_0); + bsig1 = 0; + if (asign == extractFloatx80Sign(accum)) { + add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + } else { + sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + } + /* And thus compute an approximation to 2^y - 1. */ + mul128By64To192(asig0, asig1, extractFloatx80Frac(y), + &asig0, &asig1, &asig2); + aexp += extractFloatx80Exp(y) - 0x3ffe; + asign ^= extractFloatx80Sign(y); + if (n != 32) { + /* + * Multiply this by the precomputed value of 2^t and + * add that of 2^t - 1. + */ + mul128By64To192(asig0, asig1, + extractFloatx80Frac(f2xm1_table[n].exp2), + &asig0, &asig1, &asig2); + aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; + bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); + bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); + bsig1 = 0; + if (bexp < aexp) { + shift128RightJamming(bsig0, bsig1, aexp - bexp, + &bsig0, &bsig1); + } else if (aexp < bexp) { + shift128RightJamming(asig0, asig1, bexp - aexp, + &asig0, &asig1); + aexp = bexp; + } + /* The sign of 2^t - 1 is always that of the result. */ + bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); + if (asign == bsign) { + /* Avoid possible carry out of the addition. */ + shift128RightJamming(asig0, asig1, 1, + &asig0, &asig1); + shift128RightJamming(bsig0, bsig1, 1, + &bsig0, &bsig1); + ++aexp; + add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); + } else { + sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + asign = bsign; + } + } + env->fp_status.float_rounding_mode = save_mode; + /* This result is inexact. */ + asig1 |= 1; + ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1, + &env->fp_status); + } + + env->fp_status.floatx80_rounding_precision = save_prec; } + merge_exception_flags(env, old_flags); } void helper_fptan(CPUX86State *env) @@ -743,194 +1219,996 @@ void helper_fptan(CPUX86State *env) if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { env->fpus |= 0x400; } else { - floatx80 one = { 0x8000000000000000LL, 0x3fff }; fptemp = tan(fptemp); ST0 = double_to_floatx80(env, fptemp); fpush(env); - ST0 = one; + ST0 = floatx80_one; env->fpus &= ~0x400; /* C2 <-- 0 */ /* the above code is for |arg| < 2**52 only */ } } +/* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ +#define pi_4_exp 0x3ffe +#define pi_4_sig_high 0xc90fdaa22168c234ULL +#define pi_4_sig_low 0xc4c6628b80dc1cd1ULL +#define pi_2_exp 0x3fff +#define pi_2_sig_high 0xc90fdaa22168c234ULL +#define pi_2_sig_low 0xc4c6628b80dc1cd1ULL +#define pi_34_exp 0x4000 +#define pi_34_sig_high 0x96cbe3f9990e91a7ULL +#define pi_34_sig_low 0x9394c9e8a0a5159dULL +#define pi_exp 0x4000 +#define pi_sig_high 0xc90fdaa22168c234ULL +#define pi_sig_low 0xc4c6628b80dc1cd1ULL + +/* + * Polynomial coefficients for an approximation to atan(x), with only + * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike + * for some other approximations, no low part is needed for the first + * coefficient here to achieve a sufficiently accurate result, because + * the coefficient in this minimax approximation is very close to + * exactly 1.) + */ +#define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) +#define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) +#define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) +#define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) +#define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) +#define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) +#define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) + +struct fpatan_data { + /* High and low parts of atan(x). */ + floatx80 atan_high, atan_low; +}; + +static const struct fpatan_data fpatan_table[9] = { + { floatx80_zero_init, + floatx80_zero_init }, + { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), + make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, + { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), + make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, + { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), + make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, + { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), + make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, + { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), + make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, + { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), + make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, + { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), + make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, + { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), + make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, +}; + void helper_fpatan(CPUX86State *env) { - double fptemp, fpsrcop; + uint8_t old_flags = save_exception_flags(env); + uint64_t arg0_sig = extractFloatx80Frac(ST0); + int32_t arg0_exp = extractFloatx80Exp(ST0); + bool arg0_sign = extractFloatx80Sign(ST0); + uint64_t arg1_sig = extractFloatx80Frac(ST1); + int32_t arg1_exp = extractFloatx80Exp(ST1); + bool arg1_sign = extractFloatx80Sign(ST1); + + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST0, &env->fp_status); + } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST1, &env->fp_status); + } else if (floatx80_invalid_encoding(ST0) || + floatx80_invalid_encoding(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + ST1 = ST0; + } else if (floatx80_is_any_nan(ST1)) { + /* Pass this NaN through. */ + } else if (floatx80_is_zero(ST1) && !arg0_sign) { + /* Pass this zero through. */ + } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || + arg0_exp - arg1_exp >= 80) && + !arg0_sign) { + /* + * Dividing ST1 by ST0 gives the correct result up to + * rounding, and avoids spurious underflow exceptions that + * might result from passing some small values through the + * polynomial approximation, but if a finite nonzero result of + * division is exact, the result of fpatan is still inexact + * (and underflowing where appropriate). + */ + signed char save_prec = env->fp_status.floatx80_rounding_precision; + env->fp_status.floatx80_rounding_precision = 80; + ST1 = floatx80_div(ST1, ST0, &env->fp_status); + env->fp_status.floatx80_rounding_precision = save_prec; + if (!floatx80_is_zero(ST1) && + !(get_float_exception_flags(&env->fp_status) & + float_flag_inexact)) { + /* + * The mathematical result is very slightly closer to zero + * than this exact result. Round a value with the + * significand adjusted accordingly to get the correct + * exceptions, and possibly an adjusted result depending + * on the rounding mode. + */ + uint64_t sig = extractFloatx80Frac(ST1); + int32_t exp = extractFloatx80Exp(ST1); + bool sign = extractFloatx80Sign(ST1); + if (exp == 0) { + normalizeFloatx80Subnormal(sig, &exp, &sig); + } + ST1 = normalizeRoundAndPackFloatx80(80, sign, exp, sig - 1, + -1, &env->fp_status); + } + } else { + /* The result is inexact. */ + bool rsign = arg1_sign; + int32_t rexp; + uint64_t rsig0, rsig1; + if (floatx80_is_zero(ST1)) { + /* + * ST0 is negative. The result is pi with the sign of + * ST1. + */ + rexp = pi_exp; + rsig0 = pi_sig_high; + rsig1 = pi_sig_low; + } else if (floatx80_is_infinity(ST1)) { + if (floatx80_is_infinity(ST0)) { + if (arg0_sign) { + rexp = pi_34_exp; + rsig0 = pi_34_sig_high; + rsig1 = pi_34_sig_low; + } else { + rexp = pi_4_exp; + rsig0 = pi_4_sig_high; + rsig1 = pi_4_sig_low; + } + } else { + rexp = pi_2_exp; + rsig0 = pi_2_sig_high; + rsig1 = pi_2_sig_low; + } + } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { + rexp = pi_2_exp; + rsig0 = pi_2_sig_high; + rsig1 = pi_2_sig_low; + } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { + /* ST0 is negative. */ + rexp = pi_exp; + rsig0 = pi_sig_high; + rsig1 = pi_sig_low; + } else { + /* + * ST0 and ST1 are finite, nonzero and with exponents not + * too far apart. + */ + int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; + int32_t azexp, axexp; + bool adj_sub, ysign, zsign; + uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; + uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; + uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; + uint64_t azsig0, azsig1; + uint64_t azsig2, azsig3, axsig0, axsig1; + floatx80 x8; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + signed char save_prec = env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = 80; + + if (arg0_exp == 0) { + normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); + } + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + if (arg0_exp > arg1_exp || + (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { + /* Work with abs(ST1) / abs(ST0). */ + num_exp = arg1_exp; + num_sig = arg1_sig; + den_exp = arg0_exp; + den_sig = arg0_sig; + if (arg0_sign) { + /* The result is subtracted from pi. */ + adj_exp = pi_exp; + adj_sig0 = pi_sig_high; + adj_sig1 = pi_sig_low; + adj_sub = true; + } else { + /* The result is used as-is. */ + adj_exp = 0; + adj_sig0 = 0; + adj_sig1 = 0; + adj_sub = false; + } + } else { + /* Work with abs(ST0) / abs(ST1). */ + num_exp = arg0_exp; + num_sig = arg0_sig; + den_exp = arg1_exp; + den_sig = arg1_sig; + /* The result is added to or subtracted from pi/2. */ + adj_exp = pi_2_exp; + adj_sig0 = pi_2_sig_high; + adj_sig1 = pi_2_sig_low; + adj_sub = !arg0_sign; + } + + /* + * Compute x = num/den, where 0 < x <= 1 and x is not too + * small. + */ + xexp = num_exp - den_exp + 0x3ffe; + remsig0 = num_sig; + remsig1 = 0; + if (den_sig <= remsig0) { + shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); + ++xexp; + } + xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); + mul64To128(den_sig, xsig0, &msig0, &msig1); + sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); + while ((int64_t) remsig0 < 0) { + --xsig0; + add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); + } + xsig1 = estimateDiv128To64(remsig1, 0, den_sig); + /* + * No need to correct any estimation error in xsig1; even + * with such error, it is accurate enough. + */ + + /* + * Split x as x = t + y, where t = n/8 is the nearest + * multiple of 1/8 to x. + */ + x8 = normalizeRoundAndPackFloatx80(80, false, xexp + 3, xsig0, + xsig1, &env->fp_status); + n = floatx80_to_int32(x8, &env->fp_status); + if (n == 0) { + ysign = false; + yexp = xexp; + ysig0 = xsig0; + ysig1 = xsig1; + texp = 0; + tsig = 0; + } else { + int shift = clz32(n) + 32; + texp = 0x403b - shift; + tsig = n; + tsig <<= shift; + if (texp == xexp) { + sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); + if ((int64_t) ysig0 >= 0) { + ysign = false; + if (ysig0 == 0) { + if (ysig1 == 0) { + yexp = 0; + } else { + shift = clz64(ysig1) + 64; + yexp = xexp - shift; + shift128Left(ysig0, ysig1, shift, + &ysig0, &ysig1); + } + } else { + shift = clz64(ysig0); + yexp = xexp - shift; + shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); + } + } else { + ysign = true; + sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); + if (ysig0 == 0) { + shift = clz64(ysig1) + 64; + } else { + shift = clz64(ysig0); + } + yexp = xexp - shift; + shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); + } + } else { + /* + * t's exponent must be greater than x's because t + * is positive and the nearest multiple of 1/8 to + * x, and if x has a greater exponent, the power + * of 2 with that exponent is also a multiple of + * 1/8. + */ + uint64_t usig0, usig1; + shift128RightJamming(xsig0, xsig1, texp - xexp, + &usig0, &usig1); + ysign = true; + sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); + if (ysig0 == 0) { + shift = clz64(ysig1) + 64; + } else { + shift = clz64(ysig0); + } + yexp = texp - shift; + shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); + } + } + + /* + * Compute z = y/(1+tx), so arctan(x) = arctan(t) + + * arctan(z). + */ + zsign = ysign; + if (texp == 0 || yexp == 0) { + zexp = yexp; + zsig0 = ysig0; + zsig1 = ysig1; + } else { + /* + * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. + */ + int32_t dexp = texp + xexp - 0x3ffe; + uint64_t dsig0, dsig1, dsig2; + mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); + /* + * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 + * bit). Add 1 to produce the denominator 1+tx. + */ + shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, + &dsig0, &dsig1); + dsig0 |= 0x8000000000000000ULL; + zexp = yexp - 1; + remsig0 = ysig0; + remsig1 = ysig1; + remsig2 = 0; + if (dsig0 <= remsig0) { + shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); + ++zexp; + } + zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); + mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); + sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, + &remsig0, &remsig1, &remsig2); + while ((int64_t) remsig0 < 0) { + --zsig0; + add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, + &remsig0, &remsig1, &remsig2); + } + zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); + /* No need to correct any estimation error in zsig1. */ + } + + if (zexp == 0) { + azexp = 0; + azsig0 = 0; + azsig1 = 0; + } else { + floatx80 z2, accum; + uint64_t z2sig0, z2sig1, z2sig2, z2sig3; + /* Compute z^2. */ + mul128To256(zsig0, zsig1, zsig0, zsig1, + &z2sig0, &z2sig1, &z2sig2, &z2sig3); + z2 = normalizeRoundAndPackFloatx80(80, false, + zexp + zexp - 0x3ffe, + z2sig0, z2sig1, + &env->fp_status); + + /* Compute the lower parts of the polynomial expansion. */ + accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + + /* + * The full polynomial expansion is z*(fpatan_coeff_0 + accum). + * fpatan_coeff_0 is 1, and accum is negative and much smaller. + */ + aexp = extractFloatx80Exp(fpatan_coeff_0); + shift128RightJamming(extractFloatx80Frac(accum), 0, + aexp - extractFloatx80Exp(accum), + &asig0, &asig1); + sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, + &asig0, &asig1); + /* Multiply by z to compute arctan(z). */ + azexp = aexp + zexp - 0x3ffe; + mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, + &azsig2, &azsig3); + } + + /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ + if (texp == 0) { + /* z is positive. */ + axexp = azexp; + axsig0 = azsig0; + axsig1 = azsig1; + } else { + bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); + int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); + uint64_t low_sig0 = + extractFloatx80Frac(fpatan_table[n].atan_low); + uint64_t low_sig1 = 0; + axexp = extractFloatx80Exp(fpatan_table[n].atan_high); + axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); + axsig1 = 0; + shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, + &low_sig0, &low_sig1); + if (low_sign) { + sub128(axsig0, axsig1, low_sig0, low_sig1, + &axsig0, &axsig1); + } else { + add128(axsig0, axsig1, low_sig0, low_sig1, + &axsig0, &axsig1); + } + if (azexp >= axexp) { + shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, + &axsig0, &axsig1); + axexp = azexp + 1; + shift128RightJamming(azsig0, azsig1, 1, + &azsig0, &azsig1); + } else { + shift128RightJamming(axsig0, axsig1, 1, + &axsig0, &axsig1); + shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, + &azsig0, &azsig1); + ++axexp; + } + if (zsign) { + sub128(axsig0, axsig1, azsig0, azsig1, + &axsig0, &axsig1); + } else { + add128(axsig0, axsig1, azsig0, azsig1, + &axsig0, &axsig1); + } + } + + if (adj_exp == 0) { + rexp = axexp; + rsig0 = axsig0; + rsig1 = axsig1; + } else { + /* + * Add or subtract arctan(x) (exponent axexp, + * significand axsig0 and axsig1, positive, not + * necessarily normalized) to the number given by + * adj_exp, adj_sig0 and adj_sig1, according to + * adj_sub. + */ + if (adj_exp >= axexp) { + shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, + &axsig0, &axsig1); + rexp = adj_exp + 1; + shift128RightJamming(adj_sig0, adj_sig1, 1, + &adj_sig0, &adj_sig1); + } else { + shift128RightJamming(axsig0, axsig1, 1, + &axsig0, &axsig1); + shift128RightJamming(adj_sig0, adj_sig1, + axexp - adj_exp + 1, + &adj_sig0, &adj_sig1); + rexp = axexp + 1; + } + if (adj_sub) { + sub128(adj_sig0, adj_sig1, axsig0, axsig1, + &rsig0, &rsig1); + } else { + add128(adj_sig0, adj_sig1, axsig0, axsig1, + &rsig0, &rsig1); + } + } + + env->fp_status.float_rounding_mode = save_mode; + env->fp_status.floatx80_rounding_precision = save_prec; + } + /* This result is inexact. */ + rsig1 |= 1; + ST1 = normalizeRoundAndPackFloatx80(80, rsign, rexp, + rsig0, rsig1, &env->fp_status); + } - fpsrcop = floatx80_to_double(env, ST1); - fptemp = floatx80_to_double(env, ST0); - ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp)); fpop(env); + merge_exception_flags(env, old_flags); } void helper_fxtract(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); CPU_LDoubleU temp; temp.d = ST0; if (floatx80_is_zero(ST0)) { /* Easy way to generate -inf and raising division by 0 exception */ - floatx80 zero = { 0x0000000000000000LL, 0x0000 }; - floatx80 one = { 0x8000000000000000LL, 0x3fff }; - ST0 = floatx80_div(floatx80_chs(one), zero, + ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, &env->fp_status); fpush(env); ST0 = temp.d; + } else if (floatx80_invalid_encoding(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + fpush(env); + ST0 = ST1; + } else if (floatx80_is_any_nan(ST0)) { + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_silence_nan(ST0, &env->fp_status); + } + fpush(env); + ST0 = ST1; + } else if (floatx80_is_infinity(ST0)) { + fpush(env); + ST0 = ST1; + ST1 = floatx80_infinity; } else { int expdif; - expdif = EXPD(temp) - EXPBIAS; + if (EXPD(temp) == 0) { + int shift = clz64(temp.l.lower); + temp.l.lower <<= shift; + expdif = 1 - EXPBIAS - shift; + float_raise(float_flag_input_denormal, &env->fp_status); + } else { + expdif = EXPD(temp) - EXPBIAS; + } /* DP exponent bias */ ST0 = int32_to_floatx80(expdif, &env->fp_status); fpush(env); BIASEXPONENT(temp); ST0 = temp.d; } + merge_exception_flags(env, old_flags); } -void helper_fprem1(CPUX86State *env) +static void helper_fprem_common(CPUX86State *env, bool mod) { - double st0, st1, dblq, fpsrcop, fptemp; - CPU_LDoubleU fpsrcop1, fptemp1; - int expdif; - signed long long int q; - - st0 = floatx80_to_double(env, ST0); - st1 = floatx80_to_double(env, ST1); - - if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { - ST0 = double_to_floatx80(env, NAN); /* NaN */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - return; - } - - fpsrcop = st0; - fptemp = st1; - fpsrcop1.d = ST0; - fptemp1.d = ST1; - expdif = EXPD(fpsrcop1) - EXPD(fptemp1); + uint8_t old_flags = save_exception_flags(env); + uint64_t quotient; + CPU_LDoubleU temp0, temp1; + int exp0, exp1, expdiff; - if (expdif < 0) { - /* optimisation? taken from the AMD docs */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* ST0 is unchanged */ - return; - } + temp0.d = ST0; + temp1.d = ST1; + exp0 = EXPD(temp0); + exp1 = EXPD(temp1); - if (expdif < 53) { - dblq = fpsrcop / fptemp; - /* round dblq towards nearest integer */ - dblq = rint(dblq); - st0 = fpsrcop - fptemp * dblq; - - /* convert dblq to q by truncating towards zero */ - if (dblq < 0.0) { - q = (signed long long int)(-dblq); + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || + exp0 == 0x7fff || exp1 == 0x7fff || + floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { + ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); + } else { + if (exp0 == 0) { + exp0 = 1 - clz64(temp0.l.lower); + } + if (exp1 == 0) { + exp1 = 1 - clz64(temp1.l.lower); + } + expdiff = exp0 - exp1; + if (expdiff < 64) { + ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); + env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ + env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ + env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ } else { - q = (signed long long int)dblq; + /* + * Partial remainder. This choice of how many bits to + * process at once is specified in AMD instruction set + * manuals, and empirically is followed by Intel + * processors as well; it ensures that the final remainder + * operation in a loop does produce the correct low three + * bits of the quotient. AMD manuals specify that the + * flags other than C2 are cleared, and empirically Intel + * processors clear them as well. + */ + int n = 32 + (expdiff % 32); + temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); + ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); + env->fpus |= 0x400; /* C2 <-- 1 */ } - - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* (C0,C3,C1) <-- (q2,q1,q0) */ - env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ - env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ - env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ - } else { - env->fpus |= 0x400; /* C2 <-- 1 */ - fptemp = pow(2.0, expdif - 50); - fpsrcop = (st0 / st1) / fptemp; - /* fpsrcop = integer obtained by chopping */ - fpsrcop = (fpsrcop < 0.0) ? - -(floor(fabs(fpsrcop))) : floor(fpsrcop); - st0 -= (st1 * fpsrcop * fptemp); } - ST0 = double_to_floatx80(env, st0); + merge_exception_flags(env, old_flags); } -void helper_fprem(CPUX86State *env) +void helper_fprem1(CPUX86State *env) { - double st0, st1, dblq, fpsrcop, fptemp; - CPU_LDoubleU fpsrcop1, fptemp1; - int expdif; - signed long long int q; - - st0 = floatx80_to_double(env, ST0); - st1 = floatx80_to_double(env, ST1); - - if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { - ST0 = double_to_floatx80(env, NAN); /* NaN */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - return; - } + helper_fprem_common(env, false); +} - fpsrcop = st0; - fptemp = st1; - fpsrcop1.d = ST0; - fptemp1.d = ST1; - expdif = EXPD(fpsrcop1) - EXPD(fptemp1); +void helper_fprem(CPUX86State *env) +{ + helper_fprem_common(env, true); +} - if (expdif < 0) { - /* optimisation? taken from the AMD docs */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* ST0 is unchanged */ - return; - } +/* 128-bit significand of log2(e). */ +#define log2_e_sig_high 0xb8aa3b295c17f0bbULL +#define log2_e_sig_low 0xbe87fed0691d3e89ULL - if (expdif < 53) { - dblq = fpsrcop / fptemp; /* ST0 / ST1 */ - /* round dblq towards zero */ - dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq); - st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */ +/* + * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), + * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, + * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the + * interval [sqrt(2)/2, sqrt(2)]. + */ +#define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) +#define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) +#define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) +#define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) +#define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) +#define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) +#define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) +#define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) +#define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) +#define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) +#define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) - /* convert dblq to q by truncating towards zero */ - if (dblq < 0.0) { - q = (signed long long int)(-dblq); - } else { - q = (signed long long int)dblq; - } +/* + * Compute an approximation of log2(1+arg), where 1+arg is in the + * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this + * function is called, rounding precision is set to 80 and the + * round-to-nearest mode is in effect. arg must not be exactly zero, + * and must not be so close to zero that underflow might occur. + */ +static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, + uint64_t *sig0, uint64_t *sig1) +{ + uint64_t arg0_sig = extractFloatx80Frac(arg); + int32_t arg0_exp = extractFloatx80Exp(arg); + bool arg0_sign = extractFloatx80Sign(arg); + bool asign; + int32_t dexp, texp, aexp; + uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; + uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; + uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; + floatx80 t2, accum; - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* (C0,C3,C1) <-- (q2,q1,q0) */ - env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ - env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ - env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ + /* + * Compute an approximation of arg/(2+arg), with extra precision, + * as the argument to a polynomial approximation. The extra + * precision is only needed for the first term of the + * approximation, with subsequent terms being significantly + * smaller; the approximation only uses odd exponents, and the + * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... + */ + if (arg0_sign) { + dexp = 0x3fff; + shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); + sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); } else { - int N = 32 + (expdif % 32); /* as per AMD docs */ + dexp = 0x4000; + shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); + dsig0 |= 0x8000000000000000ULL; + } + texp = arg0_exp - dexp + 0x3ffe; + rsig0 = arg0_sig; + rsig1 = 0; + rsig2 = 0; + if (dsig0 <= rsig0) { + shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); + ++texp; + } + tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); + mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); + sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, + &rsig0, &rsig1, &rsig2); + while ((int64_t) rsig0 < 0) { + --tsig0; + add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, + &rsig0, &rsig1, &rsig2); + } + tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); + /* + * No need to correct any estimation error in tsig1; even with + * such error, it is accurate enough. Now compute the square of + * that approximation. + */ + mul128To256(tsig0, tsig1, tsig0, tsig1, + &t2sig0, &t2sig1, &t2sig2, &t2sig3); + t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe, + t2sig0, t2sig1, &env->fp_status); + + /* Compute the lower parts of the polynomial expansion. */ + accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); - env->fpus |= 0x400; /* C2 <-- 1 */ - fptemp = pow(2.0, (double)(expdif - N)); - fpsrcop = (st0 / st1) / fptemp; - /* fpsrcop = integer obtained by chopping */ - fpsrcop = (fpsrcop < 0.0) ? - -(floor(fabs(fpsrcop))) : floor(fpsrcop); - st0 -= (st1 * fpsrcop * fptemp); + /* + * The full polynomial expansion is fyl2x_coeff_0 + accum (where + * accum has much lower magnitude, and so, in particular, carry + * out of the addition is not possible), multiplied by t. (This + * expansion is only accurate to about 70 bits, not 128 bits.) + */ + aexp = extractFloatx80Exp(fyl2x_coeff_0); + asign = extractFloatx80Sign(fyl2x_coeff_0); + shift128RightJamming(extractFloatx80Frac(accum), 0, + aexp - extractFloatx80Exp(accum), + &asig0, &asig1); + bsig0 = extractFloatx80Frac(fyl2x_coeff_0); + bsig1 = 0; + if (asign == extractFloatx80Sign(accum)) { + add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + } else { + sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); } - ST0 = double_to_floatx80(env, st0); + /* Multiply by t to compute the required result. */ + mul128To256(asig0, asig1, tsig0, tsig1, + &asig0, &asig1, &asig2, &asig3); + aexp += texp - 0x3ffe; + *exp = aexp; + *sig0 = asig0; + *sig1 = asig1; } void helper_fyl2xp1(CPUX86State *env) { - double fptemp = floatx80_to_double(env, ST0); + uint8_t old_flags = save_exception_flags(env); + uint64_t arg0_sig = extractFloatx80Frac(ST0); + int32_t arg0_exp = extractFloatx80Exp(ST0); + bool arg0_sign = extractFloatx80Sign(ST0); + uint64_t arg1_sig = extractFloatx80Frac(ST1); + int32_t arg1_exp = extractFloatx80Exp(ST1); + bool arg1_sign = extractFloatx80Sign(ST1); + + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST0, &env->fp_status); + } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST1, &env->fp_status); + } else if (floatx80_invalid_encoding(ST0) || + floatx80_invalid_encoding(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + ST1 = ST0; + } else if (floatx80_is_any_nan(ST1)) { + /* Pass this NaN through. */ + } else if (arg0_exp > 0x3ffd || + (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? + 0x95f619980c4336f7ULL : + 0xd413cccfe7799211ULL))) { + /* + * Out of range for the instruction (ST0 must have absolute + * value less than 1 - sqrt(2)/2 = 0.292..., according to + * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 + * to sqrt(2) - 1, which we allow here), treat as invalid. + */ + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || + arg1_exp == 0x7fff) { + /* + * One argument is zero, or multiplying by infinity; correct + * result is exact and can be obtained by multiplying the + * arguments. + */ + ST1 = floatx80_mul(ST0, ST1, &env->fp_status); + } else if (arg0_exp < 0x3fb0) { + /* + * Multiplying both arguments and an extra-precision version + * of log2(e) is sufficiently precise. + */ + uint64_t sig0, sig1, sig2; + int32_t exp; + if (arg0_exp == 0) { + normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); + } + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, + &sig0, &sig1, &sig2); + exp = arg0_exp + 1; + mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); + exp += arg1_exp - 0x3ffe; + /* This result is inexact. */ + sig1 |= 1; + ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp, + sig0, sig1, &env->fp_status); + } else { + int32_t aexp; + uint64_t asig0, asig1, asig2; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + signed char save_prec = env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = 80; + + helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); + /* + * Multiply by the second argument to compute the required + * result. + */ + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); + aexp += arg1_exp - 0x3ffe; + /* This result is inexact. */ + asig1 |= 1; + env->fp_status.float_rounding_mode = save_mode; + ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp, + asig0, asig1, &env->fp_status); + env->fp_status.floatx80_rounding_precision = save_prec; + } + fpop(env); + merge_exception_flags(env, old_flags); +} - if ((fptemp + 1.0) > 0.0) { - fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */ - fptemp *= floatx80_to_double(env, ST1); - ST1 = double_to_floatx80(env, fptemp); - fpop(env); +void helper_fyl2x(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + uint64_t arg0_sig = extractFloatx80Frac(ST0); + int32_t arg0_exp = extractFloatx80Exp(ST0); + bool arg0_sign = extractFloatx80Sign(ST0); + uint64_t arg1_sig = extractFloatx80Frac(ST1); + int32_t arg1_exp = extractFloatx80Exp(ST1); + bool arg1_sign = extractFloatx80Sign(ST1); + + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST0, &env->fp_status); + } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST1, &env->fp_status); + } else if (floatx80_invalid_encoding(ST0) || + floatx80_invalid_encoding(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + ST1 = ST0; + } else if (floatx80_is_any_nan(ST1)) { + /* Pass this NaN through. */ + } else if (arg0_sign && !floatx80_is_zero(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_infinity(ST1)) { + FloatRelation cmp = floatx80_compare(ST0, floatx80_one, + &env->fp_status); + switch (cmp) { + case float_relation_less: + ST1 = floatx80_chs(ST1); + break; + case float_relation_greater: + /* Result is infinity of the same sign as ST1. */ + break; + default: + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + break; + } + } else if (floatx80_is_infinity(ST0)) { + if (floatx80_is_zero(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (arg1_sign) { + ST1 = floatx80_chs(ST0); + } else { + ST1 = ST0; + } + } else if (floatx80_is_zero(ST0)) { + if (floatx80_is_zero(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else { + /* Result is infinity with opposite sign to ST1. */ + float_raise(float_flag_divbyzero, &env->fp_status); + ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, + 0x8000000000000000ULL); + } + } else if (floatx80_is_zero(ST1)) { + if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { + ST1 = floatx80_chs(ST1); + } + /* Otherwise, ST1 is already the correct result. */ + } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { + if (arg1_sign) { + ST1 = floatx80_chs(floatx80_zero); + } else { + ST1 = floatx80_zero; + } } else { - env->fpus &= ~0x4700; - env->fpus |= 0x400; + int32_t int_exp; + floatx80 arg0_m1; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + signed char save_prec = env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = 80; + + if (arg0_exp == 0) { + normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); + } + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + int_exp = arg0_exp - 0x3fff; + if (arg0_sig > 0xb504f333f9de6484ULL) { + ++int_exp; + } + arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, + &env->fp_status), + floatx80_one, &env->fp_status); + if (floatx80_is_zero(arg0_m1)) { + /* Exact power of 2; multiply by ST1. */ + env->fp_status.float_rounding_mode = save_mode; + ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), + ST1, &env->fp_status); + } else { + bool asign = extractFloatx80Sign(arg0_m1); + int32_t aexp; + uint64_t asig0, asig1, asig2; + helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); + if (int_exp != 0) { + bool isign = (int_exp < 0); + int32_t iexp; + uint64_t isig; + int shift; + int_exp = isign ? -int_exp : int_exp; + shift = clz32(int_exp) + 32; + isig = int_exp; + isig <<= shift; + iexp = 0x403e - shift; + shift128RightJamming(asig0, asig1, iexp - aexp, + &asig0, &asig1); + if (asign == isign) { + add128(isig, 0, asig0, asig1, &asig0, &asig1); + } else { + sub128(isig, 0, asig0, asig1, &asig0, &asig1); + } + aexp = iexp; + asign = isign; + } + /* + * Multiply by the second argument to compute the required + * result. + */ + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); + aexp += arg1_exp - 0x3ffe; + /* This result is inexact. */ + asig1 |= 1; + env->fp_status.float_rounding_mode = save_mode; + ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp, + asig0, asig1, &env->fp_status); + } + + env->fp_status.floatx80_rounding_precision = save_prec; } + fpop(env); + merge_exception_flags(env, old_flags); } void helper_fsqrt(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); if (floatx80_is_neg(ST0)) { env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ env->fpus |= 0x400; } ST0 = floatx80_sqrt(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fsincos(CPUX86State *env) @@ -950,17 +2228,60 @@ void helper_fsincos(CPUX86State *env) void helper_frndint(CPUX86State *env) { + uint8_t old_flags = save_exception_flags(env); ST0 = floatx80_round_to_int(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); } void helper_fscale(CPUX86State *env) { - if (floatx80_is_any_nan(ST1)) { + uint8_t old_flags = save_exception_flags(env); + if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST1)) { + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + } ST0 = ST1; + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_silence_nan(ST0, &env->fp_status); + } + } else if (floatx80_is_infinity(ST1) && + !floatx80_invalid_encoding(ST0) && + !floatx80_is_any_nan(ST0)) { + if (floatx80_is_neg(ST1)) { + if (floatx80_is_infinity(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else { + ST0 = (floatx80_is_neg(ST0) ? + floatx80_chs(floatx80_zero) : + floatx80_zero); + } + } else { + if (floatx80_is_zero(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else { + ST0 = (floatx80_is_neg(ST0) ? + floatx80_chs(floatx80_infinity) : + floatx80_infinity); + } + } } else { - int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); + int n; + signed char save = env->fp_status.floatx80_rounding_precision; + uint8_t save_flags = get_float_exception_flags(&env->fp_status); + set_float_exception_flags(0, &env->fp_status); + n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); + set_float_exception_flags(save_flags, &env->fp_status); + env->fp_status.floatx80_rounding_precision = 80; ST0 = floatx80_scalbn(ST0, n, &env->fp_status); + env->fp_status.floatx80_rounding_precision = save; } + merge_exception_flags(env, old_flags); } void helper_fsin(CPUX86State *env) @@ -1010,7 +2331,7 @@ void helper_fxam_ST0(CPUX86State *env) if (expdif == MAXEXPD) { if (MANTD(temp) == 0x8000000000000000ULL) { env->fpus |= 0x500; /* Infinity */ - } else { + } else if (MANTD(temp) & 0x8000000000000000ULL) { env->fpus |= 0x100; /* NaN */ } } else if (expdif == 0) { @@ -1019,7 +2340,7 @@ void helper_fxam_ST0(CPUX86State *env) } else { env->fpus |= 0x4400; /* Denormal */ } - } else { + } else if (MANTD(temp) & 0x8000000000000000ULL) { env->fpus |= 0x400; } } @@ -1171,7 +2492,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) In 64-bit mode this is rip, rdp. But in either case we don't write actual data, just zeros. */ cpu_stq_data_ra(env, ptr + XO(legacy.fpip), env->fpip, ra); /* eip+sel; rip */ - cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ + cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), env->fpdp, ra); /* edp+sel; rdp */ addr = ptr + XO(legacy.fpregs); for (i = 0; i < 8; i++) { @@ -1183,6 +2504,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) { + update_mxcsr_from_sse_status(env); cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); } @@ -1600,11 +2922,43 @@ void update_mxcsr_status(CPUX86State *env) } set_float_rounding_mode(rnd_type, &env->sse_status); + /* Set exception flags. */ + set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | + (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | + (mxcsr & FPUS_OE ? float_flag_overflow : 0) | + (mxcsr & FPUS_UE ? float_flag_underflow : 0) | + (mxcsr & FPUS_PE ? float_flag_inexact : 0), + &env->sse_status); + /* set denormals are zero */ set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); /* set flush to zero */ - set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status); + set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); +} + +void update_mxcsr_from_sse_status(CPUX86State *env) +{ + uint8_t flags = get_float_exception_flags(&env->sse_status); + /* + * The MXCSR denormal flag has opposite semantics to + * float_flag_input_denormal (the softfloat code sets that flag + * only when flushing input denormals to zero, but SSE sets it + * only when not flushing them to zero), so is not converted + * here. + */ + env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | + (flags & float_flag_divbyzero ? FPUS_ZE : 0) | + (flags & float_flag_overflow ? FPUS_OE : 0) | + (flags & float_flag_underflow ? FPUS_UE : 0) | + (flags & float_flag_inexact ? FPUS_PE : 0) | + (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : + 0)); +} + +void helper_update_mxcsr(CPUX86State *env) +{ + update_mxcsr_from_sse_status(env); } void helper_ldmxcsr(CPUX86State *env, uint32_t val) diff --git a/qemu/target/i386/helper.h b/qemu/target/i386/helper.h index 399cc0df99..ca55ded21a 100644 --- a/qemu/target/i386/helper.h +++ b/qemu/target/i386/helper.h @@ -210,6 +210,7 @@ DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl) /* MMX/SSE */ DEF_HELPER_2(ldmxcsr, void, env, i32) +DEF_HELPER_1(update_mxcsr, void, env) DEF_HELPER_1(enter_mmx, void, env) DEF_HELPER_1(emms, void, env) DEF_HELPER_3(movq, void, env, ptr, ptr) diff --git a/qemu/target/i386/ops_sse.h b/qemu/target/i386/ops_sse.h index ec1ec745d0..027ff59300 100644 --- a/qemu/target/i386/ops_sse.h +++ b/qemu/target/i386/ops_sse.h @@ -843,6 +843,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s) void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); d->ZMM_S(0) = float32_div(float32_one, float32_sqrt(s->ZMM_S(0), &env->sse_status), &env->sse_status); @@ -855,26 +856,33 @@ void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s) d->ZMM_S(3) = float32_div(float32_one, float32_sqrt(s->ZMM_S(3), &env->sse_status), &env->sse_status); + set_float_exception_flags(old_flags, &env->sse_status); } void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *s) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); d->ZMM_S(0) = float32_div(float32_one, float32_sqrt(s->ZMM_S(0), &env->sse_status), &env->sse_status); + set_float_exception_flags(old_flags, &env->sse_status); } void helper_rcpps(CPUX86State *env, ZMMReg *d, ZMMReg *s) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); d->ZMM_S(1) = float32_div(float32_one, s->ZMM_S(1), &env->sse_status); d->ZMM_S(2) = float32_div(float32_one, s->ZMM_S(2), &env->sse_status); d->ZMM_S(3) = float32_div(float32_one, s->ZMM_S(3), &env->sse_status); + set_float_exception_flags(old_flags, &env->sse_status); } void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *s) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); + set_float_exception_flags(old_flags, &env->sse_status); } static inline uint64_t helper_extrq(uint64_t src, int shift, int len) @@ -1031,7 +1039,7 @@ static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) { - int ret; + FloatRelation ret; float32 s0, s1; s0 = d->ZMM_S(0); @@ -1042,7 +1050,7 @@ void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) void helper_comiss(CPUX86State *env, Reg *d, Reg *s) { - int ret; + FloatRelation ret; float32 s0, s1; s0 = d->ZMM_S(0); @@ -1053,7 +1061,7 @@ void helper_comiss(CPUX86State *env, Reg *d, Reg *s) void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s) { - int ret; + FloatRelation ret; float64 d0, d1; d0 = d->ZMM_D(0); @@ -1435,34 +1443,46 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); - d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); - XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); - XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); - d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); - d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); - XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); - XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); + Reg r; + + r.W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); + r.W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); + XMM_ONLY(r.W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); + XMM_ONLY(r.W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); + r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); + r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); + XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); + XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); + + *d = r; } void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); - XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); - d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); - XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); + Reg r; + + r.L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); + XMM_ONLY(r.L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); + r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); + XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); + + *d = r; } void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); - d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); - XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); - XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); - d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); - d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); - XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); - XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); + Reg r; + + r.W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); + r.W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); + XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); + XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); + r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); + r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); + XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); + XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); + + *d = r; } void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -1751,6 +1771,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode; @@ -1776,19 +1797,18 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, d->ZMM_S(2) = float32_round_to_int(s->ZMM_S(2), &env->sse_status); d->ZMM_S(3) = float32_round_to_int(s->ZMM_S(3), &env->sse_status); -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode; @@ -1812,19 +1832,18 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); d->ZMM_D(1) = float64_round_to_int(s->ZMM_D(1), &env->sse_status); -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode; @@ -1847,19 +1866,18 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status); -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode; @@ -1882,13 +1900,11 @@ void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } @@ -2076,10 +2092,10 @@ static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s, res = (2 << upper) - 1; break; } - for (j = valids - validd; j >= 0; j--) { + for (j = valids == upper ? valids : valids - validd; j >= 0; j--) { res <<= 1; v = 1; - for (i = validd; i >= 0; i--) { + for (i = MIN(valids - j, validd); i >= 0; i--) { v &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i)); } res |= v; diff --git a/qemu/target/i386/svm.h b/qemu/target/i386/svm.h index 30649ee9cb..35d0b8a0a8 100644 --- a/qemu/target/i386/svm.h +++ b/qemu/target/i386/svm.h @@ -137,6 +137,7 @@ #define SVM_NPT_PAE (1 << 0) #define SVM_NPT_LMA (1 << 1) #define SVM_NPT_NXE (1 << 2) +#define SVM_NPT_PSE (1 << 3) #define SVM_NPTEXIT_P (1ULL << 0) #define SVM_NPTEXIT_RW (1ULL << 1) diff --git a/qemu/target/i386/svm_helper.c b/qemu/target/i386/svm_helper.c index ade26593a3..4553dc6810 100644 --- a/qemu/target/i386/svm_helper.c +++ b/qemu/target/i386/svm_helper.c @@ -152,16 +152,21 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.nested_ctl)); + + env->nested_pg_mode = 0; + if (nested_ctl & SVM_NPT_ENABLED) { env->nested_cr3 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.nested_cr3)); env->hflags2 |= HF2_NPT_MASK; - env->nested_pg_mode = 0; if (env->cr[4] & CR4_PAE_MASK) { env->nested_pg_mode |= SVM_NPT_PAE; } + if (env->cr[4] & CR4_PSE_MASK) { + env->nested_pg_mode |= SVM_NPT_PSE; + } if (env->hflags & HF_LMA_MASK) { env->nested_pg_mode |= SVM_NPT_LMA; } diff --git a/qemu/target/i386/translate.c b/qemu/target/i386/translate.c index 741102bed4..b92f3f373f 100644 --- a/qemu/target/i386/translate.c +++ b/qemu/target/i386/translate.c @@ -1325,9 +1325,6 @@ static inline void gen_ins(DisasContext *s, MemOp ot) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(tcg_ctx); - } gen_string_movl_A0_EDI(s); /* Note: we must do this dummy write first to be restartable in case of page fault. */ @@ -1340,18 +1337,12 @@ static inline void gen_ins(DisasContext *s, MemOp ot) gen_op_movl_T0_Dshift(s, ot); gen_op_add_reg_T0(s, s->aflag, R_EDI); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(tcg_ctx); - } } static inline void gen_outs(DisasContext *s, MemOp ot) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(tcg_ctx); - } gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); @@ -1362,9 +1353,6 @@ static inline void gen_outs(DisasContext *s, MemOp ot) gen_op_movl_T0_Dshift(s, ot); gen_op_add_reg_T0(s, s->aflag, R_ESI); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(tcg_ctx); - } } /* same method as Valgrind : we generate jumps to current or next @@ -7017,6 +7005,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + /* jump generated by gen_repz_ins */ } else { gen_ins(s, ot); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { @@ -7030,8 +7019,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_ext16u_tl(tcg_ctx, s->T0, tcg_ctx->cpu_regs[R_EDX]); gen_check_io(s, ot, pc_start - s->cs_base, svm_is_rep(prefixes) | 4); + if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { + gen_io_start(tcg_ctx); + } if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + /* jump generated by gen_repz_outs */ } else { gen_outs(s, ot); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { @@ -7881,6 +7874,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) l1 = gen_new_label(tcg_ctx); l2 = gen_new_label(tcg_ctx); l3 = gen_new_label(tcg_ctx); + gen_update_cc_op(s); b &= 3; switch(b) { case 0: /* loopnz */ @@ -8314,12 +8308,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) CASE_MODRM_OP(4): /* smsw */ gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0); tcg_gen_ld_tl(tcg_ctx, s->T0, tcg_ctx->cpu_env, offsetof(CPUX86State, cr[0])); - if (CODE64(s)) { - mod = (modrm >> 6) & 3; - ot = (mod != 3 ? MO_16 : s->dflag); - } else { - ot = MO_16; - } + /* + * In 32-bit mode, the higher 16 bits of the destination + * register are undefined. In practice CR0[31:0] is stored + * just like in 64-bit mode. + */ + mod = (modrm >> 6) & 3; + ot = (mod != 3 ? MO_16 : s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); break; case 0xee: /* rdpkru */ @@ -8774,7 +8769,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_helper_read_crN(tcg_ctx, s->T0, tcg_ctx->cpu_env, tcg_const_i32(tcg_ctx, reg)); gen_op_mov_reg_v(s, ot, rm, s->T0); if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(tcg_ctx); + gen_jmp(s, s->pc - s->cs_base); } } break; @@ -8892,6 +8887,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); break; } + gen_helper_update_mxcsr(tcg_ctx, tcg_ctx->cpu_env); gen_lea_modrm(env, s, modrm); tcg_gen_ld32u_tl(tcg_ctx, s->T0, tcg_ctx->cpu_env, offsetof(CPUX86State, mxcsr)); gen_op_st_v(s, MO_32, s->T0, s->A0); diff --git a/qemu/target/m68k/cpu.c b/qemu/target/m68k/cpu.c index 6b636b80eb..5802dbdf24 100644 --- a/qemu/target/m68k/cpu.c +++ b/qemu/target/m68k/cpu.c @@ -236,12 +236,6 @@ static void m68k_cpu_class_init(CPUClass *c) cc->tcg_initialize = m68k_tcg_init; } -#define DEFINE_M68K_CPU_TYPE(cpu_model, initfn) \ - { \ - .name = cpu_model, \ - .initfn = initfn, \ - } - struct M68kCPUInfo { const char *name; void (*initfn)(CPUState *obj); diff --git a/qemu/target/m68k/fpu_helper.c b/qemu/target/m68k/fpu_helper.c index 3f544a0572..7f67fa10db 100644 --- a/qemu/target/m68k/fpu_helper.c +++ b/qemu/target/m68k/fpu_helper.c @@ -149,7 +149,7 @@ void cpu_m68k_set_fpcr(CPUM68KState *env, uint32_t val) void HELPER(fitrunc)(CPUM68KState *env, FPReg *res, FPReg *val) { - int rounding_mode = get_float_rounding_mode(&env->fp_status); + FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status); set_float_rounding_mode(float_round_to_zero, &env->fp_status); res->d = floatx80_round_to_int(val->d, &env->fp_status); set_float_rounding_mode(rounding_mode, &env->fp_status); @@ -300,7 +300,7 @@ void HELPER(fdmul)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1) void HELPER(fsglmul)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1) { - int rounding_mode = get_float_rounding_mode(&env->fp_status); + FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status); floatx80 a, b; PREC_BEGIN(32); @@ -333,7 +333,7 @@ void HELPER(fddiv)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1) void HELPER(fsgldiv)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1) { - int rounding_mode = get_float_rounding_mode(&env->fp_status); + FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status); floatx80 a, b; PREC_BEGIN(32); @@ -642,6 +642,11 @@ void HELPER(fatanh)(CPUM68KState *env, FPReg *res, FPReg *val) res->d = floatx80_atanh(val->d, &env->fp_status); } +void HELPER(fetoxm1)(CPUM68KState *env, FPReg *res, FPReg *val) +{ + res->d = floatx80_etoxm1(val->d, &env->fp_status); +} + void HELPER(ftanh)(CPUM68KState *env, FPReg *res, FPReg *val) { res->d = floatx80_tanh(val->d, &env->fp_status); diff --git a/qemu/target/m68k/helper.c b/qemu/target/m68k/helper.c index b0f2e298e7..fb441ee935 100644 --- a/qemu/target/m68k/helper.c +++ b/qemu/target/m68k/helper.c @@ -284,7 +284,7 @@ static int get_physical_address(CPUM68KState *env, hwaddr *physical, /* Transparent Translation Register bit */ env->mmu.mmusr = M68K_MMU_T_040 | M68K_MMU_R_040; } - *physical = address & TARGET_PAGE_MASK; + *physical = address; *page_size = TARGET_PAGE_SIZE; return 0; } @@ -412,7 +412,7 @@ static int get_physical_address(CPUM68KState *env, hwaddr *physical, } *page_size = 1 << page_bits; page_mask = ~(*page_size - 1); - *physical = next & page_mask; + *physical = (next & page_mask) + (address & (*page_size - 1)); if (access_type & ACCESS_PTEST) { env->mmu.mmusr |= next & M68K_MMU_SR_MASK_040; @@ -461,6 +461,7 @@ hwaddr m68k_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) if (env->sr & SR_S) { access_type |= ACCESS_SUPER; } + if (get_physical_address(env, &phys_addr, &prot, addr, access_type, &page_size) != 0) { return -1; @@ -525,10 +526,8 @@ bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int size, ret = get_physical_address(&cpu->env, &physical, &prot, address, access_type, &page_size); if (likely(ret == 0)) { - address &= TARGET_PAGE_MASK; - physical += address & (page_size - 1); - tlb_set_page(cs, address, physical, - prot, mmu_idx, TARGET_PAGE_SIZE); + tlb_set_page(cs, address & TARGET_PAGE_MASK, + physical & TARGET_PAGE_MASK, prot, mmu_idx, page_size); return true; } @@ -1015,9 +1014,8 @@ void HELPER(ptest)(CPUM68KState *env, uint32_t addr, uint32_t is_read) ret = get_physical_address(env, &physical, &prot, addr, access_type, &page_size); if (ret == 0) { - addr &= TARGET_PAGE_MASK; - physical += addr & (page_size - 1); - tlb_set_page(env_cpu(env), addr, physical, + tlb_set_page(env_cpu(env), addr & TARGET_PAGE_MASK, + physical & TARGET_PAGE_MASK, prot, access_type & ACCESS_SUPER ? MMU_KERNEL_IDX : MMU_USER_IDX, page_size); } diff --git a/qemu/target/m68k/helper.h b/qemu/target/m68k/helper.h index 413f88dc65..f9978b0e20 100644 --- a/qemu/target/m68k/helper.h +++ b/qemu/target/m68k/helper.h @@ -86,6 +86,7 @@ DEF_HELPER_3(fatan, void, env, fp, fp) DEF_HELPER_3(fasin, void, env, fp, fp) DEF_HELPER_3(facos, void, env, fp, fp) DEF_HELPER_3(fatanh, void, env, fp, fp) +DEF_HELPER_3(fetoxm1, void, env, fp, fp) DEF_HELPER_3(ftanh, void, env, fp, fp) DEF_HELPER_3(fsinh, void, env, fp, fp) DEF_HELPER_3(fcosh, void, env, fp, fp) diff --git a/qemu/target/m68k/softfloat.c b/qemu/target/m68k/softfloat.c index 24c313ed69..b6d0ed7acf 100644 --- a/qemu/target/m68k/softfloat.c +++ b/qemu/target/m68k/softfloat.c @@ -42,89 +42,6 @@ static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) return a; } -/* - * Returns the modulo remainder of the extended double-precision floating-point - * value `a' with respect to the corresponding value `b'. - */ - -floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, zSign; - int32_t aExp, bExp, expDiff; - uint64_t aSig0, aSig1, bSig; - uint64_t qTemp, term0, term1; - - aSig0 = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - bSig = extractFloatx80Frac(b); - bExp = extractFloatx80Exp(b); - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig0 << 1) - || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { - return propagateFloatx80NaN(a, b, status); - } - goto invalid; - } - if (bExp == 0x7FFF) { - if ((uint64_t) (bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } - if (bExp == 0) { - if (bSig == 0) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - normalizeFloatx80Subnormal(bSig, &bExp, &bSig); - } - if (aExp == 0) { - if ((uint64_t) (aSig0 << 1) == 0) { - return a; - } - normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); - } - bSig |= UINT64_C(0x8000000000000000); - zSign = aSign; - expDiff = aExp - bExp; - aSig1 = 0; - if (expDiff < 0) { - return a; - } - qTemp = (bSig <= aSig0); - if (qTemp) { - aSig0 -= bSig; - } - expDiff -= 64; - while (0 < expDiff) { - qTemp = estimateDiv128To64(aSig0, aSig1, bSig); - qTemp = (2 < qTemp) ? qTemp - 2 : 0; - mul64To128(bSig, qTemp, &term0, &term1); - sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); - shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); - expDiff -= 62; - } - expDiff += 64; - if (0 < expDiff) { - qTemp = estimateDiv128To64(aSig0, aSig1, bSig); - qTemp = (2 < qTemp) ? qTemp - 2 : 0; - qTemp >>= 64 - expDiff; - mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); - sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); - shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); - while (le128(term0, term1, aSig0, aSig1)) { - ++qTemp; - sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); - } - } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1, status); -} - /* * Returns the mantissa of the extended double-precision floating-point * value `a'. @@ -132,7 +49,7 @@ floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_getman(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -166,7 +83,7 @@ floatx80 floatx80_getman(floatx80 a, float_status *status) floatx80 floatx80_getexp(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -202,7 +119,7 @@ floatx80 floatx80_getexp(floatx80 a, float_status *status) floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + bool aSign, bSign; int32_t aExp, bExp, shiftCount; uint64_t aSig, bSig; @@ -258,7 +175,7 @@ floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) floatx80 floatx80_move(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -306,7 +223,7 @@ static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) floatx80 floatx80_lognp1(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig, fSig; @@ -505,7 +422,7 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status) floatx80 floatx80_logn(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig, fSig; @@ -673,7 +590,7 @@ floatx80 floatx80_logn(floatx80 a, float_status *status) floatx80 floatx80_log10(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -730,7 +647,7 @@ floatx80 floatx80_log10(floatx80 a, float_status *status) floatx80 floatx80_log2(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -797,7 +714,7 @@ floatx80 floatx80_log2(floatx80 a, float_status *status) floatx80 floatx80_etox(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -805,7 +722,7 @@ floatx80 floatx80_etox(floatx80 a, float_status *status) int32_t compact, n, j, k, m, m1; floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; - flag adjflag; + bool adjflag; aSig = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); @@ -981,7 +898,7 @@ floatx80 floatx80_etox(floatx80 a, float_status *status) floatx80 floatx80_twotox(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -1131,7 +1048,7 @@ floatx80 floatx80_twotox(floatx80 a, float_status *status) floatx80 floatx80_tentox(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -1286,7 +1203,7 @@ floatx80 floatx80_tentox(floatx80 a, float_status *status) floatx80 floatx80_tan(floatx80 a, float_status *status) { - flag aSign, xSign; + bool aSign, xSign; int32_t aExp, xExp; uint64_t aSig, xSig; @@ -1295,7 +1212,7 @@ floatx80 floatx80_tan(floatx80 a, float_status *status) int32_t compact, l, n, j; floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; float32 twoto63; - flag endflag; + bool endflag; aSig = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); @@ -1344,10 +1261,10 @@ floatx80 floatx80_tan(floatx80 a, float_status *status) xExp -= 0x3FFF; if (xExp <= 28) { l = 0; - endflag = 1; + endflag = true; } else { l = xExp - 27; - endflag = 0; + endflag = false; } invtwopi = packFloatx80(0, 0x3FFE - l, UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ @@ -1372,7 +1289,7 @@ floatx80 floatx80_tan(floatx80 a, float_status *status) fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ - if (endflag > 0) { + if (endflag) { n = floatx80_to_int32(fp2, status); goto tancont; } @@ -1496,7 +1413,7 @@ floatx80 floatx80_tan(floatx80 a, float_status *status) floatx80 floatx80_sin(floatx80 a, float_status *status) { - flag aSign, xSign; + bool aSign, xSign; int32_t aExp, xExp; uint64_t aSig, xSig; @@ -1505,7 +1422,7 @@ floatx80 floatx80_sin(floatx80 a, float_status *status) int32_t compact, l, n, j; floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; float32 posneg1, twoto63; - flag endflag; + bool endflag; aSig = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); @@ -1554,10 +1471,10 @@ floatx80 floatx80_sin(floatx80 a, float_status *status) xExp -= 0x3FFF; if (xExp <= 28) { l = 0; - endflag = 1; + endflag = true; } else { l = xExp - 27; - endflag = 0; + endflag = false; } invtwopi = packFloatx80(0, 0x3FFE - l, UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ @@ -1582,7 +1499,7 @@ floatx80 floatx80_sin(floatx80 a, float_status *status) fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ - if (endflag > 0) { + if (endflag) { n = floatx80_to_int32(fp2, status); goto sincont; } @@ -1735,7 +1652,7 @@ floatx80 floatx80_sin(floatx80 a, float_status *status) floatx80 floatx80_cos(floatx80 a, float_status *status) { - flag aSign, xSign; + bool aSign, xSign; int32_t aExp, xExp; uint64_t aSig, xSig; @@ -1744,7 +1661,7 @@ floatx80 floatx80_cos(floatx80 a, float_status *status) int32_t compact, l, n, j; floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; float32 posneg1, twoto63; - flag endflag; + bool endflag; aSig = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); @@ -1793,10 +1710,10 @@ floatx80 floatx80_cos(floatx80 a, float_status *status) xExp -= 0x3FFF; if (xExp <= 28) { l = 0; - endflag = 1; + endflag = true; } else { l = xExp - 27; - endflag = 0; + endflag = false; } invtwopi = packFloatx80(0, 0x3FFE - l, UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ @@ -1821,7 +1738,7 @@ floatx80 floatx80_cos(floatx80 a, float_status *status) fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ - if (endflag > 0) { + if (endflag) { n = floatx80_to_int32(fp2, status); goto sincont; } @@ -1972,7 +1889,7 @@ floatx80 floatx80_cos(floatx80 a, float_status *status) floatx80 floatx80_atan(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -2169,7 +2086,7 @@ floatx80 floatx80_atan(floatx80 a, float_status *status) floatx80 floatx80_asin(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -2234,7 +2151,7 @@ floatx80 floatx80_asin(floatx80 a, float_status *status) floatx80 floatx80_acos(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -2303,7 +2220,7 @@ floatx80 floatx80_acos(floatx80 a, float_status *status) floatx80 floatx80_atanh(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -2368,7 +2285,7 @@ floatx80 floatx80_atanh(floatx80 a, float_status *status) floatx80 floatx80_etoxm1(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; @@ -2620,7 +2537,7 @@ floatx80 floatx80_etoxm1(floatx80 a, float_status *status) floatx80 floatx80_tanh(floatx80 a, float_status *status) { - flag aSign, vSign; + bool aSign, vSign; int32_t aExp, vExp; uint64_t aSig, vSig; @@ -2735,7 +2652,7 @@ floatx80 floatx80_tanh(floatx80 a, float_status *status) floatx80 floatx80_sinh(floatx80 a, float_status *status) { - flag aSign; + bool aSign; int32_t aExp; uint64_t aSig; diff --git a/qemu/target/m68k/softfloat.h b/qemu/target/m68k/softfloat.h index 365ef6ac7a..4bb9567134 100644 --- a/qemu/target/m68k/softfloat.h +++ b/qemu/target/m68k/softfloat.h @@ -23,7 +23,6 @@ #define TARGET_M68K_SOFTFLOAT_H #include "fpu/softfloat.h" -floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status); floatx80 floatx80_getman(floatx80 a, float_status *status); floatx80 floatx80_getexp(floatx80 a, float_status *status); floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status); diff --git a/qemu/target/m68k/translate.c b/qemu/target/m68k/translate.c index 5d0fa7497a..fdb4ccb977 100644 --- a/qemu/target/m68k/translate.c +++ b/qemu/target/m68k/translate.c @@ -5077,6 +5077,20 @@ static void gen_op_fmove_fcr(CPUM68KState *env, DisasContext *s, gen_store_fcr(s, AREG(insn, 0), mask); } return; + case 7: /* Immediate */ + if (REG(insn, 0) == 4) { + if (is_write || + (mask != M68K_FPIAR && mask != M68K_FPSR && + mask != M68K_FPCR)) { + gen_exception(s, s->base.pc_next, EXCP_ILLEGAL); + return; + } + tmp = tcg_const_i32(tcg_ctx, read_im32(env, s)); + gen_store_fcr(s, tmp, mask); + tcg_temp_free(tcg_ctx, tmp); + return; + } + break; default: break; } @@ -5289,6 +5303,9 @@ DISAS_INSN(fpu) case 0x06: /* flognp1 */ gen_helper_flognp1(tcg_ctx, tcg_ctx->cpu_env, cpu_dest, cpu_src); break; + case 0x08: /* fetoxm1 */ + gen_helper_fetoxm1(tcg_ctx, tcg_ctx->cpu_env, cpu_dest, cpu_src); + break; case 0x09: /* ftanh */ gen_helper_ftanh(tcg_ctx, tcg_ctx->cpu_env, cpu_dest, cpu_src); break; diff --git a/qemu/target/mips/cp0_helper.c b/qemu/target/mips/cp0_helper.c index e3600c26d7..9665c0beb5 100644 --- a/qemu/target/mips/cp0_helper.c +++ b/qemu/target/mips/cp0_helper.c @@ -378,16 +378,9 @@ target_ulong helper_mftc0_entryhi(CPUMIPSState *env) target_ulong helper_mftc0_cause(CPUMIPSState *env) { int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC); - int32_t tccause; CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc); - if (other_tc == other->current_tc) { - tccause = other->CP0_Cause; - } else { - tccause = other->CP0_Cause; - } - - return tccause; + return other->CP0_Cause; } target_ulong helper_mftc0_status(CPUMIPSState *env) @@ -877,6 +870,7 @@ void helper_mtc0_memorymapid(CPUMIPSState *env, target_ulong arg1) void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask) { + struct uc_struct *uc = env->uc; uint64_t mask = arg1 >> (TARGET_PAGE_BITS + 1); if (!(env->insn_flags & ISA_MIPS32R6) || (arg1 == ~0) || (mask == 0x0000 || mask == 0x0003 || mask == 0x000F || @@ -1113,6 +1107,7 @@ void helper_mthc0_saar(CPUMIPSState *env, target_ulong arg1) void helper_mtc0_entryhi(CPUMIPSState *env, target_ulong arg1) { + struct uc_struct *uc = env->uc; target_ulong old, val, mask; mask = (TARGET_PAGE_MASK << 1) | env->CP0_EntryHi_ASID_mask; if (((env->CP0_Config4 >> CP0C4_IE) & 0x3) >= 2) { diff --git a/qemu/target/mips/cpu-param.h b/qemu/target/mips/cpu-param.h index f073f379fc..57caf5d588 100644 --- a/qemu/target/mips/cpu-param.h +++ b/qemu/target/mips/cpu-param.h @@ -19,7 +19,8 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 40 #define TARGET_VIRT_ADDR_SPACE_BITS 32 #endif -#define TARGET_PAGE_BITS 12 +#define TARGET_PAGE_BITS_VARY +#define TARGET_PAGE_BITS_MIN 12 #define NB_MMU_MODES 4 #endif diff --git a/qemu/target/mips/cpu.h b/qemu/target/mips/cpu.h index 95f6bf5077..4ddd4321d3 100644 --- a/qemu/target/mips/cpu.h +++ b/qemu/target/mips/cpu.h @@ -940,7 +940,35 @@ struct CPUMIPSState { #define CP0C5_UFR 2 #define CP0C5_NFExists 0 int32_t CP0_Config6; + int32_t CP0_Config6_rw_bitmask; +#define CP0C6_BPPASS 31 +#define CP0C6_KPOS 24 +#define CP0C6_KE 23 +#define CP0C6_VTLBONLY 22 +#define CP0C6_LASX 21 +#define CP0C6_SSEN 20 +#define CP0C6_DISDRTIME 19 +#define CP0C6_PIXNUEN 18 +#define CP0C6_SCRAND 17 +#define CP0C6_LLEXCEN 16 +#define CP0C6_DISVC 15 +#define CP0C6_VCLRU 14 +#define CP0C6_DCLRU 13 +#define CP0C6_PIXUEN 12 +#define CP0C6_DISBLKLYEN 11 +#define CP0C6_UMEMUALEN 10 +#define CP0C6_SFBEN 8 +#define CP0C6_FLTINT 7 +#define CP0C6_VLTINT 6 +#define CP0C6_DISBTB 5 +#define CP0C6_STPREFCTL 2 +#define CP0C6_INSTPREF 1 +#define CP0C6_DATAPREF 0 int32_t CP0_Config7; + int64_t CP0_Config7_rw_bitmask; +#define CP0C7_NAPCGEN 2 +#define CP0C7_UNIMUEN 1 +#define CP0C7_VFPUCGEN 0 uint64_t CP0_LLAddr; uint64_t CP0_MAAR[MIPS_MAAR_MAX]; int32_t CP0_MAARI; diff --git a/qemu/target/mips/fpu_helper.c b/qemu/target/mips/fpu_helper.c index 34431468af..027d8c0fa9 100644 --- a/qemu/target/mips/fpu_helper.c +++ b/qemu/target/mips/fpu_helper.c @@ -28,7 +28,6 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/memop.h" -//#include "sysemu/kvm.h" #include "fpu/softfloat.h" @@ -188,43 +187,48 @@ void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t fs, uint32_t rt) } } -int ieee_ex_to_mips(int xcpt) +static inline int ieee_to_mips_xcpt(int ieee_xcpt) { - int ret = 0; - if (xcpt) { - if (xcpt & float_flag_invalid) { - ret |= FP_INVALID; - } - if (xcpt & float_flag_overflow) { - ret |= FP_OVERFLOW; - } - if (xcpt & float_flag_underflow) { - ret |= FP_UNDERFLOW; - } - if (xcpt & float_flag_divbyzero) { - ret |= FP_DIV0; - } - if (xcpt & float_flag_inexact) { - ret |= FP_INEXACT; - } + int mips_xcpt = 0; + + if (ieee_xcpt & float_flag_invalid) { + mips_xcpt |= FP_INVALID; + } + if (ieee_xcpt & float_flag_overflow) { + mips_xcpt |= FP_OVERFLOW; + } + if (ieee_xcpt & float_flag_underflow) { + mips_xcpt |= FP_UNDERFLOW; } - return ret; + if (ieee_xcpt & float_flag_divbyzero) { + mips_xcpt |= FP_DIV0; + } + if (ieee_xcpt & float_flag_inexact) { + mips_xcpt |= FP_INEXACT; + } + + return mips_xcpt; } static inline void update_fcr31(CPUMIPSState *env, uintptr_t pc) { - int tmp = ieee_ex_to_mips(get_float_exception_flags( - &env->active_fpu.fp_status)); + int ieee_exception_flags = get_float_exception_flags( + &env->active_fpu.fp_status); + int mips_exception_flags = 0; - SET_FP_CAUSE(env->active_fpu.fcr31, tmp); + if (ieee_exception_flags) { + mips_exception_flags = ieee_to_mips_xcpt(ieee_exception_flags); + } - if (tmp) { + SET_FP_CAUSE(env->active_fpu.fcr31, mips_exception_flags); + + if (mips_exception_flags) { set_float_exception_flags(0, &env->active_fpu.fp_status); - if (GET_FP_ENABLE(env->active_fpu.fcr31) & tmp) { + if (GET_FP_ENABLE(env->active_fpu.fcr31) & mips_exception_flags) { do_raise_exception(env, EXCP_FPE, pc); } else { - UPDATE_FP_FLAGS(env->active_fpu.fcr31, tmp); + UPDATE_FP_FLAGS(env->active_fpu.fcr31, mips_exception_flags); } } } @@ -1058,14 +1062,14 @@ uint32_t helper_float_recip1_s(CPUMIPSState *env, uint32_t fst0) uint64_t helper_float_recip1_ps(CPUMIPSState *env, uint64_t fdt0) { - uint32_t fst2; + uint32_t fstl2; uint32_t fsth2; - fst2 = float32_div(float32_one, fdt0 & 0XFFFFFFFF, - &env->active_fpu.fp_status); + fstl2 = float32_div(float32_one, fdt0 & 0XFFFFFFFF, + &env->active_fpu.fp_status); fsth2 = float32_div(float32_one, fdt0 >> 32, &env->active_fpu.fp_status); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } uint64_t helper_float_rsqrt1_d(CPUMIPSState *env, uint64_t fdt0) @@ -1090,31 +1094,34 @@ uint32_t helper_float_rsqrt1_s(CPUMIPSState *env, uint32_t fst0) uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, uint64_t fdt0) { - uint32_t fst2; + uint32_t fstl2; uint32_t fsth2; - fst2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status); + fstl2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status); fsth2 = float32_sqrt(fdt0 >> 32, &env->active_fpu.fp_status); - fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status); + fstl2 = float32_div(float32_one, fstl2, &env->active_fpu.fp_status); fsth2 = float32_div(float32_one, fsth2, &env->active_fpu.fp_status); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } -#define FLOAT_RINT(name, bits) \ -uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env, \ - uint ## bits ## _t fs) \ -{ \ - uint ## bits ## _t fdret; \ - \ - fdret = float ## bits ## _round_to_int(fs, &env->active_fpu.fp_status); \ - update_fcr31(env, GETPC()); \ - return fdret; \ +uint64_t helper_float_rint_d(CPUMIPSState *env, uint64_t fs) +{ + uint64_t fdret; + + fdret = float64_round_to_int(fs, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return fdret; } -FLOAT_RINT(rint_s, 32) -FLOAT_RINT(rint_d, 64) -#undef FLOAT_RINT +uint32_t helper_float_rint_s(CPUMIPSState *env, uint32_t fs) +{ + uint32_t fdret; + + fdret = float32_round_to_int(fs, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return fdret; +} #define FLOAT_CLASS_SIGNALING_NAN 0x001 #define FLOAT_CLASS_QUIET_NAN 0x002 @@ -1127,91 +1134,220 @@ FLOAT_RINT(rint_d, 64) #define FLOAT_CLASS_POSITIVE_SUBNORMAL 0x100 #define FLOAT_CLASS_POSITIVE_ZERO 0x200 -#define FLOAT_CLASS(name, bits) \ -uint ## bits ## _t float_ ## name(uint ## bits ## _t arg, \ - float_status *status) \ -{ \ - if (float ## bits ## _is_signaling_nan(arg, status)) { \ - return FLOAT_CLASS_SIGNALING_NAN; \ - } else if (float ## bits ## _is_quiet_nan(arg, status)) { \ - return FLOAT_CLASS_QUIET_NAN; \ - } else if (float ## bits ## _is_neg(arg)) { \ - if (float ## bits ## _is_infinity(arg)) { \ - return FLOAT_CLASS_NEGATIVE_INFINITY; \ - } else if (float ## bits ## _is_zero(arg)) { \ - return FLOAT_CLASS_NEGATIVE_ZERO; \ - } else if (float ## bits ## _is_zero_or_denormal(arg)) { \ - return FLOAT_CLASS_NEGATIVE_SUBNORMAL; \ - } else { \ - return FLOAT_CLASS_NEGATIVE_NORMAL; \ - } \ - } else { \ - if (float ## bits ## _is_infinity(arg)) { \ - return FLOAT_CLASS_POSITIVE_INFINITY; \ - } else if (float ## bits ## _is_zero(arg)) { \ - return FLOAT_CLASS_POSITIVE_ZERO; \ - } else if (float ## bits ## _is_zero_or_denormal(arg)) { \ - return FLOAT_CLASS_POSITIVE_SUBNORMAL; \ - } else { \ - return FLOAT_CLASS_POSITIVE_NORMAL; \ - } \ - } \ -} \ - \ -uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env, \ - uint ## bits ## _t arg) \ -{ \ - return float_ ## name(arg, &env->active_fpu.fp_status); \ -} - -FLOAT_CLASS(class_s, 32) -FLOAT_CLASS(class_d, 64) -#undef FLOAT_CLASS +uint64_t float_class_d(uint64_t arg, float_status *status) +{ + if (float64_is_signaling_nan(arg, status)) { + return FLOAT_CLASS_SIGNALING_NAN; + } else if (float64_is_quiet_nan(arg, status)) { + return FLOAT_CLASS_QUIET_NAN; + } else if (float64_is_neg(arg)) { + if (float64_is_infinity(arg)) { + return FLOAT_CLASS_NEGATIVE_INFINITY; + } else if (float64_is_zero(arg)) { + return FLOAT_CLASS_NEGATIVE_ZERO; + } else if (float64_is_zero_or_denormal(arg)) { + return FLOAT_CLASS_NEGATIVE_SUBNORMAL; + } else { + return FLOAT_CLASS_NEGATIVE_NORMAL; + } + } else { + if (float64_is_infinity(arg)) { + return FLOAT_CLASS_POSITIVE_INFINITY; + } else if (float64_is_zero(arg)) { + return FLOAT_CLASS_POSITIVE_ZERO; + } else if (float64_is_zero_or_denormal(arg)) { + return FLOAT_CLASS_POSITIVE_SUBNORMAL; + } else { + return FLOAT_CLASS_POSITIVE_NORMAL; + } + } +} + +uint64_t helper_float_class_d(CPUMIPSState *env, uint64_t arg) +{ + return float_class_d(arg, &env->active_fpu.fp_status); +} + +uint32_t float_class_s(uint32_t arg, float_status *status) +{ + if (float32_is_signaling_nan(arg, status)) { + return FLOAT_CLASS_SIGNALING_NAN; + } else if (float32_is_quiet_nan(arg, status)) { + return FLOAT_CLASS_QUIET_NAN; + } else if (float32_is_neg(arg)) { + if (float32_is_infinity(arg)) { + return FLOAT_CLASS_NEGATIVE_INFINITY; + } else if (float32_is_zero(arg)) { + return FLOAT_CLASS_NEGATIVE_ZERO; + } else if (float32_is_zero_or_denormal(arg)) { + return FLOAT_CLASS_NEGATIVE_SUBNORMAL; + } else { + return FLOAT_CLASS_NEGATIVE_NORMAL; + } + } else { + if (float32_is_infinity(arg)) { + return FLOAT_CLASS_POSITIVE_INFINITY; + } else if (float32_is_zero(arg)) { + return FLOAT_CLASS_POSITIVE_ZERO; + } else if (float32_is_zero_or_denormal(arg)) { + return FLOAT_CLASS_POSITIVE_SUBNORMAL; + } else { + return FLOAT_CLASS_POSITIVE_NORMAL; + } + } +} + +uint32_t helper_float_class_s(CPUMIPSState *env, uint32_t arg) +{ + return float_class_s(arg, &env->active_fpu.fp_status); +} /* binary operations */ -#define FLOAT_BINOP(name) \ -uint64_t helper_float_ ## name ## _d(CPUMIPSState *env, \ - uint64_t fdt0, uint64_t fdt1) \ -{ \ - uint64_t dt2; \ - \ - dt2 = float64_ ## name(fdt0, fdt1, &env->active_fpu.fp_status);\ - update_fcr31(env, GETPC()); \ - return dt2; \ -} \ - \ -uint32_t helper_float_ ## name ## _s(CPUMIPSState *env, \ - uint32_t fst0, uint32_t fst1) \ -{ \ - uint32_t wt2; \ - \ - wt2 = float32_ ## name(fst0, fst1, &env->active_fpu.fp_status);\ - update_fcr31(env, GETPC()); \ - return wt2; \ -} \ - \ -uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env, \ - uint64_t fdt0, \ - uint64_t fdt1) \ -{ \ - uint32_t fst0 = fdt0 & 0XFFFFFFFF; \ - uint32_t fsth0 = fdt0 >> 32; \ - uint32_t fst1 = fdt1 & 0XFFFFFFFF; \ - uint32_t fsth1 = fdt1 >> 32; \ - uint32_t wt2; \ - uint32_t wth2; \ - \ - wt2 = float32_ ## name(fst0, fst1, &env->active_fpu.fp_status); \ - wth2 = float32_ ## name(fsth0, fsth1, &env->active_fpu.fp_status); \ - update_fcr31(env, GETPC()); \ - return ((uint64_t)wth2 << 32) | wt2; \ -} - -FLOAT_BINOP(add) -FLOAT_BINOP(sub) -FLOAT_BINOP(mul) -FLOAT_BINOP(div) -#undef FLOAT_BINOP + +uint64_t helper_float_add_d(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint64_t dt2; + + dt2 = float64_add(fdt0, fdt1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return dt2; +} + +uint32_t helper_float_add_s(CPUMIPSState *env, + uint32_t fst0, uint32_t fst1) +{ + uint32_t wt2; + + wt2 = float32_add(fst0, fst1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return wt2; +} + +uint64_t helper_float_add_ps(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t wtl2; + uint32_t wth2; + + wtl2 = float32_add(fstl0, fstl1, &env->active_fpu.fp_status); + wth2 = float32_add(fsth0, fsth1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return ((uint64_t)wth2 << 32) | wtl2; +} + +uint64_t helper_float_sub_d(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint64_t dt2; + + dt2 = float64_sub(fdt0, fdt1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return dt2; +} + +uint32_t helper_float_sub_s(CPUMIPSState *env, + uint32_t fst0, uint32_t fst1) +{ + uint32_t wt2; + + wt2 = float32_sub(fst0, fst1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return wt2; +} + +uint64_t helper_float_sub_ps(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t wtl2; + uint32_t wth2; + + wtl2 = float32_sub(fstl0, fstl1, &env->active_fpu.fp_status); + wth2 = float32_sub(fsth0, fsth1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return ((uint64_t)wth2 << 32) | wtl2; +} + +uint64_t helper_float_mul_d(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint64_t dt2; + + dt2 = float64_mul(fdt0, fdt1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return dt2; +} + +uint32_t helper_float_mul_s(CPUMIPSState *env, + uint32_t fst0, uint32_t fst1) +{ + uint32_t wt2; + + wt2 = float32_mul(fst0, fst1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return wt2; +} + +uint64_t helper_float_mul_ps(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t wtl2; + uint32_t wth2; + + wtl2 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status); + wth2 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return ((uint64_t)wth2 << 32) | wtl2; +} + +uint64_t helper_float_div_d(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint64_t dt2; + + dt2 = float64_div(fdt0, fdt1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return dt2; +} + +uint32_t helper_float_div_s(CPUMIPSState *env, + uint32_t fst0, uint32_t fst1) +{ + uint32_t wt2; + + wt2 = float32_div(fst0, fst1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return wt2; +} + +uint64_t helper_float_div_ps(CPUMIPSState *env, + uint64_t fdt0, uint64_t fdt1) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t wtl2; + uint32_t wth2; + + wtl2 = float32_div(fstl0, fstl1, &env->active_fpu.fp_status); + wth2 = float32_div(fsth0, fsth1, &env->active_fpu.fp_status); + update_fcr31(env, GETPC()); + return ((uint64_t)wth2 << 32) | wtl2; +} + /* MIPS specific binary operations */ uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2) @@ -1234,19 +1370,19 @@ uint32_t helper_float_recip2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2) uint64_t helper_float_recip2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2) { - uint32_t fst0 = fdt0 & 0XFFFFFFFF; + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; uint32_t fsth0 = fdt0 >> 32; - uint32_t fst2 = fdt2 & 0XFFFFFFFF; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; uint32_t fsth2 = fdt2 >> 32; - fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status); + fstl2 = float32_mul(fstl0, fstl2, &env->active_fpu.fp_status); fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status); - fst2 = float32_chs(float32_sub(fst2, float32_one, + fstl2 = float32_chs(float32_sub(fstl2, float32_one, &env->active_fpu.fp_status)); fsth2 = float32_chs(float32_sub(fsth2, float32_one, &env->active_fpu.fp_status)); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } uint64_t helper_float_rsqrt2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2) @@ -1271,51 +1407,51 @@ uint32_t helper_float_rsqrt2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2) uint64_t helper_float_rsqrt2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2) { - uint32_t fst0 = fdt0 & 0XFFFFFFFF; + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; uint32_t fsth0 = fdt0 >> 32; - uint32_t fst2 = fdt2 & 0XFFFFFFFF; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; uint32_t fsth2 = fdt2 >> 32; - fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status); + fstl2 = float32_mul(fstl0, fstl2, &env->active_fpu.fp_status); fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status); - fst2 = float32_sub(fst2, float32_one, &env->active_fpu.fp_status); + fstl2 = float32_sub(fstl2, float32_one, &env->active_fpu.fp_status); fsth2 = float32_sub(fsth2, float32_one, &env->active_fpu.fp_status); - fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, + fstl2 = float32_chs(float32_div(fstl2, FLOAT_TWO32, &env->active_fpu.fp_status)); fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32, &env->active_fpu.fp_status)); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } uint64_t helper_float_addr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1) { - uint32_t fst0 = fdt0 & 0XFFFFFFFF; + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; uint32_t fsth0 = fdt0 >> 32; - uint32_t fst1 = fdt1 & 0XFFFFFFFF; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; uint32_t fsth1 = fdt1 >> 32; - uint32_t fst2; + uint32_t fstl2; uint32_t fsth2; - fst2 = float32_add(fst0, fsth0, &env->active_fpu.fp_status); - fsth2 = float32_add(fst1, fsth1, &env->active_fpu.fp_status); + fstl2 = float32_add(fstl0, fsth0, &env->active_fpu.fp_status); + fsth2 = float32_add(fstl1, fsth1, &env->active_fpu.fp_status); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } uint64_t helper_float_mulr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1) { - uint32_t fst0 = fdt0 & 0XFFFFFFFF; + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; uint32_t fsth0 = fdt0 >> 32; - uint32_t fst1 = fdt1 & 0XFFFFFFFF; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; uint32_t fsth1 = fdt1 >> 32; - uint32_t fst2; + uint32_t fstl2; uint32_t fsth2; - fst2 = float32_mul(fst0, fsth0, &env->active_fpu.fp_status); - fsth2 = float32_mul(fst1, fsth1, &env->active_fpu.fp_status); + fstl2 = float32_mul(fstl0, fsth0, &env->active_fpu.fp_status); + fsth2 = float32_mul(fstl1, fsth1, &env->active_fpu.fp_status); update_fcr31(env, GETPC()); - return ((uint64_t)fsth2 << 32) | fst2; + return ((uint64_t)fsth2 << 32) | fstl2; } #define FLOAT_MINMAX(name, bits, minmaxfunc) \ @@ -1343,60 +1479,171 @@ FLOAT_MINMAX(mina_d, 64, minnummag) #undef FLOAT_MINMAX /* ternary operations */ -#define UNFUSED_FMA(prefix, a, b, c, flags) \ -{ \ - a = prefix##_mul(a, b, &env->active_fpu.fp_status); \ - if ((flags) & float_muladd_negate_c) { \ - a = prefix##_sub(a, c, &env->active_fpu.fp_status); \ - } else { \ - a = prefix##_add(a, c, &env->active_fpu.fp_status); \ - } \ - if ((flags) & float_muladd_negate_result) { \ - a = prefix##_chs(a); \ - } \ -} - -/* FMA based operations */ -#define FLOAT_FMA(name, type) \ -uint64_t helper_float_ ## name ## _d(CPUMIPSState *env, \ - uint64_t fdt0, uint64_t fdt1, \ - uint64_t fdt2) \ -{ \ - UNFUSED_FMA(float64, fdt0, fdt1, fdt2, type); \ - update_fcr31(env, GETPC()); \ - return fdt0; \ -} \ - \ -uint32_t helper_float_ ## name ## _s(CPUMIPSState *env, \ - uint32_t fst0, uint32_t fst1, \ - uint32_t fst2) \ -{ \ - UNFUSED_FMA(float32, fst0, fst1, fst2, type); \ - update_fcr31(env, GETPC()); \ - return fst0; \ -} \ - \ -uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env, \ - uint64_t fdt0, uint64_t fdt1, \ - uint64_t fdt2) \ -{ \ - uint32_t fst0 = fdt0 & 0XFFFFFFFF; \ - uint32_t fsth0 = fdt0 >> 32; \ - uint32_t fst1 = fdt1 & 0XFFFFFFFF; \ - uint32_t fsth1 = fdt1 >> 32; \ - uint32_t fst2 = fdt2 & 0XFFFFFFFF; \ - uint32_t fsth2 = fdt2 >> 32; \ - \ - UNFUSED_FMA(float32, fst0, fst1, fst2, type); \ - UNFUSED_FMA(float32, fsth0, fsth1, fsth2, type); \ - update_fcr31(env, GETPC()); \ - return ((uint64_t)fsth0 << 32) | fst0; \ -} -FLOAT_FMA(madd, 0) -FLOAT_FMA(msub, float_muladd_negate_c) -FLOAT_FMA(nmadd, float_muladd_negate_result) -FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c) -#undef FLOAT_FMA + +uint64_t helper_float_madd_d(CPUMIPSState *env, uint64_t fst0, + uint64_t fst1, uint64_t fst2) +{ + fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float64_add(fst0, fst2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint32_t helper_float_madd_s(CPUMIPSState *env, uint32_t fst0, + uint32_t fst1, uint32_t fst2) +{ + fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float32_add(fst0, fst2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint64_t helper_float_madd_ps(CPUMIPSState *env, uint64_t fdt0, + uint64_t fdt1, uint64_t fdt2) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; + uint32_t fsth2 = fdt2 >> 32; + + fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status); + fstl0 = float32_add(fstl0, fstl2, &env->active_fpu.fp_status); + fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status); + fsth0 = float32_add(fsth0, fsth2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return ((uint64_t)fsth0 << 32) | fstl0; +} + +uint64_t helper_float_msub_d(CPUMIPSState *env, uint64_t fst0, + uint64_t fst1, uint64_t fst2) +{ + fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float64_sub(fst0, fst2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint32_t helper_float_msub_s(CPUMIPSState *env, uint32_t fst0, + uint32_t fst1, uint32_t fst2) +{ + fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float32_sub(fst0, fst2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint64_t helper_float_msub_ps(CPUMIPSState *env, uint64_t fdt0, + uint64_t fdt1, uint64_t fdt2) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; + uint32_t fsth2 = fdt2 >> 32; + + fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status); + fstl0 = float32_sub(fstl0, fstl2, &env->active_fpu.fp_status); + fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status); + fsth0 = float32_sub(fsth0, fsth2, &env->active_fpu.fp_status); + + update_fcr31(env, GETPC()); + return ((uint64_t)fsth0 << 32) | fstl0; +} + +uint64_t helper_float_nmadd_d(CPUMIPSState *env, uint64_t fst0, + uint64_t fst1, uint64_t fst2) +{ + fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float64_add(fst0, fst2, &env->active_fpu.fp_status); + fst0 = float64_chs(fst0); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint32_t helper_float_nmadd_s(CPUMIPSState *env, uint32_t fst0, + uint32_t fst1, uint32_t fst2) +{ + fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float32_add(fst0, fst2, &env->active_fpu.fp_status); + fst0 = float32_chs(fst0); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint64_t helper_float_nmadd_ps(CPUMIPSState *env, uint64_t fdt0, + uint64_t fdt1, uint64_t fdt2) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; + uint32_t fsth2 = fdt2 >> 32; + + fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status); + fstl0 = float32_add(fstl0, fstl2, &env->active_fpu.fp_status); + fstl0 = float32_chs(fstl0); + fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status); + fsth0 = float32_add(fsth0, fsth2, &env->active_fpu.fp_status); + fsth0 = float32_chs(fsth0); + + update_fcr31(env, GETPC()); + return ((uint64_t)fsth0 << 32) | fstl0; +} + +uint64_t helper_float_nmsub_d(CPUMIPSState *env, uint64_t fst0, + uint64_t fst1, uint64_t fst2) +{ + fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float64_sub(fst0, fst2, &env->active_fpu.fp_status); + fst0 = float64_chs(fst0); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint32_t helper_float_nmsub_s(CPUMIPSState *env, uint32_t fst0, + uint32_t fst1, uint32_t fst2) +{ + fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status); + fst0 = float32_sub(fst0, fst2, &env->active_fpu.fp_status); + fst0 = float32_chs(fst0); + + update_fcr31(env, GETPC()); + return fst0; +} + +uint64_t helper_float_nmsub_ps(CPUMIPSState *env, uint64_t fdt0, + uint64_t fdt1, uint64_t fdt2) +{ + uint32_t fstl0 = fdt0 & 0XFFFFFFFF; + uint32_t fsth0 = fdt0 >> 32; + uint32_t fstl1 = fdt1 & 0XFFFFFFFF; + uint32_t fsth1 = fdt1 >> 32; + uint32_t fstl2 = fdt2 & 0XFFFFFFFF; + uint32_t fsth2 = fdt2 >> 32; + + fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status); + fstl0 = float32_sub(fstl0, fstl2, &env->active_fpu.fp_status); + fstl0 = float32_chs(fstl0); + fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status); + fsth0 = float32_sub(fsth0, fsth2, &env->active_fpu.fp_status); + fsth0 = float32_chs(fsth0); + + update_fcr31(env, GETPC()); + return ((uint64_t)fsth0 << 32) | fstl0; +} + #define FLOAT_FMADDSUB(name, bits, muladd_arg) \ uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env, \ diff --git a/qemu/target/mips/helper.c b/qemu/target/mips/helper.c index 3c2ba8cec0..f407873180 100644 --- a/qemu/target/mips/helper.c +++ b/qemu/target/mips/helper.c @@ -68,6 +68,7 @@ int fixed_mmu_map_address(CPUMIPSState *env, hwaddr *physical, int *prot, int r4k_map_address(CPUMIPSState *env, hwaddr *physical, int *prot, target_ulong address, int rw, int access_type) { + struct uc_struct *uc = env->uc; uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask; uint32_t MMID = env->CP0_MemoryMapID; bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1); @@ -461,6 +462,7 @@ void cpu_mips_store_cause(CPUMIPSState *env, target_ulong val) static void raise_mmu_exception(CPUMIPSState *env, target_ulong address, int rw, int tlb_error) { + struct uc_struct *uc = env->uc; CPUState *cs = env_cpu(env); int exception = 0, error_code = 0; @@ -903,6 +905,7 @@ bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size, { MIPSCPU *cpu = MIPS_CPU(cs); CPUMIPSState *env = &cpu->env; + struct uc_struct *uc = env->uc; hwaddr physical; int prot; int mips_access_type; @@ -1424,6 +1427,7 @@ bool mips_cpu_exec_interrupt(CPUState *cs, int interrupt_request) void r4k_invalidate_tlb(CPUMIPSState *env, int idx, int use_extra) { + struct uc_struct *uc = env->uc; CPUState *cs = env_cpu(env); r4k_tlb_t *tlb; target_ulong addr; diff --git a/qemu/target/mips/helper.h b/qemu/target/mips/helper.h index 221e78257b..012f867e59 100644 --- a/qemu/target/mips/helper.h +++ b/qemu/target/mips/helper.h @@ -945,6 +945,21 @@ DEF_HELPER_4(msa_mod_s_h, void, env, i32, i32, i32) DEF_HELPER_4(msa_mod_s_w, void, env, i32, i32, i32) DEF_HELPER_4(msa_mod_s_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_maddv_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_maddv_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_maddv_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_maddv_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_msubv_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_msubv_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_msubv_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_msubv_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_mulv_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_mulv_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_mulv_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_mulv_d, void, env, i32, i32, i32) + DEF_HELPER_4(msa_asub_s_b, void, env, i32, i32, i32) DEF_HELPER_4(msa_asub_s_h, void, env, i32, i32, i32) DEF_HELPER_4(msa_asub_s_w, void, env, i32, i32, i32) @@ -963,6 +978,31 @@ DEF_HELPER_4(msa_hsub_u_h, void, env, i32, i32, i32) DEF_HELPER_4(msa_hsub_u_w, void, env, i32, i32, i32) DEF_HELPER_4(msa_hsub_u_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_s_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_s_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_s_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_s_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_subs_u_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_u_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_u_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subs_u_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_subsus_u_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsus_u_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsus_u_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsus_u_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_subsuu_s_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsuu_s_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsuu_s_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subsuu_s_d, void, env, i32, i32, i32) + +DEF_HELPER_4(msa_subv_b, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subv_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subv_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_subv_d, void, env, i32, i32, i32) + DEF_HELPER_4(msa_ilvev_b, void, env, i32, i32, i32) DEF_HELPER_4(msa_ilvev_h, void, env, i32, i32, i32) DEF_HELPER_4(msa_ilvev_w, void, env, i32, i32, i32) @@ -1058,20 +1098,25 @@ DEF_HELPER_5(msa_srlri_df, void, env, i32, i32, i32, i32) DEF_HELPER_5(msa_binsl_df, void, env, i32, i32, i32, i32) DEF_HELPER_5(msa_binsr_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_subv_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_subs_s_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_subs_u_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_subsus_u_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_subsuu_s_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_mulv_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_maddv_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_msubv_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dotp_s_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dotp_u_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dpadd_s_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dpadd_u_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dpsub_s_df, void, env, i32, i32, i32, i32) -DEF_HELPER_5(msa_dpsub_u_df, void, env, i32, i32, i32, i32) + +DEF_HELPER_4(msa_dotp_s_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dotp_s_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dotp_s_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dotp_u_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dotp_u_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dotp_u_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_s_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_s_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_s_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_u_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_u_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpadd_u_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_s_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_s_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_s_d, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_u_h, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_u_w, void, env, i32, i32, i32) +DEF_HELPER_4(msa_dpsub_u_d, void, env, i32, i32, i32) DEF_HELPER_5(msa_sld_df, void, env, i32, i32, i32, i32) DEF_HELPER_5(msa_splat_df, void, env, i32, i32, i32, i32) DEF_HELPER_5(msa_vshf_df, void, env, i32, i32, i32, i32) diff --git a/qemu/target/mips/internal.h b/qemu/target/mips/internal.h index 6978801d9e..2d6032ceff 100644 --- a/qemu/target/mips/internal.h +++ b/qemu/target/mips/internal.h @@ -39,7 +39,9 @@ struct mips_def_t { int32_t CP0_Config5; int32_t CP0_Config5_rw_bitmask; int32_t CP0_Config6; + int32_t CP0_Config6_rw_bitmask; int32_t CP0_Config7; + int32_t CP0_Config7_rw_bitmask; target_ulong CP0_LLAddr_rw_bitmask; int CP0_LLAddr_shift; int32_t SYNCI_Step; @@ -217,7 +219,6 @@ uint32_t float_class_s(uint32_t arg, float_status *fst); uint64_t float_class_d(uint64_t arg, float_status *fst); extern unsigned int ieee_rm[]; -int ieee_ex_to_mips(int xcpt); void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask); static inline void restore_rounding_mode(CPUMIPSState *env) diff --git a/qemu/target/mips/mips-defs.h b/qemu/target/mips/mips-defs.h index a831bb4384..ed6a7a9e54 100644 --- a/qemu/target/mips/mips-defs.h +++ b/qemu/target/mips/mips-defs.h @@ -15,7 +15,7 @@ * ------------------------------------------------ */ /* - * bits 0-31: MIPS base instruction sets + * bits 0-23: MIPS base instruction sets */ #define ISA_MIPS1 0x0000000000000001ULL #define ISA_MIPS2 0x0000000000000002ULL @@ -34,30 +34,37 @@ #define ISA_MIPS64R6 0x0000000000004000ULL #define ISA_NANOMIPS32 0x0000000000008000ULL /* - * bits 32-47: MIPS ASEs + * bits 24-39: MIPS ASEs */ -#define ASE_MIPS16 0x0000000100000000ULL -#define ASE_MIPS3D 0x0000000200000000ULL -#define ASE_MDMX 0x0000000400000000ULL -#define ASE_DSP 0x0000000800000000ULL -#define ASE_DSP_R2 0x0000001000000000ULL -#define ASE_DSP_R3 0x0000002000000000ULL -#define ASE_MT 0x0000004000000000ULL -#define ASE_SMARTMIPS 0x0000008000000000ULL -#define ASE_MICROMIPS 0x0000010000000000ULL -#define ASE_MSA 0x0000020000000000ULL +#define ASE_MIPS16 0x0000000001000000ULL +#define ASE_MIPS3D 0x0000000002000000ULL +#define ASE_MDMX 0x0000000004000000ULL +#define ASE_DSP 0x0000000008000000ULL +#define ASE_DSP_R2 0x0000000010000000ULL +#define ASE_DSP_R3 0x0000000020000000ULL +#define ASE_MT 0x0000000040000000ULL +#define ASE_SMARTMIPS 0x0000000080000000ULL +#define ASE_MICROMIPS 0x0000000100000000ULL +#define ASE_MSA 0x0000000200000000ULL /* - * bits 48-55: vendor-specific base instruction sets + * bits 40-51: vendor-specific base instruction sets */ -#define INSN_LOONGSON2E 0x0001000000000000ULL -#define INSN_LOONGSON2F 0x0002000000000000ULL -#define INSN_VR54XX 0x0004000000000000ULL -#define INSN_R5900 0x0008000000000000ULL +#define INSN_VR54XX 0x0000010000000000ULL +#define INSN_R5900 0x0000020000000000ULL +#define INSN_LOONGSON2E 0x0000040000000000ULL +#define INSN_LOONGSON2F 0x0000080000000000ULL +#define INSN_LOONGSON3A 0x0000100000000000ULL /* - * bits 56-63: vendor-specific ASEs + * bits 52-63: vendor-specific ASEs */ -#define ASE_MMI 0x0100000000000000ULL -#define ASE_MXU 0x0200000000000000ULL +/* MultiMedia Instructions defined by R5900 */ +#define ASE_MMI 0x0010000000000000ULL +/* MIPS eXtension/enhanced Unit defined by Ingenic */ +#define ASE_MXU 0x0020000000000000ULL +/* Loongson MultiMedia Instructions */ +#define ASE_LMMI 0x0040000000000000ULL +/* Loongson EXTensions */ +#define ASE_LEXT 0x0080000000000000ULL /* MIPS CPU defines. */ #define CPU_MIPS1 (ISA_MIPS1) @@ -67,7 +74,7 @@ #define CPU_VR54XX (CPU_MIPS4 | INSN_VR54XX) #define CPU_R5900 (CPU_MIPS3 | INSN_R5900) #define CPU_LOONGSON2E (CPU_MIPS3 | INSN_LOONGSON2E) -#define CPU_LOONGSON2F (CPU_MIPS3 | INSN_LOONGSON2F) +#define CPU_LOONGSON2F (CPU_MIPS3 | INSN_LOONGSON2F | ASE_LMMI) #define CPU_MIPS5 (CPU_MIPS4 | ISA_MIPS5) @@ -94,6 +101,8 @@ /* Wave Computing: "nanoMIPS" */ #define CPU_NANOMIPS32 (CPU_MIPS32R6 | ISA_NANOMIPS32) +#define CPU_LOONGSON3A (CPU_MIPS64R2 | INSN_LOONGSON3A | ASE_LMMI | ASE_LEXT) + /* * Strictly follow the architecture standard: * - Disallow "special" instruction handling for PMON/SPIM. diff --git a/qemu/target/mips/msa_helper.c b/qemu/target/mips/msa_helper.c index c3501927ce..e83c899a93 100644 --- a/qemu/target/mips/msa_helper.c +++ b/qemu/target/mips/msa_helper.c @@ -2232,7 +2232,339 @@ void helper_msa_div_u_d(CPUMIPSState *env, * +---------------+----------------------------------------------------------+ */ -/* TODO: insert Int Dot Product group helpers here */ +#define SIGNED_EXTRACT(e, o, a, df) \ + do { \ + e = SIGNED_EVEN(a, df); \ + o = SIGNED_ODD(a, df); \ + } while (0) + +#define UNSIGNED_EXTRACT(e, o, a, df) \ + do { \ + e = UNSIGNED_EVEN(a, df); \ + o = UNSIGNED_ODD(a, df); \ + } while (0) + + +static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); +} + +void helper_msa_dotp_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dotp_s_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dotp_s_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dotp_s_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dotp_s_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dotp_s_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dotp_s_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dotp_s_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dotp_s_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_dotp_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dotp_s_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dotp_s_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dotp_s_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dotp_s_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_dotp_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dotp_s_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dotp_s_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); +} + +void helper_msa_dotp_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dotp_u_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dotp_u_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dotp_u_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dotp_u_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dotp_u_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dotp_u_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dotp_u_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dotp_u_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_dotp_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dotp_u_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dotp_u_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dotp_u_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dotp_u_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_dotp_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dotp_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dotp_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); +} + +void helper_msa_dpadd_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dpadd_s_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dpadd_s_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dpadd_s_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dpadd_s_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dpadd_s_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dpadd_s_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dpadd_s_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dpadd_s_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_dpadd_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dpadd_s_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dpadd_s_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dpadd_s_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dpadd_s_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_dpadd_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); +} + +void helper_msa_dpadd_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dpadd_u_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dpadd_u_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dpadd_u_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dpadd_u_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dpadd_u_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dpadd_u_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dpadd_u_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dpadd_u_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_dpadd_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dpadd_u_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dpadd_u_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dpadd_u_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dpadd_u_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_dpadd_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2)); +} + +void helper_msa_dpsub_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dpsub_s_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dpsub_s_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dpsub_s_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dpsub_s_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dpsub_s_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dpsub_s_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dpsub_s_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dpsub_s_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_dpsub_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dpsub_s_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dpsub_s_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dpsub_s_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dpsub_s_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_dpsub_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + int64_t even_arg1; + int64_t even_arg2; + int64_t odd_arg1; + int64_t odd_arg2; + UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); + UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); + return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2)); +} + +void helper_msa_dpsub_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_dpsub_u_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_dpsub_u_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_dpsub_u_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_dpsub_u_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_dpsub_u_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_dpsub_u_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_dpsub_u_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_dpsub_u_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_dpsub_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_dpsub_u_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_dpsub_u_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_dpsub_u_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_dpsub_u_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_dpsub_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} /* @@ -2891,36 +3223,250 @@ void helper_msa_mod_u_d(CPUMIPSState *env, * +---------------+----------------------------------------------------------+ */ -/* TODO: insert Int Multiply group helpers here */ +static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + return dest + arg1 * arg2; +} +void helper_msa_maddv_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); -/* - * Int Subtract - * ------------ - * - * +---------------+----------------------------------------------------------+ - * | ASUB_S.B | Vector Absolute Values of Signed Subtract (byte) | - * | ASUB_S.H | Vector Absolute Values of Signed Subtract (halfword) | - * | ASUB_S.W | Vector Absolute Values of Signed Subtract (word) | - * | ASUB_S.D | Vector Absolute Values of Signed Subtract (doubleword) | - * | ASUB_U.B | Vector Absolute Values of Unsigned Subtract (byte) | - * | ASUB_U.H | Vector Absolute Values of Unsigned Subtract (halfword) | - * | ASUB_U.W | Vector Absolute Values of Unsigned Subtract (word) | - * | ASUB_U.D | Vector Absolute Values of Unsigned Subtract (doubleword) | - * | HSUB_S.H | Vector Signed Horizontal Subtract (halfword) | - * | HSUB_S.W | Vector Signed Horizontal Subtract (word) | - * | HSUB_S.D | Vector Signed Horizontal Subtract (doubleword) | - * | HSUB_U.H | Vector Unigned Horizontal Subtract (halfword) | - * | HSUB_U.W | Vector Unigned Horizontal Subtract (word) | - * | HSUB_U.D | Vector Unigned Horizontal Subtract (doubleword) | - * | SUBS_S.B | Vector Signed Saturated Subtract (of Signed) (byte) | - * | SUBS_S.H | Vector Signed Saturated Subtract (of Signed) (halfword) | - * | SUBS_S.W | Vector Signed Saturated Subtract (of Signed) (word) | - * | SUBS_S.D | Vector Signed Saturated Subtract (of Signed) (doubleword)| - * | SUBS_U.B | Vector Unsigned Saturated Subtract (of Uns.) (byte) | - * | SUBS_U.H | Vector Unsigned Saturated Subtract (of Uns.) (halfword) | - * | SUBS_U.W | Vector Unsigned Saturated Subtract (of Uns.) (word) | - * | SUBS_U.D | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)| + pwd->b[0] = msa_maddv_df(DF_BYTE, pwt->b[0], pws->b[0], pwt->b[0]); + pwd->b[1] = msa_maddv_df(DF_BYTE, pwt->b[1], pws->b[1], pwt->b[1]); + pwd->b[2] = msa_maddv_df(DF_BYTE, pwt->b[2], pws->b[2], pwt->b[2]); + pwd->b[3] = msa_maddv_df(DF_BYTE, pwt->b[3], pws->b[3], pwt->b[3]); + pwd->b[4] = msa_maddv_df(DF_BYTE, pwt->b[4], pws->b[4], pwt->b[4]); + pwd->b[5] = msa_maddv_df(DF_BYTE, pwt->b[5], pws->b[5], pwt->b[5]); + pwd->b[6] = msa_maddv_df(DF_BYTE, pwt->b[6], pws->b[6], pwt->b[6]); + pwd->b[7] = msa_maddv_df(DF_BYTE, pwt->b[7], pws->b[7], pwt->b[7]); + pwd->b[8] = msa_maddv_df(DF_BYTE, pwt->b[8], pws->b[8], pwt->b[8]); + pwd->b[9] = msa_maddv_df(DF_BYTE, pwt->b[9], pws->b[9], pwt->b[9]); + pwd->b[10] = msa_maddv_df(DF_BYTE, pwt->b[10], pws->b[10], pwt->b[10]); + pwd->b[11] = msa_maddv_df(DF_BYTE, pwt->b[11], pws->b[11], pwt->b[11]); + pwd->b[12] = msa_maddv_df(DF_BYTE, pwt->b[12], pws->b[12], pwt->b[12]); + pwd->b[13] = msa_maddv_df(DF_BYTE, pwt->b[13], pws->b[13], pwt->b[13]); + pwd->b[14] = msa_maddv_df(DF_BYTE, pwt->b[14], pws->b[14], pwt->b[14]); + pwd->b[15] = msa_maddv_df(DF_BYTE, pwt->b[15], pws->b[15], pwt->b[15]); +} + +void helper_msa_maddv_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_maddv_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_maddv_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_maddv_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_maddv_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_maddv_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_maddv_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_maddv_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_maddv_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_maddv_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_maddv_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_maddv_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_maddv_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_maddv_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_maddv_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_maddv_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_maddv_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} + +static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1, + int64_t arg2) +{ + return dest - arg1 * arg2; +} + +void helper_msa_msubv_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->b[0] = msa_msubv_df(DF_BYTE, pwt->b[0], pws->b[0], pwt->b[0]); + pwd->b[1] = msa_msubv_df(DF_BYTE, pwt->b[1], pws->b[1], pwt->b[1]); + pwd->b[2] = msa_msubv_df(DF_BYTE, pwt->b[2], pws->b[2], pwt->b[2]); + pwd->b[3] = msa_msubv_df(DF_BYTE, pwt->b[3], pws->b[3], pwt->b[3]); + pwd->b[4] = msa_msubv_df(DF_BYTE, pwt->b[4], pws->b[4], pwt->b[4]); + pwd->b[5] = msa_msubv_df(DF_BYTE, pwt->b[5], pws->b[5], pwt->b[5]); + pwd->b[6] = msa_msubv_df(DF_BYTE, pwt->b[6], pws->b[6], pwt->b[6]); + pwd->b[7] = msa_msubv_df(DF_BYTE, pwt->b[7], pws->b[7], pwt->b[7]); + pwd->b[8] = msa_msubv_df(DF_BYTE, pwt->b[8], pws->b[8], pwt->b[8]); + pwd->b[9] = msa_msubv_df(DF_BYTE, pwt->b[9], pws->b[9], pwt->b[9]); + pwd->b[10] = msa_msubv_df(DF_BYTE, pwt->b[10], pws->b[10], pwt->b[10]); + pwd->b[11] = msa_msubv_df(DF_BYTE, pwt->b[11], pws->b[11], pwt->b[11]); + pwd->b[12] = msa_msubv_df(DF_BYTE, pwt->b[12], pws->b[12], pwt->b[12]); + pwd->b[13] = msa_msubv_df(DF_BYTE, pwt->b[13], pws->b[13], pwt->b[13]); + pwd->b[14] = msa_msubv_df(DF_BYTE, pwt->b[14], pws->b[14], pwt->b[14]); + pwd->b[15] = msa_msubv_df(DF_BYTE, pwt->b[15], pws->b[15], pwt->b[15]); +} + +void helper_msa_msubv_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_msubv_df(DF_HALF, pwd->h[0], pws->h[0], pwt->h[0]); + pwd->h[1] = msa_msubv_df(DF_HALF, pwd->h[1], pws->h[1], pwt->h[1]); + pwd->h[2] = msa_msubv_df(DF_HALF, pwd->h[2], pws->h[2], pwt->h[2]); + pwd->h[3] = msa_msubv_df(DF_HALF, pwd->h[3], pws->h[3], pwt->h[3]); + pwd->h[4] = msa_msubv_df(DF_HALF, pwd->h[4], pws->h[4], pwt->h[4]); + pwd->h[5] = msa_msubv_df(DF_HALF, pwd->h[5], pws->h[5], pwt->h[5]); + pwd->h[6] = msa_msubv_df(DF_HALF, pwd->h[6], pws->h[6], pwt->h[6]); + pwd->h[7] = msa_msubv_df(DF_HALF, pwd->h[7], pws->h[7], pwt->h[7]); +} + +void helper_msa_msubv_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_msubv_df(DF_WORD, pwd->w[0], pws->w[0], pwt->w[0]); + pwd->w[1] = msa_msubv_df(DF_WORD, pwd->w[1], pws->w[1], pwt->w[1]); + pwd->w[2] = msa_msubv_df(DF_WORD, pwd->w[2], pws->w[2], pwt->w[2]); + pwd->w[3] = msa_msubv_df(DF_WORD, pwd->w[3], pws->w[3], pwt->w[3]); +} + +void helper_msa_msubv_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_msubv_df(DF_DOUBLE, pwd->d[0], pws->d[0], pwt->d[0]); + pwd->d[1] = msa_msubv_df(DF_DOUBLE, pwd->d[1], pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 * arg2; +} + +void helper_msa_mulv_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->b[0] = msa_mulv_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_mulv_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_mulv_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_mulv_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_mulv_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_mulv_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_mulv_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_mulv_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_mulv_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_mulv_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]); +} + +void helper_msa_mulv_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_mulv_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_mulv_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_mulv_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_mulv_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_mulv_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_mulv_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_mulv_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_mulv_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_mulv_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_mulv_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_mulv_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_mulv_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_mulv_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_mulv_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_mulv_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_mulv_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +/* + * Int Subtract + * ------------ + * + * +---------------+----------------------------------------------------------+ + * | ASUB_S.B | Vector Absolute Values of Signed Subtract (byte) | + * | ASUB_S.H | Vector Absolute Values of Signed Subtract (halfword) | + * | ASUB_S.W | Vector Absolute Values of Signed Subtract (word) | + * | ASUB_S.D | Vector Absolute Values of Signed Subtract (doubleword) | + * | ASUB_U.B | Vector Absolute Values of Unsigned Subtract (byte) | + * | ASUB_U.H | Vector Absolute Values of Unsigned Subtract (halfword) | + * | ASUB_U.W | Vector Absolute Values of Unsigned Subtract (word) | + * | ASUB_U.D | Vector Absolute Values of Unsigned Subtract (doubleword) | + * | HSUB_S.H | Vector Signed Horizontal Subtract (halfword) | + * | HSUB_S.W | Vector Signed Horizontal Subtract (word) | + * | HSUB_S.D | Vector Signed Horizontal Subtract (doubleword) | + * | HSUB_U.H | Vector Unigned Horizontal Subtract (halfword) | + * | HSUB_U.W | Vector Unigned Horizontal Subtract (word) | + * | HSUB_U.D | Vector Unigned Horizontal Subtract (doubleword) | + * | SUBS_S.B | Vector Signed Saturated Subtract (of Signed) (byte) | + * | SUBS_S.H | Vector Signed Saturated Subtract (of Signed) (halfword) | + * | SUBS_S.W | Vector Signed Saturated Subtract (of Signed) (word) | + * | SUBS_S.D | Vector Signed Saturated Subtract (of Signed) (doubleword)| + * | SUBS_U.B | Vector Unsigned Saturated Subtract (of Uns.) (byte) | + * | SUBS_U.H | Vector Unsigned Saturated Subtract (of Uns.) (halfword) | + * | SUBS_U.W | Vector Unsigned Saturated Subtract (of Uns.) (word) | + * | SUBS_U.D | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)| * | SUBSUS_U.B | Vector Uns. Sat. Subtract (of S. from Uns.) (byte) | * | SUBSUS_U.H | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword) | * | SUBSUS_U.W | Vector Uns. Sat. Subtract (of S. from Uns.) (word) | @@ -3045,142 +3591,531 @@ void helper_msa_asub_u_b(CPUMIPSState *env, pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]); } -void helper_msa_asub_u_h(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_asub_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_asub_u_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_asub_u_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_asub_u_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_asub_u_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_asub_u_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_asub_u_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_asub_u_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_asub_u_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_asub_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_asub_u_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_asub_u_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_asub_u_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_asub_u_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_asub_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_asub_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_asub_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df); +} + +void helper_msa_hsub_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_hsub_s_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_hsub_s_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_hsub_s_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_hsub_s_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_hsub_s_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_hsub_s_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_hsub_s_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_hsub_s_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_hsub_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_hsub_s_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_hsub_s_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_hsub_s_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_hsub_s_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_hsub_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_hsub_s_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_hsub_s_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df); +} + +void helper_msa_hsub_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_hsub_u_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_hsub_u_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_hsub_u_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_hsub_u_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_hsub_u_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_hsub_u_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_hsub_u_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_hsub_u_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_hsub_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_hsub_u_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_hsub_u_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_hsub_u_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_hsub_u_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_hsub_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_hsub_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_hsub_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + int64_t max_int = DF_MAX_INT(df); + int64_t min_int = DF_MIN_INT(df); + if (arg2 > 0) { + return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int; + } else { + return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int; + } +} + +void helper_msa_subs_s_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->b[0] = msa_subs_s_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_subs_s_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_subs_s_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_subs_s_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_subs_s_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_subs_s_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_subs_s_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_subs_s_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_subs_s_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_subs_s_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]); +} + +void helper_msa_subs_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_subs_s_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_subs_s_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_subs_s_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_subs_s_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_subs_s_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_subs_s_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_subs_s_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_subs_s_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_subs_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_subs_s_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_subs_s_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_subs_s_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_subs_s_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_subs_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_subs_s_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_subs_s_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + uint64_t u_arg1 = UNSIGNED(arg1, df); + uint64_t u_arg2 = UNSIGNED(arg2, df); + return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0; +} + +void helper_msa_subs_u_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->b[0] = msa_subs_u_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_subs_u_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_subs_u_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_subs_u_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_subs_u_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_subs_u_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_subs_u_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_subs_u_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_subs_u_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_subs_u_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]); +} + +void helper_msa_subs_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_subs_u_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_subs_u_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_subs_u_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_subs_u_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_subs_u_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_subs_u_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_subs_u_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_subs_u_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_subs_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_subs_u_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_subs_u_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_subs_u_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_subs_u_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_subs_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_subs_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_subs_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + uint64_t u_arg1 = UNSIGNED(arg1, df); + uint64_t max_uint = DF_MAX_UINT(df); + if (arg2 >= 0) { + uint64_t u_arg2 = (uint64_t)arg2; + return (u_arg1 > u_arg2) ? + (int64_t)(u_arg1 - u_arg2) : + 0; + } else { + uint64_t u_arg2 = (uint64_t)(-arg2); + return (u_arg1 < max_uint - u_arg2) ? + (int64_t)(u_arg1 + u_arg2) : + (int64_t)max_uint; + } +} + +void helper_msa_subsus_u_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->b[0] = msa_subsus_u_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_subsus_u_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_subsus_u_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_subsus_u_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_subsus_u_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_subsus_u_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_subsus_u_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_subsus_u_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_subsus_u_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_subsus_u_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]); +} + +void helper_msa_subsus_u_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->h[0] = msa_subsus_u_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_subsus_u_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_subsus_u_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_subsus_u_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_subsus_u_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_subsus_u_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_subsus_u_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_subsus_u_df(DF_HALF, pws->h[7], pwt->h[7]); +} + +void helper_msa_subsus_u_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->w[0] = msa_subsus_u_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_subsus_u_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_subsus_u_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_subsus_u_df(DF_WORD, pws->w[3], pwt->w[3]); +} + +void helper_msa_subsus_u_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = msa_subsus_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_subsus_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); +} + + +static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + uint64_t u_arg1 = UNSIGNED(arg1, df); + uint64_t u_arg2 = UNSIGNED(arg2, df); + int64_t max_int = DF_MAX_INT(df); + int64_t min_int = DF_MIN_INT(df); + if (u_arg1 > u_arg2) { + return u_arg1 - u_arg2 < (uint64_t)max_int ? + (int64_t)(u_arg1 - u_arg2) : + max_int; + } else { + return u_arg2 - u_arg1 < (uint64_t)(-min_int) ? + (int64_t)(u_arg1 - u_arg2) : + min_int; + } +} + +void helper_msa_subsuu_s_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->h[0] = msa_asub_u_df(DF_HALF, pws->h[0], pwt->h[0]); - pwd->h[1] = msa_asub_u_df(DF_HALF, pws->h[1], pwt->h[1]); - pwd->h[2] = msa_asub_u_df(DF_HALF, pws->h[2], pwt->h[2]); - pwd->h[3] = msa_asub_u_df(DF_HALF, pws->h[3], pwt->h[3]); - pwd->h[4] = msa_asub_u_df(DF_HALF, pws->h[4], pwt->h[4]); - pwd->h[5] = msa_asub_u_df(DF_HALF, pws->h[5], pwt->h[5]); - pwd->h[6] = msa_asub_u_df(DF_HALF, pws->h[6], pwt->h[6]); - pwd->h[7] = msa_asub_u_df(DF_HALF, pws->h[7], pwt->h[7]); + pwd->b[0] = msa_subsuu_s_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_subsuu_s_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_subsuu_s_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_subsuu_s_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_subsuu_s_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_subsuu_s_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_subsuu_s_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_subsuu_s_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_subsuu_s_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_subsuu_s_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]); } -void helper_msa_asub_u_w(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subsuu_s_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->w[0] = msa_asub_u_df(DF_WORD, pws->w[0], pwt->w[0]); - pwd->w[1] = msa_asub_u_df(DF_WORD, pws->w[1], pwt->w[1]); - pwd->w[2] = msa_asub_u_df(DF_WORD, pws->w[2], pwt->w[2]); - pwd->w[3] = msa_asub_u_df(DF_WORD, pws->w[3], pwt->w[3]); + pwd->h[0] = msa_subsuu_s_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_subsuu_s_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_subsuu_s_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_subsuu_s_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_subsuu_s_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_subsuu_s_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_subsuu_s_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_subsuu_s_df(DF_HALF, pws->h[7], pwt->h[7]); } -void helper_msa_asub_u_d(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subsuu_s_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->d[0] = msa_asub_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); - pwd->d[1] = msa_asub_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); -} - - -/* TODO: insert the rest of Int Subtract group helpers here */ - - -static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df); + pwd->w[0] = msa_subsuu_s_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_subsuu_s_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_subsuu_s_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_subsuu_s_df(DF_WORD, pws->w[3], pwt->w[3]); } -void helper_msa_hsub_s_h(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subsuu_s_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->h[0] = msa_hsub_s_df(DF_HALF, pws->h[0], pwt->h[0]); - pwd->h[1] = msa_hsub_s_df(DF_HALF, pws->h[1], pwt->h[1]); - pwd->h[2] = msa_hsub_s_df(DF_HALF, pws->h[2], pwt->h[2]); - pwd->h[3] = msa_hsub_s_df(DF_HALF, pws->h[3], pwt->h[3]); - pwd->h[4] = msa_hsub_s_df(DF_HALF, pws->h[4], pwt->h[4]); - pwd->h[5] = msa_hsub_s_df(DF_HALF, pws->h[5], pwt->h[5]); - pwd->h[6] = msa_hsub_s_df(DF_HALF, pws->h[6], pwt->h[6]); - pwd->h[7] = msa_hsub_s_df(DF_HALF, pws->h[7], pwt->h[7]); + pwd->d[0] = msa_subsuu_s_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_subsuu_s_df(DF_DOUBLE, pws->d[1], pwt->d[1]); } -void helper_msa_hsub_s_w(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) -{ - wr_t *pwd = &(env->active_fpu.fpr[wd].wr); - wr_t *pws = &(env->active_fpu.fpr[ws].wr); - wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->w[0] = msa_hsub_s_df(DF_WORD, pws->w[0], pwt->w[0]); - pwd->w[1] = msa_hsub_s_df(DF_WORD, pws->w[1], pwt->w[1]); - pwd->w[2] = msa_hsub_s_df(DF_WORD, pws->w[2], pwt->w[2]); - pwd->w[3] = msa_hsub_s_df(DF_WORD, pws->w[3], pwt->w[3]); +static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2) +{ + return arg1 - arg2; } -void helper_msa_hsub_s_d(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subv_b(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->d[0] = msa_hsub_s_df(DF_DOUBLE, pws->d[0], pwt->d[0]); - pwd->d[1] = msa_hsub_s_df(DF_DOUBLE, pws->d[1], pwt->d[1]); -} - - -static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df); + pwd->b[0] = msa_subv_df(DF_BYTE, pws->b[0], pwt->b[0]); + pwd->b[1] = msa_subv_df(DF_BYTE, pws->b[1], pwt->b[1]); + pwd->b[2] = msa_subv_df(DF_BYTE, pws->b[2], pwt->b[2]); + pwd->b[3] = msa_subv_df(DF_BYTE, pws->b[3], pwt->b[3]); + pwd->b[4] = msa_subv_df(DF_BYTE, pws->b[4], pwt->b[4]); + pwd->b[5] = msa_subv_df(DF_BYTE, pws->b[5], pwt->b[5]); + pwd->b[6] = msa_subv_df(DF_BYTE, pws->b[6], pwt->b[6]); + pwd->b[7] = msa_subv_df(DF_BYTE, pws->b[7], pwt->b[7]); + pwd->b[8] = msa_subv_df(DF_BYTE, pws->b[8], pwt->b[8]); + pwd->b[9] = msa_subv_df(DF_BYTE, pws->b[9], pwt->b[9]); + pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]); + pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]); + pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]); + pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]); + pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]); + pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]); } -void helper_msa_hsub_u_h(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subv_h(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->h[0] = msa_hsub_u_df(DF_HALF, pws->h[0], pwt->h[0]); - pwd->h[1] = msa_hsub_u_df(DF_HALF, pws->h[1], pwt->h[1]); - pwd->h[2] = msa_hsub_u_df(DF_HALF, pws->h[2], pwt->h[2]); - pwd->h[3] = msa_hsub_u_df(DF_HALF, pws->h[3], pwt->h[3]); - pwd->h[4] = msa_hsub_u_df(DF_HALF, pws->h[4], pwt->h[4]); - pwd->h[5] = msa_hsub_u_df(DF_HALF, pws->h[5], pwt->h[5]); - pwd->h[6] = msa_hsub_u_df(DF_HALF, pws->h[6], pwt->h[6]); - pwd->h[7] = msa_hsub_u_df(DF_HALF, pws->h[7], pwt->h[7]); + pwd->h[0] = msa_subv_df(DF_HALF, pws->h[0], pwt->h[0]); + pwd->h[1] = msa_subv_df(DF_HALF, pws->h[1], pwt->h[1]); + pwd->h[2] = msa_subv_df(DF_HALF, pws->h[2], pwt->h[2]); + pwd->h[3] = msa_subv_df(DF_HALF, pws->h[3], pwt->h[3]); + pwd->h[4] = msa_subv_df(DF_HALF, pws->h[4], pwt->h[4]); + pwd->h[5] = msa_subv_df(DF_HALF, pws->h[5], pwt->h[5]); + pwd->h[6] = msa_subv_df(DF_HALF, pws->h[6], pwt->h[6]); + pwd->h[7] = msa_subv_df(DF_HALF, pws->h[7], pwt->h[7]); } -void helper_msa_hsub_u_w(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subv_w(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->w[0] = msa_hsub_u_df(DF_WORD, pws->w[0], pwt->w[0]); - pwd->w[1] = msa_hsub_u_df(DF_WORD, pws->w[1], pwt->w[1]); - pwd->w[2] = msa_hsub_u_df(DF_WORD, pws->w[2], pwt->w[2]); - pwd->w[3] = msa_hsub_u_df(DF_WORD, pws->w[3], pwt->w[3]); + pwd->w[0] = msa_subv_df(DF_WORD, pws->w[0], pwt->w[0]); + pwd->w[1] = msa_subv_df(DF_WORD, pws->w[1], pwt->w[1]); + pwd->w[2] = msa_subv_df(DF_WORD, pws->w[2], pwt->w[2]); + pwd->w[3] = msa_subv_df(DF_WORD, pws->w[3], pwt->w[3]); } -void helper_msa_hsub_u_d(CPUMIPSState *env, - uint32_t wd, uint32_t ws, uint32_t wt) +void helper_msa_subv_d(CPUMIPSState *env, + uint32_t wd, uint32_t ws, uint32_t wt) { wr_t *pwd = &(env->active_fpu.fpr[wd].wr); wr_t *pws = &(env->active_fpu.fpr[ws].wr); wr_t *pwt = &(env->active_fpu.fpr[wt].wr); - pwd->d[0] = msa_hsub_u_df(DF_DOUBLE, pws->d[0], pwt->d[0]); - pwd->d[1] = msa_hsub_u_df(DF_DOUBLE, pws->d[1], pwt->d[1]); + pwd->d[0] = msa_subv_df(DF_DOUBLE, pws->d[0], pwt->d[0]); + pwd->d[1] = msa_subv_df(DF_DOUBLE, pws->d[1], pwt->d[1]); } @@ -4408,11 +5343,6 @@ void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd, msa_move_v(pwd, pwx); } -static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - return arg1 - arg2; -} - #define MSA_BINOP_IMM_DF(helper, func) \ void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, \ uint32_t wd, uint32_t ws, int32_t u5) \ @@ -4594,97 +5524,6 @@ MSA_TEROP_IMMU_DF(binsli, binsl) MSA_TEROP_IMMU_DF(binsri, binsr) #undef MSA_TEROP_IMMU_DF -static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - int64_t max_int = DF_MAX_INT(df); - int64_t min_int = DF_MIN_INT(df); - if (arg2 > 0) { - return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int; - } else { - return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int; - } -} - -static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - uint64_t u_arg1 = UNSIGNED(arg1, df); - uint64_t u_arg2 = UNSIGNED(arg2, df); - return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0; -} - -static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - uint64_t u_arg1 = UNSIGNED(arg1, df); - uint64_t max_uint = DF_MAX_UINT(df); - if (arg2 >= 0) { - uint64_t u_arg2 = (uint64_t)arg2; - return (u_arg1 > u_arg2) ? - (int64_t)(u_arg1 - u_arg2) : - 0; - } else { - uint64_t u_arg2 = (uint64_t)(-arg2); - return (u_arg1 < max_uint - u_arg2) ? - (int64_t)(u_arg1 + u_arg2) : - (int64_t)max_uint; - } -} - -static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - uint64_t u_arg1 = UNSIGNED(arg1, df); - uint64_t u_arg2 = UNSIGNED(arg2, df); - int64_t max_int = DF_MAX_INT(df); - int64_t min_int = DF_MIN_INT(df); - if (u_arg1 > u_arg2) { - return u_arg1 - u_arg2 < (uint64_t)max_int ? - (int64_t)(u_arg1 - u_arg2) : - max_int; - } else { - return u_arg2 - u_arg1 < (uint64_t)(-min_int) ? - (int64_t)(u_arg1 - u_arg2) : - min_int; - } -} - -static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - return arg1 * arg2; -} - -#define SIGNED_EXTRACT(e, o, a, df) \ - do { \ - e = SIGNED_EVEN(a, df); \ - o = SIGNED_ODD(a, df); \ - } while (0) - -#define UNSIGNED_EXTRACT(e, o, a, df) \ - do { \ - e = UNSIGNED_EVEN(a, df); \ - o = UNSIGNED_ODD(a, df); \ - } while (0) - -static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); -} - -static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); -} - #define CONCATENATE_AND_SLIDE(s, k) \ do { \ for (i = 0; i < s; i++) { \ @@ -4802,15 +5641,6 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, \ } \ } -MSA_BINOP_DF(subv) -MSA_BINOP_DF(subs_s) -MSA_BINOP_DF(subs_u) -MSA_BINOP_DF(subsus_u) -MSA_BINOP_DF(subsuu_s) -MSA_BINOP_DF(mulv) -MSA_BINOP_DF(dotp_s) -MSA_BINOP_DF(dotp_u) - MSA_BINOP_DF(mul_q) MSA_BINOP_DF(mulr_q) #undef MSA_BINOP_DF @@ -4824,66 +5654,6 @@ void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd, msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]); } -static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - return dest + arg1 * arg2; -} - -static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - return dest - arg1 * arg2; -} - -static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); -} - -static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2); -} - -static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2)); -} - -static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1, - int64_t arg2) -{ - int64_t even_arg1; - int64_t even_arg2; - int64_t odd_arg1; - int64_t odd_arg2; - UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df); - UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df); - return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2)); -} - static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1, int64_t arg2) { @@ -5010,12 +5780,6 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \ } \ } -MSA_TEROP_DF(maddv) -MSA_TEROP_DF(msubv) -MSA_TEROP_DF(dpadd_s) -MSA_TEROP_DF(dpadd_u) -MSA_TEROP_DF(dpsub_s) -MSA_TEROP_DF(dpsub_u) MSA_TEROP_DF(binsl) MSA_TEROP_DF(binsr) MSA_TEROP_DF(madd_q) @@ -5427,54 +6191,80 @@ static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr) #define CLEAR_IS_INEXACT 2 #define RECIPROCAL_INEXACT 4 -static inline int update_msacsr(CPUMIPSState *env, int action, int denormal) +static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt) { - int ieee_ex; + int mips_xcpt = 0; + + if (ieee_xcpt & float_flag_invalid) { + mips_xcpt |= FP_INVALID; + } + if (ieee_xcpt & float_flag_overflow) { + mips_xcpt |= FP_OVERFLOW; + } + if (ieee_xcpt & float_flag_underflow) { + mips_xcpt |= FP_UNDERFLOW; + } + if (ieee_xcpt & float_flag_divbyzero) { + mips_xcpt |= FP_DIV0; + } + if (ieee_xcpt & float_flag_inexact) { + mips_xcpt |= FP_INEXACT; + } + + return mips_xcpt; +} - int c; +static inline int update_msacsr(CPUMIPSState *env, int action, int denormal) +{ + int ieee_exception_flags; + int mips_exception_flags = 0; int cause; int enable; - ieee_ex = get_float_exception_flags(&env->active_tc.msa_fp_status); + ieee_exception_flags = get_float_exception_flags( + &env->active_tc.msa_fp_status); /* QEMU softfloat does not signal all underflow cases */ if (denormal) { - ieee_ex |= float_flag_underflow; + ieee_exception_flags |= float_flag_underflow; + } + if (ieee_exception_flags) { + mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags); } - - c = ieee_ex_to_mips(ieee_ex); enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED; /* Set Inexact (I) when flushing inputs to zero */ - if ((ieee_ex & float_flag_input_denormal) && + if ((ieee_exception_flags & float_flag_input_denormal) && (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) { if (action & CLEAR_IS_INEXACT) { - c &= ~FP_INEXACT; + mips_exception_flags &= ~FP_INEXACT; } else { - c |= FP_INEXACT; + mips_exception_flags |= FP_INEXACT; } } /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */ - if ((ieee_ex & float_flag_output_denormal) && + if ((ieee_exception_flags & float_flag_output_denormal) && (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) { - c |= FP_INEXACT; + mips_exception_flags |= FP_INEXACT; if (action & CLEAR_FS_UNDERFLOW) { - c &= ~FP_UNDERFLOW; + mips_exception_flags &= ~FP_UNDERFLOW; } else { - c |= FP_UNDERFLOW; + mips_exception_flags |= FP_UNDERFLOW; } } /* Set Inexact (I) when Overflow (O) is not enabled */ - if ((c & FP_OVERFLOW) != 0 && (enable & FP_OVERFLOW) == 0) { - c |= FP_INEXACT; + if ((mips_exception_flags & FP_OVERFLOW) != 0 && + (enable & FP_OVERFLOW) == 0) { + mips_exception_flags |= FP_INEXACT; } /* Clear Exact Underflow when Underflow (U) is not enabled */ - if ((c & FP_UNDERFLOW) != 0 && (enable & FP_UNDERFLOW) == 0 && - (c & FP_INEXACT) == 0) { - c &= ~FP_UNDERFLOW; + if ((mips_exception_flags & FP_UNDERFLOW) != 0 && + (enable & FP_UNDERFLOW) == 0 && + (mips_exception_flags & FP_INEXACT) == 0) { + mips_exception_flags &= ~FP_UNDERFLOW; } /* @@ -5482,11 +6272,11 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal) * divide by zero */ if ((action & RECIPROCAL_INEXACT) && - (c & (FP_INVALID | FP_DIV0)) == 0) { - c = FP_INEXACT; + (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) { + mips_exception_flags = FP_INEXACT; } - cause = c & enable; /* all current enabled exceptions */ + cause = mips_exception_flags & enable; /* all current enabled exceptions */ if (cause == 0) { /* @@ -5494,7 +6284,7 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal) * with all current exceptions */ SET_FP_CAUSE(env->active_tc.msacsr, - (GET_FP_CAUSE(env->active_tc.msacsr) | c)); + (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags)); } else { /* Current exceptions are enabled */ if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) { @@ -5503,11 +6293,11 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal) * with all enabled exceptions */ SET_FP_CAUSE(env->active_tc.msacsr, - (GET_FP_CAUSE(env->active_tc.msacsr) | c)); + (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags)); } } - return c; + return mips_exception_flags; } static inline int get_enabled_exceptions(const CPUMIPSState *env, int c) @@ -5516,7 +6306,7 @@ static inline int get_enabled_exceptions(const CPUMIPSState *env, int c) return c & enable; } -static inline float16 float16_from_float32(int32_t a, flag ieee, +static inline float16 float16_from_float32(int32_t a, bool ieee, float_status *status) { float16 f_val; @@ -5535,7 +6325,7 @@ static inline float32 float32_from_float64(int64_t a, float_status *status) return a < 0 ? (f_val | (1 << 31)) : f_val; } -static inline float32 float32_from_float16(int16_t a, flag ieee, +static inline float32 float32_from_float16(int16_t a, bool ieee, float_status *status) { float32 f_val; @@ -6572,7 +7362,7 @@ void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd, * IEEE and "ARM" format. The latter gains extra exponent * range by omitting the NaN/Inf encodings. */ - flag ieee = 1; + bool ieee = true; MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16); MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16); @@ -7186,7 +7976,7 @@ void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd, * IEEE and "ARM" format. The latter gains extra exponent * range by omitting the NaN/Inf encodings. */ - flag ieee = 1; + bool ieee = true; MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32); } @@ -7222,7 +8012,7 @@ void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd, * IEEE and "ARM" format. The latter gains extra exponent * range by omitting the NaN/Inf encodings. */ - flag ieee = 1; + bool ieee = true; MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32); } diff --git a/qemu/target/mips/op_helper.c b/qemu/target/mips/op_helper.c index 9802b9cebd..f8119b999e 100644 --- a/qemu/target/mips/op_helper.c +++ b/qemu/target/mips/op_helper.c @@ -618,6 +618,7 @@ static inline uint64_t get_tlb_pfn_from_entrylo(uint64_t entrylo) static void r4k_fill_tlb(CPUMIPSState *env, int idx) { + struct uc_struct *uc = env->uc; r4k_tlb_t *tlb; uint64_t mask = env->CP0_PageMask >> (TARGET_PAGE_BITS + 1); @@ -682,6 +683,7 @@ void r4k_helper_tlbinvf(CPUMIPSState *env) void r4k_helper_tlbwi(CPUMIPSState *env) { + struct uc_struct *uc = env->uc; bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1); target_ulong VPN; uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask; @@ -738,6 +740,7 @@ void r4k_helper_tlbwr(CPUMIPSState *env) void r4k_helper_tlbp(CPUMIPSState *env) { + struct uc_struct *uc = env->uc; bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1); r4k_tlb_t *tlb; target_ulong mask; @@ -1241,6 +1244,7 @@ static inline void ensure_writable_pages(CPUMIPSState *env, int mmu_idx, uintptr_t retaddr) { + struct uc_struct *uc = env->uc; /* FIXME: Probe the actual accesses (pass and use a size) */ if (unlikely(MSA_PAGESPAN(addr))) { /* first page */ diff --git a/qemu/target/mips/translate.c b/qemu/target/mips/translate.c index 3fab57b251..b8c82c82d8 100644 --- a/qemu/target/mips/translate.c +++ b/qemu/target/mips/translate.c @@ -1040,7 +1040,7 @@ enum { OPC_BC2NEZ = (0x0D << 21) | OPC_CP2, }; -#define MASK_LMI(op) (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F)) +#define MASK_LMMI(op) (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F)) enum { OPC_PADDSH = (24 << 21) | (0x00) | OPC_CP2, @@ -3384,7 +3384,8 @@ static void gen_ld(DisasContext *ctx, uint32_t opc, TCGv t0, t1, t2; int mem_idx = ctx->mem_idx; - if (rt == 0 && ctx->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)) { + if (rt == 0 && ctx->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F | + INSN_LOONGSON3A)) { /* * Loongson CPU uses a load to zero register for prefetch. * We emulate it as a NOP. On other CPU we must perform the @@ -5520,7 +5521,7 @@ static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt) TCGv_i64 t0, t1; TCGCond cond; - opc = MASK_LMI(ctx->opcode); + opc = MASK_LMMI(ctx->opcode); switch (opc) { case OPC_ADD_CP2: case OPC_SUB_CP2: @@ -5995,6 +5996,7 @@ static void gen_trap(DisasContext *ctx, uint32_t opc, static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest) { + struct uc_struct *uc = ctx->uc; if (unlikely(ctx->base.singlestep_enabled)) { return false; } @@ -27207,7 +27209,7 @@ static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_MULTU_G_2F: case OPC_MOD_G_2F: case OPC_MODU_G_2F: - check_insn(ctx, INSN_LOONGSON2F); + check_insn(ctx, INSN_LOONGSON2F | ASE_LEXT); gen_loongson_integer(ctx, op1, rd, rs, rt); break; case OPC_CLO: @@ -27240,7 +27242,7 @@ static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx) case OPC_DDIVU_G_2F: case OPC_DMOD_G_2F: case OPC_DMODU_G_2F: - check_insn(ctx, INSN_LOONGSON2F); + check_insn(ctx, INSN_LOONGSON2F | ASE_LEXT); gen_loongson_integer(ctx, op1, rd, rs, rt); break; #endif @@ -29097,6 +29099,38 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx) break; } break; + case OPC_MADDV_df: + switch (df) { + case DF_BYTE: + gen_helper_msa_maddv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_maddv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_maddv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_maddv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } + break; + case OPC_MSUBV_df: + switch (df) { + case DF_BYTE: + gen_helper_msa_msubv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_msubv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_msubv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_msubv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } + break; case OPC_ASUB_S_df: switch (df) { case DF_BYTE: @@ -29306,10 +29340,36 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx) } break; case OPC_SUBS_S_df: - gen_helper_msa_subs_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_subs_s_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_subs_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_subs_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_subs_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_MULV_df: - gen_helper_msa_mulv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_mulv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_mulv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_mulv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_mulv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_SLD_df: gen_helper_msa_sld_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); @@ -29318,25 +29378,71 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx) gen_helper_msa_vshf_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); break; case OPC_SUBV_df: - gen_helper_msa_subv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_subv_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_subv_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_subv_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_subv_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_SUBS_U_df: - gen_helper_msa_subs_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); - break; - case OPC_MADDV_df: - gen_helper_msa_maddv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_subs_u_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_subs_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_subs_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_subs_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_SPLAT_df: gen_helper_msa_splat_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); break; case OPC_SUBSUS_U_df: - gen_helper_msa_subsus_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); - break; - case OPC_MSUBV_df: - gen_helper_msa_msubv_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_subsus_u_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_subsus_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_subsus_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_subsus_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_SUBSUU_S_df: - gen_helper_msa_subsuu_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_BYTE: + gen_helper_msa_subsuu_s_b(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_HALF: + gen_helper_msa_subsuu_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_subsuu_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_subsuu_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DOTP_S_df: @@ -29407,22 +29513,82 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx) } break; case OPC_DOTP_S_df: - gen_helper_msa_dotp_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dotp_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dotp_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dotp_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DOTP_U_df: - gen_helper_msa_dotp_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dotp_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dotp_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dotp_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DPADD_S_df: - gen_helper_msa_dpadd_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dpadd_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dpadd_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dpadd_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DPADD_U_df: - gen_helper_msa_dpadd_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dpadd_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dpadd_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dpadd_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DPSUB_S_df: - gen_helper_msa_dpsub_s_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dpsub_s_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dpsub_s_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dpsub_s_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; case OPC_DPSUB_U_df: - gen_helper_msa_dpsub_u_df(tcg_ctx, tcg_ctx->cpu_env, tdf, twd, tws, twt); + switch (df) { + case DF_HALF: + gen_helper_msa_dpsub_u_h(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_WORD: + gen_helper_msa_dpsub_u_w(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + case DF_DOUBLE: + gen_helper_msa_dpsub_u_d(tcg_ctx, tcg_ctx->cpu_env, twd, tws, twt); + break; + } break; } break; @@ -30683,7 +30849,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext *ctx) } break; case OPC_CP2: - check_insn(ctx, INSN_LOONGSON2F); + check_insn(ctx, ASE_LMMI); /* Note that these instructions use different fields. */ gen_loongson_multimedia(ctx, sa, rd, rt); break; @@ -30849,7 +31015,8 @@ static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) CPUMIPSState *env = cs->env_ptr; // unicorn setup - ctx->uc = cs->uc; + struct uc_struct *uc = cs->uc; + ctx->uc = uc; ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK; ctx->saved_pc = -1; @@ -31238,7 +31405,9 @@ void cpu_state_reset(CPUMIPSState *env) env->CP0_Config5 = env->cpu_model->CP0_Config5; env->CP0_Config5_rw_bitmask = env->cpu_model->CP0_Config5_rw_bitmask; env->CP0_Config6 = env->cpu_model->CP0_Config6; + env->CP0_Config6_rw_bitmask = env->cpu_model->CP0_Config6_rw_bitmask; env->CP0_Config7 = env->cpu_model->CP0_Config7; + env->CP0_Config7_rw_bitmask = env->cpu_model->CP0_Config7_rw_bitmask; env->CP0_LLAddr_rw_bitmask = env->cpu_model->CP0_LLAddr_rw_bitmask << env->cpu_model->CP0_LLAddr_shift; env->CP0_LLAddr_shift = env->cpu_model->CP0_LLAddr_shift; diff --git a/qemu/target/mips/translate_init.inc.c b/qemu/target/mips/translate_init.inc.c index 3e395c7e6a..02885b5c65 100644 --- a/qemu/target/mips/translate_init.inc.c +++ b/qemu/target/mips/translate_init.inc.c @@ -366,7 +366,7 @@ const mips_def_t mips_defs[] = }, { /* FIXME: - * Config3: CMGCR, PW, VZ, CTXTC, CDMM, TL + * Config3: VZ, CTXTC, CDMM, TL * Config4: MMUExtDef * Config5: MRP * FIR(FCR0): Has2008 @@ -380,10 +380,11 @@ const mips_def_t mips_defs[] = (2 << CP0C1_DS) | (4 << CP0C1_DL) | (3 << CP0C1_DA) | (1 << CP0C1_PC) | (1 << CP0C1_FP), .CP0_Config2 = MIPS_CONFIG2, - .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | (1 << CP0C3_MSAP) | + .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | + (1 << CP0C3_CMGCR) | (1 << CP0C3_MSAP) | (1 << CP0C3_BP) | (1 << CP0C3_BI) | (1 << CP0C3_SC) | - (1 << CP0C3_ULRI) | (1 << CP0C3_RXI) | (1 << CP0C3_LPA) | - (1 << CP0C3_VInt), + (1 << CP0C3_PW) | (1 << CP0C3_ULRI) | (1 << CP0C3_RXI) | + (1 << CP0C3_LPA) | (1 << CP0C3_VInt), .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (2 << CP0C4_IE) | (0x1c << CP0C4_KScrExist), .CP0_Config4_rw_bitmask = 0, @@ -801,6 +802,92 @@ const mips_def_t mips_defs[] = .insn_flags = CPU_LOONGSON2F, .mmu_type = MMU_TYPE_R4000, }, + { + .name = "Loongson-3A1000", + .CP0_PRid = 0x6305, + /* 64KB I-cache and d-cache. 4 way with 32 bit cache line size. */ + .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) | + (MMU_TYPE_R4000 << CP0C0_MT), + .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) | + (3 << CP0C1_IS) | (4 << CP0C1_IL) | (3 << CP0C1_IA) | + (3 << CP0C1_DS) | (4 << CP0C1_DL) | (3 << CP0C1_DA) | + (1 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP), + .CP0_Config2 = MIPS_CONFIG2 | (7 << CP0C2_SS) | (4 << CP0C2_SL) | + (3 << CP0C2_SA), + .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_LPA), + .CP0_LLAddr_rw_bitmask = 0, + .SYNCI_Step = 32, + .CCRes = 2, + .CP0_Status_rw_bitmask = 0x74D8FFFF, + .CP0_PageGrain = (1 << CP0PG_ELPA), + .CP0_PageGrain_rw_bitmask = (1 << CP0PG_ELPA), + .CP1_fcr0 = (0x5 << FCR0_PRID) | (0x1 << FCR0_REV) | (0x1 << FCR0_F64) | + (0x1 << FCR0_PS) | (0x1 << FCR0_L) | (0x1 << FCR0_W) | + (0x1 << FCR0_D) | (0x1 << FCR0_S), + .CP1_fcr31 = 0, + .CP1_fcr31_rw_bitmask = 0xFF83FFFF, + .SEGBITS = 42, + .PABITS = 48, + .insn_flags = CPU_LOONGSON3A, + .mmu_type = MMU_TYPE_R4000, + }, + { + .name = "Loongson-3A4000", + .CP0_PRid = 0x14C000, + /* 64KB I-cache and d-cache. 4 way with 32 bit cache line size. */ + .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) | + (MMU_TYPE_R4000 << CP0C0_MT), + .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) | + (2 << CP0C1_IS) | (5 << CP0C1_IL) | (3 << CP0C1_IA) | + (2 << CP0C1_DS) | (5 << CP0C1_DL) | (3 << CP0C1_DA) | + (1 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP), + .CP0_Config2 = MIPS_CONFIG2 | (5 << CP0C2_SS) | (5 << CP0C2_SL) | + (15 << CP0C2_SA), + .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | (1 << CP0C3_MSAP) | + (1 << CP0C3_BP) | (1 << CP0C3_BI) | (1 << CP0C3_ULRI) | + (1 << CP0C3_RXI) | (1 << CP0C3_LPA) | (1 << CP0C3_VInt), + .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M) | (2 << CP0C4_IE) | + (1 << CP0C4_AE) | (0x1c << CP0C4_KScrExist), + .CP0_Config4_rw_bitmask = 0, + .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_CRCP) | (1 << CP0C5_NFExists), + .CP0_Config5_rw_bitmask = (1 << CP0C5_K) | (1 << CP0C5_CV) | + (1 << CP0C5_MSAEn) | (1 << CP0C5_UFE) | + (1 << CP0C5_FRE) | (1 << CP0C5_SBRI), + .CP0_Config6 = (1 << CP0C6_VCLRU) | (1 << CP0C6_DCLRU) | + (1 << CP0C6_SFBEN) | (1 << CP0C6_VLTINT) | + (1 << CP0C6_INSTPREF) | (1 << CP0C6_DATAPREF), + .CP0_Config6_rw_bitmask = (1 << CP0C6_BPPASS) | (0x3f << CP0C6_KPOS) | + (1 << CP0C6_KE) | (1 << CP0C6_VTLBONLY) | + (1 << CP0C6_LASX) | (1 << CP0C6_SSEN) | + (1 << CP0C6_DISDRTIME) | (1 << CP0C6_PIXNUEN) | + (1 << CP0C6_SCRAND) | (1 << CP0C6_LLEXCEN) | + (1 << CP0C6_DISVC) | (1 << CP0C6_VCLRU) | + (1 << CP0C6_DCLRU) | (1 << CP0C6_PIXUEN) | + (1 << CP0C6_DISBLKLYEN) | (1 << CP0C6_UMEMUALEN) | + (1 << CP0C6_SFBEN) | (1 << CP0C6_FLTINT) | + (1 << CP0C6_VLTINT) | (1 << CP0C6_DISBTB) | + (3 << CP0C6_STPREFCTL) | (1 << CP0C6_INSTPREF) | + (1 << CP0C6_DATAPREF), + .CP0_Config7 = 0, + .CP0_Config7_rw_bitmask = (1 << CP0C7_NAPCGEN) | (1 << CP0C7_UNIMUEN) | + (1 << CP0C7_VFPUCGEN), + .CP0_LLAddr_rw_bitmask = 1, + .SYNCI_Step = 16, + .CCRes = 2, + .CP0_Status_rw_bitmask = 0x7DDBFFFF, + .CP0_PageGrain = (1 << CP0PG_ELPA), + .CP0_PageGrain_rw_bitmask = (1U << CP0PG_RIE) | (1 << CP0PG_XIE) | + (1 << CP0PG_ELPA) | (1 << CP0PG_IEC), + .CP1_fcr0 = (0x5 << FCR0_PRID) | (0x1 << FCR0_REV) | (0x1 << FCR0_F64) | + (0x1 << FCR0_PS) | (0x1 << FCR0_L) | (0x1 << FCR0_W) | + (0x1 << FCR0_D) | (0x1 << FCR0_S), + .CP1_fcr31 = 0, + .CP1_fcr31_rw_bitmask = 0xFF83FFFF, + .SEGBITS = 48, + .PABITS = 48, + .insn_flags = CPU_LOONGSON3A, + .mmu_type = MMU_TYPE_R4000, + }, { /* A generic CPU providing MIPS64 DSP R2 ASE features. FIXME: Eventually this should be replaced by a real CPU model. */ diff --git a/qemu/target/ppc/cpu.h b/qemu/target/ppc/cpu.h index 26ed16808c..f7b127c9a3 100644 --- a/qemu/target/ppc/cpu.h +++ b/qemu/target/ppc/cpu.h @@ -129,8 +129,9 @@ enum { POWERPC_EXCP_SDOOR_HV = 100, /* ISA 3.00 additions */ POWERPC_EXCP_HVIRT = 101, + POWERPC_EXCP_SYSCALL_VECTORED = 102, /* scv exception */ /* EOL */ - POWERPC_EXCP_NB = 102, + POWERPC_EXCP_NB = 103, /* QEMU exceptions: used internally during code translation */ POWERPC_EXCP_STOP = 0x200, /* stop translation */ POWERPC_EXCP_BRANCH = 0x201, /* branch instruction */ @@ -460,6 +461,9 @@ typedef struct ppc_v3_pate_t { #define DSISR_AMR 0x00200000 /* Unsupported Radix Tree Configuration */ #define DSISR_R_BADCONFIG 0x00080000 +#define DSISR_ATOMIC_RC 0x00040000 +/* Unable to translate address of (guest) pde or process/page table entry */ +#define DSISR_PRTABLE_FAULT 0x00020000 /* SRR1 error code fields */ @@ -469,9 +473,31 @@ typedef struct ppc_v3_pate_t { #define SRR1_PROTFAULT DSISR_PROTFAULT #define SRR1_IAMR DSISR_AMR +/* SRR1[42:45] wakeup fields for System Reset Interrupt */ + +#define SRR1_WAKEMASK 0x003c0000 /* reason for wakeup */ + +#define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */ +#define SRR1_WAKEHVI 0x00240000 /* Hypervisor Virt. Interrupt (P9) */ +#define SRR1_WAKEEE 0x00200000 /* External interrupt */ +#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ +#define SRR1_WAKEDBELL 0x00140000 /* Privileged doorbell */ +#define SRR1_WAKERESET 0x00100000 /* System reset */ +#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell */ +#define SRR1_WAKESCOM 0x00080000 /* SCOM not in power-saving mode */ + +/* SRR1[46:47] power-saving exit mode */ + +#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask */ + +#define SRR1_WS_HVLOSS 0x00030000 /* HV resources not maintained */ +#define SRR1_WS_GPRLOSS 0x00020000 /* GPRs not maintained */ +#define SRR1_WS_NOLOSS 0x00010000 /* All resources maintained */ + /* Facility Status and Control (FSCR) bits */ #define FSCR_EBB (63 - 56) /* Event-Based Branch Facility */ #define FSCR_TAR (63 - 55) /* Target Address Register */ +#define FSCR_SCV (63 - 51) /* System call vectored */ /* Interrupt cause mask and position in FSCR. HFSCR has the same format */ #define FSCR_IC_MASK (0xFFULL) #define FSCR_IC_POS (63 - 7) @@ -481,6 +507,7 @@ typedef struct ppc_v3_pate_t { #define FSCR_IC_TM 5 #define FSCR_IC_EBB 7 #define FSCR_IC_TAR 8 +#define FSCR_IC_SCV 12 /* Exception state register bits definition */ #define ESR_PIL PPC_BIT(36) /* Illegal Instruction */ @@ -548,6 +575,8 @@ enum { POWERPC_FLAG_VSX = 0x00080000, /* Has Transaction Memory (ISA 2.07) */ POWERPC_FLAG_TM = 0x00100000, + /* Has SCV (ISA 3.00) */ + POWERPC_FLAG_SCV = 0x00200000, }; /*****************************************************************************/ @@ -1206,7 +1235,7 @@ void ppc_cpu_do_interrupt(CPUState *cpu); bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req); hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); -void ppc_cpu_do_system_reset(CPUState *cs, target_ulong vector); +void ppc_cpu_do_system_reset(CPUState *cs); void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector); #if 0 extern const VMStateDescription vmstate_ppc_cpu; diff --git a/qemu/target/ppc/dfp_helper.c b/qemu/target/ppc/dfp_helper.c index a025ed362e..a6a398cf5c 100644 --- a/qemu/target/ppc/dfp_helper.c +++ b/qemu/target/ppc/dfp_helper.c @@ -113,7 +113,7 @@ static void dfp_set_round_mode_from_immediate(uint8_t r, uint8_t rmc, case 3: /* use FPSCR rounding mode */ return; default: - assert(0); /* cannot get here */ + g_assert_not_reached(); /* cannot get here */ } } else { /* r == 1 */ switch (rmc & 3) { @@ -130,7 +130,7 @@ static void dfp_set_round_mode_from_immediate(uint8_t r, uint8_t rmc, rnd = DEC_ROUND_HALF_DOWN; break; default: - assert(0); /* cannot get here */ + g_assert_not_reached(); /* cannot get here */ } } decContextSetRounding(&dfp->context, rnd); diff --git a/qemu/target/ppc/excp_helper.c b/qemu/target/ppc/excp_helper.c index 298b7730a1..3cfdae9ab2 100644 --- a/qemu/target/ppc/excp_helper.c +++ b/qemu/target/ppc/excp_helper.c @@ -38,12 +38,27 @@ /* Exception processing */ static inline void dump_syscall(CPUPPCState *env) { - qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64 " r3=%016" PRIx64 + qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64 + " r3=%016" PRIx64 " r4=%016" PRIx64 " r5=%016" PRIx64 + " r6=%016" PRIx64 " r7=%016" PRIx64 " r8=%016" PRIx64 " r4=%016" PRIx64 " r5=%016" PRIx64 " r6=%016" PRIx64 " nip=" TARGET_FMT_lx "\n", ppc_dump_gpr(env, 0), ppc_dump_gpr(env, 3), ppc_dump_gpr(env, 4), ppc_dump_gpr(env, 5), - ppc_dump_gpr(env, 6), env->nip); + ppc_dump_gpr(env, 6), ppc_dump_gpr(env, 7), + ppc_dump_gpr(env, 8), env->nip); +} + +static inline void dump_syscall_vectored(CPUPPCState *env) +{ + qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64 + " r3=%016" PRIx64 " r4=%016" PRIx64 " r5=%016" PRIx64 + " r6=%016" PRIx64 " r7=%016" PRIx64 " r8=%016" PRIx64 + " nip=" TARGET_FMT_lx "\n", + ppc_dump_gpr(env, 0), ppc_dump_gpr(env, 3), + ppc_dump_gpr(env, 4), ppc_dump_gpr(env, 5), + ppc_dump_gpr(env, 6), ppc_dump_gpr(env, 7), + ppc_dump_gpr(env, 8), env->nip); } static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, @@ -53,7 +68,7 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, env->resume_as_sreset = false; /* Pretend to be returning from doze always as we don't lose state */ - *msr |= (0x1ull << (63 - 47)); + *msr |= SRR1_WS_NOLOSS; /* Machine checks are sent normally */ if (excp == POWERPC_EXCP_MCHECK) { @@ -61,25 +76,25 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp, } switch (excp) { case POWERPC_EXCP_RESET: - *msr |= 0x4ull << (63 - 45); + *msr |= SRR1_WAKERESET; break; case POWERPC_EXCP_EXTERNAL: - *msr |= 0x8ull << (63 - 45); + *msr |= SRR1_WAKEEE; break; case POWERPC_EXCP_DECR: - *msr |= 0x6ull << (63 - 45); + *msr |= SRR1_WAKEDEC; break; case POWERPC_EXCP_SDOOR: - *msr |= 0x5ull << (63 - 45); + *msr |= SRR1_WAKEDBELL; break; case POWERPC_EXCP_SDOOR_HV: - *msr |= 0x3ull << (63 - 45); + *msr |= SRR1_WAKEHDBELL; break; case POWERPC_EXCP_HV_MAINT: - *msr |= 0xaull << (63 - 45); + *msr |= SRR1_WAKEHMI; break; case POWERPC_EXCP_HVIRT: - *msr |= 0x9ull << (63 - 45); + *msr |= SRR1_WAKEHVI; break; default: cpu_abort(cs, "Unsupported exception %d in Power Save mode\n", @@ -149,7 +164,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; target_ulong msr, new_msr, vector; - int srr0, srr1, asrr0, asrr1, lev, ail; + int srr0, srr1, asrr0, asrr1, lev = -1, ail; bool lpes0; qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx @@ -388,6 +403,13 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) new_msr |= (target_ulong)MSR_HVB; } break; + case POWERPC_EXCP_SYSCALL_VECTORED: /* scv exception */ + lev = env->error_code; + dump_syscall_vectored(env); + env->nip += 4; + new_msr |= env->msr & ((target_ulong)1 << MSR_EE); + new_msr |= env->msr & ((target_ulong)1 << MSR_RI); + break; case POWERPC_EXCP_FPU: /* Floating-point unavailable exception */ case POWERPC_EXCP_APU: /* Auxiliary processor unavailable */ case POWERPC_EXCP_DECR: /* Decrementer exception */ @@ -476,6 +498,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) case POWERPC_EXCP_HDECR: /* Hypervisor decrementer exception */ case POWERPC_EXCP_HDSI: /* Hypervisor data storage exception */ case POWERPC_EXCP_HISI: /* Hypervisor instruction storage exception */ + msr |= env->error_code; case POWERPC_EXCP_HDSEG: /* Hypervisor data segment exception */ case POWERPC_EXCP_HISEG: /* Hypervisor instruction segment exception */ case POWERPC_EXCP_SDOOR_HV: /* Hypervisor Doorbell interrupt */ @@ -690,12 +713,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) break; } - /* Save PC */ - env->spr[srr0] = env->nip; - - /* Save MSR */ - env->spr[srr1] = msr; - /* Sanity check */ if (!(env->msr_mask & MSR_HVB)) { if (new_msr & MSR_HVB) { @@ -708,14 +725,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) } } - /* If any alternate SRR register are defined, duplicate saved values */ - if (asrr0 != -1) { - env->spr[asrr0] = env->spr[srr0]; - } - if (asrr1 != -1) { - env->spr[asrr1] = env->spr[srr1]; - } - /* * Sort out endianness of interrupt, this differs depending on the * CPU, the HV mode, etc... @@ -750,18 +759,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) } #endif - /* Jump to handler */ - vector = env->excp_vectors[excp]; -#ifdef _MSC_VER - if (vector == (target_ulong)(0ULL - 1ULL)) { -#else - if (vector == (target_ulong)-1ULL) { -#endif - cpu_abort(cs, "Raised an exception without defined vector %d\n", - excp); - } - vector |= env->excp_prefix; - /* * AIL only works if there is no HV transition and we are running * with translations enabled @@ -770,10 +767,21 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) ((new_msr & MSR_HVB) && !(msr & MSR_HVB))) { ail = 0; } - /* Handle AIL */ - if (ail) { - new_msr |= (1 << MSR_IR) | (1 << MSR_DR); - vector |= ppc_excp_vector_offset(cs, ail); + + vector = env->excp_vectors[excp]; + if (vector == (target_ulong)-1ULL) { + cpu_abort(cs, "Raised an exception without defined vector %d\n", + excp); + } + + vector |= env->excp_prefix; + + /* If any alternate SRR register are defined, duplicate saved values */ + if (asrr0 != -1) { + env->spr[asrr0] = env->nip; + } + if (asrr1 != -1) { + env->spr[asrr1] = msr; } #if defined(TARGET_PPC64) @@ -793,6 +801,37 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) } #endif + if (excp != POWERPC_EXCP_SYSCALL_VECTORED) { + /* Save PC */ + env->spr[srr0] = env->nip; + + /* Save MSR */ + env->spr[srr1] = msr; + + /* Handle AIL */ + if (ail) { + new_msr |= (1 << MSR_IR) | (1 << MSR_DR); + vector |= ppc_excp_vector_offset(cs, ail); + } + +#if defined(TARGET_PPC64) + } else { + /* scv AIL is a little different */ + if (ail) { + new_msr |= (1 << MSR_IR) | (1 << MSR_DR); + } + if (ail == AIL_C000_0000_0000_4000) { + vector |= 0xc000000000003000ull; + } else { + vector |= 0x0000000000017000ull; + } + vector += lev * 0x20; + + env->lr = env->nip; + env->ctr = msr; +#endif + } + powerpc_set_excp_state(cpu, vector, new_msr); } @@ -954,15 +993,12 @@ static void ppc_hw_interrupt(CPUPPCState *env) } } -void ppc_cpu_do_system_reset(CPUState *cs, target_ulong vector) +void ppc_cpu_do_system_reset(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_RESET); - if (vector != -1) { - env->nip = vector; - } } void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector) @@ -1135,6 +1171,11 @@ void helper_rfid(CPUPPCState *env) do_rfi(env, env->spr[SPR_SRR0], env->spr[SPR_SRR1]); } +void helper_rfscv(CPUPPCState *env) +{ + do_rfi(env, env->lr, env->ctr); +} + void helper_hrfid(CPUPPCState *env) { do_rfi(env, env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]); diff --git a/qemu/target/ppc/helper.h b/qemu/target/ppc/helper.h index b1c4343908..77892dc80f 100644 --- a/qemu/target/ppc/helper.h +++ b/qemu/target/ppc/helper.h @@ -18,6 +18,7 @@ DEF_HELPER_1(rfmci, void, env) #if defined(TARGET_PPC64) DEF_HELPER_2(pminsn, void, env, i32) DEF_HELPER_1(rfid, void, env) +DEF_HELPER_1(rfscv, void, env) DEF_HELPER_1(hrfid, void, env) DEF_HELPER_2(store_lpcr, void, env, tl) DEF_HELPER_2(store_pcr, void, env, tl) @@ -215,10 +216,6 @@ DEF_HELPER_3(vsubuqm, void, avr, avr, avr) DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr) DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr) DEF_HELPER_3(vsubcuq, void, avr, avr, avr) -DEF_HELPER_3(vrlb, void, avr, avr, avr) -DEF_HELPER_3(vrlh, void, avr, avr, avr) -DEF_HELPER_3(vrlw, void, avr, avr, avr) -DEF_HELPER_3(vrld, void, avr, avr, avr) DEF_HELPER_4(vsldoi, void, avr, avr, avr, i32) DEF_HELPER_3(vextractub, void, avr, avr, i32) DEF_HELPER_3(vextractuh, void, avr, avr, i32) diff --git a/qemu/target/ppc/int_helper.c b/qemu/target/ppc/int_helper.c index c6ead3e149..57ede62f78 100644 --- a/qemu/target/ppc/int_helper.c +++ b/qemu/target/ppc/int_helper.c @@ -763,7 +763,7 @@ VCMPNE(w, u32, uint32_t, 0) \ for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ uint32_t result; \ - int rel = float32_compare_quiet(a->f32[i], b->f32[i], \ + FloatRelation rel = float32_compare_quiet(a->f32[i], b->f32[i], \ &env->vec_status); \ if (rel == float_relation_unordered) { \ result = 0; \ @@ -796,14 +796,14 @@ static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, int all_in = 0; for (i = 0; i < ARRAY_SIZE(r->f32); i++) { - int le_rel = float32_compare_quiet(a->f32[i], b->f32[i], + FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], &env->vec_status); if (le_rel == float_relation_unordered) { r->u32[i] = 0xc0000000; all_in = 1; } else { float32 bneg = float32_chs(b->f32[i]); - int ge_rel = float32_compare_quiet(a->f32[i], bneg, + FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, &env->vec_status); int le = le_rel != float_relation_greater; int ge = ge_rel != float_relation_less; @@ -1340,23 +1340,6 @@ VRFI(p, float_round_up) VRFI(z, float_round_to_zero) #undef VRFI -#define VROTATE(suffix, element, mask) \ - void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ - { \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - unsigned int shift = b->element[i] & mask; \ - r->element[i] = (a->element[i] << shift) | \ - (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ - } \ - } -VROTATE(b, u8, 0x7) -VROTATE(h, u16, 0xF) -VROTATE(w, u32, 0x1F) -VROTATE(d, u64, 0x3F) -#undef VROTATE - void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) { int i; diff --git a/qemu/target/ppc/translate.c b/qemu/target/ppc/translate.c index 15c9fde4f3..8e100300c2 100644 --- a/qemu/target/ppc/translate.c +++ b/qemu/target/ppc/translate.c @@ -170,6 +170,7 @@ struct DisasContext { bool vsx_enabled; bool spe_enabled; bool tm_enabled; + bool scv_enabled; bool gtse; ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */ int singlestep_enabled; @@ -1946,6 +1947,7 @@ static void gen_rlwimi(DisasContext *ctx) tcg_gen_deposit_tl(tcg_ctx, t_ra, t_ra, t_rs, sh, me - mb + 1); } else { target_ulong mask; + bool mask_in_32b = true; TCGv t1; #if defined(TARGET_PPC64) @@ -1954,8 +1956,13 @@ static void gen_rlwimi(DisasContext *ctx) #endif mask = MASK(mb, me); +#if defined(TARGET_PPC64) + if (mask > 0xffffffffu) { + mask_in_32b = false; + } +#endif t1 = tcg_temp_new(tcg_ctx); - if (mask <= 0xffffffffu) { + if (mask_in_32b) { TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx); tcg_gen_trunc_tl_i32(tcg_ctx, t0, t_rs); tcg_gen_rotli_i32(tcg_ctx, t0, t0, sh); @@ -1998,12 +2005,18 @@ static void gen_rlwinm(DisasContext *ctx) tcg_gen_extract_tl(tcg_ctx, t_ra, t_rs, rsh, len); } else { target_ulong mask; + bool mask_in_32b = true; #if defined(TARGET_PPC64) mb += 32; me += 32; #endif mask = MASK(mb, me); - if (mask <= 0xffffffffu) { +#if defined(TARGET_PPC64) + if (mask > 0xffffffffu) { + mask_in_32b = false; + } +#endif + if (mask_in_32b) { if (sh == 0) { tcg_gen_andi_tl(tcg_ctx, t_ra, t_rs, mask); } else { @@ -2039,6 +2052,7 @@ static void gen_rlwnm(DisasContext *ctx) uint32_t mb = MB(ctx->opcode); uint32_t me = ME(ctx->opcode); target_ulong mask; + bool mask_in_32b = true; #if defined(TARGET_PPC64) mb += 32; @@ -2046,7 +2060,12 @@ static void gen_rlwnm(DisasContext *ctx) #endif mask = MASK(mb, me); - if (mask <= 0xffffffffu) { +#if defined(TARGET_PPC64) + if (mask > 0xffffffffu) { + mask_in_32b = false; + } +#endif + if (mask_in_32b) { TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx); TCGv_i32 t1 = tcg_temp_new_i32(tcg_ctx); tcg_gen_trunc_tl_i32(tcg_ctx, t0, t_rb); @@ -4112,6 +4131,18 @@ static void gen_rfid(DisasContext *ctx) gen_sync_exception(ctx); } +static void gen_rfscv(DisasContext *ctx) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + CHK_SV; + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_io_start(tcg_ctx); + } + gen_update_cfar(ctx, ctx->base.pc_next - 4); + gen_helper_rfscv(tcg_ctx, tcg_ctx->cpu_env); + gen_sync_exception(ctx); +} + static void gen_hrfid(DisasContext *ctx) { TCGContext *tcg_ctx = ctx->uc->tcg_ctx; @@ -4124,6 +4155,7 @@ static void gen_hrfid(DisasContext *ctx) /* sc */ #define POWERPC_SYSCALL POWERPC_EXCP_SYSCALL +#define POWERPC_SYSCALL_VECTORED POWERPC_EXCP_SYSCALL_VECTORED static void gen_sc(DisasContext *ctx) { uint32_t lev; @@ -4132,6 +4164,21 @@ static void gen_sc(DisasContext *ctx) gen_exception_err(ctx, POWERPC_SYSCALL, lev); } +#if defined(TARGET_PPC64) +static void gen_scv(DisasContext *ctx) +{ + uint32_t lev; + + if (unlikely(!ctx->scv_enabled)) { + gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_SCV); + return; + } + + lev = (ctx->opcode >> 5) & 0x7F; + gen_exception_err(ctx, POWERPC_SYSCALL_VECTORED, lev); +} +#endif + /*** Trap ***/ /* Check for unconditional traps (always or never) */ @@ -6988,6 +7035,10 @@ GEN_HANDLER(mcrf, 0x13, 0x00, 0xFF, 0x00000001, PPC_INTEGER), GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW), #if defined(TARGET_PPC64) GEN_HANDLER(rfid, 0x13, 0x12, 0x00, 0x03FF8001, PPC_64B), +/* Top bit of opc2 corresponds with low bit of LEV, so use two handlers */ +GEN_HANDLER_E(scv, 0x11, 0x10, 0xFF, 0x03FFF01E, PPC_NONE, PPC2_ISA300), +GEN_HANDLER_E(scv, 0x11, 0x00, 0xFF, 0x03FFF01E, PPC_NONE, PPC2_ISA300), +GEN_HANDLER_E(rfscv, 0x13, 0x12, 0x02, 0x03FF8001, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(stop, 0x13, 0x12, 0x0b, 0x03FFF801, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(doze, 0x13, 0x12, 0x0c, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206), GEN_HANDLER_E(nap, 0x13, 0x12, 0x0d, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206), @@ -6995,7 +7046,9 @@ GEN_HANDLER_E(sleep, 0x13, 0x12, 0x0e, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206), GEN_HANDLER_E(rvwinkle, 0x13, 0x12, 0x0f, 0x03FFF801, PPC_NONE, PPC2_PM_ISA206), GEN_HANDLER(hrfid, 0x13, 0x12, 0x08, 0x03FF8001, PPC_64H), #endif -GEN_HANDLER(sc, 0x11, 0xFF, 0xFF, 0x03FFF01D, PPC_FLOW), +/* Top bit of opc2 corresponds with low bit of LEV, so use two handlers */ +GEN_HANDLER(sc, 0x11, 0x11, 0xFF, 0x03FFF01D, PPC_FLOW), +GEN_HANDLER(sc, 0x11, 0x01, 0xFF, 0x03FFF01D, PPC_FLOW), GEN_HANDLER(tw, 0x1F, 0x04, 0x00, 0x00000001, PPC_FLOW), GEN_HANDLER(twi, 0x03, 0xFF, 0xFF, 0x00000000, PPC_FLOW), #if defined(TARGET_PPC64) @@ -7541,6 +7594,12 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) } else { ctx->vsx_enabled = false; } + if ((env->flags & POWERPC_FLAG_SCV) + && (env->spr[SPR_FSCR] & (1ull << FSCR_SCV))) { + ctx->scv_enabled = true; + } else { + ctx->scv_enabled = false; + } #if defined(TARGET_PPC64) if ((env->flags & POWERPC_FLAG_TM) && msr_tm) { ctx->tm_enabled = !!msr_tm; diff --git a/qemu/target/ppc/translate/fp-impl.inc.c b/qemu/target/ppc/translate/fp-impl.inc.c index 58155f21eb..00a9d42dd8 100644 --- a/qemu/target/ppc/translate/fp-impl.inc.c +++ b/qemu/target/ppc/translate/fp-impl.inc.c @@ -33,170 +33,170 @@ static void gen_set_cr1_from_fpscr(DisasContext *ctx) #endif /*** Floating-Point arithmetic ***/ -#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - TCGv_i64 t2; \ - TCGv_i64 t3; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - t1 = tcg_temp_new_i64(tcg_ctx); \ - t2 = tcg_temp_new_i64(tcg_ctx); \ - t3 = tcg_temp_new_i64(tcg_ctx); \ - gen_reset_fpstatus(tcg_ctx); \ - get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ - get_fpr(tcg_ctx, t1, rC(ctx->opcode)); \ - get_fpr(tcg_ctx, t2, rB(ctx->opcode)); \ - gen_helper_f##op(tcg_ctx, t3, tcg_ctx->cpu_env, t0, t1, t2); \ - if (isfloat) { \ - gen_helper_frsp(tcg_ctx, t3, tcg_ctx->cpu_env, t3); \ - } \ - set_fpr(tcg_ctx, rD(ctx->opcode), t3); \ - if (set_fprf) { \ - gen_compute_fprf_float64(tcg_ctx, t3); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ - tcg_temp_free_i64(tcg_ctx, t0); \ - tcg_temp_free_i64(tcg_ctx, t1); \ - tcg_temp_free_i64(tcg_ctx, t2); \ - tcg_temp_free_i64(tcg_ctx, t3); \ -} +#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type) \ + static void gen_f##name(DisasContext *ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv_i64 t0; \ + TCGv_i64 t1; \ + TCGv_i64 t2; \ + TCGv_i64 t3; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + t1 = tcg_temp_new_i64(tcg_ctx); \ + t2 = tcg_temp_new_i64(tcg_ctx); \ + t3 = tcg_temp_new_i64(tcg_ctx); \ + gen_reset_fpstatus(tcg_ctx); \ + get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ + get_fpr(tcg_ctx, t1, rC(ctx->opcode)); \ + get_fpr(tcg_ctx, t2, rB(ctx->opcode)); \ + gen_helper_f##op(tcg_ctx, t3, tcg_ctx->cpu_env, t0, t1, t2); \ + if (isfloat) { \ + gen_helper_frsp(tcg_ctx, t3, tcg_ctx->cpu_env, t3); \ + } \ + set_fpr(tcg_ctx, rD(ctx->opcode), t3); \ + if (set_fprf) { \ + gen_compute_fprf_float64(tcg_ctx, t3); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ + tcg_temp_free_i64(tcg_ctx, t0); \ + tcg_temp_free_i64(tcg_ctx, t1); \ + tcg_temp_free_i64(tcg_ctx, t2); \ + tcg_temp_free_i64(tcg_ctx, t3); \ + } -#define GEN_FLOAT_ACB(name, op2, set_fprf, type) \ -_GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type); \ -_GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type); - -#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - TCGv_i64 t2; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - t1 = tcg_temp_new_i64(tcg_ctx); \ - t2 = tcg_temp_new_i64(tcg_ctx); \ - gen_reset_fpstatus(tcg_ctx); \ - get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ - get_fpr(tcg_ctx, t1, rB(ctx->opcode)); \ - gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1); \ - if (isfloat) { \ - gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2); \ - } \ - set_fpr(tcg_ctx, rD(ctx->opcode), t2); \ - if (set_fprf) { \ - gen_compute_fprf_float64(tcg_ctx, t2); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ - tcg_temp_free_i64(tcg_ctx, t0); \ - tcg_temp_free_i64(tcg_ctx, t1); \ - tcg_temp_free_i64(tcg_ctx, t2); \ -} -#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type) \ -_GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type); \ -_GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type); - -#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - TCGv_i64 t2; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - t1 = tcg_temp_new_i64(tcg_ctx); \ - t2 = tcg_temp_new_i64(tcg_ctx); \ - gen_reset_fpstatus(tcg_ctx); \ - get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ - get_fpr(tcg_ctx, t1, rC(ctx->opcode)); \ - gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1); \ - if (isfloat) { \ - gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2); \ - } \ - set_fpr(tcg_ctx, rD(ctx->opcode), t2); \ - if (set_fprf) { \ - gen_compute_fprf_float64(tcg_ctx, t2); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ - tcg_temp_free_i64(tcg_ctx, t0); \ - tcg_temp_free_i64(tcg_ctx, t1); \ - tcg_temp_free_i64(tcg_ctx, t2); \ -} -#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type) \ -_GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type); \ -_GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type); - -#define GEN_FLOAT_B(name, op2, op3, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - t1 = tcg_temp_new_i64(tcg_ctx); \ - gen_reset_fpstatus(tcg_ctx); \ - get_fpr(tcg_ctx, t0, rB(ctx->opcode)); \ - gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t1); \ - if (set_fprf) { \ - gen_compute_fprf_float64(tcg_ctx, t1); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ - tcg_temp_free_i64(tcg_ctx, t0); \ - tcg_temp_free_i64(tcg_ctx, t1); \ -} +#define GEN_FLOAT_ACB(name, op2, set_fprf, type) \ + _GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type); \ + _GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type); + +#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type) \ + static void gen_f##name(DisasContext *ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv_i64 t0; \ + TCGv_i64 t1; \ + TCGv_i64 t2; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + t1 = tcg_temp_new_i64(tcg_ctx); \ + t2 = tcg_temp_new_i64(tcg_ctx); \ + gen_reset_fpstatus(tcg_ctx); \ + get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ + get_fpr(tcg_ctx, t1, rB(ctx->opcode)); \ + gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1); \ + if (isfloat) { \ + gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2); \ + } \ + set_fpr(tcg_ctx, rD(ctx->opcode), t2); \ + if (set_fprf) { \ + gen_compute_fprf_float64(tcg_ctx, t2); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ + tcg_temp_free_i64(tcg_ctx, t0); \ + tcg_temp_free_i64(tcg_ctx, t1); \ + tcg_temp_free_i64(tcg_ctx, t2); \ + } +#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type) \ + _GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type); \ + _GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type); + +#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type) \ + static void gen_f##name(DisasContext *ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv_i64 t0; \ + TCGv_i64 t1; \ + TCGv_i64 t2; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + t1 = tcg_temp_new_i64(tcg_ctx); \ + t2 = tcg_temp_new_i64(tcg_ctx); \ + gen_reset_fpstatus(tcg_ctx); \ + get_fpr(tcg_ctx, t0, rA(ctx->opcode)); \ + get_fpr(tcg_ctx, t1, rC(ctx->opcode)); \ + gen_helper_f##op(tcg_ctx, t2, tcg_ctx->cpu_env, t0, t1); \ + if (isfloat) { \ + gen_helper_frsp(tcg_ctx, t2, tcg_ctx->cpu_env, t2); \ + } \ + set_fpr(tcg_ctx, rD(ctx->opcode), t2); \ + if (set_fprf) { \ + gen_compute_fprf_float64(tcg_ctx, t2); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ + tcg_temp_free_i64(tcg_ctx, t0); \ + tcg_temp_free_i64(tcg_ctx, t1); \ + tcg_temp_free_i64(tcg_ctx, t2); \ + } +#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type) \ + _GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type); \ + _GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type); + +#define GEN_FLOAT_B(name, op2, op3, set_fprf, type) \ + static void gen_f##name(DisasContext *ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv_i64 t0; \ + TCGv_i64 t1; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + t1 = tcg_temp_new_i64(tcg_ctx); \ + gen_reset_fpstatus(tcg_ctx); \ + get_fpr(tcg_ctx, t0, rB(ctx->opcode)); \ + gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t1); \ + if (set_fprf) { \ + gen_compute_fprf_float64(tcg_ctx, t1); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ + tcg_temp_free_i64(tcg_ctx, t0); \ + tcg_temp_free_i64(tcg_ctx, t1); \ + } -#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - t1 = tcg_temp_new_i64(tcg_ctx); \ - gen_reset_fpstatus(tcg_ctx); \ - get_fpr(tcg_ctx, t0, rB(ctx->opcode)); \ - gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t1); \ - if (set_fprf) { \ - gen_compute_fprf_float64(tcg_ctx, t1); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ - tcg_temp_free_i64(tcg_ctx, t0); \ - tcg_temp_free_i64(tcg_ctx, t1); \ -} +#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type) \ + static void gen_f##name(DisasContext *ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv_i64 t0; \ + TCGv_i64 t1; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + t1 = tcg_temp_new_i64(tcg_ctx); \ + gen_reset_fpstatus(tcg_ctx); \ + get_fpr(tcg_ctx, t0, rB(ctx->opcode)); \ + gen_helper_f##name(tcg_ctx, t1, tcg_ctx->cpu_env, t0); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t1); \ + if (set_fprf) { \ + gen_compute_fprf_float64(tcg_ctx, t1); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ + tcg_temp_free_i64(tcg_ctx, t0); \ + tcg_temp_free_i64(tcg_ctx, t1); \ + } /* fadd - fadds */ GEN_FLOAT_AB(add, 0x15, 0x000007C0, 1, PPC_FLOAT); @@ -217,7 +217,7 @@ GEN_FLOAT_BS(rsqrte, 0x3F, 0x1A, 1, PPC_FLOAT_FRSQRTE); /* frsqrtes */ static void gen_frsqrtes(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -248,7 +248,7 @@ GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT); /* fsqrt */ static void gen_fsqrt(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -271,7 +271,7 @@ static void gen_fsqrt(DisasContext *ctx) static void gen_fsqrts(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -342,7 +342,7 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT); static void gen_ftdiv(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -360,7 +360,7 @@ static void gen_ftdiv(DisasContext *ctx) static void gen_ftsqrt(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; if (unlikely(!ctx->fpu_enabled)) { gen_exception(ctx, POWERPC_EXCP_FPU); @@ -372,14 +372,12 @@ static void gen_ftsqrt(DisasContext *ctx) tcg_temp_free_i64(tcg_ctx, t0); } - - /*** Floating-Point compare ***/ /* fcmpo */ static void gen_fcmpo(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i32 crf; TCGv_i64 t0; TCGv_i64 t1; @@ -403,7 +401,7 @@ static void gen_fcmpo(DisasContext *ctx) /* fcmpu */ static void gen_fcmpu(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i32 crf; TCGv_i64 t0; TCGv_i64 t1; @@ -429,7 +427,7 @@ static void gen_fcmpu(DisasContext *ctx) /* XXX: beware that fabs never checks for NaNs nor update FPSCR */ static void gen_fabs(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -452,7 +450,7 @@ static void gen_fabs(DisasContext *ctx) /* XXX: beware that fmr never checks for NaNs nor update FPSCR */ static void gen_fmr(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; if (unlikely(!ctx->fpu_enabled)) { gen_exception(ctx, POWERPC_EXCP_FPU); @@ -471,7 +469,7 @@ static void gen_fmr(DisasContext *ctx) /* XXX: beware that fnabs never checks for NaNs nor update FPSCR */ static void gen_fnabs(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -494,7 +492,7 @@ static void gen_fnabs(DisasContext *ctx) /* XXX: beware that fneg never checks for NaNs nor update FPSCR */ static void gen_fneg(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; if (unlikely(!ctx->fpu_enabled)) { @@ -517,7 +515,7 @@ static void gen_fneg(DisasContext *ctx) /* XXX: beware that fcpsgn never checks for NaNs nor update FPSCR */ static void gen_fcpsgn(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; TCGv_i64 t2; @@ -542,7 +540,7 @@ static void gen_fcpsgn(DisasContext *ctx) static void gen_fmrgew(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 b0; TCGv_i64 t0; TCGv_i64 t1; @@ -565,7 +563,7 @@ static void gen_fmrgew(DisasContext *ctx) static void gen_fmrgow(DisasContext *ctx) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv_i64 t0; TCGv_i64 t1; TCGv_i64 t2; @@ -607,8 +605,8 @@ static void gen_mcrfs(DisasContext *ctx) shift = 4 * nibble; tcg_gen_shri_tl(tcg_ctx, tmp, cpu_fpscr, shift); tcg_gen_trunc_tl_i32(tcg_ctx, cpu_crf[crfD(ctx->opcode)], tmp); - tcg_gen_andi_i32(tcg_ctx, cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], - 0xf); + tcg_gen_andi_i32(tcg_ctx, cpu_crf[crfD(ctx->opcode)], + cpu_crf[crfD(ctx->opcode)], 0xf); tcg_temp_free(tcg_ctx, tmp); tcg_gen_extu_tl_i64(tcg_ctx, tnew_fpscr, cpu_fpscr); /* Only the exception bits (including FX) should be cleared if read */ @@ -836,7 +834,8 @@ static void gen_mtfsf(DisasContext *ctx) } gen_reset_fpstatus(tcg_ctx); if (l) { - t0 = tcg_const_i32(tcg_ctx, (ctx->insns_flags2 & PPC2_ISA205) ? 0xffff : 0xff); + t0 = tcg_const_i32(tcg_ctx, + (ctx->insns_flags2 & PPC2_ISA205) ? 0xffff : 0xff); } else { t0 = tcg_const_i32(tcg_ctx, flm << (w * 8)); } @@ -887,101 +886,101 @@ static void gen_mtfsfi(DisasContext *ctx) } /*** Floating-point load ***/ -#define GEN_LDF(name, ldop, opc, type) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_imm_index(ctx, EA, 0); \ - gen_qemu_##ldop(ctx, t0, EA); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_LDF(name, ldop, opc, type) \ + static void glue(gen_, name)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_imm_index(ctx, EA, 0); \ + gen_qemu_##ldop(ctx, t0, EA); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_LDUF(name, ldop, opc, type) \ -static void glue(gen_, name##u)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - if (unlikely(rA(ctx->opcode) == 0)) { \ - gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_imm_index(ctx, EA, 0); \ - gen_qemu_##ldop(ctx, t0, EA); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ - tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_LDUF(name, ldop, opc, type) \ + static void glue(gen_, name##u)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + if (unlikely(rA(ctx->opcode) == 0)) { \ + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_imm_index(ctx, EA, 0); \ + gen_qemu_##ldop(ctx, t0, EA); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ + tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_LDUXF(name, ldop, opc, type) \ -static void glue(gen_, name##ux)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - if (unlikely(rA(ctx->opcode) == 0)) { \ - gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - gen_addr_reg_index(ctx, EA); \ - gen_qemu_##ldop(ctx, t0, EA); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ - tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_LDUXF(name, ldop, opc, type) \ + static void glue(gen_, name##ux)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + if (unlikely(rA(ctx->opcode) == 0)) { \ + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + gen_addr_reg_index(ctx, EA); \ + gen_qemu_##ldop(ctx, t0, EA); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ + tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_LDXF(name, ldop, opc2, opc3, type) \ -static void glue(gen_, name##x)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_reg_index(ctx, EA); \ - gen_qemu_##ldop(ctx, t0, EA); \ - set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_LDXF(name, ldop, opc2, opc3, type) \ + static void glue(gen_, name##x)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_reg_index(ctx, EA); \ + gen_qemu_##ldop(ctx, t0, EA); \ + set_fpr(tcg_ctx, rD(ctx->opcode), t0); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_LDFS(name, ldop, op, type) \ -GEN_LDF(name, ldop, op | 0x20, type); \ -GEN_LDUF(name, ldop, op | 0x21, type); \ -GEN_LDUXF(name, ldop, op | 0x01, type); \ -GEN_LDXF(name, ldop, 0x17, op | 0x00, type) +#define GEN_LDFS(name, ldop, op, type) \ + GEN_LDF(name, ldop, op | 0x20, type); \ + GEN_LDUF(name, ldop, op | 0x21, type); \ + GEN_LDUXF(name, ldop, op | 0x01, type); \ + GEN_LDXF(name, ldop, 0x17, op | 0x00, type) static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr) { @@ -992,9 +991,9 @@ static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr) tcg_temp_free_i32(tcg_ctx, tmp); } - /* lfd lfdu lfdux lfdx */ +/* lfd lfdu lfdux lfdx */ GEN_LDFS(lfd, ld64_i64, 0x12, PPC_FLOAT); - /* lfs lfsu lfsux lfsx */ +/* lfs lfsu lfsux lfsx */ GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT); /* lfdepx (external PID lfdx) */ @@ -1132,101 +1131,101 @@ static void gen_lfiwzx(DisasContext *ctx) tcg_temp_free_i64(tcg_ctx, t0); } /*** Floating-point store ***/ -#define GEN_STF(name, stop, opc, type) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_imm_index(ctx, EA, 0); \ - get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ - gen_qemu_##stop(ctx, t0, EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_STF(name, stop, opc, type) \ + static void glue(gen_, name)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_imm_index(ctx, EA, 0); \ + get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ + gen_qemu_##stop(ctx, t0, EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_STUF(name, stop, opc, type) \ -static void glue(gen_, name##u)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - if (unlikely(rA(ctx->opcode) == 0)) { \ - gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_imm_index(ctx, EA, 0); \ - get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ - gen_qemu_##stop(ctx, t0, EA); \ - tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_STUF(name, stop, opc, type) \ + static void glue(gen_, name##u)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + if (unlikely(rA(ctx->opcode) == 0)) { \ + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_imm_index(ctx, EA, 0); \ + get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ + gen_qemu_##stop(ctx, t0, EA); \ + tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_STUXF(name, stop, opc, type) \ -static void glue(gen_, name##ux)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - if (unlikely(rA(ctx->opcode) == 0)) { \ - gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_reg_index(ctx, EA); \ - get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ - gen_qemu_##stop(ctx, t0, EA); \ - tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_STUXF(name, stop, opc, type) \ + static void glue(gen_, name##ux)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + if (unlikely(rA(ctx->opcode) == 0)) { \ + gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_reg_index(ctx, EA); \ + get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ + gen_qemu_##stop(ctx, t0, EA); \ + tcg_gen_mov_tl(tcg_ctx, cpu_gpr[rA(ctx->opcode)], EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_STXF(name, stop, opc2, opc3, type) \ -static void glue(gen_, name##x)(DisasContext *ctx) \ -{ \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - TCGv EA; \ - TCGv_i64 t0; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_FLOAT); \ - EA = tcg_temp_new(tcg_ctx); \ - t0 = tcg_temp_new_i64(tcg_ctx); \ - gen_addr_reg_index(ctx, EA); \ - get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ - gen_qemu_##stop(ctx, t0, EA); \ - tcg_temp_free(tcg_ctx, EA); \ - tcg_temp_free_i64(tcg_ctx, t0); \ -} +#define GEN_STXF(name, stop, opc2, opc3, type) \ + static void glue(gen_, name##x)(DisasContext * ctx) \ + { \ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ + TCGv EA; \ + TCGv_i64 t0; \ + if (unlikely(!ctx->fpu_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_FPU); \ + return; \ + } \ + gen_set_access_type(ctx, ACCESS_FLOAT); \ + EA = tcg_temp_new(tcg_ctx); \ + t0 = tcg_temp_new_i64(tcg_ctx); \ + gen_addr_reg_index(ctx, EA); \ + get_fpr(tcg_ctx, t0, rS(ctx->opcode)); \ + gen_qemu_##stop(ctx, t0, EA); \ + tcg_temp_free(tcg_ctx, EA); \ + tcg_temp_free_i64(tcg_ctx, t0); \ + } -#define GEN_STFS(name, stop, op, type) \ -GEN_STF(name, stop, op | 0x20, type); \ -GEN_STUF(name, stop, op | 0x21, type); \ -GEN_STUXF(name, stop, op | 0x01, type); \ -GEN_STXF(name, stop, 0x17, op | 0x00, type) +#define GEN_STFS(name, stop, op, type) \ + GEN_STF(name, stop, op | 0x20, type); \ + GEN_STUF(name, stop, op | 0x21, type); \ + GEN_STUXF(name, stop, op | 0x01, type); \ + GEN_STXF(name, stop, 0x17, op | 0x00, type) static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr) { @@ -1338,8 +1337,7 @@ static inline void gen_qemu_st32fiw(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2) { TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv t0 = tcg_temp_new(tcg_ctx); - tcg_gen_trunc_i64_tl(tcg_ctx, t0, arg1), - gen_qemu_st32(ctx, t0, arg2); + tcg_gen_trunc_i64_tl(tcg_ctx, t0, arg1), gen_qemu_st32(ctx, t0, arg2); tcg_temp_free(tcg_ctx, t0); } /* stfiwx */ diff --git a/qemu/target/ppc/translate/vmx-impl.inc.c b/qemu/target/ppc/translate/vmx-impl.inc.c index 9d4211dd6e..16df4ae63b 100644 --- a/qemu/target/ppc/translate/vmx-impl.inc.c +++ b/qemu/target/ppc/translate/vmx-impl.inc.c @@ -922,13 +922,13 @@ GEN_VXFORM3(vsubeuqm, 31, 0); GEN_VXFORM3(vsubecuq, 31, 0); GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \ vsubecuq, PPC_NONE, PPC2_ALTIVEC_207) -GEN_VXFORM(vrlb, 2, 0); -GEN_VXFORM(vrlh, 2, 1); -GEN_VXFORM(vrlw, 2, 2); +GEN_VXFORM_V(vrlb, MO_8, tcg_gen_gvec_rotlv, 2, 0); +GEN_VXFORM_V(vrlh, MO_16, tcg_gen_gvec_rotlv, 2, 1); +GEN_VXFORM_V(vrlw, MO_32, tcg_gen_gvec_rotlv, 2, 2); GEN_VXFORM(vrlwmi, 2, 2); GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \ vrlwmi, PPC_NONE, PPC2_ISA300) -GEN_VXFORM(vrld, 2, 3); +GEN_VXFORM_V(vrld, MO_64, tcg_gen_gvec_rotlv, 2, 3); GEN_VXFORM(vrldmi, 2, 3); GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \ vrldmi, PPC_NONE, PPC2_ISA300) @@ -1058,22 +1058,25 @@ GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \ GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \ vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207) -#define GEN_VXFORM_DUPI(name, tcg_op, opc2, opc3) \ -static void glue(gen_, name)(DisasContext *ctx) \ - { \ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; \ - int simm; \ - if (unlikely(!ctx->altivec_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VPU); \ - return; \ - } \ - simm = SIMM5(ctx->opcode); \ - tcg_op(tcg_ctx, avr_full_offset(rD(ctx->opcode)), 16, 16, simm);\ +static void gen_vsplti(DisasContext *ctx, int vece) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + int simm; + + if (unlikely(!ctx->altivec_enabled)) { + gen_exception(ctx, POWERPC_EXCP_VPU); + return; } + simm = SIMM5(ctx->opcode); + tcg_gen_gvec_dup_imm(tcg_ctx, vece, avr_full_offset(rD(ctx->opcode)), 16, 16, simm); +} + +#define GEN_VXFORM_VSPLTI(name, vece, opc2, opc3) \ +static void glue(gen_, name)(DisasContext *ctx) { gen_vsplti(ctx, vece); } -GEN_VXFORM_DUPI(vspltisb, tcg_gen_gvec_dup8i, 6, 12); -GEN_VXFORM_DUPI(vspltish, tcg_gen_gvec_dup16i, 6, 13); -GEN_VXFORM_DUPI(vspltisw, tcg_gen_gvec_dup32i, 6, 14); +GEN_VXFORM_VSPLTI(vspltisb, MO_8, 6, 12); +GEN_VXFORM_VSPLTI(vspltish, MO_16, 6, 13); +GEN_VXFORM_VSPLTI(vspltisw, MO_32, 6, 14); #define GEN_VXFORM_NOA(name, opc2, opc3) \ static void glue(gen_, name)(DisasContext *ctx) \ @@ -1598,7 +1601,7 @@ GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE, #undef GEN_VXRFORM_DUAL #undef GEN_VXRFORM1 #undef GEN_VXRFORM -#undef GEN_VXFORM_DUPI +#undef GEN_VXFORM_VSPLTI #undef GEN_VXFORM_NOA #undef GEN_VXFORM_UIMM #undef GEN_VAFORM_PAIRED diff --git a/qemu/target/ppc/translate/vsx-impl.inc.c b/qemu/target/ppc/translate/vsx-impl.inc.c index 679da14902..a0c3832842 100644 --- a/qemu/target/ppc/translate/vsx-impl.inc.c +++ b/qemu/target/ppc/translate/vsx-impl.inc.c @@ -1629,7 +1629,7 @@ static void gen_xxspltib(DisasContext *ctx) return; } } - tcg_gen_gvec_dup8i(tcg_ctx, vsr_full_offset(rt), 16, 16, uim8); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_8, vsr_full_offset(rt), 16, 16, uim8); } static void gen_xxsldwi(DisasContext *ctx) diff --git a/qemu/target/ppc/translate_init.inc.c b/qemu/target/ppc/translate_init.inc.c index d2fb1974ad..6ded89c817 100644 --- a/qemu/target/ppc/translate_init.inc.c +++ b/qemu/target/ppc/translate_init.inc.c @@ -3377,6 +3377,7 @@ static void init_excp_POWER9(CPUPPCState *env) init_excp_POWER8(env); env->excp_vectors[POWERPC_EXCP_HVIRT] = 0x00000EA0; + env->excp_vectors[POWERPC_EXCP_SYSCALL_VECTORED] = 0x00000000; } static void init_excp_POWER10(CPUPPCState *env) @@ -5145,7 +5146,7 @@ POWERPC_FAMILY(e5500)(CPUClass *oc, void *data) PPC_FLOAT_STFIWX | PPC_WAIT | PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC | PPC_64B | PPC_POPCNTB | PPC_POPCNTWD; - pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | \ + pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | PPC2_FP_CVT_S64; pcc->msr_mask = (1ull << MSR_CM) | (1ull << MSR_GS) | @@ -5191,7 +5192,7 @@ POWERPC_FAMILY(e6500)(CPUClass *oc, void *data) PPC_FLOAT_STFIWX | PPC_WAIT | PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC | PPC_64B | PPC_POPCNTB | PPC_POPCNTWD | PPC_ALTIVEC; - pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | \ + pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | PPC2_FP_CVT_S64 | PPC2_ATOMIC_ISA206; pcc->msr_mask = (1ull << MSR_CM) | (1ull << MSR_GS) | @@ -8845,7 +8846,7 @@ POWERPC_FAMILY(POWER9)(CPUClass *oc, void *data) pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE | POWERPC_FLAG_BE | POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR | - POWERPC_FLAG_VSX | POWERPC_FLAG_TM; + POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV; pcc->l1_dcache_size = 0x8000; pcc->l1_icache_size = 0x8000; pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr; @@ -8898,11 +8899,6 @@ static void init_proc_POWER10(CPUPPCState *env) gen_spr_power8_rpr(env); gen_spr_power9_mmu(env); - /* POWER9 Specific registers */ - spr_register_kvm(env, SPR_TIDR, "TIDR", NULL, NULL, - spr_read_generic, spr_write_generic, - KVM_REG_PPC_TIDR, 0); - /* FIXME: Filter fields properly based on privilege level */ spr_register_kvm_hv(env, SPR_PSSCR, "PSSCR", NULL, NULL, NULL, NULL, spr_read_generic, spr_write_generic, @@ -9680,7 +9676,7 @@ static int gdb_get_float_reg(CPUPPCState *env, GByteArray *buf, int n) { uint8_t *mem_buf; if (n < 32) { - gdb_get_reg64(buf, *cpu_fpr_ptr(env, n)); + gdb_get_float64(buf, *cpu_fpr_ptr(env, n)); mem_buf = gdb_get_reg_ptr(buf, 8); ppc_maybe_bswap_register(env, mem_buf, 8); return 8; diff --git a/qemu/target/riscv/cpu.c b/qemu/target/riscv/cpu.c index 2313cfc6cc..9979df4979 100644 --- a/qemu/target/riscv/cpu.c +++ b/qemu/target/riscv/cpu.c @@ -30,22 +30,20 @@ // static const char riscv_exts[26] = "IEMAFDQCLBJTPVNSUHKORWXYZG"; -const char * const riscv_int_regnames[] = { - "x0/zero", "x1/ra", "x2/sp", "x3/gp", "x4/tp", "x5/t0", "x6/t1", - "x7/t2", "x8/s0", "x9/s1", "x10/a0", "x11/a1", "x12/a2", "x13/a3", - "x14/a4", "x15/a5", "x16/a6", "x17/a7", "x18/s2", "x19/s3", "x20/s4", - "x21/s5", "x22/s6", "x23/s7", "x24/s8", "x25/s9", "x26/s10", "x27/s11", - "x28/t3", "x29/t4", "x30/t5", "x31/t6" -}; - -const char * const riscv_fpr_regnames[] = { - "f0/ft0", "f1/ft1", "f2/ft2", "f3/ft3", "f4/ft4", "f5/ft5", - "f6/ft6", "f7/ft7", "f8/fs0", "f9/fs1", "f10/fa0", "f11/fa1", - "f12/fa2", "f13/fa3", "f14/fa4", "f15/fa5", "f16/fa6", "f17/fa7", - "f18/fs2", "f19/fs3", "f20/fs4", "f21/fs5", "f22/fs6", "f23/fs7", - "f24/fs8", "f25/fs9", "f26/fs10", "f27/fs11", "f28/ft8", "f29/ft9", - "f30/ft10", "f31/ft11" -}; +const char *const riscv_int_regnames[] = { + "x0/zero", "x1/ra", "x2/sp", "x3/gp", "x4/tp", "x5/t0", "x6/t1", + "x7/t2", "x8/s0", "x9/s1", "x10/a0", "x11/a1", "x12/a2", "x13/a3", + "x14/a4", "x15/a5", "x16/a6", "x17/a7", "x18/s2", "x19/s3", "x20/s4", + "x21/s5", "x22/s6", "x23/s7", "x24/s8", "x25/s9", "x26/s10", "x27/s11", + "x28/t3", "x29/t4", "x30/t5", "x31/t6"}; + +const char *const riscv_fpr_regnames[] = { + "f0/ft0", "f1/ft1", "f2/ft2", "f3/ft3", "f4/ft4", "f5/ft5", + "f6/ft6", "f7/ft7", "f8/fs0", "f9/fs1", "f10/fa0", "f11/fa1", + "f12/fa2", "f13/fa3", "f14/fa4", "f15/fa5", "f16/fa6", "f17/fa7", + "f18/fs2", "f19/fs3", "f20/fs4", "f21/fs5", "f22/fs6", "f23/fs7", + "f24/fs8", "f25/fs9", "f26/fs10", "f27/fs11", "f28/ft8", "f29/ft9", + "f30/ft10", "f31/ft11"}; static void set_misa(CPURISCVState *env, target_ulong misa) { @@ -57,6 +55,11 @@ static void set_priv_version(CPURISCVState *env, int priv_ver) env->priv_ver = priv_ver; } +static void set_vext_version(CPURISCVState *env, int vext_ver) +{ + env->vext_ver = vext_ver; +} + static void set_feature(CPURISCVState *env, int feature) { env->features |= (1ULL << feature); @@ -75,65 +78,48 @@ static void riscv_any_cpu_init(CPUState *obj) set_resetvec(env, DEFAULT_RSTVEC); } -#if defined(TARGET_RISCV32) -// rv32 -static void riscv_base32_cpu_init(CPUState *obj) +static void riscv_base_cpu_init(CPUState *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ set_misa(env, 0); + set_resetvec(env, DEFAULT_RSTVEC); } -// sifive-u34 -static void rv32gcsu_priv1_10_0_cpu_init(CPUState *obj) +static void rvxx_sifive_u_cpu_init(CPUState *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; - set_misa(env, RV32 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); + set_misa(env, RVXLEN | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); - set_resetvec(env, DEFAULT_RSTVEC); - set_feature(env, RISCV_FEATURE_MMU); - set_feature(env, RISCV_FEATURE_PMP); + set_resetvec(env, 0x1004); } -// sifive-e31 -static void rv32imacu_nommu_cpu_init(CPUState *obj) +static void rvxx_sifive_e_cpu_init(CPUState *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; - set_misa(env, RV32 | RVI | RVM | RVA | RVC | RVU); + set_misa(env, RVXLEN | RVI | RVM | RVA | RVC | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); - set_resetvec(env, DEFAULT_RSTVEC); - set_feature(env, RISCV_FEATURE_PMP); + set_resetvec(env, 0x1004); } -#elif defined(TARGET_RISCV64) -// rv64 -static void riscv_base64_cpu_init(CPUState *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; - /* We set this in the realise function */ - set_misa(env, 0); -} +#if defined(TARGET_RISCV32) -// sifive-u54 -static void rv64gcsu_priv1_10_0_cpu_init(CPUState *obj) +static void rv32_ibex_cpu_init(CPUState *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; - set_misa(env, RV64 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); + set_misa(env, RV32 | RVI | RVM | RVC | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); - set_resetvec(env, DEFAULT_RSTVEC); - set_feature(env, RISCV_FEATURE_MMU); - set_feature(env, RISCV_FEATURE_PMP); + set_resetvec(env, 0x8090); } -// sifive-e51 -static void rv64imacu_nommu_cpu_init(CPUState *obj) +static void rv32_imafcu_nommu_cpu_init(CPUState *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; - set_misa(env, RV64 | RVI | RVM | RVA | RVC | RVU); + set_misa(env, RV32 | RVI | RVM | RVA | RVF | RVC | RVU); set_priv_version(env, PRIV_VERSION_1_10_0); set_resetvec(env, DEFAULT_RSTVEC); - set_feature(env, RISCV_FEATURE_PMP); } + #endif static void riscv_cpu_set_pc(CPUState *cs, vaddr value) @@ -192,6 +178,7 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev) RISCVCPU *cpu = RISCV_CPU(dev); CPURISCVState *env = &cpu->env; int priv_version = PRIV_VERSION_1_11_0; + int vext_version = VEXT_VERSION_0_07_1; target_ulong target_misa = 0; cpu_exec_realizefn(cs); @@ -201,16 +188,15 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev) priv_version = PRIV_VERSION_1_11_0; } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.10.0")) { priv_version = PRIV_VERSION_1_10_0; - } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.9.1")) { - priv_version = PRIV_VERSION_1_09_1; } else { - // error_setg(errp, "Unsupported privilege spec version '%s'", cpu->cfg.priv_spec); + // error_setg(errp, "Unsupported privilege spec version '%s'", + // cpu->cfg.priv_spec); return; } } set_priv_version(env, priv_version); - set_resetvec(env, DEFAULT_RSTVEC); + set_vext_version(env, vext_version); if (cpu->cfg.mmu) { set_feature(env, RISCV_FEATURE_MMU); @@ -224,7 +210,7 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev) if (!env->misa) { /* Do some ISA extension error checking */ if (cpu->cfg.ext_i && cpu->cfg.ext_e) { - //error_setg(errp, "I and E extensions are incompatible"); + // error_setg(errp, "I and E extensions are incompatible"); return; } @@ -233,8 +219,9 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev) return; } - if (cpu->cfg.ext_g && !(cpu->cfg.ext_i & cpu->cfg.ext_m & - cpu->cfg.ext_a & cpu->cfg.ext_f & cpu->cfg.ext_d)) { + if (cpu->cfg.ext_g && + !(cpu->cfg.ext_i & cpu->cfg.ext_m & cpu->cfg.ext_a & + cpu->cfg.ext_f & cpu->cfg.ext_d)) { // warn_report("Setting G will also set IMAFD"); cpu->cfg.ext_i = true; cpu->cfg.ext_m = true; @@ -274,6 +261,45 @@ static void riscv_cpu_realize(struct uc_struct *uc, CPUState *dev) if (cpu->cfg.ext_h) { target_misa |= RVH; } + if (cpu->cfg.ext_v) { + target_misa |= RVV; + if (!is_power_of_2(cpu->cfg.vlen)) { + // error_setg(errp, + // "Vector extension VLEN must be power of 2"); + return; + } + if (cpu->cfg.vlen > RV_VLEN_MAX || cpu->cfg.vlen < 128) { + // error_setg(errp, + // "Vector extension implementation only supports VLEN " + // "in the range [128, %d]", RV_VLEN_MAX); + return; + } + if (!is_power_of_2(cpu->cfg.elen)) { + // error_setg(errp, + // "Vector extension ELEN must be power of 2"); + return; + } + if (cpu->cfg.elen > 64 || cpu->cfg.vlen < 8) { + // error_setg(errp, + // "Vector extension implementation only supports ELEN " + // "in the range [8, 64]"); + return; + } + if (cpu->cfg.vext_spec) { + if (!g_strcmp0(cpu->cfg.vext_spec, "v0.7.1")) { + vext_version = VEXT_VERSION_0_07_1; + } else { + // error_setg(errp, + // "Unsupported vector spec version '%s'", + // cpu->cfg.vext_spec); + return; + } + } else { + // qemu_log("vector verison is not specified, " + // "use the default value v0.7.1\n"); + } + set_vext_version(env, vext_version); + } set_misa(env, RVXLEN | target_misa); } @@ -316,16 +342,17 @@ typedef struct CPUModelInfo { } CPUModelInfo; static const CPUModelInfo cpu_models[] = { - {TYPE_RISCV_CPU_ANY, riscv_any_cpu_init}, -#ifdef TARGET_RISCV32 - {TYPE_RISCV_CPU_BASE32, riscv_base32_cpu_init}, - {TYPE_RISCV_CPU_SIFIVE_E31, rv32imacu_nommu_cpu_init}, - {TYPE_RISCV_CPU_SIFIVE_U34, rv32gcsu_priv1_10_0_cpu_init}, -#endif -#ifdef TARGET_RISCV64 - {TYPE_RISCV_CPU_BASE64, riscv_base64_cpu_init}, - {TYPE_RISCV_CPU_SIFIVE_E51, rv64imacu_nommu_cpu_init}, - {TYPE_RISCV_CPU_SIFIVE_U54, rv64gcsu_priv1_10_0_cpu_init}, + {TYPE_RISCV_CPU_ANY, riscv_any_cpu_init}, +#if defined(TARGET_RISCV32) + {TYPE_RISCV_CPU_BASE32, riscv_base_cpu_init}, + {TYPE_RISCV_CPU_IBEX, rv32_ibex_cpu_init}, + {TYPE_RISCV_CPU_SIFIVE_E31, rvxx_sifive_e_cpu_init}, + {TYPE_RISCV_CPU_SIFIVE_E34, rv32_imafcu_nommu_cpu_init}, + {TYPE_RISCV_CPU_SIFIVE_U34, rvxx_sifive_u_cpu_init}, +#elif defined(TARGET_RISCV64) + {TYPE_RISCV_CPU_BASE64, riscv_base_cpu_init}, + {TYPE_RISCV_CPU_SIFIVE_E51, rvxx_sifive_e_cpu_init}, + {TYPE_RISCV_CPU_SIFIVE_U54, rvxx_sifive_u_cpu_init}, #endif }; @@ -339,7 +366,7 @@ RISCVCPU *cpu_riscv_init(struct uc_struct *uc) if (cpu == NULL) { return NULL; } - memset((void*)cpu, 0, sizeof(*cpu)); + memset((void *)cpu, 0, sizeof(*cpu)); #ifdef TARGET_RISCV32 if (uc->cpu_model == INT_MAX) { diff --git a/qemu/target/riscv/cpu.h b/qemu/target/riscv/cpu.h index b94516eb7c..50c5d5fbf7 100644 --- a/qemu/target/riscv/cpu.h +++ b/qemu/target/riscv/cpu.h @@ -21,6 +21,7 @@ #define RISCV_CPU_H #include "hw/core/cpu.h" +#include "hw/registerfields.h" #include "exec/cpu-defs.h" #include "fpu/softfloat-types.h" @@ -35,7 +36,9 @@ typedef struct TCGContext TCGContext; #define TYPE_RISCV_CPU_ANY RISCV_CPU_TYPE_NAME("any") #define TYPE_RISCV_CPU_BASE32 RISCV_CPU_TYPE_NAME("rv32") #define TYPE_RISCV_CPU_BASE64 RISCV_CPU_TYPE_NAME("rv64") +#define TYPE_RISCV_CPU_IBEX RISCV_CPU_TYPE_NAME("lowrisc-ibex") #define TYPE_RISCV_CPU_SIFIVE_E31 RISCV_CPU_TYPE_NAME("sifive-e31") +#define TYPE_RISCV_CPU_SIFIVE_E34 RISCV_CPU_TYPE_NAME("sifive-e34") #define TYPE_RISCV_CPU_SIFIVE_E51 RISCV_CPU_TYPE_NAME("sifive-e51") #define TYPE_RISCV_CPU_SIFIVE_U34 RISCV_CPU_TYPE_NAME("sifive-u34") #define TYPE_RISCV_CPU_SIFIVE_U54 RISCV_CPU_TYPE_NAME("sifive-u54") @@ -57,6 +60,7 @@ typedef struct TCGContext TCGContext; #define RVA RV('A') #define RVF RV('F') #define RVD RV('D') +#define RVV RV('V') #define RVC RV('C') #define RVS RV('S') #define RVU RV('U') @@ -72,10 +76,11 @@ enum { RISCV_FEATURE_MISA }; -#define PRIV_VERSION_1_09_1 0x00010901 #define PRIV_VERSION_1_10_0 0x00011000 #define PRIV_VERSION_1_11_0 0x00011100 +#define VEXT_VERSION_0_07_1 0x00000701 + #define TRANSLATE_PMP_FAIL 2 #define TRANSLATE_FAIL 1 #define TRANSLATE_SUCCESS 0 @@ -87,9 +92,26 @@ typedef struct CPURISCVState CPURISCVState; #include "pmp.h" +#define RV_VLEN_MAX 256 + +FIELD(VTYPE, VLMUL, 0, 2) +FIELD(VTYPE, VSEW, 2, 3) +FIELD(VTYPE, VEDIV, 5, 2) +FIELD(VTYPE, RESERVED, 7, sizeof(target_ulong) * 8 - 9) +FIELD(VTYPE, VILL, sizeof(target_ulong) * 8 - 1, 1) + struct CPURISCVState { target_ulong gpr[32]; uint64_t fpr[32]; /* assume both F and D extensions */ + + /* vector coprocessor state. */ + uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16); + target_ulong vxrm; + target_ulong vxsat; + target_ulong vl; + target_ulong vstart; + target_ulong vtype; + target_ulong pc; target_ulong load_res; target_ulong load_val; @@ -100,6 +122,7 @@ struct CPURISCVState { target_ulong guest_phys_fault_addr; target_ulong priv_ver; + target_ulong vext_ver; target_ulong misa; target_ulong misa_mask; @@ -245,12 +268,16 @@ typedef struct RISCVCPU { bool ext_s; bool ext_u; bool ext_h; + bool ext_v; bool ext_counters; bool ext_ifencei; bool ext_icsr; char *priv_spec; char *user_spec; + char *vext_spec; + uint16_t vlen; + uint16_t elen; bool mmu; bool pmp; } cfg; @@ -323,15 +350,56 @@ void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong); #define TB_FLAGS_MMU_MASK 3 #define TB_FLAGS_MSTATUS_FS MSTATUS_FS +typedef CPURISCVState CPUArchState; +typedef RISCVCPU ArchCPU; +#include "exec/cpu-all.h" + +FIELD(TB_FLAGS, VL_EQ_VLMAX, 2, 1) +FIELD(TB_FLAGS, LMUL, 3, 2) +FIELD(TB_FLAGS, SEW, 5, 3) +FIELD(TB_FLAGS, VILL, 8, 1) + +/* + * A simplification for VLMAX + * = (1 << LMUL) * VLEN / (8 * (1 << SEW)) + * = (VLEN << LMUL) / (8 << SEW) + * = (VLEN << LMUL) >> (SEW + 3) + * = VLEN >> (SEW + 3 - LMUL) + */ +static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype) +{ + uint8_t sew, lmul; + + sew = FIELD_EX64(vtype, VTYPE, VSEW); + lmul = FIELD_EX64(vtype, VTYPE, VLMUL); + return cpu->cfg.vlen >> (sew + 3 - lmul); +} + static inline void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) + target_ulong *cs_base, uint32_t *pflags) { + uint32_t flags = 0; + *pc = env->pc; *cs_base = 0; - *flags = cpu_mmu_index(env, 0); + + if (riscv_has_ext(env, RVV)) { + uint32_t vlmax = vext_get_vlmax(env_archcpu(env), env->vtype); + bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl); + FIELD_DP32(flags, TB_FLAGS, VILL, FIELD_EX64(env->vtype, VTYPE, VILL), flags); + FIELD_DP32(flags, TB_FLAGS, SEW, FIELD_EX64(env->vtype, VTYPE, VSEW), flags); + FIELD_DP32(flags, TB_FLAGS, LMUL, FIELD_EX64(env->vtype, VTYPE, VLMUL), flags); + FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax, flags); + } else { + FIELD_DP32(flags, TB_FLAGS, VILL, 1, flags); + } + + flags |= cpu_mmu_index(env, 0); if (riscv_cpu_fp_enabled(env)) { - *flags |= env->mstatus & MSTATUS_FS; + flags |= env->mstatus & MSTATUS_FS; } + + *pflags = flags; } int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value, @@ -372,9 +440,4 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops); void riscv_cpu_register_gdb_regs_for_features(CPUState *cs); -typedef CPURISCVState CPUArchState; -typedef RISCVCPU ArchCPU; - -#include "exec/cpu-all.h" - #endif /* RISCV_CPU_H */ diff --git a/qemu/target/riscv/cpu_bits.h b/qemu/target/riscv/cpu_bits.h index ffa73864a9..48625ac2fd 100644 --- a/qemu/target/riscv/cpu_bits.h +++ b/qemu/target/riscv/cpu_bits.h @@ -29,6 +29,14 @@ #define FSR_NXA (FPEXC_NX << FSR_AEXC_SHIFT) #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA) +/* Vector Fixed-Point round model */ +#define FSR_VXRM_SHIFT 9 +#define FSR_VXRM (0x3 << FSR_VXRM_SHIFT) + +/* Vector Fixed-Point saturation flag */ +#define FSR_VXSAT_SHIFT 8 +#define FSR_VXSAT (0x1 << FSR_VXSAT_SHIFT) + /* Control and Status Registers */ /* User Trap Setup */ @@ -48,6 +56,13 @@ #define CSR_FRM 0x002 #define CSR_FCSR 0x003 +/* User Vector CSRs */ +#define CSR_VSTART 0x008 +#define CSR_VXSAT 0x009 +#define CSR_VXRM 0x00a +#define CSR_VL 0xc20 +#define CSR_VTYPE 0xc21 + /* User Timers and Counters */ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 diff --git a/qemu/target/riscv/cpu_helper.c b/qemu/target/riscv/cpu_helper.c index bb2c3d869f..bad05e3049 100644 --- a/qemu/target/riscv/cpu_helper.c +++ b/qemu/target/riscv/cpu_helper.c @@ -300,9 +300,6 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED; int mode = mmu_idx; bool use_background = false; - hwaddr base; - int levels = 0, ptidxbits = 0, ptesize = 0, vm, sum, mxr, widened; - /* * Check if we should use the background registers for the two @@ -344,63 +341,45 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, *prot = 0; + hwaddr base; + int levels, ptidxbits, ptesize, vm, sum, mxr, widened; + if (first_stage == true) { mxr = get_field(env->mstatus, MSTATUS_MXR); } else { mxr = get_field(env->vsstatus, MSTATUS_MXR); } - if (env->priv_ver >= PRIV_VERSION_1_10_0) { - if (first_stage == true) { - if (use_background) { - base = (hwaddr)get_field(env->vsatp, SATP_PPN) << PGSHIFT; - vm = get_field(env->vsatp, SATP_MODE); - } else { - base = (hwaddr)get_field(env->satp, SATP_PPN) << PGSHIFT; - vm = get_field(env->satp, SATP_MODE); - } - widened = 0; + if (first_stage == true) { + if (use_background) { + base = (hwaddr)get_field(env->vsatp, SATP_PPN) << PGSHIFT; + vm = get_field(env->vsatp, SATP_MODE); } else { - base = (hwaddr)get_field(env->hgatp, HGATP_PPN) << PGSHIFT; - vm = get_field(env->hgatp, HGATP_MODE); - widened = 2; - } - sum = get_field(env->mstatus, MSTATUS_SUM); - switch (vm) { - case VM_1_10_SV32: - levels = 2; ptidxbits = 10; ptesize = 4; break; - case VM_1_10_SV39: - levels = 3; ptidxbits = 9; ptesize = 8; break; - case VM_1_10_SV48: - levels = 4; ptidxbits = 9; ptesize = 8; break; - case VM_1_10_SV57: - levels = 5; ptidxbits = 9; ptesize = 8; break; - case VM_1_10_MBARE: - *physical = addr; - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - return TRANSLATE_SUCCESS; - default: - g_assert_not_reached(); + base = (hwaddr)get_field(env->satp, SATP_PPN) << PGSHIFT; + vm = get_field(env->satp, SATP_MODE); } - } else { widened = 0; - base = (hwaddr)(env->sptbr) << PGSHIFT; - sum = !get_field(env->mstatus, MSTATUS_PUM); - vm = get_field(env->mstatus, MSTATUS_VM); - switch (vm) { - case VM_1_09_SV32: - levels = 2; ptidxbits = 10; ptesize = 4; break; - case VM_1_09_SV39: - levels = 3; ptidxbits = 9; ptesize = 8; break; - case VM_1_09_SV48: - levels = 4; ptidxbits = 9; ptesize = 8; break; - case VM_1_09_MBARE: - *physical = addr; - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - return TRANSLATE_SUCCESS; - default: - g_assert_not_reached(); - } + } else { + base = (hwaddr)get_field(env->hgatp, HGATP_PPN) << PGSHIFT; + vm = get_field(env->hgatp, HGATP_MODE); + widened = 2; + } + sum = get_field(env->mstatus, MSTATUS_SUM); + switch (vm) { + case VM_1_10_SV32: + levels = 2; ptidxbits = 10; ptesize = 4; break; + case VM_1_10_SV39: + levels = 3; ptidxbits = 9; ptesize = 8; break; + case VM_1_10_SV48: + levels = 4; ptidxbits = 9; ptesize = 8; break; + case VM_1_10_SV57: + levels = 5; ptidxbits = 9; ptesize = 8; break; + case VM_1_10_MBARE: + *physical = addr; + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + return TRANSLATE_SUCCESS; + default: + g_assert_not_reached(); } CPUState *cs = env_cpu(env); @@ -438,11 +417,17 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, hwaddr pte_addr; if (two_stage && first_stage) { + int vbase_prot; hwaddr vbase; /* Do the second stage translation on the base PTE address. */ - get_physical_address(env, &vbase, prot, base, access_type, - mmu_idx, false, true); + int vbase_ret = get_physical_address(env, &vbase, &vbase_prot, + base, MMU_DATA_LOAD, + mmu_idx, false, true); + + if (vbase_ret != TRANSLATE_SUCCESS) { + return vbase_ret; + } pte_addr = vbase + idx * ptesize; } else { @@ -456,17 +441,9 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, } #if defined(TARGET_RISCV32) -#ifdef UNICORN_ARCH_POSTFIX target_ulong pte = glue(address_space_ldl, UNICORN_ARCH_POSTFIX)(cs->as->uc, cs->as, pte_addr, attrs, &res); -#else - target_ulong pte = address_space_ldl(cs->as->uc, cs->as, pte_addr, attrs, &res); -#endif #elif defined(TARGET_RISCV64) -#ifdef UNICORN_ARCH_POSTFIX target_ulong pte = glue(address_space_ldq, UNICORN_ARCH_POSTFIX)(cs->as->uc, cs->as, pte_addr, attrs, &res); -#else - target_ulong pte = address_space_ldq(cs->as->uc, cs->as, pte_addr, attrs, &res); -#endif #endif if (res != MEMTX_OK) { return TRANSLATE_FAIL; @@ -528,18 +505,14 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, &addr1, &l, false, MEMTXATTRS_UNSPECIFIED); if (memory_region_is_ram(mr)) { target_ulong *pte_pa = - qemu_map_ram_ptr(mr->uc, mr->ram_block, addr1); + qemu_map_ram_ptr(cs->as->uc, mr->ram_block, addr1); #if TCG_OVERSIZED_GUEST /* MTTCG is not enabled on oversized TCG guests so * page table updates do not need to be atomic */ *pte_pa = pte = updated_pte; #else target_ulong old_pte = -#ifdef _MSC_VER - atomic_cmpxchg((long *)pte_pa, pte, updated_pte); -#else atomic_cmpxchg(pte_pa, pte, updated_pte); -#endif if (old_pte != pte) { goto restart; } else { @@ -556,12 +529,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, /* for superpage mappings, make a fake leaf PTE for the TLB's benefit. */ target_ulong vpn = addr >> PGSHIFT; - if (i == 0) { - *physical = (ppn | (vpn & ((1L << (ptshift + widened)) - 1))) << - PGSHIFT; - } else { - *physical = (ppn | (vpn & ((1L << ptshift) - 1))) << PGSHIFT; - } + *physical = (ppn | (vpn & ((1L << ptshift) - 1))) << PGSHIFT; /* set permissions on the TLB entry */ if ((pte & PTE_R) || ((pte & PTE_X) && mxr)) { @@ -590,7 +558,6 @@ static void raise_mmu_exception(CPURISCVState *env, target_ulong address, int page_fault_exceptions; if (first_stage) { page_fault_exceptions = - (env->priv_ver >= PRIV_VERSION_1_10_0) && get_field(env->satp, SATP_MODE) != VM_1_10_MBARE && !pmp_violation; } else { @@ -702,7 +669,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, CPURISCVState *env = &cpu->env; vaddr im_address; hwaddr pa = 0; - int prot; + int prot, prot2; bool pmp_violation = false; bool m_mode_two_stage = false; bool hs_mode_two_stage = false; @@ -752,13 +719,13 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, /* Second stage lookup */ im_address = pa; - ret = get_physical_address(env, &pa, &prot, im_address, + ret = get_physical_address(env, &pa, &prot2, im_address, access_type, mmu_idx, false, true); qemu_log_mask(CPU_LOG_MMU, "%s 2nd-stage address=%" VADDR_PRIx " ret %d physical " TARGET_FMT_plx " prot %d\n", - __func__, im_address, ret, pa, prot); + __func__, im_address, ret, pa, prot2); if (riscv_feature(env, RISCV_FEATURE_PMP) && (ret == TRANSLATE_SUCCESS) && @@ -916,8 +883,7 @@ void riscv_cpu_do_interrupt(CPUState *cs) } s = env->mstatus; - s = set_field(s, MSTATUS_SPIE, env->priv_ver >= PRIV_VERSION_1_10_0 ? - get_field(s, MSTATUS_SIE) : get_field(s, MSTATUS_UIE << env->priv)); + s = set_field(s, MSTATUS_SPIE, get_field(s, MSTATUS_SIE)); s = set_field(s, MSTATUS_SPP, env->priv); s = set_field(s, MSTATUS_SIE, 0); env->mstatus = s; @@ -954,8 +920,7 @@ void riscv_cpu_do_interrupt(CPUState *cs) } s = env->mstatus; - s = set_field(s, MSTATUS_MPIE, env->priv_ver >= PRIV_VERSION_1_10_0 ? - get_field(s, MSTATUS_MIE) : get_field(s, MSTATUS_UIE << env->priv)); + s = set_field(s, MSTATUS_MPIE, get_field(s, MSTATUS_MIE)); s = set_field(s, MSTATUS_MPP, env->priv); s = set_field(s, MSTATUS_MIE, 0); env->mstatus = s; diff --git a/qemu/target/riscv/csr.c b/qemu/target/riscv/csr.c index 785ef26dc4..bd746455d1 100644 --- a/qemu/target/riscv/csr.c +++ b/qemu/target/riscv/csr.c @@ -23,12 +23,21 @@ #include "exec/exec-all.h" static int fs(CPURISCVState *env, int csrno); +static int vs(CPURISCVState *env, int csrno); static int read_fflags(CPURISCVState *env, int csrno, target_ulong *val); static int write_fflags(CPURISCVState *env, int csrno, target_ulong val); static int read_frm(CPURISCVState *env, int csrno, target_ulong *val); static int write_frm(CPURISCVState *env, int csrno, target_ulong val); static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val); static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val); +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val); +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val); +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val); +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val); +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val); +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val); +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val); +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val); static int ctr(CPURISCVState *env, int csrno); static int read_instret(CPURISCVState *env, int csrno, target_ulong *val); static int read_time(CPURISCVState *env, int csrno, target_ulong *val); @@ -49,8 +58,6 @@ static int read_mtvec(CPURISCVState *env, int csrno, target_ulong *val); static int write_mtvec(CPURISCVState *env, int csrno, target_ulong val); static int read_mcounteren(CPURISCVState *env, int csrno, target_ulong *val); static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val); -static int read_mucounteren(CPURISCVState *env, int csrno, target_ulong *val); -static int write_mucounteren(CPURISCVState *env, int csrno, target_ulong val); static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val); static int write_mscounteren(CPURISCVState *env, int csrno, target_ulong val); static int read_mscratch(CPURISCVState *env, int csrno, target_ulong *val); @@ -154,6 +161,12 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_FRM] = { fs, read_frm, write_frm }, [CSR_FCSR] = { fs, read_fcsr, write_fcsr }, + /* Vector CSRs */ + [CSR_VSTART] = { vs, read_vstart, write_vstart }, + [CSR_VXSAT] = { vs, read_vxsat, write_vxsat }, + [CSR_VXRM] = { vs, read_vxrm, write_vxrm }, + [CSR_VL] = { vs, read_vl }, + [CSR_VTYPE] = { vs, read_vtype }, /* User Timers and Counters */ [CSR_CYCLE] = { ctr, read_instret }, [CSR_INSTRET] = { ctr, read_instret }, @@ -196,8 +209,6 @@ static riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_MSTATUSH] = { any, read_mstatush, write_mstatush }, #endif - /* Legacy Counter Setup (priv v1.9.1) */ - [CSR_MUCOUNTEREN] = { any, read_mucounteren, write_mucounteren }, [CSR_MSCOUNTEREN] = { any, read_mscounteren, write_mscounteren }, /* Machine Trap Handling */ @@ -441,41 +452,34 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops) /* Predicates */ static int fs(CPURISCVState *env, int csrno) { + /* loose check condition for fcsr in vector extension */ + if ((csrno == CSR_FCSR) && (env->misa & RVV)) { + return 0; + } if (!env->debugger && !riscv_cpu_fp_enabled(env)) { return -1; } return 0; } +static int vs(CPURISCVState *env, int csrno) +{ + if (env->misa & RVV) { + return 0; + } + return -1; +} + static int ctr(CPURISCVState *env, int csrno) { CPUState *cs = env_cpu(env); RISCVCPU *cpu = RISCV_CPU(cs); - uint32_t ctr_en = ~0u; if (!cpu->cfg.ext_counters) { /* The Counters extensions is not enabled */ return -1; } - /* - * The counters are always enabled at run time on newer priv specs, as the - * CSR has changed from controlling that the counters can be read to - * controlling that the counters increment. - */ - if (env->priv_ver > PRIV_VERSION_1_09_1) { - return 0; - } - - if (env->priv < PRV_M) { - ctr_en &= env->mcounteren; - } - if (env->priv < PRV_S) { - ctr_en &= env->scounteren; - } - if (!(ctr_en & (1u << (csrno & 31)))) { - return -1; - } return 0; } @@ -554,6 +558,10 @@ static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val) } *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT) | (env->frm << FSR_RD_SHIFT); + if (vs(env, csrno) >= 0) { + *val |= (env->vxrm << FSR_VXRM_SHIFT) + | (env->vxsat << FSR_VXSAT_SHIFT); + } return 0; } @@ -564,10 +572,62 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val) } env->mstatus |= MSTATUS_FS; env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; + if (vs(env, csrno) >= 0) { + env->vxrm = (val & FSR_VXRM) >> FSR_VXRM_SHIFT; + env->vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT; + } riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); return 0; } +static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vtype; + return 0; +} + +static int read_vl(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vl; + return 0; +} + +static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vxrm; + return 0; +} + +static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vxrm = val; + return 0; +} + +static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vxsat; + return 0; +} + +static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vxsat = val; + return 0; +} + +static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->vstart; + return 0; +} + +static int write_vstart(CPURISCVState *env, int csrno, target_ulong val) +{ + env->vstart = val; + return 0; +} + /* User Timers and Counters */ static int read_instret(CPURISCVState *env, int csrno, target_ulong *val) { @@ -640,9 +700,6 @@ static const target_ulong delegable_excps = (1ULL << (RISCV_EXCP_INST_GUEST_PAGE_FAULT)) | (1ULL << (RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT)) | (1ULL << (RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT)); -static const target_ulong sstatus_v1_9_mask = SSTATUS_SIE | SSTATUS_SPIE | - SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS | - SSTATUS_SUM | SSTATUS_SD; static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE | SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS | SSTATUS_SUM | SSTATUS_MXR | SSTATUS_SD; @@ -651,20 +708,11 @@ static const target_ulong hip_writable_mask = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP; static const target_ulong vsip_writable_mask = MIP_VSSIP; #if defined(TARGET_RISCV32) -static const char valid_vm_1_09[16] = { - [VM_1_09_MBARE] = 1, - [VM_1_09_SV32] = 1, -}; static const char valid_vm_1_10[16] = { [VM_1_10_MBARE] = 1, [VM_1_10_SV32] = 1 }; #elif defined(TARGET_RISCV64) -static const char valid_vm_1_09[16] = { - [VM_1_09_MBARE] = 1, - [VM_1_09_SV39] = 1, - [VM_1_09_SV48] = 1, -}; static const char valid_vm_1_10[16] = { [VM_1_10_MBARE] = 1, [VM_1_10_SV39] = 1, @@ -694,8 +742,7 @@ static int read_mstatus(CPURISCVState *env, int csrno, target_ulong *val) static int validate_vm(CPURISCVState *env, target_ulong vm) { - return (env->priv_ver >= PRIV_VERSION_1_10_0) ? - valid_vm_1_10[vm & 0xf] : valid_vm_1_09[vm & 0xf]; + return valid_vm_1_10[vm & 0xf]; } static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val) @@ -705,34 +752,21 @@ static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val) int dirty; /* flush tlb on mstatus fields that affect VM */ - if (env->priv_ver <= PRIV_VERSION_1_09_1) { - if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | - MSTATUS_MPRV | MSTATUS_SUM | MSTATUS_VM)) { - tlb_flush(env_cpu(env)); - } - mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE | - MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM | - MSTATUS_MPP | MSTATUS_MXR | - (validate_vm(env, get_field(val, MSTATUS_VM)) ? - MSTATUS_VM : 0); + if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV | + MSTATUS_MPRV | MSTATUS_SUM)) { + tlb_flush(env_cpu(env)); } - if (env->priv_ver >= PRIV_VERSION_1_10_0) { - if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV | - MSTATUS_MPRV | MSTATUS_SUM)) { - tlb_flush(env_cpu(env)); - } - mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE | - MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM | - MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR | - MSTATUS_TW; + mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE | + MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM | + MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR | + MSTATUS_TW; #if defined(TARGET_RISCV64) - /* - * RV32: MPV and MTL are not in mstatus. The current plan is to - * add them to mstatush. For now, we just don't support it. - */ - mask |= MSTATUS_MTL | MSTATUS_MPV; + /* + * RV32: MPV and MTL are not in mstatus. The current plan is to + * add them to mstatush. For now, we just don't support it. + */ + mask |= MSTATUS_MTL | MSTATUS_MPV; #endif - } mstatus = (mstatus & ~mask) | (val & mask); @@ -881,18 +915,12 @@ static int write_mtvec(CPURISCVState *env, int csrno, target_ulong val) static int read_mcounteren(CPURISCVState *env, int csrno, target_ulong *val) { - if (env->priv_ver < PRIV_VERSION_1_10_0) { - return -1; - } *val = env->mcounteren; return 0; } static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val) { - if (env->priv_ver < PRIV_VERSION_1_10_0) { - return -1; - } env->mcounteren = val; return 0; } @@ -900,8 +928,7 @@ static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val) /* This regiser is replaced with CSR_MCOUNTINHIBIT in 1.11.0 */ static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val) { - if (env->priv_ver > PRIV_VERSION_1_09_1 - && env->priv_ver < PRIV_VERSION_1_11_0) { + if (env->priv_ver < PRIV_VERSION_1_11_0) { return -1; } *val = env->mcounteren; @@ -911,32 +938,13 @@ static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val) /* This regiser is replaced with CSR_MCOUNTINHIBIT in 1.11.0 */ static int write_mscounteren(CPURISCVState *env, int csrno, target_ulong val) { - if (env->priv_ver > PRIV_VERSION_1_09_1 - && env->priv_ver < PRIV_VERSION_1_11_0) { + if (env->priv_ver < PRIV_VERSION_1_11_0) { return -1; } env->mcounteren = val; return 0; } -static int read_mucounteren(CPURISCVState *env, int csrno, target_ulong *val) -{ - if (env->priv_ver > PRIV_VERSION_1_09_1) { - return -1; - } - *val = env->scounteren; - return 0; -} - -static int write_mucounteren(CPURISCVState *env, int csrno, target_ulong val) -{ - if (env->priv_ver > PRIV_VERSION_1_09_1) { - return -1; - } - env->scounteren = val; - return 0; -} - /* Machine Trap Handling */ static int read_mscratch(CPURISCVState *env, int csrno, target_ulong *val) { @@ -1010,16 +1018,14 @@ static int rmw_mip(CPURISCVState *env, int csrno, target_ulong *ret_value, /* Supervisor Trap Setup */ static int read_sstatus(CPURISCVState *env, int csrno, target_ulong *val) { - target_ulong mask = ((env->priv_ver >= PRIV_VERSION_1_10_0) ? - sstatus_v1_10_mask : sstatus_v1_9_mask); + target_ulong mask = (sstatus_v1_10_mask); *val = env->mstatus & mask; return 0; } static int write_sstatus(CPURISCVState *env, int csrno, target_ulong val) { - target_ulong mask = ((env->priv_ver >= PRIV_VERSION_1_10_0) ? - sstatus_v1_10_mask : sstatus_v1_9_mask); + target_ulong mask = (sstatus_v1_10_mask); target_ulong newval = (env->mstatus & ~mask) | (val & mask); return write_mstatus(env, CSR_MSTATUS, newval); } @@ -1069,18 +1075,12 @@ static int write_stvec(CPURISCVState *env, int csrno, target_ulong val) static int read_scounteren(CPURISCVState *env, int csrno, target_ulong *val) { - if (env->priv_ver < PRIV_VERSION_1_10_0) { - return -1; - } *val = env->scounteren; return 0; } static int write_scounteren(CPURISCVState *env, int csrno, target_ulong val) { - if (env->priv_ver < PRIV_VERSION_1_10_0) { - return -1; - } env->scounteren = val; return 0; } @@ -1159,15 +1159,15 @@ static int read_satp(CPURISCVState *env, int csrno, target_ulong *val) { if (!riscv_feature(env, RISCV_FEATURE_MMU)) { *val = 0; - } else if (env->priv_ver >= PRIV_VERSION_1_10_0) { - if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) { - return -1; - } else { - *val = env->satp; - } + return 0; + } + + if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) { + return -1; } else { - *val = env->sptbr; + *val = env->satp; } + return 0; } @@ -1176,13 +1176,7 @@ static int write_satp(CPURISCVState *env, int csrno, target_ulong val) if (!riscv_feature(env, RISCV_FEATURE_MMU)) { return 0; } - if (env->priv_ver <= PRIV_VERSION_1_09_1 && (val ^ env->sptbr)) { - tlb_flush(env_cpu(env)); - env->sptbr = val & (((target_ulong) - 1 << (TARGET_PHYS_ADDR_SPACE_BITS - PGSHIFT)) - 1); - } - if (env->priv_ver >= PRIV_VERSION_1_10_0 && - validate_vm(env, get_field(val, SATP_MODE)) && + if (validate_vm(env, get_field(val, SATP_MODE)) && ((val ^ env->satp) & (SATP_MODE | SATP_ASID | SATP_PPN))) { if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) { diff --git a/qemu/target/riscv/fpu_helper.c b/qemu/target/riscv/fpu_helper.c index 3fb6684b16..4379756dc4 100644 --- a/qemu/target/riscv/fpu_helper.c +++ b/qemu/target/riscv/fpu_helper.c @@ -22,6 +22,7 @@ #include "exec/exec-all.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" +#include "internals.h" target_ulong riscv_cpu_get_fflags(CPURISCVState *env) { @@ -230,21 +231,7 @@ uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1) target_ulong helper_fclass_s(uint64_t frs1) { - float32 f = frs1; - bool sign = float32_is_neg(f); - - if (float32_is_infinity(f)) { - return sign ? 1 << 0 : 1 << 7; - } else if (float32_is_zero(f)) { - return sign ? 1 << 3 : 1 << 4; - } else if (float32_is_zero_or_denormal(f)) { - return sign ? 1 << 2 : 1 << 5; - } else if (float32_is_any_nan(f)) { - float_status s = { 0 }; /* for snan_bit_is_one */ - return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; - } else { - return sign ? 1 << 1 : 1 << 6; - } + return fclass_s(frs1); } uint64_t helper_fadd_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2) @@ -353,19 +340,5 @@ uint64_t helper_fcvt_d_lu(CPURISCVState *env, uint64_t rs1) target_ulong helper_fclass_d(uint64_t frs1) { - float64 f = frs1; - bool sign = float64_is_neg(f); - - if (float64_is_infinity(f)) { - return sign ? 1 << 0 : 1 << 7; - } else if (float64_is_zero(f)) { - return sign ? 1 << 3 : 1 << 4; - } else if (float64_is_zero_or_denormal(f)) { - return sign ? 1 << 2 : 1 << 5; - } else if (float64_is_any_nan(f)) { - float_status s = { 0 }; /* for snan_bit_is_one */ - return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; - } else { - return sign ? 1 << 1 : 1 << 6; - } + return fclass_d(frs1); } diff --git a/qemu/target/riscv/helper.h b/qemu/target/riscv/helper.h index 32e483860f..11b0f57c14 100644 --- a/qemu/target/riscv/helper.h +++ b/qemu/target/riscv/helper.h @@ -78,3 +78,1077 @@ DEF_HELPER_2(sret, tl, env, tl) DEF_HELPER_2(mret, tl, env, tl) DEF_HELPER_1(wfi, void, env) DEF_HELPER_1(tlb_flush, void, env) + +/* Hypervisor functions */ +#ifndef CONFIG_USER_ONLY +DEF_HELPER_1(hyp_tlb_flush, void, env) +#endif + +/* Vector functions */ +DEF_HELPER_3(vsetvl, tl, env, tl, tl) +DEF_HELPER_5(vlb_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlb_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlh_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlw_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vle_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbu_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhu_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwu_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsb_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsh_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vsw_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_b_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_h_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_w_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vse_v_d_mask, void, ptr, ptr, tl, env, i32) +DEF_HELPER_6(vlsb_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsb_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsb_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsb_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsh_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsh_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsh_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsw_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsw_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlse_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlsbu_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlshu_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlshu_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlshu_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlswu_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlswu_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssb_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssh_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssh_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssh_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssw_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vssw_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_b, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_h, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_w, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vsse_v_d, void, ptr, ptr, tl, tl, env, i32) +DEF_HELPER_6(vlxb_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxb_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxb_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxb_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxh_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxh_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxh_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxe_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxbu_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxhu_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxhu_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxhu_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxwu_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vlxwu_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxb_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxh_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxh_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxh_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsxe_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_5(vlbff_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vleff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_b, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlbuff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhuff_v_h, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32) +DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32) +#ifdef TARGET_RISCV64 +DEF_HELPER_6(vamoswapw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoswapd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoaddw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoaddd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoxorw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoxord_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoandw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoandd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoorw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoord_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomind_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxd_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominuw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominud_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxuw_v_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxud_v_d, void, ptr, ptr, tl, ptr, env, i32) +#endif +DEF_HELPER_6(vamoswapw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoaddw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoxorw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoandw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamoorw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_FLAGS_4(vec_rsubs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vec_rsubs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vec_rsubs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) +DEF_HELPER_FLAGS_4(vec_rsubs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) + +DEF_HELPER_6(vwaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsubu_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_wv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwaddu_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwaddu_wx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsubu_wx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwadd_wx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_wx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_wx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsub_wx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsbc_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsbc_vxm_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vand_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vor_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vxor_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vand_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vand_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vand_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vand_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vor_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vxor_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsra_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsll_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsll_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsll_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsll_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsrl_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsra_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vnsrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnsrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsra_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsra_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnsra_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmseq_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsne_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsltu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmslt_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsleu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmsle_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmseq_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsne_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsltu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmslt_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsleu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsle_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgtu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmsgt_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vminu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmaxu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vminu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vminu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vminu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vminu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmin_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmaxu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vdivu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vremu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrem_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdivu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vdiv_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vremu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrem_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vwmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmulsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmacc_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsac_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnmsub_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vwmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmerge_vvm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vvm_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmerge_vxm_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_b, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_h, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_w, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_v_d, void, ptr, ptr, env, i32) +DEF_HELPER_4(vmv_v_x_b, void, ptr, i64, env, i32) +DEF_HELPER_4(vmv_v_x_h, void, ptr, i64, env, i32) +DEF_HELPER_4(vmv_v_x_w, void, ptr, i64, env, i32) +DEF_HELPER_4(vmv_v_x_d, void, ptr, i64, env, i32) + +DEF_HELPER_6(vsaddu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssubu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsaddu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssubu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssub_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vaadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vasub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vaadd_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vasub_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vsmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vsmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vwsmaccu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmacc_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsmaccus_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vssrl_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssra_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssrl_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vssra_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vnclip_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclip_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclip_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vnclipu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclipu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclipu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclip_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclip_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vnclip_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vfadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfwadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_wv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwsub_wv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_wf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwadd_wf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_wf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwsub_wf_w, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfdiv_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfdiv_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfdiv_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmul_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrdiv_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrdiv_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfrdiv_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfwmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmul_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmul_vf_w, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmacc_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsac_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmadd_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_5(vfsqrt_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfsqrt_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfsqrt_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfmin_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmax_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmax_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmax_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfmin_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmin_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmin_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmax_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmax_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmax_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vfsgnj_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnj_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjn_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfsgnjx_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(vmfeq_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfeq_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfeq_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfne_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfne_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfne_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmflt_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmflt_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmflt_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfle_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfle_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfle_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmfeq_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfeq_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfeq_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfne_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfne_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfne_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmflt_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmflt_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmflt_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfle_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfle_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfle_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfgt_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfgt_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfgt_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfge_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfge_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmfge_vf_d, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmford_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmford_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmford_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmford_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmford_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfmerge_vfm_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmerge_vfm_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(vfmerge_vfm_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_5(vfcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_xu_f_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_x_f_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_xu_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfcvt_f_x_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(vfwcvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfwcvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(vfncvt_xu_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_xu_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_x_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_x_f_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_xu_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_xu_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_x_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmaxu_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredminu_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredand_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredor_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vwredsumu_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsumu_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsumu_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vmand_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmnand_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmandnot_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmxor_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmor_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmnor_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmornot_mm, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmxnor_mm, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_4(vmpopc_m, tl, ptr, ptr, env, i32) + +DEF_HELPER_4(vmfirst_m, tl, ptr, ptr, env, i32) + +DEF_HELPER_5(vmsbf_m, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vmsif_m, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vmsof_m, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(viota_m_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(viota_m_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(viota_m_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(viota_m_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_4(vid_v_b, void, ptr, ptr, env, i32) +DEF_HELPER_4(vid_v_h, void, ptr, ptr, env, i32) +DEF_HELPER_4(vid_v_w, void, ptr, ptr, env, i32) +DEF_HELPER_4(vid_v_d, void, ptr, ptr, env, i32) + +DEF_HELPER_6(vslideup_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslideup_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslideup_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslideup_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslidedown_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1up_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vslide1down_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vrgather_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrgather_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32) diff --git a/qemu/target/riscv/insn_trans/trans_privileged.inc.c b/qemu/target/riscv/insn_trans/trans_privileged.inc.c index 05662b21e6..7bfb889d35 100644 --- a/qemu/target/riscv/insn_trans/trans_privileged.inc.c +++ b/qemu/target/riscv/insn_trans/trans_privileged.inc.c @@ -77,57 +77,11 @@ static bool trans_wfi(DisasContext *ctx, arg_wfi *a) static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a) { TCGContext *tcg_ctx = ctx->uc->tcg_ctx; - if (ctx->priv_ver >= PRIV_VERSION_1_10_0) { - gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); - return true; - } - return false; + gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); + return true; } static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a) { - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; - if (ctx->priv_ver <= PRIV_VERSION_1_09_1) { - gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); - return true; - } - return false; -} - -static bool trans_hfence_gvma(DisasContext *ctx, arg_sfence_vma *a) -{ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; - if (ctx->priv_ver >= PRIV_VERSION_1_10_0 && - has_ext(ctx, RVH)) { - /* Hpervisor extensions exist */ - /* - * if (env->priv == PRV_M || - * (env->priv == PRV_S && - * !riscv_cpu_virt_enabled(env) && - * get_field(ctx->mstatus_fs, MSTATUS_TVM))) { - */ - gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); - return true; - /* } */ - } - return false; -} - -static bool trans_hfence_bvma(DisasContext *ctx, arg_sfence_vma *a) -{ - TCGContext *tcg_ctx = ctx->uc->tcg_ctx; - if (ctx->priv_ver >= PRIV_VERSION_1_10_0 && - has_ext(ctx, RVH)) { - /* Hpervisor extensions exist */ - /* - * if (env->priv == PRV_M || - * (env->priv == PRV_S && - * !riscv_cpu_virt_enabled(env) && - * get_field(ctx->mstatus_fs, MSTATUS_TVM))) { - */ - gen_helper_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); - return true; - /* } */ - } return false; } diff --git a/qemu/target/riscv/insn_trans/trans_rvd.inc.c b/qemu/target/riscv/insn_trans/trans_rvd.inc.c index 2e643d5168..e461146e23 100644 --- a/qemu/target/riscv/insn_trans/trans_rvd.inc.c +++ b/qemu/target/riscv/insn_trans/trans_rvd.inc.c @@ -314,7 +314,7 @@ static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a) TCGContext *tcg_ctx = ctx->uc->tcg_ctx; TCGv t0 = tcg_temp_new(tcg_ctx); - gen_helper_fclass_d(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]); + glue(gen_helper_fclass_d, UNICORN_ARCH_POSTFIX)(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]); gen_set_gpr(tcg_ctx, a->rd, t0); tcg_temp_free(tcg_ctx, t0); return true; diff --git a/qemu/target/riscv/insn_trans/trans_rvf.inc.c b/qemu/target/riscv/insn_trans/trans_rvf.inc.c index de044bfeb9..b4fd677b23 100644 --- a/qemu/target/riscv/insn_trans/trans_rvf.inc.c +++ b/qemu/target/riscv/insn_trans/trans_rvf.inc.c @@ -23,6 +23,21 @@ return false; \ } while (0) +/* + * RISC-V requires NaN-boxing of narrower width floating + * point values. This applies when a 32-bit value is + * assigned to a 64-bit FP register. Thus this does not + * apply when the RVD extension is not present. + */ +static void gen_nanbox_fpr(DisasContext *ctx, int regno) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + if (has_ext(ctx, RVD)) { + tcg_gen_ori_i64(tcg_ctx, tcg_ctx->cpu_fpr[regno], tcg_ctx->cpu_fpr[regno], + MAKE_64BIT_MASK(32, 32)); + } +} + static bool trans_flw(DisasContext *ctx, arg_flw *a) { TCGContext *tcg_ctx = ctx->uc->tcg_ctx; @@ -33,8 +48,7 @@ static bool trans_flw(DisasContext *ctx, arg_flw *a) tcg_gen_addi_tl(tcg_ctx, t0, t0, a->imm); tcg_gen_qemu_ld_i64(tcg_ctx, tcg_ctx->cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEUL); - /* RISC-V requires NaN-boxing of narrower width floating point values */ - tcg_gen_ori_i64(tcg_ctx, tcg_ctx->cpu_fpr[a->rd], tcg_ctx->cpu_fpr[a->rd], 0xffffffff00000000ULL); + gen_nanbox_fpr(ctx, a->rd); tcg_temp_free(tcg_ctx, t0); mark_fs_dirty(ctx); @@ -343,7 +357,7 @@ static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a) TCGv t0 = tcg_temp_new(tcg_ctx); - gen_helper_fclass_s(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]); + glue(gen_helper_fclass_s, UNICORN_ARCH_POSTFIX)(tcg_ctx, t0, tcg_ctx->cpu_fpr[a->rs1]); gen_set_gpr(tcg_ctx, a->rd, t0); tcg_temp_free(tcg_ctx, t0); diff --git a/qemu/target/riscv/insn_trans/trans_rvh.inc.c b/qemu/target/riscv/insn_trans/trans_rvh.inc.c new file mode 100644 index 0000000000..c238510e4f --- /dev/null +++ b/qemu/target/riscv/insn_trans/trans_rvh.inc.c @@ -0,0 +1,33 @@ +/* + * RISC-V translation routines for the RVXI Base Integer Instruction Set. + * + * Copyright (c) 2020 Western Digital + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +static bool trans_hfence_gvma(DisasContext *ctx, arg_sfence_vma *a) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + REQUIRE_EXT(ctx, RVH); + gen_helper_hyp_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); + return true; +} + +static bool trans_hfence_vvma(DisasContext *ctx, arg_sfence_vma *a) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + REQUIRE_EXT(ctx, RVH); + gen_helper_hyp_tlb_flush(tcg_ctx, tcg_ctx->cpu_env); + return true; +} diff --git a/qemu/target/riscv/insn_trans/trans_rvv.inc.c b/qemu/target/riscv/insn_trans/trans_rvv.inc.c new file mode 100644 index 0000000000..40b74f11ce --- /dev/null +++ b/qemu/target/riscv/insn_trans/trans_rvv.inc.c @@ -0,0 +1,2954 @@ +/* + * RISC-V translation routines for the RVV Standard Extension. + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#include "tcg/tcg-op-gvec.h" +#include "tcg/tcg-gvec-desc.h" +#include "internals.h" + +static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGv s1, s2, dst; + + if (!has_ext(ctx, RVV)) { + return false; + } + + s2 = tcg_temp_new(tcg_ctx); + dst = tcg_temp_new(tcg_ctx); + + /* Using x0 as the rs1 register specifier, encodes an infinite AVL */ + if (a->rs1 == 0) { + /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ + s1 = tcg_const_tl(tcg_ctx, RV_VLEN_MAX); + } else { + s1 = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, s1, a->rs1); + } + gen_get_gpr(tcg_ctx, s2, a->rs2); + gen_helper_vsetvl(tcg_ctx, dst, tcg_ctx->cpu_env, s1, s2); + gen_set_gpr(tcg_ctx, a->rd, dst); + tcg_gen_movi_tl(tcg_ctx, tcg_ctx->cpu_pc, ctx->pc_succ_insn); + lookup_and_goto_ptr(ctx); + ctx->base.is_jmp = DISAS_NORETURN; + + tcg_temp_free(tcg_ctx, s1); + tcg_temp_free(tcg_ctx, s2); + tcg_temp_free(tcg_ctx, dst); + return true; +} + +static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a) +{ + TCGContext *tcg_ctx = ctx->uc->tcg_ctx; + TCGv s1, s2, dst; + + if (!has_ext(ctx, RVV)) { + return false; + } + + s2 = tcg_const_tl(tcg_ctx, a->zimm); + dst = tcg_temp_new(tcg_ctx); + + /* Using x0 as the rs1 register specifier, encodes an infinite AVL */ + if (a->rs1 == 0) { + /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ + s1 = tcg_const_tl(tcg_ctx, RV_VLEN_MAX); + } else { + s1 = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, s1, a->rs1); + } + gen_helper_vsetvl(tcg_ctx, dst, tcg_ctx->cpu_env, s1, s2); + gen_set_gpr(tcg_ctx, a->rd, dst); + gen_goto_tb(ctx, 0, ctx->pc_succ_insn); + ctx->base.is_jmp = DISAS_NORETURN; + + tcg_temp_free(tcg_ctx, s1); + tcg_temp_free(tcg_ctx, s2); + tcg_temp_free(tcg_ctx, dst); + return true; +} + +/* vector register offset from env */ +static uint32_t vreg_ofs(DisasContext *s, int reg) +{ + return offsetof(CPURISCVState, vreg) + reg * s->vlen / 8; +} + +/* check functions */ + +/* + * In cpu_get_tb_cpu_state(), set VILL if RVV was not present. + * So RVV is also be checked in this function. + */ +static bool vext_check_isa_ill(DisasContext *s) +{ + return !s->vill; +} + +/* + * There are two rules check here. + * + * 1. Vector register numbers are multiples of LMUL. (Section 3.2) + * + * 2. For all widening instructions, the destination LMUL value must also be + * a supported LMUL value. (Section 11.2) + */ +static bool vext_check_reg(DisasContext *s, uint32_t reg, bool widen) +{ + /* + * The destination vector register group results are arranged as if both + * SEW and LMUL were at twice their current settings. (Section 11.2). + */ + int legal = widen ? 2 << s->lmul : 1 << s->lmul; + + return !((s->lmul == 0x3 && widen) || (reg % legal)); +} + +/* + * There are two rules check here. + * + * 1. The destination vector register group for a masked vector instruction can + * only overlap the source mask register (v0) when LMUL=1. (Section 5.3) + * + * 2. In widen instructions and some other insturctions, like vslideup.vx, + * there is no need to check whether LMUL=1. + */ +static bool vext_check_overlap_mask(DisasContext *s, uint32_t vd, bool vm, + bool force) +{ + return (vm != 0 || vd != 0) || (!force && (s->lmul == 0)); +} + +/* The LMUL setting must be such that LMUL * NFIELDS <= 8. (Section 7.8) */ +static bool vext_check_nf(DisasContext *s, uint32_t nf) +{ + return (1 << s->lmul) * nf <= 8; +} + +/* + * The destination vector register group cannot overlap a source vector register + * group of a different element width. (Section 11.2) + */ +static inline bool vext_check_overlap_group(int rd, int dlen, int rs, int slen) +{ + return ((rd >= rs + slen) || (rs >= rd + dlen)); +} +/* common translation macro */ +#define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\ +{ \ + if (CHECK(s, a)) { \ + return OP(s, a, SEQ); \ + } \ + return false; \ +} + +/* + *** unit stride load and store + */ +typedef void gen_helper_ldst_us(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, + TCGv_env, TCGv_i32); + +static bool ldst_us_trans(TCGContext *tcg_ctx, uint32_t vd, uint32_t rs1, uint32_t data, + gen_helper_ldst_us *fn, DisasContext *s) +{ + TCGv_ptr dest, mask; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + base = tcg_temp_new(tcg_ctx); + + /* + * As simd_desc supports at most 256 bytes, and in this implementation, + * the max vector group length is 2048 bytes. So split it into two parts. + * + * The first part is vlen in bytes, encoded in maxsz of simd_desc. + * The second part is lmul, encoded in data of simd_desc. + */ + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(tcg_ctx, base, rs1); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, base, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free(tcg_ctx, base); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + uint32_t data = 0; + gen_helper_ldst_us *fn; + static gen_helper_ldst_us * const fns[2][7][4] = { + /* masked unit stride load */ + { { gen_helper_vlb_v_b_mask, gen_helper_vlb_v_h_mask, + gen_helper_vlb_v_w_mask, gen_helper_vlb_v_d_mask }, + { NULL, gen_helper_vlh_v_h_mask, + gen_helper_vlh_v_w_mask, gen_helper_vlh_v_d_mask }, + { NULL, NULL, + gen_helper_vlw_v_w_mask, gen_helper_vlw_v_d_mask }, + { gen_helper_vle_v_b_mask, gen_helper_vle_v_h_mask, + gen_helper_vle_v_w_mask, gen_helper_vle_v_d_mask }, + { gen_helper_vlbu_v_b_mask, gen_helper_vlbu_v_h_mask, + gen_helper_vlbu_v_w_mask, gen_helper_vlbu_v_d_mask }, + { NULL, gen_helper_vlhu_v_h_mask, + gen_helper_vlhu_v_w_mask, gen_helper_vlhu_v_d_mask }, + { NULL, NULL, + gen_helper_vlwu_v_w_mask, gen_helper_vlwu_v_d_mask } }, + /* unmasked unit stride load */ + { { gen_helper_vlb_v_b, gen_helper_vlb_v_h, + gen_helper_vlb_v_w, gen_helper_vlb_v_d }, + { NULL, gen_helper_vlh_v_h, + gen_helper_vlh_v_w, gen_helper_vlh_v_d }, + { NULL, NULL, + gen_helper_vlw_v_w, gen_helper_vlw_v_d }, + { gen_helper_vle_v_b, gen_helper_vle_v_h, + gen_helper_vle_v_w, gen_helper_vle_v_d }, + { gen_helper_vlbu_v_b, gen_helper_vlbu_v_h, + gen_helper_vlbu_v_w, gen_helper_vlbu_v_d }, + { NULL, gen_helper_vlhu_v_h, + gen_helper_vlhu_v_w, gen_helper_vlhu_v_d }, + { NULL, NULL, + gen_helper_vlwu_v_w, gen_helper_vlwu_v_d } } + }; + + fn = fns[a->vm][seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldst_us_trans(tcg_ctx, a->rd, a->rs1, data, fn, s); +} + +static bool ld_us_check(DisasContext *s, arg_r2nfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vlb_v, 0, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlh_v, 1, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlw_v, 2, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vle_v, 3, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlbu_v, 4, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlhu_v, 5, r2nfvm, ld_us_op, ld_us_check) +GEN_VEXT_TRANS(vlwu_v, 6, r2nfvm, ld_us_op, ld_us_check) + +static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + uint32_t data = 0; + gen_helper_ldst_us *fn; + static gen_helper_ldst_us * const fns[2][4][4] = { + /* masked unit stride load and store */ + { { gen_helper_vsb_v_b_mask, gen_helper_vsb_v_h_mask, + gen_helper_vsb_v_w_mask, gen_helper_vsb_v_d_mask }, + { NULL, gen_helper_vsh_v_h_mask, + gen_helper_vsh_v_w_mask, gen_helper_vsh_v_d_mask }, + { NULL, NULL, + gen_helper_vsw_v_w_mask, gen_helper_vsw_v_d_mask }, + { gen_helper_vse_v_b_mask, gen_helper_vse_v_h_mask, + gen_helper_vse_v_w_mask, gen_helper_vse_v_d_mask } }, + /* unmasked unit stride store */ + { { gen_helper_vsb_v_b, gen_helper_vsb_v_h, + gen_helper_vsb_v_w, gen_helper_vsb_v_d }, + { NULL, gen_helper_vsh_v_h, + gen_helper_vsh_v_w, gen_helper_vsh_v_d }, + { NULL, NULL, + gen_helper_vsw_v_w, gen_helper_vsw_v_d }, + { gen_helper_vse_v_b, gen_helper_vse_v_h, + gen_helper_vse_v_w, gen_helper_vse_v_d } } + }; + + fn = fns[a->vm][seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldst_us_trans(tcg_ctx, a->rd, a->rs1, data, fn, s); +} + +static bool st_us_check(DisasContext *s, arg_r2nfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vsb_v, 0, r2nfvm, st_us_op, st_us_check) +GEN_VEXT_TRANS(vsh_v, 1, r2nfvm, st_us_op, st_us_check) +GEN_VEXT_TRANS(vsw_v, 2, r2nfvm, st_us_op, st_us_check) +GEN_VEXT_TRANS(vse_v, 3, r2nfvm, st_us_op, st_us_check) + +/* + *** stride load and store + */ +typedef void gen_helper_ldst_stride(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, + TCGv, TCGv_env, TCGv_i32); + +static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, + uint32_t data, gen_helper_ldst_stride *fn, + DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, mask; + TCGv base, stride; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + base = tcg_temp_new(tcg_ctx); + stride = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(tcg_ctx, base, rs1); + gen_get_gpr(tcg_ctx, stride, rs2); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, base, stride, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free(tcg_ctx, base); + tcg_temp_free(tcg_ctx, stride); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_stride *fn; + static gen_helper_ldst_stride * const fns[7][4] = { + { gen_helper_vlsb_v_b, gen_helper_vlsb_v_h, + gen_helper_vlsb_v_w, gen_helper_vlsb_v_d }, + { NULL, gen_helper_vlsh_v_h, + gen_helper_vlsh_v_w, gen_helper_vlsh_v_d }, + { NULL, NULL, + gen_helper_vlsw_v_w, gen_helper_vlsw_v_d }, + { gen_helper_vlse_v_b, gen_helper_vlse_v_h, + gen_helper_vlse_v_w, gen_helper_vlse_v_d }, + { gen_helper_vlsbu_v_b, gen_helper_vlsbu_v_h, + gen_helper_vlsbu_v_w, gen_helper_vlsbu_v_d }, + { NULL, gen_helper_vlshu_v_h, + gen_helper_vlshu_v_w, gen_helper_vlshu_v_d }, + { NULL, NULL, + gen_helper_vlswu_v_w, gen_helper_vlswu_v_d }, + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +static bool ld_stride_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vlsb_v, 0, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlsh_v, 1, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlsw_v, 2, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlse_v, 3, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlsbu_v, 4, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlshu_v, 5, rnfvm, ld_stride_op, ld_stride_check) +GEN_VEXT_TRANS(vlswu_v, 6, rnfvm, ld_stride_op, ld_stride_check) + +static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_stride *fn; + static gen_helper_ldst_stride * const fns[4][4] = { + /* masked stride store */ + { gen_helper_vssb_v_b, gen_helper_vssb_v_h, + gen_helper_vssb_v_w, gen_helper_vssb_v_d }, + { NULL, gen_helper_vssh_v_h, + gen_helper_vssh_v_w, gen_helper_vssh_v_d }, + { NULL, NULL, + gen_helper_vssw_v_w, gen_helper_vssw_v_d }, + { gen_helper_vsse_v_b, gen_helper_vsse_v_h, + gen_helper_vsse_v_w, gen_helper_vsse_v_d } + }; + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +static bool st_stride_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check) +GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check) +GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check) +GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check) + +/* + *** index load and store + */ +typedef void gen_helper_ldst_index(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, + TCGv_ptr, TCGv_env, TCGv_i32); + +static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, + uint32_t data, gen_helper_ldst_index *fn, + DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, mask, index; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + index = tcg_temp_new_ptr(tcg_ctx); + base = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(tcg_ctx, base, rs1); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, index, tcg_ctx->cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, base, index, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, index); + tcg_temp_free(tcg_ctx, base); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_index *fn; + static gen_helper_ldst_index * const fns[7][4] = { + { gen_helper_vlxb_v_b, gen_helper_vlxb_v_h, + gen_helper_vlxb_v_w, gen_helper_vlxb_v_d }, + { NULL, gen_helper_vlxh_v_h, + gen_helper_vlxh_v_w, gen_helper_vlxh_v_d }, + { NULL, NULL, + gen_helper_vlxw_v_w, gen_helper_vlxw_v_d }, + { gen_helper_vlxe_v_b, gen_helper_vlxe_v_h, + gen_helper_vlxe_v_w, gen_helper_vlxe_v_d }, + { gen_helper_vlxbu_v_b, gen_helper_vlxbu_v_h, + gen_helper_vlxbu_v_w, gen_helper_vlxbu_v_d }, + { NULL, gen_helper_vlxhu_v_h, + gen_helper_vlxhu_v_w, gen_helper_vlxhu_v_d }, + { NULL, NULL, + gen_helper_vlxwu_v_w, gen_helper_vlxwu_v_d }, + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +/* + * For vector indexed segment loads, the destination vector register + * groups cannot overlap the source vector register group (specified by + * `vs2`), else an illegal instruction exception is raised. + */ +static bool ld_index_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_nf(s, a->nf) && + ((a->nf == 1) || + vext_check_overlap_group(a->rd, a->nf << s->lmul, + a->rs2, 1 << s->lmul))); +} + +GEN_VEXT_TRANS(vlxb_v, 0, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxh_v, 1, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxw_v, 2, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxe_v, 3, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxbu_v, 4, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxhu_v, 5, rnfvm, ld_index_op, ld_index_check) +GEN_VEXT_TRANS(vlxwu_v, 6, rnfvm, ld_index_op, ld_index_check) + +static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_index *fn; + static gen_helper_ldst_index * const fns[4][4] = { + { gen_helper_vsxb_v_b, gen_helper_vsxb_v_h, + gen_helper_vsxb_v_w, gen_helper_vsxb_v_d }, + { NULL, gen_helper_vsxh_v_h, + gen_helper_vsxh_v_w, gen_helper_vsxh_v_d }, + { NULL, NULL, + gen_helper_vsxw_v_w, gen_helper_vsxw_v_d }, + { gen_helper_vsxe_v_b, gen_helper_vsxe_v_h, + gen_helper_vsxe_v_w, gen_helper_vsxe_v_d } + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} + +static bool st_index_check(DisasContext *s, arg_rnfvm* a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_nf(s, a->nf)); +} + +GEN_VEXT_TRANS(vsxb_v, 0, rnfvm, st_index_op, st_index_check) +GEN_VEXT_TRANS(vsxh_v, 1, rnfvm, st_index_op, st_index_check) +GEN_VEXT_TRANS(vsxw_v, 2, rnfvm, st_index_op, st_index_check) +GEN_VEXT_TRANS(vsxe_v, 3, rnfvm, st_index_op, st_index_check) + +/* + *** unit stride fault-only-first load + */ +static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, + gen_helper_ldst_us *fn, DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, mask; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + base = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(tcg_ctx, base, rs1); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, base, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free(tcg_ctx, base); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq) +{ + uint32_t data = 0; + gen_helper_ldst_us *fn; + static gen_helper_ldst_us * const fns[7][4] = { + { gen_helper_vlbff_v_b, gen_helper_vlbff_v_h, + gen_helper_vlbff_v_w, gen_helper_vlbff_v_d }, + { NULL, gen_helper_vlhff_v_h, + gen_helper_vlhff_v_w, gen_helper_vlhff_v_d }, + { NULL, NULL, + gen_helper_vlwff_v_w, gen_helper_vlwff_v_d }, + { gen_helper_vleff_v_b, gen_helper_vleff_v_h, + gen_helper_vleff_v_w, gen_helper_vleff_v_d }, + { gen_helper_vlbuff_v_b, gen_helper_vlbuff_v_h, + gen_helper_vlbuff_v_w, gen_helper_vlbuff_v_d }, + { NULL, gen_helper_vlhuff_v_h, + gen_helper_vlhuff_v_w, gen_helper_vlhuff_v_d }, + { NULL, NULL, + gen_helper_vlwuff_v_w, gen_helper_vlwuff_v_d } + }; + + fn = fns[seq][s->sew]; + if (fn == NULL) { + return false; + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, NF, a->nf, data); + return ldff_trans(a->rd, a->rs1, data, fn, s); +} + +GEN_VEXT_TRANS(vlbff_v, 0, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlhff_v, 1, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlwff_v, 2, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vleff_v, 3, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlbuff_v, 4, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlhuff_v, 5, r2nfvm, ldff_op, ld_us_check) +GEN_VEXT_TRANS(vlwuff_v, 6, r2nfvm, ldff_op, ld_us_check) + +/* + *** vector atomic operation + */ +typedef void gen_helper_amo(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool amo_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, + uint32_t data, gen_helper_amo *fn, DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, mask, index; + TCGv base; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + index = tcg_temp_new_ptr(tcg_ctx); + base = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + gen_get_gpr(tcg_ctx, base, rs1); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, index, tcg_ctx->cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, base, index, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, index); + tcg_temp_free(tcg_ctx, base); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + uint32_t data = 0; + gen_helper_amo *fn; + static gen_helper_amo *const fnsw[9] = { + /* no atomic operation */ + gen_helper_vamoswapw_v_w, + gen_helper_vamoaddw_v_w, + gen_helper_vamoxorw_v_w, + gen_helper_vamoandw_v_w, + gen_helper_vamoorw_v_w, + gen_helper_vamominw_v_w, + gen_helper_vamomaxw_v_w, + gen_helper_vamominuw_v_w, + gen_helper_vamomaxuw_v_w + }; +#ifdef TARGET_RISCV64 + static gen_helper_amo *const fnsd[18] = { + gen_helper_vamoswapw_v_d, + gen_helper_vamoaddw_v_d, + gen_helper_vamoxorw_v_d, + gen_helper_vamoandw_v_d, + gen_helper_vamoorw_v_d, + gen_helper_vamominw_v_d, + gen_helper_vamomaxw_v_d, + gen_helper_vamominuw_v_d, + gen_helper_vamomaxuw_v_d, + gen_helper_vamoswapd_v_d, + gen_helper_vamoaddd_v_d, + gen_helper_vamoxord_v_d, + gen_helper_vamoandd_v_d, + gen_helper_vamoord_v_d, + gen_helper_vamomind_v_d, + gen_helper_vamomaxd_v_d, + gen_helper_vamominud_v_d, + gen_helper_vamomaxud_v_d + }; +#endif + + if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_exit_atomic(tcg_ctx, tcg_ctx->cpu_env); + s->base.is_jmp = DISAS_NORETURN; + return true; + } else { + if (s->sew == 3) { +#ifdef TARGET_RISCV64 + fn = fnsd[seq]; +#else + /* Check done in amo_check(). */ + g_assert_not_reached(); +#endif + } else { + assert(seq < ARRAY_SIZE(fnsw)); + fn = fnsw[seq]; + } + } + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + FIELD_DP32(data, VDATA, WD, a->wd, data); + return amo_trans(a->rd, a->rs1, a->rs2, data, fn, s); +} +/* + * There are two rules check here. + * + * 1. SEW must be at least as wide as the AMO memory element size. + * + * 2. If SEW is greater than XLEN, an illegal instruction exception is raised. + */ +static bool amo_check(DisasContext *s, arg_rwdvm* a) +{ + return (!s->vill && has_ext(s, RVA) && + (!a->wd || vext_check_overlap_mask(s, a->rd, a->vm, false)) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + ((1 << s->sew) <= sizeof(target_ulong)) && + ((1 << s->sew) >= 4)); +} + +GEN_VEXT_TRANS(vamoswapw_v, 0, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoaddw_v, 1, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoxorw_v, 2, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoandw_v, 3, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoorw_v, 4, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamominw_v, 5, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxw_v, 6, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamominuw_v, 7, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxuw_v, 8, rwdvm, amo_op, amo_check) +#ifdef TARGET_RISCV64 +GEN_VEXT_TRANS(vamoswapd_v, 9, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoaddd_v, 10, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoxord_v, 11, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoandd_v, 12, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamoord_v, 13, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomind_v, 14, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check) +GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check) +#endif + +/* + *** Vector Integer Arithmetic Instructions + */ +#define MAXSZ(s) (s->vlen >> (3 - s->lmul)) + +static bool opivv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false)); +} + +typedef void GVecGen3Fn(TCGContext *, unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t); + +static inline bool +do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, + gen_helper_gvec_4_ptr *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGLabel *over = gen_new_label(tcg_ctx); + if (!opivv_check(s, a)) { + return false; + } + + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + if (a->vm && s->vl_eq_vlmax) { + gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), + MAXSZ(s), MAXSZ(s)); + } else { + uint32_t data = 0; + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fn); + } + gen_set_label(tcg_ctx, over); + return true; +} + +/* OPIVV with GVEC IR */ +#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVV_GVEC_TRANS(vadd_vv, add) +GEN_OPIVV_GVEC_TRANS(vsub_vv, sub) + +typedef void gen_helper_opivx(TCGContext *, TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, + gen_helper_opivx *fn, DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, src2, mask; + TCGv src1; + TCGv_i32 desc; + uint32_t data = 0; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + src2 = tcg_temp_new_ptr(tcg_ctx); + src1 = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, src1, rs1); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, src1, src2, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, src2); + tcg_temp_free(tcg_ctx, src1); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool opivx_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false)); +} + +typedef void GVecGen2sFn(TCGContext *, unsigned, uint32_t, uint32_t, TCGv_i64, + uint32_t, uint32_t); + +static inline bool +do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, + gen_helper_opivx *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + TCGv_i64 src1 = tcg_temp_new_i64(tcg_ctx); + TCGv tmp = tcg_temp_new(tcg_ctx); + + gen_get_gpr(tcg_ctx, tmp, a->rs1); + tcg_gen_ext_tl_i64(tcg_ctx, src1, tmp); + gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + src1, MAXSZ(s), MAXSZ(s)); + + tcg_temp_free_i64(tcg_ctx, src1); + tcg_temp_free(tcg_ctx, tmp); + return true; + } + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); +} + +/* OPIVX with GVEC IR */ +#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVX_GVEC_TRANS(vadd_vx, adds) +GEN_OPIVX_GVEC_TRANS(vsub_vx, subs) + +static void gen_vec_rsub8_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_vec_sub8_i64(tcg_ctx, d, b, a); +} + +static void gen_vec_rsub16_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_vec_sub16_i64(tcg_ctx, d, b, a); +} + +static void gen_rsub_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_sub_i32(tcg_ctx, ret, arg2, arg1); +} + +static void gen_rsub_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_sub_i64(tcg_ctx, ret, arg2, arg1); +} + +static void gen_rsub_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_sub_vec(tcg_ctx, vece, r, b, a); +} + +static void tcg_gen_gvec_rsubs(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; + static const GVecGen2s rsub_op[4] = { + { .fni8 = gen_vec_rsub8_i64, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs8, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_vec_rsub16_i64, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs16, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_rsub_i32, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs32, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_rsub_i64, + .fniv = gen_rsub_vec, + .fno = gen_helper_vec_rsubs64, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_2s(tcg_ctx, dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); +} + +GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs) + +static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, + gen_helper_opivx *fn, DisasContext *s, int zx) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, src2, mask; + TCGv src1; + TCGv_i32 desc; + uint32_t data = 0; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + src2 = tcg_temp_new_ptr(tcg_ctx); + if (zx) { + src1 = tcg_const_tl(tcg_ctx, imm); + } else { + src1 = tcg_const_tl(tcg_ctx, sextract64(imm, 0, 5)); + } + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, src1, src2, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, src2); + tcg_temp_free(tcg_ctx, src1); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +typedef void GVecGen2iFn(TCGContext *, unsigned, uint32_t, uint32_t, int64_t, + uint32_t, uint32_t); + +static inline bool +do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, + gen_helper_opivx *fn, int zx) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + if (zx) { + gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + extract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); + } else { + gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + sextract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s)); + } + } else { + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, zx); + } + return true; +} + +/* OPIVI with GVEC IR */ +#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ + }; \ + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ + fns[s->sew], ZX); \ +} + +GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi) + +static void tcg_gen_gvec_rsubi(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t c, uint32_t oprsz, uint32_t maxsz) +{ + TCGv_i64 tmp = tcg_const_i64(tcg_ctx, c); + tcg_gen_gvec_rsubs(tcg_ctx, vece, dofs, aofs, tmp, oprsz, maxsz); + tcg_temp_free_i64(tcg_ctx, tmp); +} + +GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi) + +/* Vector Widening Integer Add/Subtract */ + +/* OPIVV with WIDEN */ +static bool opivv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, + gen_helper_gvec_4_ptr *fn, + bool (*checkfn)(DisasContext *, arg_rmrr *)) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (checkfn(s, a)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), + vreg_ofs(s, a->rs2), + tcg_ctx->cpu_env, 0, s->vlen / 8, + data, fn); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +#define GEN_OPIVV_WIDEN_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opivv_widen(s, a, fns[s->sew], CHECK); \ +} + +GEN_OPIVV_WIDEN_TRANS(vwaddu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwadd_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsubu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check) + +/* OPIVX with WIDEN */ +static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opivx_widen(DisasContext *s, arg_rmrr *a, + gen_helper_opivx *fn) +{ + if (opivx_widen_check(s, a)) { + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); + } + return false; +} + +#define GEN_OPIVX_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opivx_widen(s, a, fns[s->sew]); \ +} + +GEN_OPIVX_WIDEN_TRANS(vwaddu_vx) +GEN_OPIVX_WIDEN_TRANS(vwadd_vx) +GEN_OPIVX_WIDEN_TRANS(vwsubu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsub_vx) + +/* WIDEN OPIVV with WIDEN */ +static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, + gen_helper_gvec_4_ptr *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (opiwv_widen_check(s, a)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), + vreg_ofs(s, a->rs2), + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fn); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +#define GEN_OPIWV_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opiwv_widen(s, a, fns[s->sew]); \ +} + +GEN_OPIWV_WIDEN_TRANS(vwaddu_wv) +GEN_OPIWV_WIDEN_TRANS(vwadd_wv) +GEN_OPIWV_WIDEN_TRANS(vwsubu_wv) +GEN_OPIWV_WIDEN_TRANS(vwsub_wv) + +/* WIDEN OPIVX with WIDEN */ +static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a, + gen_helper_opivx *fn) +{ + if (opiwx_widen_check(s, a)) { + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); + } + return false; +} + +#define GEN_OPIWX_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return do_opiwx_widen(s, a, fns[s->sew]); \ +} + +GEN_OPIWX_WIDEN_TRANS(vwaddu_wx) +GEN_OPIWX_WIDEN_TRANS(vwadd_wx) +GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) +GEN_OPIWX_WIDEN_TRANS(vwsub_wx) + +/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ +/* OPIVV without GVEC IR */ +#define GEN_OPIVV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +/* + * For vadc and vsbc, an illegal instruction exception is raised if the + * destination vector register is v0 and LMUL > 1. (Section 12.3) + */ +static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + ((a->rd != 0) || (s->lmul == 0))); +} + +GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check) +GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check) + +/* + * For vmadc and vmsbc, an illegal instruction exception is raised if the + * destination vector register overlaps a source vector register group. + */ +static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)); +} + +GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check) +GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check) + +static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + ((a->rd != 0) || (s->lmul == 0))); +} + +/* OPIVX without GVEC IR */ +#define GEN_OPIVX_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ + } \ + return false; \ +} + +GEN_OPIVX_TRANS(vadc_vxm, opivx_vadc_check) +GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check) + +static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)); +} + +GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check) +GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check) + +/* OPIVI without GVEC IR */ +#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ + }; \ + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ + fns[s->sew], s, ZX); \ + } \ + return false; \ +} + +GEN_OPIVI_TRANS(vadc_vim, 0, vadc_vxm, opivx_vadc_check) +GEN_OPIVI_TRANS(vmadc_vim, 0, vmadc_vxm, opivx_vmadc_check) + +/* Vector Bitwise Logical Instructions */ +GEN_OPIVV_GVEC_TRANS(vand_vv, and) +GEN_OPIVV_GVEC_TRANS(vor_vv, or) +GEN_OPIVV_GVEC_TRANS(vxor_vv, xor) +GEN_OPIVX_GVEC_TRANS(vand_vx, ands) +GEN_OPIVX_GVEC_TRANS(vor_vx, ors) +GEN_OPIVX_GVEC_TRANS(vxor_vx, xors) +GEN_OPIVI_GVEC_TRANS(vand_vi, 0, vand_vx, andi) +GEN_OPIVI_GVEC_TRANS(vor_vi, 0, vor_vx, ori) +GEN_OPIVI_GVEC_TRANS(vxor_vi, 0, vxor_vx, xori) + +/* Vector Single-Width Bit Shift Instructions */ +GEN_OPIVV_GVEC_TRANS(vsll_vv, shlv) +GEN_OPIVV_GVEC_TRANS(vsrl_vv, shrv) +GEN_OPIVV_GVEC_TRANS(vsra_vv, sarv) + +typedef void GVecGen2sFn32(TCGContext *, unsigned, uint32_t, uint32_t, TCGv_i32, + uint32_t, uint32_t); + +static inline bool +do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, + gen_helper_opivx *fn) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!opivx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + TCGv_i32 src1 = tcg_temp_new_i32(tcg_ctx); + TCGv tmp = tcg_temp_new(tcg_ctx); + + gen_get_gpr(tcg_ctx, tmp, a->rs1); + tcg_gen_trunc_tl_i32(tcg_ctx, src1, tmp); + tcg_gen_extract_i32(tcg_ctx, src1, src1, 0, s->sew + 3); + gvec_fn(tcg_ctx, s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), + src1, MAXSZ(s), MAXSZ(s)); + + tcg_temp_free_i32(tcg_ctx, src1); + tcg_temp_free(tcg_ctx, tmp); + return true; + } + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); +} + +#define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + static gen_helper_opivx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ +} + +GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx, shls) +GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx, shrs) +GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars) + +GEN_OPIVI_GVEC_TRANS(vsll_vi, 1, vsll_vx, shli) +GEN_OPIVI_GVEC_TRANS(vsrl_vi, 1, vsrl_vx, shri) +GEN_OPIVI_GVEC_TRANS(vsra_vi, 1, vsra_vx, sari) + +/* Vector Narrowing Integer Right Shift Instructions */ +static bool opivv_narrow_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, true) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, + 2 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +/* OPIVV with NARROW */ +#define GEN_OPIVV_NARROW_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (opivv_narrow_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} +GEN_OPIVV_NARROW_TRANS(vnsra_vv) +GEN_OPIVV_NARROW_TRANS(vnsrl_vv) + +static bool opivx_narrow_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, true) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, + 2 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3)); +} + +/* OPIVX with NARROW */ +#define GEN_OPIVX_NARROW_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opivx_narrow_check(s, a)) { \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ + } \ + return false; \ +} + +GEN_OPIVX_NARROW_TRANS(vnsra_vx) +GEN_OPIVX_NARROW_TRANS(vnsrl_vx) + +/* OPIVI with NARROW */ +#define GEN_OPIVI_NARROW_TRANS(NAME, ZX, OPIVX) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opivx_narrow_check(s, a)) { \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##OPIVX##_b, \ + gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, \ + }; \ + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ + fns[s->sew], s, ZX); \ + } \ + return false; \ +} + +GEN_OPIVI_NARROW_TRANS(vnsra_vi, 1, vnsra_vx) +GEN_OPIVI_NARROW_TRANS(vnsrl_vi, 1, vnsrl_vx) + +/* Vector Integer Comparison Instructions */ +/* + * For all comparison instructions, an illegal instruction exception is raised + * if the destination vector register overlaps a source vector register group + * and LMUL > 1. + */ +static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) || + (s->lmul == 0))); +} +GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmslt_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsleu_vv, opivv_cmp_check) +GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check) + +static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) || + (s->lmul == 0))); +} + +GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsne_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsltu_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmslt_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsleu_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check) +GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check) + +GEN_OPIVI_TRANS(vmseq_vi, 0, vmseq_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsne_vi, 0, vmsne_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsleu_vi, 1, vmsleu_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsle_vi, 0, vmsle_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsgtu_vi, 1, vmsgtu_vx, opivx_cmp_check) +GEN_OPIVI_TRANS(vmsgt_vi, 0, vmsgt_vx, opivx_cmp_check) + +/* Vector Integer Min/Max Instructions */ +GEN_OPIVV_GVEC_TRANS(vminu_vv, umin) +GEN_OPIVV_GVEC_TRANS(vmin_vv, smin) +GEN_OPIVV_GVEC_TRANS(vmaxu_vv, umax) +GEN_OPIVV_GVEC_TRANS(vmax_vv, smax) +GEN_OPIVX_TRANS(vminu_vx, opivx_check) +GEN_OPIVX_TRANS(vmin_vx, opivx_check) +GEN_OPIVX_TRANS(vmaxu_vx, opivx_check) +GEN_OPIVX_TRANS(vmax_vx, opivx_check) + +/* Vector Single-Width Integer Multiply Instructions */ +GEN_OPIVV_GVEC_TRANS(vmul_vv, mul) +GEN_OPIVV_TRANS(vmulh_vv, opivv_check) +GEN_OPIVV_TRANS(vmulhu_vv, opivv_check) +GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check) +GEN_OPIVX_GVEC_TRANS(vmul_vx, muls) +GEN_OPIVX_TRANS(vmulh_vx, opivx_check) +GEN_OPIVX_TRANS(vmulhu_vx, opivx_check) +GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check) + +/* Vector Integer Divide Instructions */ +GEN_OPIVV_TRANS(vdivu_vv, opivv_check) +GEN_OPIVV_TRANS(vdiv_vv, opivv_check) +GEN_OPIVV_TRANS(vremu_vv, opivv_check) +GEN_OPIVV_TRANS(vrem_vv, opivv_check) +GEN_OPIVX_TRANS(vdivu_vx, opivx_check) +GEN_OPIVX_TRANS(vdiv_vx, opivx_check) +GEN_OPIVX_TRANS(vremu_vx, opivx_check) +GEN_OPIVX_TRANS(vrem_vx, opivx_check) + +/* Vector Widening Integer Multiply Instructions */ +GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmul_vx) +GEN_OPIVX_WIDEN_TRANS(vwmulu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx) + +/* Vector Single-Width Integer Multiply-Add Instructions */ +GEN_OPIVV_TRANS(vmacc_vv, opivv_check) +GEN_OPIVV_TRANS(vnmsac_vv, opivv_check) +GEN_OPIVV_TRANS(vmadd_vv, opivv_check) +GEN_OPIVV_TRANS(vnmsub_vv, opivv_check) +GEN_OPIVX_TRANS(vmacc_vx, opivx_check) +GEN_OPIVX_TRANS(vnmsac_vx, opivx_check) +GEN_OPIVX_TRANS(vmadd_vx, opivx_check) +GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) + +/* Vector Widening Integer Multiply-Add Instructions */ +GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) + +/* Vector Integer Merge and Move Instructions */ +static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs1, false)) { + + if (s->vl_eq_vlmax) { + tcg_gen_gvec_mov(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + vreg_ofs(s, a->rs1), + MAXSZ(s), MAXSZ(s)); + } else { + uint32_t data; + FIELD_DP32(0, VDATA, LMUL, s->lmul, data); + static gen_helper_gvec_2_ptr * const fns[4] = { + gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, + gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, + }; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + tcg_gen_gvec_2_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fns[s->sew]); + gen_set_label(tcg_ctx, over); + } + return true; + } + return false; +} + +typedef void gen_helper_vmv_vx(TCGContext *, TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32); +static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false)) { + + TCGv s1; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + s1 = tcg_temp_new(tcg_ctx); + gen_get_gpr(tcg_ctx, s1, a->rs1); + + if (s->vl_eq_vlmax) { + tcg_gen_gvec_dup_tl(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), s1); + } else { + TCGv_i32 desc ; + TCGv_i64 s1_i64 = tcg_temp_new_i64(tcg_ctx); + TCGv_ptr dest = tcg_temp_new_ptr(tcg_ctx); + uint32_t data; + FIELD_DP32(0, VDATA, LMUL, s->lmul, data); + static gen_helper_vmv_vx * const fns[4] = { + gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, + gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, + }; + + tcg_gen_ext_tl_i64(tcg_ctx, s1_i64, s1); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, a->rd)); + fns[s->sew](tcg_ctx, dest, s1_i64, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_i32(tcg_ctx, desc); + tcg_temp_free_i64(tcg_ctx, s1_i64); + } + + tcg_temp_free(tcg_ctx, s1); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false)) { + + int64_t simm = sextract64(a->rs1, 0, 5); + if (s->vl_eq_vlmax) { + tcg_gen_gvec_dup_imm(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), simm); + } else { + TCGv_i32 desc; + TCGv_i64 s1; + TCGv_ptr dest; + uint32_t data; + FIELD_DP32(0, VDATA, LMUL, s->lmul, data); + static gen_helper_vmv_vx * const fns[4] = { + gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, + gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, + }; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + s1 = tcg_const_i64(tcg_ctx, simm); + dest = tcg_temp_new_ptr(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, a->rd)); + fns[s->sew](tcg_ctx, dest, s1, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_i32(tcg_ctx, desc); + tcg_temp_free_i64(tcg_ctx, s1); + gen_set_label(tcg_ctx, over); + } + return true; + } + return false; +} + +GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check) +GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check) +GEN_OPIVI_TRANS(vmerge_vim, 0, vmerge_vxm, opivx_vadc_check) + +/* + *** Vector Fixed-Point Arithmetic Instructions + */ + +/* Vector Single-Width Saturating Add and Subtract */ +GEN_OPIVV_TRANS(vsaddu_vv, opivv_check) +GEN_OPIVV_TRANS(vsadd_vv, opivv_check) +GEN_OPIVV_TRANS(vssubu_vv, opivv_check) +GEN_OPIVV_TRANS(vssub_vv, opivv_check) +GEN_OPIVX_TRANS(vsaddu_vx, opivx_check) +GEN_OPIVX_TRANS(vsadd_vx, opivx_check) +GEN_OPIVX_TRANS(vssubu_vx, opivx_check) +GEN_OPIVX_TRANS(vssub_vx, opivx_check) +GEN_OPIVI_TRANS(vsaddu_vi, 1, vsaddu_vx, opivx_check) +GEN_OPIVI_TRANS(vsadd_vi, 0, vsadd_vx, opivx_check) + +/* Vector Single-Width Averaging Add and Subtract */ +GEN_OPIVV_TRANS(vaadd_vv, opivv_check) +GEN_OPIVV_TRANS(vasub_vv, opivv_check) +GEN_OPIVX_TRANS(vaadd_vx, opivx_check) +GEN_OPIVX_TRANS(vasub_vx, opivx_check) +GEN_OPIVI_TRANS(vaadd_vi, 0, vaadd_vx, opivx_check) + +/* Vector Single-Width Fractional Multiply with Rounding and Saturation */ +GEN_OPIVV_TRANS(vsmul_vv, opivv_check) +GEN_OPIVX_TRANS(vsmul_vx, opivx_check) + +/* Vector Widening Saturating Scaled Multiply-Add */ +GEN_OPIVV_WIDEN_TRANS(vwsmaccu_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsmacc_vv, opivv_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwsmaccsu_vv, opivv_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwsmaccu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmacc_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmaccsu_vx) +GEN_OPIVX_WIDEN_TRANS(vwsmaccus_vx) + +/* Vector Single-Width Scaling Shift Instructions */ +GEN_OPIVV_TRANS(vssrl_vv, opivv_check) +GEN_OPIVV_TRANS(vssra_vv, opivv_check) +GEN_OPIVX_TRANS(vssrl_vx, opivx_check) +GEN_OPIVX_TRANS(vssra_vx, opivx_check) +GEN_OPIVI_TRANS(vssrl_vi, 1, vssrl_vx, opivx_check) +GEN_OPIVI_TRANS(vssra_vi, 0, vssra_vx, opivx_check) + +/* Vector Narrowing Fixed-Point Clip Instructions */ +GEN_OPIVV_NARROW_TRANS(vnclipu_vv) +GEN_OPIVV_NARROW_TRANS(vnclip_vv) +GEN_OPIVX_NARROW_TRANS(vnclipu_vx) +GEN_OPIVX_NARROW_TRANS(vnclip_vx) +GEN_OPIVI_NARROW_TRANS(vnclipu_vi, 1, vnclipu_vx) +GEN_OPIVI_NARROW_TRANS(vnclip_vi, 1, vnclip_vx) + +/* + *** Vector Float Point Arithmetic Instructions + */ +/* Vector Single-Width Floating-Point Add/Subtract Instructions */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised. + */ +static bool opfvv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + (s->sew != 0)); +} + +/* OPFVV without GVEC IR */ +#define GEN_OPFVV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} +GEN_OPFVV_TRANS(vfadd_vv, opfvv_check) +GEN_OPFVV_TRANS(vfsub_vv, opfvv_check) + +typedef void gen_helper_opfvf(TCGContext* tcg_ctx, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, + TCGv_env, TCGv_i32); + +static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, + uint32_t data, gen_helper_opfvf *fn, DisasContext *s) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr dest, src2, mask; + TCGv_i32 desc; + + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + mask = tcg_temp_new_ptr(tcg_ctx); + src2 = tcg_temp_new_ptr(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, vs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + fn(tcg_ctx, dest, mask, tcg_ctx->cpu_fpr[rs1], src2, tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, src2); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + return true; +} + +static bool opfvf_check(DisasContext *s, arg_rmrr *a) +{ +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (s->sew != 0)); +} + +/* OPFVF without GVEC IR */ +#define GEN_OPFVF_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + gen_set_rm(s, 7); \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew - 1], s); \ + } \ + return false; \ +} + +GEN_OPFVF_TRANS(vfadd_vf, opfvf_check) +GEN_OPFVF_TRANS(vfsub_vf, opfvf_check) +GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check) + +/* Vector Widening Floating-Point Add/Subtract Instructions */ +static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* OPFVV with WIDEN */ +#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check) + +static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* OPFVF with WIDEN */ +#define GEN_OPFVF_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opfvf_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + gen_set_rm(s, 7); \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew - 1], s); \ + } \ + return false; \ +} + +GEN_OPFVF_WIDEN_TRANS(vfwadd_vf) +GEN_OPFVF_WIDEN_TRANS(vfwsub_vf) + +static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + vext_check_reg(s, a->rs1, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs1, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* WIDEN OPFVV with WIDEN */ +#define GEN_OPFWV_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (opfwv_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_4_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFWV_WIDEN_TRANS(vfwadd_wv) +GEN_OPFWV_WIDEN_TRANS(vfwsub_wv) + +static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, true) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +/* WIDEN OPFVF with WIDEN */ +#define GEN_OPFWF_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (opfwf_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_opfvf *const fns[2] = { \ + gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ + }; \ + gen_set_rm(s, 7); \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ + fns[s->sew - 1], s); \ + } \ + return false; \ +} + +GEN_OPFWF_WIDEN_TRANS(vfwadd_wf) +GEN_OPFWF_WIDEN_TRANS(vfwsub_wf) + +/* Vector Single-Width Floating-Point Multiply/Divide Instructions */ +GEN_OPFVV_TRANS(vfmul_vv, opfvv_check) +GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check) +GEN_OPFVF_TRANS(vfmul_vf, opfvf_check) +GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check) +GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) + +/* Vector Widening Floating-Point Multiply */ +GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmul_vf) + +/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ +GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check) +GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check) +GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) +GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) + +/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ +GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf) +GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf) +GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf) +GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf) + +/* Vector Floating-Point Square-Root Instruction */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ +static bool opfv_check(DisasContext *s, arg_rmr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (s->sew != 0)); +} + +#define GEN_OPFV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[3] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFV_TRANS(vfsqrt_v, opfv_check) + +/* Vector Floating-Point MIN/MAX Instructions */ +GEN_OPFVV_TRANS(vfmin_vv, opfvv_check) +GEN_OPFVV_TRANS(vfmax_vv, opfvv_check) +GEN_OPFVF_TRANS(vfmin_vf, opfvf_check) +GEN_OPFVF_TRANS(vfmax_vf, opfvf_check) + +/* Vector Floating-Point Sign-Injection Instructions */ +GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check) +GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check) +GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check) +GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check) +GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check) +GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check) + +/* Vector Floating-Point Compare Instructions */ +static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + vext_check_reg(s, a->rs1, false) && + (s->sew != 0) && + ((vext_check_overlap_group(a->rd, 1, a->rs1, 1 << s->lmul) && + vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul)) || + (s->lmul == 0))); +} + +GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check) +GEN_OPFVV_TRANS(vmford_vv, opfvv_cmp_check) + +static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rs2, false) && + (s->sew != 0) && + (vext_check_overlap_group(a->rd, 1, a->rs2, 1 << s->lmul) || + (s->lmul == 0))); +} + +GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check) +GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check) + +/* Vector Floating-Point Classify Instruction */ +GEN_OPFV_TRANS(vfclass_v, opfv_check) + +/* Vector Floating-Point Merge Instruction */ +GEN_OPFVF_TRANS(vfmerge_vfm, opfvf_check) + +static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + (s->sew != 0)) { + + if (s->vl_eq_vlmax) { + tcg_gen_gvec_dup_i64(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), tcg_ctx->cpu_fpr[a->rs1]); + } else { + TCGv_ptr dest; + TCGv_i32 desc; + uint32_t data; + FIELD_DP32(0, VDATA, LMUL, s->lmul, data); + static gen_helper_vmv_vx * const fns[3] = { + gen_helper_vmv_v_x_h, + gen_helper_vmv_v_x_w, + gen_helper_vmv_v_x_d, + }; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + dest = tcg_temp_new_ptr(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + tcg_gen_addi_ptr(tcg_ctx, dest, tcg_ctx->cpu_env, vreg_ofs(s, a->rd)); + fns[s->sew - 1](tcg_ctx, dest, tcg_ctx->cpu_fpr[a->rs1], tcg_ctx->cpu_env, desc); + + tcg_temp_free_ptr(tcg_ctx, dest); + tcg_temp_free_i32(tcg_ctx, desc); + gen_set_label(tcg_ctx, over); + } + return true; + } + return false; +} + +/* Single-Width Floating-Point/Integer Type-Convert Instructions */ +GEN_OPFV_TRANS(vfcvt_xu_f_v, opfv_check) +GEN_OPFV_TRANS(vfcvt_x_f_v, opfv_check) +GEN_OPFV_TRANS(vfcvt_f_xu_v, opfv_check) +GEN_OPFV_TRANS(vfcvt_f_x_v, opfv_check) + +/* Widening Floating-Point/Integer Type-Convert Instructions */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ +static bool opfv_widen_check(DisasContext *s, arg_rmr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, true) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 2 << s->lmul, a->rs2, + 1 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +#define GEN_OPFV_WIDEN_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (opfv_widen_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_xu_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_x_v) +GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v) + +/* Narrowing Floating-Point/Integer Type-Convert Instructions */ + +/* + * If the current SEW does not correspond to a supported IEEE floating-point + * type, an illegal instruction exception is raised + */ +static bool opfv_narrow_check(DisasContext *s, arg_rmr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, false) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, true) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, + 2 << s->lmul) && + (s->lmul < 0x3) && (s->sew < 0x3) && (s->sew != 0)); +} + +#define GEN_OPFV_NARROW_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (opfv_narrow_check(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr * const fns[2] = { \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + }; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + gen_set_rm(s, 7); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fns[s->sew - 1]); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_OPFV_NARROW_TRANS(vfncvt_xu_f_v) +GEN_OPFV_NARROW_TRANS(vfncvt_x_f_v) +GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_v) +GEN_OPFV_NARROW_TRANS(vfncvt_f_x_v) +GEN_OPFV_NARROW_TRANS(vfncvt_f_f_v) + +/* + *** Vector Reduction Operations + */ +/* Vector Single-Width Integer Reduction Instructions */ +static bool reduction_check(DisasContext *s, arg_rmrr *a) +{ + return vext_check_isa_ill(s) && vext_check_reg(s, a->rs2, false); +} + +GEN_OPIVV_TRANS(vredsum_vs, reduction_check) +GEN_OPIVV_TRANS(vredmaxu_vs, reduction_check) +GEN_OPIVV_TRANS(vredmax_vs, reduction_check) +GEN_OPIVV_TRANS(vredminu_vs, reduction_check) +GEN_OPIVV_TRANS(vredmin_vs, reduction_check) +GEN_OPIVV_TRANS(vredand_vs, reduction_check) +GEN_OPIVV_TRANS(vredor_vs, reduction_check) +GEN_OPIVV_TRANS(vredxor_vs, reduction_check) + +/* Vector Widening Integer Reduction Instructions */ +GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_check) +GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_check) + +/* Vector Single-Width Floating-Point Reduction Instructions */ +GEN_OPFVV_TRANS(vfredsum_vs, reduction_check) +GEN_OPFVV_TRANS(vfredmax_vs, reduction_check) +GEN_OPFVV_TRANS(vfredmin_vs, reduction_check) + +/* Vector Widening Floating-Point Reduction Instructions */ +GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check) + +/* + *** Vector Mask Operations + */ + +/* Vector Mask-Register Logical Instructions */ +#define GEN_MM_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_r *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (vext_check_isa_ill(s)) { \ + uint32_t data = 0; \ + gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, \ + s->vlen / 8, data, fn); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_MM_TRANS(vmand_mm) +GEN_MM_TRANS(vmnand_mm) +GEN_MM_TRANS(vmandnot_mm) +GEN_MM_TRANS(vmxor_mm) +GEN_MM_TRANS(vmor_mm) +GEN_MM_TRANS(vmnor_mm) +GEN_MM_TRANS(vmornot_mm) +GEN_MM_TRANS(vmxnor_mm) + +/* Vector mask population count vmpopc */ +static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s)) { + TCGv_ptr src2, mask; + TCGv dst; + TCGv_i32 desc; + uint32_t data = 0; + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + + mask = tcg_temp_new_ptr(tcg_ctx); + src2 = tcg_temp_new_ptr(tcg_ctx); + dst = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, a->rs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + gen_helper_vmpopc_m(tcg_ctx, dst, mask, src2, tcg_ctx->cpu_env, desc); + gen_set_gpr(tcg_ctx, a->rd, dst); + + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, src2); + tcg_temp_free(tcg_ctx, dst); + tcg_temp_free_i32(tcg_ctx, desc); + return true; + } + return false; +} + +/* vmfirst find-first-set mask bit */ +static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s)) { + TCGv_ptr src2, mask; + TCGv dst; + TCGv_i32 desc; + uint32_t data = 0; + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + + mask = tcg_temp_new_ptr(tcg_ctx); + src2 = tcg_temp_new_ptr(tcg_ctx); + dst = tcg_temp_new(tcg_ctx); + desc = tcg_const_i32(tcg_ctx, simd_desc(0, s->vlen / 8, data)); + + tcg_gen_addi_ptr(tcg_ctx, src2, tcg_ctx->cpu_env, vreg_ofs(s, a->rs2)); + tcg_gen_addi_ptr(tcg_ctx, mask, tcg_ctx->cpu_env, vreg_ofs(s, 0)); + + gen_helper_vmfirst_m(tcg_ctx, dst, mask, src2, tcg_ctx->cpu_env, desc); + gen_set_gpr(tcg_ctx, a->rd, dst); + + tcg_temp_free_ptr(tcg_ctx, mask); + tcg_temp_free_ptr(tcg_ctx, src2); + tcg_temp_free(tcg_ctx, dst); + tcg_temp_free_i32(tcg_ctx, desc); + return true; + } + return false; +} + +/* vmsbf.m set-before-first mask bit */ +/* vmsif.m set-includ-first mask bit */ +/* vmsof.m set-only-first mask bit */ +#define GEN_M_TRANS(NAME) \ +static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ +{ \ + TCGContext *tcg_ctx = s->uc->tcg_ctx; \ + if (vext_check_isa_ill(s)) { \ + uint32_t data = 0; \ + gen_helper_gvec_3_ptr *fn = gen_helper_##NAME; \ + TCGLabel *over = gen_new_label(tcg_ctx); \ + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); \ + \ + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); \ + FIELD_DP32(data, VDATA, VM, a->vm, data); \ + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); \ + tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), \ + vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fn); \ + gen_set_label(tcg_ctx, over); \ + return true; \ + } \ + return false; \ +} + +GEN_M_TRANS(vmsbf_m) +GEN_M_TRANS(vmsif_m) +GEN_M_TRANS(vmsof_m) + +/* Vector Iota Instruction */ +static bool trans_viota_m(DisasContext *s, arg_viota_m *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs2, 1) && + (a->vm != 0 || a->rd != 0)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + static gen_helper_gvec_3_ptr * const fns[4] = { + gen_helper_viota_m_b, gen_helper_viota_m_h, + gen_helper_viota_m_w, gen_helper_viota_m_d, + }; + tcg_gen_gvec_3_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs2), tcg_ctx->cpu_env, 0, + s->vlen / 8, data, fns[s->sew]); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +/* Vector Element Index Instruction */ +static bool trans_vid_v(DisasContext *s, arg_vid_v *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_overlap_mask(s, a->rd, a->vm, false)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, VM, a->vm, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + static gen_helper_gvec_2_ptr * const fns[4] = { + gen_helper_vid_v_b, gen_helper_vid_v_h, + gen_helper_vid_v_w, gen_helper_vid_v_d, + }; + tcg_gen_gvec_2_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fns[s->sew]); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +/* + *** Vector Permutation Instructions + */ + +/* Integer Extract Instruction */ + +static void load_element(TCGContext *tcg_ctx, TCGv_i64 dest, TCGv_ptr base, + int ofs, int sew) +{ + switch (sew) { + case MO_8: + tcg_gen_ld8u_i64(tcg_ctx, dest, base, ofs); + break; + case MO_16: + tcg_gen_ld16u_i64(tcg_ctx, dest, base, ofs); + break; + case MO_32: + tcg_gen_ld32u_i64(tcg_ctx, dest, base, ofs); + break; + case MO_64: + tcg_gen_ld_i64(tcg_ctx, dest, base, ofs); + break; + default: + g_assert_not_reached(); + break; + } +} + +/* offset of the idx element with base regsiter r */ +static uint32_t endian_ofs(DisasContext *s, int r, int idx) +{ +#ifdef HOST_WORDS_BIGENDIAN + return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew); +#else + return vreg_ofs(s, r) + (idx << s->sew); +#endif +} + +/* adjust the index according to the endian */ +static void endian_adjust(TCGv_i32 ofs, int sew) +{ +#ifdef HOST_WORDS_BIGENDIAN + tcg_gen_xori_i32(ofs, ofs, 7 >> sew); +#endif +} + +/* Load idx >= VLMAX ? 0 : vreg[idx] */ +static void vec_element_loadx(DisasContext *s, TCGv_i64 dest, + int vreg, TCGv idx, int vlmax) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_i32 ofs = tcg_temp_new_i32(tcg_ctx); + TCGv_ptr base = tcg_temp_new_ptr(tcg_ctx); + TCGv_i64 t_idx = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 t_vlmax, t_zero; + + /* + * Mask the index to the length so that we do + * not produce an out-of-range load. + */ + tcg_gen_trunc_tl_i32(tcg_ctx, ofs, idx); + tcg_gen_andi_i32(tcg_ctx, ofs, ofs, vlmax - 1); + + /* Convert the index to an offset. */ + endian_adjust(ofs, s->sew); + tcg_gen_shli_i32(tcg_ctx, ofs, ofs, s->sew); + + /* Convert the index to a pointer. */ + tcg_gen_ext_i32_ptr(tcg_ctx, base, ofs); + tcg_gen_add_ptr(tcg_ctx, base, base, tcg_ctx->cpu_env); + + /* Perform the load. */ + load_element(tcg_ctx, dest, base, + vreg_ofs(s, vreg), s->sew); + tcg_temp_free_ptr(tcg_ctx, base); + tcg_temp_free_i32(tcg_ctx, ofs); + + /* Flush out-of-range indexing to zero. */ + t_vlmax = tcg_const_i64(tcg_ctx, vlmax); + t_zero = tcg_const_i64(tcg_ctx, 0); + tcg_gen_extu_tl_i64(tcg_ctx, t_idx, idx); + + tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LTU, dest, t_idx, + t_vlmax, dest, t_zero); + + tcg_temp_free_i64(tcg_ctx, t_vlmax); + tcg_temp_free_i64(tcg_ctx, t_zero); + tcg_temp_free_i64(tcg_ctx, t_idx); +} + +static void vec_element_loadi(DisasContext *s, TCGv_i64 dest, + int vreg, int idx) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + load_element(tcg_ctx, dest, tcg_ctx->cpu_env, endian_ofs(s, vreg, idx), s->sew); +} + +static bool trans_vext_x_v(DisasContext *s, arg_r *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx); + TCGv dest = tcg_temp_new(tcg_ctx); + + if (a->rs1 == 0) { + /* Special case vmv.x.s rd, vs2. */ + vec_element_loadi(s, tmp, a->rs2, 0); + } else { + /* This instruction ignores LMUL and vector register groups */ + int vlmax = s->vlen >> (3 + s->sew); + vec_element_loadx(s, tmp, a->rs2, tcg_ctx->cpu_gpr[a->rs1], vlmax); + } + tcg_gen_trunc_i64_tl(tcg_ctx, dest, tmp); + gen_set_gpr(tcg_ctx, a->rd, dest); + + tcg_temp_free(tcg_ctx, dest); + tcg_temp_free_i64(tcg_ctx, tmp); + return true; +} + +/* Integer Scalar Move Instruction */ + +static void store_element(TCGContext *tcg_ctx, TCGv_i64 val, TCGv_ptr base, + int ofs, int sew) +{ + switch (sew) { + case MO_8: + tcg_gen_st8_i64(tcg_ctx, val, base, ofs); + break; + case MO_16: + tcg_gen_st16_i64(tcg_ctx, val, base, ofs); + break; + case MO_32: + tcg_gen_st32_i64(tcg_ctx, val, base, ofs); + break; + case MO_64: + tcg_gen_st_i64(tcg_ctx, val, base, ofs); + break; + default: + g_assert_not_reached(); + break; + } +} + +/* + * Store vreg[idx] = val. + * The index must be in range of VLMAX. + */ +static void vec_element_storei(DisasContext *s, int vreg, + int idx, TCGv_i64 val) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + store_element(tcg_ctx, val, tcg_ctx->cpu_env, endian_ofs(s, vreg, idx), s->sew); +} + +/* vmv.s.x vd, rs1 # vd[0] = rs1 */ +static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vext_check_isa_ill(s)) { + /* This instruction ignores LMUL and vector register groups */ + int maxsz = s->vlen >> 3; + TCGv_i64 t1; + TCGLabel *over = gen_new_label(tcg_ctx); + + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + tcg_gen_gvec_dup_imm(tcg_ctx, SEW64, vreg_ofs(s, a->rd), maxsz, maxsz, 0); + if (a->rs1 == 0) { + goto done; + } + + t1 = tcg_temp_new_i64(tcg_ctx); + tcg_gen_extu_tl_i64(tcg_ctx, t1, tcg_ctx->cpu_gpr[a->rs1]); + vec_element_storei(s, a->rd, 0, t1); + tcg_temp_free_i64(tcg_ctx, t1); + done: + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +/* Floating-Point Scalar Move Instructions */ +static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!s->vill && has_ext(s, RVF) && + (s->mstatus_fs != 0) && (s->sew != 0)) { + unsigned int len = 8 << s->sew; + + vec_element_loadi(s, tcg_ctx->cpu_fpr[a->rd], a->rs2, 0); + if (len < 64) { + tcg_gen_ori_i64(tcg_ctx, tcg_ctx->cpu_fpr[a->rd], tcg_ctx->cpu_fpr[a->rd], + MAKE_64BIT_MASK(len, 64 - len)); + } + + mark_fs_dirty(s); + return true; + } + return false; +} + +/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */ +static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!s->vill && has_ext(s, RVF) && (s->sew != 0)) { + TCGv_i64 t1; + /* The instructions ignore LMUL and vector register group. */ + uint32_t vlmax = s->vlen >> 3; + + /* if vl == 0, skip vector register write back */ + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + /* zeroed all elements */ + tcg_gen_gvec_dup_imm(tcg_ctx, SEW64, vreg_ofs(s, a->rd), vlmax, vlmax, 0); + + /* NaN-box f[rs1] as necessary for SEW */ + t1 = tcg_temp_new_i64(tcg_ctx); + if (s->sew == MO_64 && !has_ext(s, RVD)) { + tcg_gen_ori_i64(tcg_ctx, t1, tcg_ctx->cpu_fpr[a->rs1], MAKE_64BIT_MASK(32, 32)); + } else { + tcg_gen_mov_i64(tcg_ctx, t1, tcg_ctx->cpu_fpr[a->rs1]); + } + vec_element_storei(s, a->rd, 0, t1); + tcg_temp_free_i64(tcg_ctx, t1); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} + +/* Vector Slide Instructions */ +static bool slideup_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (a->rd != a->rs2)); +} + +GEN_OPIVX_TRANS(vslideup_vx, slideup_check) +GEN_OPIVX_TRANS(vslide1up_vx, slideup_check) +GEN_OPIVI_TRANS(vslideup_vi, 1, vslideup_vx, slideup_check) + +GEN_OPIVX_TRANS(vslidedown_vx, opivx_check) +GEN_OPIVX_TRANS(vslide1down_vx, opivx_check) +GEN_OPIVI_TRANS(vslidedown_vi, 1, vslidedown_vx, opivx_check) + +/* Vector Register Gather Instruction */ +static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs1, false) && + vext_check_reg(s, a->rs2, false) && + (a->rd != a->rs2) && (a->rd != a->rs1)); +} + +GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check) + +static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a) +{ + return (vext_check_isa_ill(s) && + vext_check_overlap_mask(s, a->rd, a->vm, true) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + (a->rd != a->rs2)); +} + +/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ +static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!vrgather_vx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + int vlmax = s->vlen / s->mlen; + TCGv_i64 dest = tcg_temp_new_i64(tcg_ctx); + + if (a->rs1 == 0) { + vec_element_loadi(s, dest, a->rs2, 0); + } else { + vec_element_loadx(s, dest, a->rs2, tcg_ctx->cpu_gpr[a->rs1], vlmax); + } + + tcg_gen_gvec_dup_i64(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), dest); + tcg_temp_free_i64(tcg_ctx, dest); + } else { + static gen_helper_opivx * const fns[4] = { + gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, + gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d + }; + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); + } + return true; +} + +/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */ +static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (!vrgather_vx_check(s, a)) { + return false; + } + + if (a->vm && s->vl_eq_vlmax) { + if (a->rs1 >= s->vlen / s->mlen) { + tcg_gen_gvec_dup_imm(tcg_ctx, SEW64, vreg_ofs(s, a->rd), + MAXSZ(s), MAXSZ(s), 0); + } else { + tcg_gen_gvec_dup_mem(tcg_ctx, s->sew, vreg_ofs(s, a->rd), + endian_ofs(s, a->rs2, a->rs1), + MAXSZ(s), MAXSZ(s)); + } + } else { + static gen_helper_opivx * const fns[4] = { + gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, + gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d + }; + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, 1); + } + return true; +} + +/* Vector Compress Instruction */ +static bool vcompress_vm_check(DisasContext *s, arg_r *a) +{ + return (vext_check_isa_ill(s) && + vext_check_reg(s, a->rd, false) && + vext_check_reg(s, a->rs2, false) && + vext_check_overlap_group(a->rd, 1 << s->lmul, a->rs1, 1) && + (a->rd != a->rs2)); +} + +static bool trans_vcompress_vm(DisasContext *s, arg_r *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + if (vcompress_vm_check(s, a)) { + uint32_t data = 0; + static gen_helper_gvec_4_ptr * const fns[4] = { + gen_helper_vcompress_vm_b, gen_helper_vcompress_vm_h, + gen_helper_vcompress_vm_w, gen_helper_vcompress_vm_d, + }; + TCGLabel *over = gen_new_label(tcg_ctx); + tcg_gen_brcondi_tl(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_vl, 0, over); + + FIELD_DP32(data, VDATA, MLEN, s->mlen, data); + FIELD_DP32(data, VDATA, LMUL, s->lmul, data); + tcg_gen_gvec_4_ptr(tcg_ctx, vreg_ofs(s, a->rd), vreg_ofs(s, 0), + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), + tcg_ctx->cpu_env, 0, s->vlen / 8, data, fns[s->sew]); + gen_set_label(tcg_ctx, over); + return true; + } + return false; +} diff --git a/qemu/target/riscv/internals.h b/qemu/target/riscv/internals.h new file mode 100644 index 0000000000..37d33820ad --- /dev/null +++ b/qemu/target/riscv/internals.h @@ -0,0 +1,41 @@ +/* + * QEMU RISC-V CPU -- internal functions and types + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef RISCV_CPU_INTERNALS_H +#define RISCV_CPU_INTERNALS_H + +#include "hw/registerfields.h" + +/* share data between vector helpers and decode code */ +FIELD(VDATA, MLEN, 0, 8) +FIELD(VDATA, VM, 8, 1) +FIELD(VDATA, LMUL, 9, 2) +FIELD(VDATA, NF, 11, 4) +FIELD(VDATA, WD, 11, 1) + +/* float point classify helpers */ +target_ulong fclass_h(uint64_t frs1); +target_ulong fclass_s(uint64_t frs1); +target_ulong fclass_d(uint64_t frs1); + +#define SEW8 0 +#define SEW16 1 +#define SEW32 2 +#define SEW64 3 + +#endif diff --git a/qemu/target/riscv/op_helper.c b/qemu/target/riscv/op_helper.c index 5afb2ce881..c5de354a05 100644 --- a/qemu/target/riscv/op_helper.c +++ b/qemu/target/riscv/op_helper.c @@ -81,8 +81,7 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb) riscv_raise_exception(env, RISCV_EXCP_INST_ADDR_MIS, GETPC()); } - if (env->priv_ver >= PRIV_VERSION_1_10_0 && - get_field(env->mstatus, MSTATUS_TSR) && !(env->priv >= PRV_M)) { + if (get_field(env->mstatus, MSTATUS_TSR) && !(env->priv >= PRV_M)) { riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } @@ -116,10 +115,8 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb) } else { prev_priv = get_field(mstatus, MSTATUS_SPP); - mstatus = set_field(mstatus, - env->priv_ver >= PRIV_VERSION_1_10_0 ? - MSTATUS_SIE : MSTATUS_UIE << prev_priv, - get_field(mstatus, MSTATUS_SPIE)); + mstatus = set_field(mstatus, MSTATUS_SIE, + get_field(mstatus, MSTATUS_SPIE)); mstatus = set_field(mstatus, MSTATUS_SPIE, 1); mstatus = set_field(mstatus, MSTATUS_SPP, PRV_U); env->mstatus = mstatus; @@ -144,10 +141,8 @@ target_ulong helper_mret(CPURISCVState *env, target_ulong cpu_pc_deb) target_ulong mstatus = env->mstatus; target_ulong prev_priv = get_field(mstatus, MSTATUS_MPP); target_ulong prev_virt = MSTATUS_MPV_ISSET(env); - mstatus = set_field(mstatus, - env->priv_ver >= PRIV_VERSION_1_10_0 ? - MSTATUS_MIE : MSTATUS_UIE << prev_priv, - get_field(mstatus, MSTATUS_MPIE)); + mstatus = set_field(mstatus, MSTATUS_MIE, + get_field(mstatus, MSTATUS_MPIE)); mstatus = set_field(mstatus, MSTATUS_MPIE, 1); mstatus = set_field(mstatus, MSTATUS_MPP, PRV_U); #ifdef TARGET_RISCV32 @@ -194,7 +189,6 @@ void helper_tlb_flush(CPURISCVState *env) CPUState *cs = env_cpu(env); if (!(env->priv >= PRV_S) || (env->priv == PRV_S && - env->priv_ver >= PRIV_VERSION_1_10_0 && get_field(env->mstatus, MSTATUS_TVM))) { riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } else { @@ -202,6 +196,19 @@ void helper_tlb_flush(CPURISCVState *env) } } +void helper_hyp_tlb_flush(CPURISCVState *env) +{ + CPUState *cs = env_cpu(env); + + if (env->priv == PRV_M || + (env->priv == PRV_S && !riscv_cpu_virt_enabled(env))) { + tlb_flush(cs); + return; + } + + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); +} + void helper_uc_riscv_exit(CPURISCVState *env) { CPUState *cs = env_cpu(env); @@ -209,4 +216,4 @@ void helper_uc_riscv_exit(CPURISCVState *env) cs->exception_index = EXCP_HLT; cs->halted = 1; cpu_loop_exit(cs); -} \ No newline at end of file +} diff --git a/qemu/target/riscv/pmp.c b/qemu/target/riscv/pmp.c index 888b99c8d9..9e1e614951 100644 --- a/qemu/target/riscv/pmp.c +++ b/qemu/target/riscv/pmp.c @@ -169,7 +169,7 @@ static void pmp_update_rule(CPURISCVState *env, uint32_t pmp_index) case PMP_AMATCH_NA4: sa = this_addr << 2; /* shift up from [xx:0] to [xx+2:2] */ - ea = (this_addr + 4u) - 1u; + ea = (sa + 4u) - 1u; break; case PMP_AMATCH_NAPOT: @@ -231,16 +231,20 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr, return true; } - /* - * if size is unknown (0), assume that all bytes - * from addr to the end of the page will be accessed. - */ if (size == 0) { + if (riscv_feature(env, RISCV_FEATURE_MMU)) { + /* + * If size is unknown (0), assume that all bytes + * from addr to the end of the page will be accessed. + */ #ifdef _MSC_VER - pmp_size = 0 - (addr | TARGET_PAGE_MASK); + pmp_size = 0 - (addr | TARGET_PAGE_MASK); #else - pmp_size = -(addr | TARGET_PAGE_MASK); + pmp_size = -(addr | TARGET_PAGE_MASK); #endif + } else { + pmp_size = sizeof(target_ulong); + } } else { pmp_size = size; } diff --git a/qemu/target/riscv/riscv32/decode_insn16.inc.c b/qemu/target/riscv/riscv32/decode_insn16.inc.c index 66ebf61203..ba4cccaf76 100644 --- a/qemu/target/riscv/riscv32/decode_insn16.inc.c +++ b/qemu/target/riscv/riscv32/decode_insn16.inc.c @@ -1,11 +1,9 @@ /* This file is autogenerated by scripts/decodetree.py. */ -#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wredundant-decls" -# ifdef __clang__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#ifdef __clang__ # pragma GCC diagnostic ignored "-Wtypedef-redefinition" -# endif #endif typedef arg_empty arg_illegal; @@ -55,9 +53,7 @@ static bool trans_flw(DisasContext *ctx, arg_flw *a); typedef arg_s arg_fsw; static bool trans_fsw(DisasContext *ctx, arg_fsw *a); -#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE -# pragma GCC diagnostic pop -#endif +#pragma GCC diagnostic pop static void decode_insn16_extract_c_addi16sp(DisasContext *ctx, arg_i *a, uint16_t insn) { @@ -231,55 +227,45 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 000..... ......00 */ if ((insn & 0x00001fe0) == 0x00000000) { /* 00000000 000...00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:87 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); - ctx->invalid = true; if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:88 */ decode_insn16_extract_c_addi4spn(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; case 0x00000001: /* 000..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:96 */ decode_insn16_extract_ci(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; case 0x00000002: /* 000..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:115 */ decode_insn16_extract_c_shift2(ctx, &u.f_shift, insn); if (trans_slli(ctx, &u.f_shift)) return true; return false; case 0x00002000: /* 001..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:90 */ decode_insn16_extract_cl_d(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; case 0x00002001: /* 001..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:24 */ decode_insn16_extract_cj(ctx, &u.f_j, insn); u.f_j.rd = 1; if (trans_jal(ctx, &u.f_j)) return true; return false; case 0x00002002: /* 001..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:116 */ decode_insn16_extract_c_ldsp(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; case 0x00004000: /* 010..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:91 */ decode_insn16_extract_cl_w(ctx, &u.f_i, insn); if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x00004001: /* 010..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:97 */ decode_insn16_extract_c_li(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -287,17 +273,14 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 010..... ......10 */ if ((insn & 0x00000f80) == 0x00000000) { /* 010.0000 0.....10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:118 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:119 */ decode_insn16_extract_c_lwsp(ctx, &u.f_i, insn); if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x00006000: /* 011..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:20 */ decode_insn16_extract_cl_w(ctx, &u.f_i, insn); if (trans_flw(ctx, &u.f_i)) return true; return false; @@ -305,23 +288,19 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 011..... ......01 */ if ((insn & 0x0000107c) == 0x00000000) { /* 0110.... .0000001 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:99 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } if ((insn & 0x00000f80) == 0x00000100) { /* 011.0001 0.....01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:100 */ decode_insn16_extract_c_addi16sp(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:101 */ decode_insn16_extract_c_lui(ctx, &u.f_u, insn); if (trans_lui(ctx, &u.f_u)) return true; return false; case 0x00006002: /* 011..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:27 */ decode_insn16_extract_c_lwsp(ctx, &u.f_i, insn); if (trans_flw(ctx, &u.f_i)) return true; return false; @@ -330,19 +309,16 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) switch ((insn >> 10) & 0x3) { case 0x0: /* 100.00.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:103 */ decode_insn16_extract_c_shift(ctx, &u.f_shift, insn); if (trans_srli(ctx, &u.f_shift)) return true; return false; case 0x1: /* 100.01.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:104 */ decode_insn16_extract_c_shift(ctx, &u.f_shift, insn); if (trans_srai(ctx, &u.f_shift)) return true; return false; case 0x2: /* 100.10.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:105 */ decode_insn16_extract_c_andi(ctx, &u.f_i, insn); if (trans_andi(ctx, &u.f_i)) return true; return false; @@ -352,22 +328,18 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) switch (insn & 0x00001060) { case 0x00000000: /* 100011.. .00...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:106 */ if (trans_sub(ctx, &u.f_r)) return true; return false; case 0x00000020: /* 100011.. .01...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:107 */ if (trans_xor(ctx, &u.f_r)) return true; return false; case 0x00000040: /* 100011.. .10...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:108 */ if (trans_or(ctx, &u.f_r)) return true; return false; case 0x00000060: /* 100011.. .11...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:109 */ if (trans_and(ctx, &u.f_r)) return true; return false; } @@ -381,18 +353,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 1000.... ......10 */ if ((insn & 0x00000ffc) == 0x00000000) { /* 10000000 00000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:122 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } if ((insn & 0x0000007c) == 0x00000000) { /* 1000.... .0000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:123 */ decode_insn16_extract_c_jalr(ctx, &u.f_i, insn); u.f_i.rd = 0; if (trans_jalr(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:124 */ decode_insn16_extract_c_mv(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -400,18 +369,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 1001.... ......10 */ if ((insn & 0x00000ffc) == 0x00000000) { /* 10010000 00000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:127 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_ebreak(ctx, &u.f_empty)) return true; } if ((insn & 0x0000007c) == 0x00000000) { /* 1001.... .0000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:128 */ decode_insn16_extract_c_jalr(ctx, &u.f_i, insn); u.f_i.rd = 1; if (trans_jalr(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:129 */ decode_insn16_extract_cr(ctx, &u.f_r, insn); if (trans_add(ctx, &u.f_r)) return true; return false; @@ -419,56 +385,47 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) return false; case 0x0000a000: /* 101..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:92 */ decode_insn16_extract_cs_d(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; case 0x0000a001: /* 101..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:110 */ decode_insn16_extract_cj(ctx, &u.f_j, insn); u.f_j.rd = 0; if (trans_jal(ctx, &u.f_j)) return true; return false; case 0x0000a002: /* 101..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:131 */ decode_insn16_extract_c_sdsp(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; case 0x0000c000: /* 110..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:93 */ decode_insn16_extract_cs_w(ctx, &u.f_s, insn); if (trans_sw(ctx, &u.f_s)) return true; return false; case 0x0000c001: /* 110..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:111 */ decode_insn16_extract_cb_z(ctx, &u.f_b, insn); if (trans_beq(ctx, &u.f_b)) return true; return false; case 0x0000c002: /* 110..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:132 */ decode_insn16_extract_c_swsp(ctx, &u.f_s, insn); if (trans_sw(ctx, &u.f_s)) return true; return false; case 0x0000e000: /* 111..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:21 */ decode_insn16_extract_cs_w(ctx, &u.f_s, insn); if (trans_fsw(ctx, &u.f_s)) return true; return false; case 0x0000e001: /* 111..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:112 */ decode_insn16_extract_cb_z(ctx, &u.f_b, insn); if (trans_bne(ctx, &u.f_b)) return true; return false; case 0x0000e002: /* 111..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-32.decode:28 */ decode_insn16_extract_c_swsp(ctx, &u.f_s, insn); if (trans_fsw(ctx, &u.f_s)) return true; return false; diff --git a/qemu/target/riscv/riscv32/decode_insn32.inc.c b/qemu/target/riscv/riscv32/decode_insn32.inc.c index c4c25de13b..ce08737432 100644 --- a/qemu/target/riscv/riscv32/decode_insn32.inc.c +++ b/qemu/target/riscv/riscv32/decode_insn32.inc.c @@ -14,56 +14,70 @@ typedef struct { int rs2; } arg_b; +typedef struct { + int csr; + int rd; + int rs1; +} arg_decode_insn3214; + typedef struct { int rd; int rm; int rs1; int rs2; int rs3; -} arg_decode_insn3210; +} arg_decode_insn3215; typedef struct { int rd; int rm; int rs1; int rs2; -} arg_decode_insn3211; +} arg_decode_insn3216; typedef struct { int rd; int rm; int rs1; -} arg_decode_insn3212; +} arg_decode_insn3217; typedef struct { int rd; int rs1; -} arg_decode_insn3213; +} arg_decode_insn3218; typedef struct { - int rs1; + int rd; + int vm; +} arg_decode_insn3219; + +typedef struct { + int rd; int rs2; -} arg_decode_insn3214; +} arg_decode_insn3220; typedef struct { + int rd; int rs1; -} arg_decode_insn3215; + int zimm; +} arg_decode_insn3221; typedef struct { - int pred; - int succ; -} arg_decode_insn3216; + int rs1; + int rs2; +} arg_decode_insn3222; typedef struct { - int csr; - int rd; int rs1; -} arg_decode_insn329; +} arg_decode_insn3223; + +typedef struct { + int pred; + int succ; +} arg_decode_insn3224; typedef struct { -#ifdef _MSC_VER - int dummy; // MSVC does not allow empty struct -#endif + int : 0; } arg_empty; typedef struct { @@ -83,6 +97,42 @@ typedef struct { int rs2; } arg_r; +typedef struct { + int nf; + int rd; + int rs1; + int vm; +} arg_r2nfvm; + +typedef struct { + int rd; + int rs2; + int vm; +} arg_rmr; + +typedef struct { + int rd; + int rs1; + int rs2; + int vm; +} arg_rmrr; + +typedef struct { + int nf; + int rd; + int rs1; + int rs2; + int vm; +} arg_rnfvm; + +typedef struct { + int rd; + int rs1; + int rs2; + int vm; + int wd; +} arg_rwdvm; + typedef struct { int imm; int rs1; @@ -112,13 +162,9 @@ typedef arg_empty arg_mret; static bool trans_mret(DisasContext *ctx, arg_mret *a); typedef arg_empty arg_wfi; static bool trans_wfi(DisasContext *ctx, arg_wfi *a); -typedef arg_decode_insn3214 arg_hfence_gvma; -static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a); -typedef arg_decode_insn3214 arg_hfence_bvma; -static bool trans_hfence_bvma(DisasContext *ctx, arg_hfence_bvma *a); -typedef arg_decode_insn3214 arg_sfence_vma; +typedef arg_decode_insn3222 arg_sfence_vma; static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a); -typedef arg_decode_insn3215 arg_sfence_vm; +typedef arg_decode_insn3223 arg_sfence_vm; static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a); typedef arg_u arg_lui; static bool trans_lui(DisasContext *ctx, arg_lui *a); @@ -194,21 +240,21 @@ typedef arg_r arg_or; static bool trans_or(DisasContext *ctx, arg_or *a); typedef arg_r arg_and; static bool trans_and(DisasContext *ctx, arg_and *a); -typedef arg_decode_insn3216 arg_fence; +typedef arg_decode_insn3224 arg_fence; static bool trans_fence(DisasContext *ctx, arg_fence *a); typedef arg_empty arg_fence_i; static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a); -typedef arg_decode_insn329 arg_csrrw; +typedef arg_decode_insn3214 arg_csrrw; static bool trans_csrrw(DisasContext *ctx, arg_csrrw *a); -typedef arg_decode_insn329 arg_csrrs; +typedef arg_decode_insn3214 arg_csrrs; static bool trans_csrrs(DisasContext *ctx, arg_csrrs *a); -typedef arg_decode_insn329 arg_csrrc; +typedef arg_decode_insn3214 arg_csrrc; static bool trans_csrrc(DisasContext *ctx, arg_csrrc *a); -typedef arg_decode_insn329 arg_csrrwi; +typedef arg_decode_insn3214 arg_csrrwi; static bool trans_csrrwi(DisasContext *ctx, arg_csrrwi *a); -typedef arg_decode_insn329 arg_csrrsi; +typedef arg_decode_insn3214 arg_csrrsi; static bool trans_csrrsi(DisasContext *ctx, arg_csrrsi *a); -typedef arg_decode_insn329 arg_csrrci; +typedef arg_decode_insn3214 arg_csrrci; static bool trans_csrrci(DisasContext *ctx, arg_csrrci *a); typedef arg_r arg_mul; static bool trans_mul(DisasContext *ctx, arg_mul *a); @@ -252,23 +298,23 @@ typedef arg_i arg_flw; static bool trans_flw(DisasContext *ctx, arg_flw *a); typedef arg_s arg_fsw; static bool trans_fsw(DisasContext *ctx, arg_fsw *a); -typedef arg_decode_insn3210 arg_fmadd_s; +typedef arg_decode_insn3215 arg_fmadd_s; static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a); -typedef arg_decode_insn3210 arg_fmsub_s; +typedef arg_decode_insn3215 arg_fmsub_s; static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a); -typedef arg_decode_insn3210 arg_fnmsub_s; +typedef arg_decode_insn3215 arg_fnmsub_s; static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a); -typedef arg_decode_insn3210 arg_fnmadd_s; +typedef arg_decode_insn3215 arg_fnmadd_s; static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a); -typedef arg_decode_insn3211 arg_fadd_s; +typedef arg_decode_insn3216 arg_fadd_s; static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a); -typedef arg_decode_insn3211 arg_fsub_s; +typedef arg_decode_insn3216 arg_fsub_s; static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a); -typedef arg_decode_insn3211 arg_fmul_s; +typedef arg_decode_insn3216 arg_fmul_s; static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a); -typedef arg_decode_insn3211 arg_fdiv_s; +typedef arg_decode_insn3216 arg_fdiv_s; static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a); -typedef arg_decode_insn3212 arg_fsqrt_s; +typedef arg_decode_insn3217 arg_fsqrt_s; static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a); typedef arg_r arg_fsgnj_s; static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a); @@ -280,11 +326,11 @@ typedef arg_r arg_fmin_s; static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a); typedef arg_r arg_fmax_s; static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a); -typedef arg_decode_insn3212 arg_fcvt_w_s; +typedef arg_decode_insn3217 arg_fcvt_w_s; static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a); -typedef arg_decode_insn3212 arg_fcvt_wu_s; +typedef arg_decode_insn3217 arg_fcvt_wu_s; static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a); -typedef arg_decode_insn3213 arg_fmv_x_w; +typedef arg_decode_insn3218 arg_fmv_x_w; static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a); typedef arg_r arg_feq_s; static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a); @@ -292,35 +338,35 @@ typedef arg_r arg_flt_s; static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a); typedef arg_r arg_fle_s; static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a); -typedef arg_decode_insn3213 arg_fclass_s; +typedef arg_decode_insn3218 arg_fclass_s; static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a); -typedef arg_decode_insn3212 arg_fcvt_s_w; +typedef arg_decode_insn3217 arg_fcvt_s_w; static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a); -typedef arg_decode_insn3212 arg_fcvt_s_wu; +typedef arg_decode_insn3217 arg_fcvt_s_wu; static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a); -typedef arg_decode_insn3213 arg_fmv_w_x; +typedef arg_decode_insn3218 arg_fmv_w_x; static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a); typedef arg_i arg_fld; static bool trans_fld(DisasContext *ctx, arg_fld *a); typedef arg_s arg_fsd; static bool trans_fsd(DisasContext *ctx, arg_fsd *a); -typedef arg_decode_insn3210 arg_fmadd_d; +typedef arg_decode_insn3215 arg_fmadd_d; static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a); -typedef arg_decode_insn3210 arg_fmsub_d; +typedef arg_decode_insn3215 arg_fmsub_d; static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a); -typedef arg_decode_insn3210 arg_fnmsub_d; +typedef arg_decode_insn3215 arg_fnmsub_d; static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a); -typedef arg_decode_insn3210 arg_fnmadd_d; +typedef arg_decode_insn3215 arg_fnmadd_d; static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a); -typedef arg_decode_insn3211 arg_fadd_d; +typedef arg_decode_insn3216 arg_fadd_d; static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a); -typedef arg_decode_insn3211 arg_fsub_d; +typedef arg_decode_insn3216 arg_fsub_d; static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a); -typedef arg_decode_insn3211 arg_fmul_d; +typedef arg_decode_insn3216 arg_fmul_d; static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a); -typedef arg_decode_insn3211 arg_fdiv_d; +typedef arg_decode_insn3216 arg_fdiv_d; static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a); -typedef arg_decode_insn3212 arg_fsqrt_d; +typedef arg_decode_insn3217 arg_fsqrt_d; static bool trans_fsqrt_d(DisasContext *ctx, arg_fsqrt_d *a); typedef arg_r arg_fsgnj_d; static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a); @@ -332,9 +378,9 @@ typedef arg_r arg_fmin_d; static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a); typedef arg_r arg_fmax_d; static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a); -typedef arg_decode_insn3212 arg_fcvt_s_d; +typedef arg_decode_insn3217 arg_fcvt_s_d; static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a); -typedef arg_decode_insn3212 arg_fcvt_d_s; +typedef arg_decode_insn3217 arg_fcvt_d_s; static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a); typedef arg_r arg_feq_d; static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a); @@ -342,16 +388,704 @@ typedef arg_r arg_flt_d; static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a); typedef arg_r arg_fle_d; static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a); -typedef arg_decode_insn3213 arg_fclass_d; +typedef arg_decode_insn3218 arg_fclass_d; static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a); -typedef arg_decode_insn3212 arg_fcvt_w_d; +typedef arg_decode_insn3217 arg_fcvt_w_d; static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a); -typedef arg_decode_insn3212 arg_fcvt_wu_d; +typedef arg_decode_insn3217 arg_fcvt_wu_d; static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a); -typedef arg_decode_insn3212 arg_fcvt_d_w; +typedef arg_decode_insn3217 arg_fcvt_d_w; static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a); -typedef arg_decode_insn3212 arg_fcvt_d_wu; +typedef arg_decode_insn3217 arg_fcvt_d_wu; static bool trans_fcvt_d_wu(DisasContext *ctx, arg_fcvt_d_wu *a); +typedef arg_decode_insn3222 arg_hfence_gvma; +static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a); +typedef arg_decode_insn3222 arg_hfence_vvma; +static bool trans_hfence_vvma(DisasContext *ctx, arg_hfence_vvma *a); +typedef arg_r2nfvm arg_vlb_v; +static bool trans_vlb_v(DisasContext *ctx, arg_vlb_v *a); +typedef arg_r2nfvm arg_vlh_v; +static bool trans_vlh_v(DisasContext *ctx, arg_vlh_v *a); +typedef arg_r2nfvm arg_vlw_v; +static bool trans_vlw_v(DisasContext *ctx, arg_vlw_v *a); +typedef arg_r2nfvm arg_vle_v; +static bool trans_vle_v(DisasContext *ctx, arg_vle_v *a); +typedef arg_r2nfvm arg_vlbu_v; +static bool trans_vlbu_v(DisasContext *ctx, arg_vlbu_v *a); +typedef arg_r2nfvm arg_vlhu_v; +static bool trans_vlhu_v(DisasContext *ctx, arg_vlhu_v *a); +typedef arg_r2nfvm arg_vlwu_v; +static bool trans_vlwu_v(DisasContext *ctx, arg_vlwu_v *a); +typedef arg_r2nfvm arg_vlbff_v; +static bool trans_vlbff_v(DisasContext *ctx, arg_vlbff_v *a); +typedef arg_r2nfvm arg_vlhff_v; +static bool trans_vlhff_v(DisasContext *ctx, arg_vlhff_v *a); +typedef arg_r2nfvm arg_vlwff_v; +static bool trans_vlwff_v(DisasContext *ctx, arg_vlwff_v *a); +typedef arg_r2nfvm arg_vleff_v; +static bool trans_vleff_v(DisasContext *ctx, arg_vleff_v *a); +typedef arg_r2nfvm arg_vlbuff_v; +static bool trans_vlbuff_v(DisasContext *ctx, arg_vlbuff_v *a); +typedef arg_r2nfvm arg_vlhuff_v; +static bool trans_vlhuff_v(DisasContext *ctx, arg_vlhuff_v *a); +typedef arg_r2nfvm arg_vlwuff_v; +static bool trans_vlwuff_v(DisasContext *ctx, arg_vlwuff_v *a); +typedef arg_r2nfvm arg_vsb_v; +static bool trans_vsb_v(DisasContext *ctx, arg_vsb_v *a); +typedef arg_r2nfvm arg_vsh_v; +static bool trans_vsh_v(DisasContext *ctx, arg_vsh_v *a); +typedef arg_r2nfvm arg_vsw_v; +static bool trans_vsw_v(DisasContext *ctx, arg_vsw_v *a); +typedef arg_r2nfvm arg_vse_v; +static bool trans_vse_v(DisasContext *ctx, arg_vse_v *a); +typedef arg_rnfvm arg_vlsb_v; +static bool trans_vlsb_v(DisasContext *ctx, arg_vlsb_v *a); +typedef arg_rnfvm arg_vlsh_v; +static bool trans_vlsh_v(DisasContext *ctx, arg_vlsh_v *a); +typedef arg_rnfvm arg_vlsw_v; +static bool trans_vlsw_v(DisasContext *ctx, arg_vlsw_v *a); +typedef arg_rnfvm arg_vlse_v; +static bool trans_vlse_v(DisasContext *ctx, arg_vlse_v *a); +typedef arg_rnfvm arg_vlsbu_v; +static bool trans_vlsbu_v(DisasContext *ctx, arg_vlsbu_v *a); +typedef arg_rnfvm arg_vlshu_v; +static bool trans_vlshu_v(DisasContext *ctx, arg_vlshu_v *a); +typedef arg_rnfvm arg_vlswu_v; +static bool trans_vlswu_v(DisasContext *ctx, arg_vlswu_v *a); +typedef arg_rnfvm arg_vssb_v; +static bool trans_vssb_v(DisasContext *ctx, arg_vssb_v *a); +typedef arg_rnfvm arg_vssh_v; +static bool trans_vssh_v(DisasContext *ctx, arg_vssh_v *a); +typedef arg_rnfvm arg_vssw_v; +static bool trans_vssw_v(DisasContext *ctx, arg_vssw_v *a); +typedef arg_rnfvm arg_vsse_v; +static bool trans_vsse_v(DisasContext *ctx, arg_vsse_v *a); +typedef arg_rnfvm arg_vlxb_v; +static bool trans_vlxb_v(DisasContext *ctx, arg_vlxb_v *a); +typedef arg_rnfvm arg_vlxh_v; +static bool trans_vlxh_v(DisasContext *ctx, arg_vlxh_v *a); +typedef arg_rnfvm arg_vlxw_v; +static bool trans_vlxw_v(DisasContext *ctx, arg_vlxw_v *a); +typedef arg_rnfvm arg_vlxe_v; +static bool trans_vlxe_v(DisasContext *ctx, arg_vlxe_v *a); +typedef arg_rnfvm arg_vlxbu_v; +static bool trans_vlxbu_v(DisasContext *ctx, arg_vlxbu_v *a); +typedef arg_rnfvm arg_vlxhu_v; +static bool trans_vlxhu_v(DisasContext *ctx, arg_vlxhu_v *a); +typedef arg_rnfvm arg_vlxwu_v; +static bool trans_vlxwu_v(DisasContext *ctx, arg_vlxwu_v *a); +typedef arg_rnfvm arg_vsxb_v; +static bool trans_vsxb_v(DisasContext *ctx, arg_vsxb_v *a); +typedef arg_rnfvm arg_vsxh_v; +static bool trans_vsxh_v(DisasContext *ctx, arg_vsxh_v *a); +typedef arg_rnfvm arg_vsxw_v; +static bool trans_vsxw_v(DisasContext *ctx, arg_vsxw_v *a); +typedef arg_rnfvm arg_vsxe_v; +static bool trans_vsxe_v(DisasContext *ctx, arg_vsxe_v *a); +typedef arg_rwdvm arg_vamoswapw_v; +static bool trans_vamoswapw_v(DisasContext *ctx, arg_vamoswapw_v *a); +typedef arg_rwdvm arg_vamoaddw_v; +static bool trans_vamoaddw_v(DisasContext *ctx, arg_vamoaddw_v *a); +typedef arg_rwdvm arg_vamoxorw_v; +static bool trans_vamoxorw_v(DisasContext *ctx, arg_vamoxorw_v *a); +typedef arg_rwdvm arg_vamoandw_v; +static bool trans_vamoandw_v(DisasContext *ctx, arg_vamoandw_v *a); +typedef arg_rwdvm arg_vamoorw_v; +static bool trans_vamoorw_v(DisasContext *ctx, arg_vamoorw_v *a); +typedef arg_rwdvm arg_vamominw_v; +static bool trans_vamominw_v(DisasContext *ctx, arg_vamominw_v *a); +typedef arg_rwdvm arg_vamomaxw_v; +static bool trans_vamomaxw_v(DisasContext *ctx, arg_vamomaxw_v *a); +typedef arg_rwdvm arg_vamominuw_v; +static bool trans_vamominuw_v(DisasContext *ctx, arg_vamominuw_v *a); +typedef arg_rwdvm arg_vamomaxuw_v; +static bool trans_vamomaxuw_v(DisasContext *ctx, arg_vamomaxuw_v *a); +typedef arg_rmrr arg_vadd_vv; +static bool trans_vadd_vv(DisasContext *ctx, arg_vadd_vv *a); +typedef arg_rmrr arg_vadd_vx; +static bool trans_vadd_vx(DisasContext *ctx, arg_vadd_vx *a); +typedef arg_rmrr arg_vadd_vi; +static bool trans_vadd_vi(DisasContext *ctx, arg_vadd_vi *a); +typedef arg_rmrr arg_vsub_vv; +static bool trans_vsub_vv(DisasContext *ctx, arg_vsub_vv *a); +typedef arg_rmrr arg_vsub_vx; +static bool trans_vsub_vx(DisasContext *ctx, arg_vsub_vx *a); +typedef arg_rmrr arg_vrsub_vx; +static bool trans_vrsub_vx(DisasContext *ctx, arg_vrsub_vx *a); +typedef arg_rmrr arg_vrsub_vi; +static bool trans_vrsub_vi(DisasContext *ctx, arg_vrsub_vi *a); +typedef arg_rmrr arg_vwaddu_vv; +static bool trans_vwaddu_vv(DisasContext *ctx, arg_vwaddu_vv *a); +typedef arg_rmrr arg_vwaddu_vx; +static bool trans_vwaddu_vx(DisasContext *ctx, arg_vwaddu_vx *a); +typedef arg_rmrr arg_vwadd_vv; +static bool trans_vwadd_vv(DisasContext *ctx, arg_vwadd_vv *a); +typedef arg_rmrr arg_vwadd_vx; +static bool trans_vwadd_vx(DisasContext *ctx, arg_vwadd_vx *a); +typedef arg_rmrr arg_vwsubu_vv; +static bool trans_vwsubu_vv(DisasContext *ctx, arg_vwsubu_vv *a); +typedef arg_rmrr arg_vwsubu_vx; +static bool trans_vwsubu_vx(DisasContext *ctx, arg_vwsubu_vx *a); +typedef arg_rmrr arg_vwsub_vv; +static bool trans_vwsub_vv(DisasContext *ctx, arg_vwsub_vv *a); +typedef arg_rmrr arg_vwsub_vx; +static bool trans_vwsub_vx(DisasContext *ctx, arg_vwsub_vx *a); +typedef arg_rmrr arg_vwaddu_wv; +static bool trans_vwaddu_wv(DisasContext *ctx, arg_vwaddu_wv *a); +typedef arg_rmrr arg_vwaddu_wx; +static bool trans_vwaddu_wx(DisasContext *ctx, arg_vwaddu_wx *a); +typedef arg_rmrr arg_vwadd_wv; +static bool trans_vwadd_wv(DisasContext *ctx, arg_vwadd_wv *a); +typedef arg_rmrr arg_vwadd_wx; +static bool trans_vwadd_wx(DisasContext *ctx, arg_vwadd_wx *a); +typedef arg_rmrr arg_vwsubu_wv; +static bool trans_vwsubu_wv(DisasContext *ctx, arg_vwsubu_wv *a); +typedef arg_rmrr arg_vwsubu_wx; +static bool trans_vwsubu_wx(DisasContext *ctx, arg_vwsubu_wx *a); +typedef arg_rmrr arg_vwsub_wv; +static bool trans_vwsub_wv(DisasContext *ctx, arg_vwsub_wv *a); +typedef arg_rmrr arg_vwsub_wx; +static bool trans_vwsub_wx(DisasContext *ctx, arg_vwsub_wx *a); +typedef arg_rmrr arg_vadc_vvm; +static bool trans_vadc_vvm(DisasContext *ctx, arg_vadc_vvm *a); +typedef arg_rmrr arg_vadc_vxm; +static bool trans_vadc_vxm(DisasContext *ctx, arg_vadc_vxm *a); +typedef arg_rmrr arg_vadc_vim; +static bool trans_vadc_vim(DisasContext *ctx, arg_vadc_vim *a); +typedef arg_rmrr arg_vmadc_vvm; +static bool trans_vmadc_vvm(DisasContext *ctx, arg_vmadc_vvm *a); +typedef arg_rmrr arg_vmadc_vxm; +static bool trans_vmadc_vxm(DisasContext *ctx, arg_vmadc_vxm *a); +typedef arg_rmrr arg_vmadc_vim; +static bool trans_vmadc_vim(DisasContext *ctx, arg_vmadc_vim *a); +typedef arg_rmrr arg_vsbc_vvm; +static bool trans_vsbc_vvm(DisasContext *ctx, arg_vsbc_vvm *a); +typedef arg_rmrr arg_vsbc_vxm; +static bool trans_vsbc_vxm(DisasContext *ctx, arg_vsbc_vxm *a); +typedef arg_rmrr arg_vmsbc_vvm; +static bool trans_vmsbc_vvm(DisasContext *ctx, arg_vmsbc_vvm *a); +typedef arg_rmrr arg_vmsbc_vxm; +static bool trans_vmsbc_vxm(DisasContext *ctx, arg_vmsbc_vxm *a); +typedef arg_rmrr arg_vand_vv; +static bool trans_vand_vv(DisasContext *ctx, arg_vand_vv *a); +typedef arg_rmrr arg_vand_vx; +static bool trans_vand_vx(DisasContext *ctx, arg_vand_vx *a); +typedef arg_rmrr arg_vand_vi; +static bool trans_vand_vi(DisasContext *ctx, arg_vand_vi *a); +typedef arg_rmrr arg_vor_vv; +static bool trans_vor_vv(DisasContext *ctx, arg_vor_vv *a); +typedef arg_rmrr arg_vor_vx; +static bool trans_vor_vx(DisasContext *ctx, arg_vor_vx *a); +typedef arg_rmrr arg_vor_vi; +static bool trans_vor_vi(DisasContext *ctx, arg_vor_vi *a); +typedef arg_rmrr arg_vxor_vv; +static bool trans_vxor_vv(DisasContext *ctx, arg_vxor_vv *a); +typedef arg_rmrr arg_vxor_vx; +static bool trans_vxor_vx(DisasContext *ctx, arg_vxor_vx *a); +typedef arg_rmrr arg_vxor_vi; +static bool trans_vxor_vi(DisasContext *ctx, arg_vxor_vi *a); +typedef arg_rmrr arg_vsll_vv; +static bool trans_vsll_vv(DisasContext *ctx, arg_vsll_vv *a); +typedef arg_rmrr arg_vsll_vx; +static bool trans_vsll_vx(DisasContext *ctx, arg_vsll_vx *a); +typedef arg_rmrr arg_vsll_vi; +static bool trans_vsll_vi(DisasContext *ctx, arg_vsll_vi *a); +typedef arg_rmrr arg_vsrl_vv; +static bool trans_vsrl_vv(DisasContext *ctx, arg_vsrl_vv *a); +typedef arg_rmrr arg_vsrl_vx; +static bool trans_vsrl_vx(DisasContext *ctx, arg_vsrl_vx *a); +typedef arg_rmrr arg_vsrl_vi; +static bool trans_vsrl_vi(DisasContext *ctx, arg_vsrl_vi *a); +typedef arg_rmrr arg_vsra_vv; +static bool trans_vsra_vv(DisasContext *ctx, arg_vsra_vv *a); +typedef arg_rmrr arg_vsra_vx; +static bool trans_vsra_vx(DisasContext *ctx, arg_vsra_vx *a); +typedef arg_rmrr arg_vsra_vi; +static bool trans_vsra_vi(DisasContext *ctx, arg_vsra_vi *a); +typedef arg_rmrr arg_vnsrl_vv; +static bool trans_vnsrl_vv(DisasContext *ctx, arg_vnsrl_vv *a); +typedef arg_rmrr arg_vnsrl_vx; +static bool trans_vnsrl_vx(DisasContext *ctx, arg_vnsrl_vx *a); +typedef arg_rmrr arg_vnsrl_vi; +static bool trans_vnsrl_vi(DisasContext *ctx, arg_vnsrl_vi *a); +typedef arg_rmrr arg_vnsra_vv; +static bool trans_vnsra_vv(DisasContext *ctx, arg_vnsra_vv *a); +typedef arg_rmrr arg_vnsra_vx; +static bool trans_vnsra_vx(DisasContext *ctx, arg_vnsra_vx *a); +typedef arg_rmrr arg_vnsra_vi; +static bool trans_vnsra_vi(DisasContext *ctx, arg_vnsra_vi *a); +typedef arg_rmrr arg_vmseq_vv; +static bool trans_vmseq_vv(DisasContext *ctx, arg_vmseq_vv *a); +typedef arg_rmrr arg_vmseq_vx; +static bool trans_vmseq_vx(DisasContext *ctx, arg_vmseq_vx *a); +typedef arg_rmrr arg_vmseq_vi; +static bool trans_vmseq_vi(DisasContext *ctx, arg_vmseq_vi *a); +typedef arg_rmrr arg_vmsne_vv; +static bool trans_vmsne_vv(DisasContext *ctx, arg_vmsne_vv *a); +typedef arg_rmrr arg_vmsne_vx; +static bool trans_vmsne_vx(DisasContext *ctx, arg_vmsne_vx *a); +typedef arg_rmrr arg_vmsne_vi; +static bool trans_vmsne_vi(DisasContext *ctx, arg_vmsne_vi *a); +typedef arg_rmrr arg_vmsltu_vv; +static bool trans_vmsltu_vv(DisasContext *ctx, arg_vmsltu_vv *a); +typedef arg_rmrr arg_vmsltu_vx; +static bool trans_vmsltu_vx(DisasContext *ctx, arg_vmsltu_vx *a); +typedef arg_rmrr arg_vmslt_vv; +static bool trans_vmslt_vv(DisasContext *ctx, arg_vmslt_vv *a); +typedef arg_rmrr arg_vmslt_vx; +static bool trans_vmslt_vx(DisasContext *ctx, arg_vmslt_vx *a); +typedef arg_rmrr arg_vmsleu_vv; +static bool trans_vmsleu_vv(DisasContext *ctx, arg_vmsleu_vv *a); +typedef arg_rmrr arg_vmsleu_vx; +static bool trans_vmsleu_vx(DisasContext *ctx, arg_vmsleu_vx *a); +typedef arg_rmrr arg_vmsleu_vi; +static bool trans_vmsleu_vi(DisasContext *ctx, arg_vmsleu_vi *a); +typedef arg_rmrr arg_vmsle_vv; +static bool trans_vmsle_vv(DisasContext *ctx, arg_vmsle_vv *a); +typedef arg_rmrr arg_vmsle_vx; +static bool trans_vmsle_vx(DisasContext *ctx, arg_vmsle_vx *a); +typedef arg_rmrr arg_vmsle_vi; +static bool trans_vmsle_vi(DisasContext *ctx, arg_vmsle_vi *a); +typedef arg_rmrr arg_vmsgtu_vx; +static bool trans_vmsgtu_vx(DisasContext *ctx, arg_vmsgtu_vx *a); +typedef arg_rmrr arg_vmsgtu_vi; +static bool trans_vmsgtu_vi(DisasContext *ctx, arg_vmsgtu_vi *a); +typedef arg_rmrr arg_vmsgt_vx; +static bool trans_vmsgt_vx(DisasContext *ctx, arg_vmsgt_vx *a); +typedef arg_rmrr arg_vmsgt_vi; +static bool trans_vmsgt_vi(DisasContext *ctx, arg_vmsgt_vi *a); +typedef arg_rmrr arg_vminu_vv; +static bool trans_vminu_vv(DisasContext *ctx, arg_vminu_vv *a); +typedef arg_rmrr arg_vminu_vx; +static bool trans_vminu_vx(DisasContext *ctx, arg_vminu_vx *a); +typedef arg_rmrr arg_vmin_vv; +static bool trans_vmin_vv(DisasContext *ctx, arg_vmin_vv *a); +typedef arg_rmrr arg_vmin_vx; +static bool trans_vmin_vx(DisasContext *ctx, arg_vmin_vx *a); +typedef arg_rmrr arg_vmaxu_vv; +static bool trans_vmaxu_vv(DisasContext *ctx, arg_vmaxu_vv *a); +typedef arg_rmrr arg_vmaxu_vx; +static bool trans_vmaxu_vx(DisasContext *ctx, arg_vmaxu_vx *a); +typedef arg_rmrr arg_vmax_vv; +static bool trans_vmax_vv(DisasContext *ctx, arg_vmax_vv *a); +typedef arg_rmrr arg_vmax_vx; +static bool trans_vmax_vx(DisasContext *ctx, arg_vmax_vx *a); +typedef arg_rmrr arg_vmul_vv; +static bool trans_vmul_vv(DisasContext *ctx, arg_vmul_vv *a); +typedef arg_rmrr arg_vmul_vx; +static bool trans_vmul_vx(DisasContext *ctx, arg_vmul_vx *a); +typedef arg_rmrr arg_vmulh_vv; +static bool trans_vmulh_vv(DisasContext *ctx, arg_vmulh_vv *a); +typedef arg_rmrr arg_vmulh_vx; +static bool trans_vmulh_vx(DisasContext *ctx, arg_vmulh_vx *a); +typedef arg_rmrr arg_vmulhu_vv; +static bool trans_vmulhu_vv(DisasContext *ctx, arg_vmulhu_vv *a); +typedef arg_rmrr arg_vmulhu_vx; +static bool trans_vmulhu_vx(DisasContext *ctx, arg_vmulhu_vx *a); +typedef arg_rmrr arg_vmulhsu_vv; +static bool trans_vmulhsu_vv(DisasContext *ctx, arg_vmulhsu_vv *a); +typedef arg_rmrr arg_vmulhsu_vx; +static bool trans_vmulhsu_vx(DisasContext *ctx, arg_vmulhsu_vx *a); +typedef arg_rmrr arg_vdivu_vv; +static bool trans_vdivu_vv(DisasContext *ctx, arg_vdivu_vv *a); +typedef arg_rmrr arg_vdivu_vx; +static bool trans_vdivu_vx(DisasContext *ctx, arg_vdivu_vx *a); +typedef arg_rmrr arg_vdiv_vv; +static bool trans_vdiv_vv(DisasContext *ctx, arg_vdiv_vv *a); +typedef arg_rmrr arg_vdiv_vx; +static bool trans_vdiv_vx(DisasContext *ctx, arg_vdiv_vx *a); +typedef arg_rmrr arg_vremu_vv; +static bool trans_vremu_vv(DisasContext *ctx, arg_vremu_vv *a); +typedef arg_rmrr arg_vremu_vx; +static bool trans_vremu_vx(DisasContext *ctx, arg_vremu_vx *a); +typedef arg_rmrr arg_vrem_vv; +static bool trans_vrem_vv(DisasContext *ctx, arg_vrem_vv *a); +typedef arg_rmrr arg_vrem_vx; +static bool trans_vrem_vx(DisasContext *ctx, arg_vrem_vx *a); +typedef arg_rmrr arg_vwmulu_vv; +static bool trans_vwmulu_vv(DisasContext *ctx, arg_vwmulu_vv *a); +typedef arg_rmrr arg_vwmulu_vx; +static bool trans_vwmulu_vx(DisasContext *ctx, arg_vwmulu_vx *a); +typedef arg_rmrr arg_vwmulsu_vv; +static bool trans_vwmulsu_vv(DisasContext *ctx, arg_vwmulsu_vv *a); +typedef arg_rmrr arg_vwmulsu_vx; +static bool trans_vwmulsu_vx(DisasContext *ctx, arg_vwmulsu_vx *a); +typedef arg_rmrr arg_vwmul_vv; +static bool trans_vwmul_vv(DisasContext *ctx, arg_vwmul_vv *a); +typedef arg_rmrr arg_vwmul_vx; +static bool trans_vwmul_vx(DisasContext *ctx, arg_vwmul_vx *a); +typedef arg_rmrr arg_vmacc_vv; +static bool trans_vmacc_vv(DisasContext *ctx, arg_vmacc_vv *a); +typedef arg_rmrr arg_vmacc_vx; +static bool trans_vmacc_vx(DisasContext *ctx, arg_vmacc_vx *a); +typedef arg_rmrr arg_vnmsac_vv; +static bool trans_vnmsac_vv(DisasContext *ctx, arg_vnmsac_vv *a); +typedef arg_rmrr arg_vnmsac_vx; +static bool trans_vnmsac_vx(DisasContext *ctx, arg_vnmsac_vx *a); +typedef arg_rmrr arg_vmadd_vv; +static bool trans_vmadd_vv(DisasContext *ctx, arg_vmadd_vv *a); +typedef arg_rmrr arg_vmadd_vx; +static bool trans_vmadd_vx(DisasContext *ctx, arg_vmadd_vx *a); +typedef arg_rmrr arg_vnmsub_vv; +static bool trans_vnmsub_vv(DisasContext *ctx, arg_vnmsub_vv *a); +typedef arg_rmrr arg_vnmsub_vx; +static bool trans_vnmsub_vx(DisasContext *ctx, arg_vnmsub_vx *a); +typedef arg_rmrr arg_vwmaccu_vv; +static bool trans_vwmaccu_vv(DisasContext *ctx, arg_vwmaccu_vv *a); +typedef arg_rmrr arg_vwmaccu_vx; +static bool trans_vwmaccu_vx(DisasContext *ctx, arg_vwmaccu_vx *a); +typedef arg_rmrr arg_vwmacc_vv; +static bool trans_vwmacc_vv(DisasContext *ctx, arg_vwmacc_vv *a); +typedef arg_rmrr arg_vwmacc_vx; +static bool trans_vwmacc_vx(DisasContext *ctx, arg_vwmacc_vx *a); +typedef arg_rmrr arg_vwmaccsu_vv; +static bool trans_vwmaccsu_vv(DisasContext *ctx, arg_vwmaccsu_vv *a); +typedef arg_rmrr arg_vwmaccsu_vx; +static bool trans_vwmaccsu_vx(DisasContext *ctx, arg_vwmaccsu_vx *a); +typedef arg_rmrr arg_vwmaccus_vx; +static bool trans_vwmaccus_vx(DisasContext *ctx, arg_vwmaccus_vx *a); +typedef arg_decode_insn3218 arg_vmv_v_v; +static bool trans_vmv_v_v(DisasContext *ctx, arg_vmv_v_v *a); +typedef arg_decode_insn3218 arg_vmv_v_x; +static bool trans_vmv_v_x(DisasContext *ctx, arg_vmv_v_x *a); +typedef arg_decode_insn3218 arg_vmv_v_i; +static bool trans_vmv_v_i(DisasContext *ctx, arg_vmv_v_i *a); +typedef arg_rmrr arg_vmerge_vvm; +static bool trans_vmerge_vvm(DisasContext *ctx, arg_vmerge_vvm *a); +typedef arg_rmrr arg_vmerge_vxm; +static bool trans_vmerge_vxm(DisasContext *ctx, arg_vmerge_vxm *a); +typedef arg_rmrr arg_vmerge_vim; +static bool trans_vmerge_vim(DisasContext *ctx, arg_vmerge_vim *a); +typedef arg_rmrr arg_vsaddu_vv; +static bool trans_vsaddu_vv(DisasContext *ctx, arg_vsaddu_vv *a); +typedef arg_rmrr arg_vsaddu_vx; +static bool trans_vsaddu_vx(DisasContext *ctx, arg_vsaddu_vx *a); +typedef arg_rmrr arg_vsaddu_vi; +static bool trans_vsaddu_vi(DisasContext *ctx, arg_vsaddu_vi *a); +typedef arg_rmrr arg_vsadd_vv; +static bool trans_vsadd_vv(DisasContext *ctx, arg_vsadd_vv *a); +typedef arg_rmrr arg_vsadd_vx; +static bool trans_vsadd_vx(DisasContext *ctx, arg_vsadd_vx *a); +typedef arg_rmrr arg_vsadd_vi; +static bool trans_vsadd_vi(DisasContext *ctx, arg_vsadd_vi *a); +typedef arg_rmrr arg_vssubu_vv; +static bool trans_vssubu_vv(DisasContext *ctx, arg_vssubu_vv *a); +typedef arg_rmrr arg_vssubu_vx; +static bool trans_vssubu_vx(DisasContext *ctx, arg_vssubu_vx *a); +typedef arg_rmrr arg_vssub_vv; +static bool trans_vssub_vv(DisasContext *ctx, arg_vssub_vv *a); +typedef arg_rmrr arg_vssub_vx; +static bool trans_vssub_vx(DisasContext *ctx, arg_vssub_vx *a); +typedef arg_rmrr arg_vaadd_vv; +static bool trans_vaadd_vv(DisasContext *ctx, arg_vaadd_vv *a); +typedef arg_rmrr arg_vaadd_vx; +static bool trans_vaadd_vx(DisasContext *ctx, arg_vaadd_vx *a); +typedef arg_rmrr arg_vaadd_vi; +static bool trans_vaadd_vi(DisasContext *ctx, arg_vaadd_vi *a); +typedef arg_rmrr arg_vasub_vv; +static bool trans_vasub_vv(DisasContext *ctx, arg_vasub_vv *a); +typedef arg_rmrr arg_vasub_vx; +static bool trans_vasub_vx(DisasContext *ctx, arg_vasub_vx *a); +typedef arg_rmrr arg_vsmul_vv; +static bool trans_vsmul_vv(DisasContext *ctx, arg_vsmul_vv *a); +typedef arg_rmrr arg_vsmul_vx; +static bool trans_vsmul_vx(DisasContext *ctx, arg_vsmul_vx *a); +typedef arg_rmrr arg_vwsmaccu_vv; +static bool trans_vwsmaccu_vv(DisasContext *ctx, arg_vwsmaccu_vv *a); +typedef arg_rmrr arg_vwsmaccu_vx; +static bool trans_vwsmaccu_vx(DisasContext *ctx, arg_vwsmaccu_vx *a); +typedef arg_rmrr arg_vwsmacc_vv; +static bool trans_vwsmacc_vv(DisasContext *ctx, arg_vwsmacc_vv *a); +typedef arg_rmrr arg_vwsmacc_vx; +static bool trans_vwsmacc_vx(DisasContext *ctx, arg_vwsmacc_vx *a); +typedef arg_rmrr arg_vwsmaccsu_vv; +static bool trans_vwsmaccsu_vv(DisasContext *ctx, arg_vwsmaccsu_vv *a); +typedef arg_rmrr arg_vwsmaccsu_vx; +static bool trans_vwsmaccsu_vx(DisasContext *ctx, arg_vwsmaccsu_vx *a); +typedef arg_rmrr arg_vwsmaccus_vx; +static bool trans_vwsmaccus_vx(DisasContext *ctx, arg_vwsmaccus_vx *a); +typedef arg_rmrr arg_vssrl_vv; +static bool trans_vssrl_vv(DisasContext *ctx, arg_vssrl_vv *a); +typedef arg_rmrr arg_vssrl_vx; +static bool trans_vssrl_vx(DisasContext *ctx, arg_vssrl_vx *a); +typedef arg_rmrr arg_vssrl_vi; +static bool trans_vssrl_vi(DisasContext *ctx, arg_vssrl_vi *a); +typedef arg_rmrr arg_vssra_vv; +static bool trans_vssra_vv(DisasContext *ctx, arg_vssra_vv *a); +typedef arg_rmrr arg_vssra_vx; +static bool trans_vssra_vx(DisasContext *ctx, arg_vssra_vx *a); +typedef arg_rmrr arg_vssra_vi; +static bool trans_vssra_vi(DisasContext *ctx, arg_vssra_vi *a); +typedef arg_rmrr arg_vnclipu_vv; +static bool trans_vnclipu_vv(DisasContext *ctx, arg_vnclipu_vv *a); +typedef arg_rmrr arg_vnclipu_vx; +static bool trans_vnclipu_vx(DisasContext *ctx, arg_vnclipu_vx *a); +typedef arg_rmrr arg_vnclipu_vi; +static bool trans_vnclipu_vi(DisasContext *ctx, arg_vnclipu_vi *a); +typedef arg_rmrr arg_vnclip_vv; +static bool trans_vnclip_vv(DisasContext *ctx, arg_vnclip_vv *a); +typedef arg_rmrr arg_vnclip_vx; +static bool trans_vnclip_vx(DisasContext *ctx, arg_vnclip_vx *a); +typedef arg_rmrr arg_vnclip_vi; +static bool trans_vnclip_vi(DisasContext *ctx, arg_vnclip_vi *a); +typedef arg_rmrr arg_vfadd_vv; +static bool trans_vfadd_vv(DisasContext *ctx, arg_vfadd_vv *a); +typedef arg_rmrr arg_vfadd_vf; +static bool trans_vfadd_vf(DisasContext *ctx, arg_vfadd_vf *a); +typedef arg_rmrr arg_vfsub_vv; +static bool trans_vfsub_vv(DisasContext *ctx, arg_vfsub_vv *a); +typedef arg_rmrr arg_vfsub_vf; +static bool trans_vfsub_vf(DisasContext *ctx, arg_vfsub_vf *a); +typedef arg_rmrr arg_vfrsub_vf; +static bool trans_vfrsub_vf(DisasContext *ctx, arg_vfrsub_vf *a); +typedef arg_rmrr arg_vfwadd_vv; +static bool trans_vfwadd_vv(DisasContext *ctx, arg_vfwadd_vv *a); +typedef arg_rmrr arg_vfwadd_vf; +static bool trans_vfwadd_vf(DisasContext *ctx, arg_vfwadd_vf *a); +typedef arg_rmrr arg_vfwadd_wv; +static bool trans_vfwadd_wv(DisasContext *ctx, arg_vfwadd_wv *a); +typedef arg_rmrr arg_vfwadd_wf; +static bool trans_vfwadd_wf(DisasContext *ctx, arg_vfwadd_wf *a); +typedef arg_rmrr arg_vfwsub_vv; +static bool trans_vfwsub_vv(DisasContext *ctx, arg_vfwsub_vv *a); +typedef arg_rmrr arg_vfwsub_vf; +static bool trans_vfwsub_vf(DisasContext *ctx, arg_vfwsub_vf *a); +typedef arg_rmrr arg_vfwsub_wv; +static bool trans_vfwsub_wv(DisasContext *ctx, arg_vfwsub_wv *a); +typedef arg_rmrr arg_vfwsub_wf; +static bool trans_vfwsub_wf(DisasContext *ctx, arg_vfwsub_wf *a); +typedef arg_rmrr arg_vfmul_vv; +static bool trans_vfmul_vv(DisasContext *ctx, arg_vfmul_vv *a); +typedef arg_rmrr arg_vfmul_vf; +static bool trans_vfmul_vf(DisasContext *ctx, arg_vfmul_vf *a); +typedef arg_rmrr arg_vfdiv_vv; +static bool trans_vfdiv_vv(DisasContext *ctx, arg_vfdiv_vv *a); +typedef arg_rmrr arg_vfdiv_vf; +static bool trans_vfdiv_vf(DisasContext *ctx, arg_vfdiv_vf *a); +typedef arg_rmrr arg_vfrdiv_vf; +static bool trans_vfrdiv_vf(DisasContext *ctx, arg_vfrdiv_vf *a); +typedef arg_rmrr arg_vfwmul_vv; +static bool trans_vfwmul_vv(DisasContext *ctx, arg_vfwmul_vv *a); +typedef arg_rmrr arg_vfwmul_vf; +static bool trans_vfwmul_vf(DisasContext *ctx, arg_vfwmul_vf *a); +typedef arg_rmrr arg_vfmacc_vv; +static bool trans_vfmacc_vv(DisasContext *ctx, arg_vfmacc_vv *a); +typedef arg_rmrr arg_vfnmacc_vv; +static bool trans_vfnmacc_vv(DisasContext *ctx, arg_vfnmacc_vv *a); +typedef arg_rmrr arg_vfnmacc_vf; +static bool trans_vfnmacc_vf(DisasContext *ctx, arg_vfnmacc_vf *a); +typedef arg_rmrr arg_vfmacc_vf; +static bool trans_vfmacc_vf(DisasContext *ctx, arg_vfmacc_vf *a); +typedef arg_rmrr arg_vfmsac_vv; +static bool trans_vfmsac_vv(DisasContext *ctx, arg_vfmsac_vv *a); +typedef arg_rmrr arg_vfmsac_vf; +static bool trans_vfmsac_vf(DisasContext *ctx, arg_vfmsac_vf *a); +typedef arg_rmrr arg_vfnmsac_vv; +static bool trans_vfnmsac_vv(DisasContext *ctx, arg_vfnmsac_vv *a); +typedef arg_rmrr arg_vfnmsac_vf; +static bool trans_vfnmsac_vf(DisasContext *ctx, arg_vfnmsac_vf *a); +typedef arg_rmrr arg_vfmadd_vv; +static bool trans_vfmadd_vv(DisasContext *ctx, arg_vfmadd_vv *a); +typedef arg_rmrr arg_vfmadd_vf; +static bool trans_vfmadd_vf(DisasContext *ctx, arg_vfmadd_vf *a); +typedef arg_rmrr arg_vfnmadd_vv; +static bool trans_vfnmadd_vv(DisasContext *ctx, arg_vfnmadd_vv *a); +typedef arg_rmrr arg_vfnmadd_vf; +static bool trans_vfnmadd_vf(DisasContext *ctx, arg_vfnmadd_vf *a); +typedef arg_rmrr arg_vfmsub_vv; +static bool trans_vfmsub_vv(DisasContext *ctx, arg_vfmsub_vv *a); +typedef arg_rmrr arg_vfmsub_vf; +static bool trans_vfmsub_vf(DisasContext *ctx, arg_vfmsub_vf *a); +typedef arg_rmrr arg_vfnmsub_vv; +static bool trans_vfnmsub_vv(DisasContext *ctx, arg_vfnmsub_vv *a); +typedef arg_rmrr arg_vfnmsub_vf; +static bool trans_vfnmsub_vf(DisasContext *ctx, arg_vfnmsub_vf *a); +typedef arg_rmrr arg_vfwmacc_vv; +static bool trans_vfwmacc_vv(DisasContext *ctx, arg_vfwmacc_vv *a); +typedef arg_rmrr arg_vfwmacc_vf; +static bool trans_vfwmacc_vf(DisasContext *ctx, arg_vfwmacc_vf *a); +typedef arg_rmrr arg_vfwnmacc_vv; +static bool trans_vfwnmacc_vv(DisasContext *ctx, arg_vfwnmacc_vv *a); +typedef arg_rmrr arg_vfwnmacc_vf; +static bool trans_vfwnmacc_vf(DisasContext *ctx, arg_vfwnmacc_vf *a); +typedef arg_rmrr arg_vfwmsac_vv; +static bool trans_vfwmsac_vv(DisasContext *ctx, arg_vfwmsac_vv *a); +typedef arg_rmrr arg_vfwmsac_vf; +static bool trans_vfwmsac_vf(DisasContext *ctx, arg_vfwmsac_vf *a); +typedef arg_rmrr arg_vfwnmsac_vv; +static bool trans_vfwnmsac_vv(DisasContext *ctx, arg_vfwnmsac_vv *a); +typedef arg_rmrr arg_vfwnmsac_vf; +static bool trans_vfwnmsac_vf(DisasContext *ctx, arg_vfwnmsac_vf *a); +typedef arg_rmr arg_vfsqrt_v; +static bool trans_vfsqrt_v(DisasContext *ctx, arg_vfsqrt_v *a); +typedef arg_rmrr arg_vfmin_vv; +static bool trans_vfmin_vv(DisasContext *ctx, arg_vfmin_vv *a); +typedef arg_rmrr arg_vfmin_vf; +static bool trans_vfmin_vf(DisasContext *ctx, arg_vfmin_vf *a); +typedef arg_rmrr arg_vfmax_vv; +static bool trans_vfmax_vv(DisasContext *ctx, arg_vfmax_vv *a); +typedef arg_rmrr arg_vfmax_vf; +static bool trans_vfmax_vf(DisasContext *ctx, arg_vfmax_vf *a); +typedef arg_rmrr arg_vfsgnj_vv; +static bool trans_vfsgnj_vv(DisasContext *ctx, arg_vfsgnj_vv *a); +typedef arg_rmrr arg_vfsgnj_vf; +static bool trans_vfsgnj_vf(DisasContext *ctx, arg_vfsgnj_vf *a); +typedef arg_rmrr arg_vfsgnjn_vv; +static bool trans_vfsgnjn_vv(DisasContext *ctx, arg_vfsgnjn_vv *a); +typedef arg_rmrr arg_vfsgnjn_vf; +static bool trans_vfsgnjn_vf(DisasContext *ctx, arg_vfsgnjn_vf *a); +typedef arg_rmrr arg_vfsgnjx_vv; +static bool trans_vfsgnjx_vv(DisasContext *ctx, arg_vfsgnjx_vv *a); +typedef arg_rmrr arg_vfsgnjx_vf; +static bool trans_vfsgnjx_vf(DisasContext *ctx, arg_vfsgnjx_vf *a); +typedef arg_rmrr arg_vmfeq_vv; +static bool trans_vmfeq_vv(DisasContext *ctx, arg_vmfeq_vv *a); +typedef arg_rmrr arg_vmfeq_vf; +static bool trans_vmfeq_vf(DisasContext *ctx, arg_vmfeq_vf *a); +typedef arg_rmrr arg_vmfne_vv; +static bool trans_vmfne_vv(DisasContext *ctx, arg_vmfne_vv *a); +typedef arg_rmrr arg_vmfne_vf; +static bool trans_vmfne_vf(DisasContext *ctx, arg_vmfne_vf *a); +typedef arg_rmrr arg_vmflt_vv; +static bool trans_vmflt_vv(DisasContext *ctx, arg_vmflt_vv *a); +typedef arg_rmrr arg_vmflt_vf; +static bool trans_vmflt_vf(DisasContext *ctx, arg_vmflt_vf *a); +typedef arg_rmrr arg_vmfle_vv; +static bool trans_vmfle_vv(DisasContext *ctx, arg_vmfle_vv *a); +typedef arg_rmrr arg_vmfle_vf; +static bool trans_vmfle_vf(DisasContext *ctx, arg_vmfle_vf *a); +typedef arg_rmrr arg_vmfgt_vf; +static bool trans_vmfgt_vf(DisasContext *ctx, arg_vmfgt_vf *a); +typedef arg_rmrr arg_vmfge_vf; +static bool trans_vmfge_vf(DisasContext *ctx, arg_vmfge_vf *a); +typedef arg_rmrr arg_vmford_vv; +static bool trans_vmford_vv(DisasContext *ctx, arg_vmford_vv *a); +typedef arg_rmrr arg_vmford_vf; +static bool trans_vmford_vf(DisasContext *ctx, arg_vmford_vf *a); +typedef arg_rmr arg_vfclass_v; +static bool trans_vfclass_v(DisasContext *ctx, arg_vfclass_v *a); +typedef arg_rmrr arg_vfmerge_vfm; +static bool trans_vfmerge_vfm(DisasContext *ctx, arg_vfmerge_vfm *a); +typedef arg_decode_insn3218 arg_vfmv_v_f; +static bool trans_vfmv_v_f(DisasContext *ctx, arg_vfmv_v_f *a); +typedef arg_rmr arg_vfcvt_xu_f_v; +static bool trans_vfcvt_xu_f_v(DisasContext *ctx, arg_vfcvt_xu_f_v *a); +typedef arg_rmr arg_vfcvt_x_f_v; +static bool trans_vfcvt_x_f_v(DisasContext *ctx, arg_vfcvt_x_f_v *a); +typedef arg_rmr arg_vfcvt_f_xu_v; +static bool trans_vfcvt_f_xu_v(DisasContext *ctx, arg_vfcvt_f_xu_v *a); +typedef arg_rmr arg_vfcvt_f_x_v; +static bool trans_vfcvt_f_x_v(DisasContext *ctx, arg_vfcvt_f_x_v *a); +typedef arg_rmr arg_vfwcvt_xu_f_v; +static bool trans_vfwcvt_xu_f_v(DisasContext *ctx, arg_vfwcvt_xu_f_v *a); +typedef arg_rmr arg_vfwcvt_x_f_v; +static bool trans_vfwcvt_x_f_v(DisasContext *ctx, arg_vfwcvt_x_f_v *a); +typedef arg_rmr arg_vfwcvt_f_xu_v; +static bool trans_vfwcvt_f_xu_v(DisasContext *ctx, arg_vfwcvt_f_xu_v *a); +typedef arg_rmr arg_vfwcvt_f_x_v; +static bool trans_vfwcvt_f_x_v(DisasContext *ctx, arg_vfwcvt_f_x_v *a); +typedef arg_rmr arg_vfwcvt_f_f_v; +static bool trans_vfwcvt_f_f_v(DisasContext *ctx, arg_vfwcvt_f_f_v *a); +typedef arg_rmr arg_vfncvt_xu_f_v; +static bool trans_vfncvt_xu_f_v(DisasContext *ctx, arg_vfncvt_xu_f_v *a); +typedef arg_rmr arg_vfncvt_x_f_v; +static bool trans_vfncvt_x_f_v(DisasContext *ctx, arg_vfncvt_x_f_v *a); +typedef arg_rmr arg_vfncvt_f_xu_v; +static bool trans_vfncvt_f_xu_v(DisasContext *ctx, arg_vfncvt_f_xu_v *a); +typedef arg_rmr arg_vfncvt_f_x_v; +static bool trans_vfncvt_f_x_v(DisasContext *ctx, arg_vfncvt_f_x_v *a); +typedef arg_rmr arg_vfncvt_f_f_v; +static bool trans_vfncvt_f_f_v(DisasContext *ctx, arg_vfncvt_f_f_v *a); +typedef arg_rmrr arg_vredsum_vs; +static bool trans_vredsum_vs(DisasContext *ctx, arg_vredsum_vs *a); +typedef arg_rmrr arg_vredand_vs; +static bool trans_vredand_vs(DisasContext *ctx, arg_vredand_vs *a); +typedef arg_rmrr arg_vredor_vs; +static bool trans_vredor_vs(DisasContext *ctx, arg_vredor_vs *a); +typedef arg_rmrr arg_vredxor_vs; +static bool trans_vredxor_vs(DisasContext *ctx, arg_vredxor_vs *a); +typedef arg_rmrr arg_vredminu_vs; +static bool trans_vredminu_vs(DisasContext *ctx, arg_vredminu_vs *a); +typedef arg_rmrr arg_vredmin_vs; +static bool trans_vredmin_vs(DisasContext *ctx, arg_vredmin_vs *a); +typedef arg_rmrr arg_vredmaxu_vs; +static bool trans_vredmaxu_vs(DisasContext *ctx, arg_vredmaxu_vs *a); +typedef arg_rmrr arg_vredmax_vs; +static bool trans_vredmax_vs(DisasContext *ctx, arg_vredmax_vs *a); +typedef arg_rmrr arg_vwredsumu_vs; +static bool trans_vwredsumu_vs(DisasContext *ctx, arg_vwredsumu_vs *a); +typedef arg_rmrr arg_vwredsum_vs; +static bool trans_vwredsum_vs(DisasContext *ctx, arg_vwredsum_vs *a); +typedef arg_rmrr arg_vfredsum_vs; +static bool trans_vfredsum_vs(DisasContext *ctx, arg_vfredsum_vs *a); +typedef arg_rmrr arg_vfredmin_vs; +static bool trans_vfredmin_vs(DisasContext *ctx, arg_vfredmin_vs *a); +typedef arg_rmrr arg_vfredmax_vs; +static bool trans_vfredmax_vs(DisasContext *ctx, arg_vfredmax_vs *a); +typedef arg_rmrr arg_vfwredsum_vs; +static bool trans_vfwredsum_vs(DisasContext *ctx, arg_vfwredsum_vs *a); +typedef arg_r arg_vmand_mm; +static bool trans_vmand_mm(DisasContext *ctx, arg_vmand_mm *a); +typedef arg_r arg_vmnand_mm; +static bool trans_vmnand_mm(DisasContext *ctx, arg_vmnand_mm *a); +typedef arg_r arg_vmandnot_mm; +static bool trans_vmandnot_mm(DisasContext *ctx, arg_vmandnot_mm *a); +typedef arg_r arg_vmxor_mm; +static bool trans_vmxor_mm(DisasContext *ctx, arg_vmxor_mm *a); +typedef arg_r arg_vmor_mm; +static bool trans_vmor_mm(DisasContext *ctx, arg_vmor_mm *a); +typedef arg_r arg_vmnor_mm; +static bool trans_vmnor_mm(DisasContext *ctx, arg_vmnor_mm *a); +typedef arg_r arg_vmornot_mm; +static bool trans_vmornot_mm(DisasContext *ctx, arg_vmornot_mm *a); +typedef arg_r arg_vmxnor_mm; +static bool trans_vmxnor_mm(DisasContext *ctx, arg_vmxnor_mm *a); +typedef arg_rmr arg_vmpopc_m; +static bool trans_vmpopc_m(DisasContext *ctx, arg_vmpopc_m *a); +typedef arg_rmr arg_vmfirst_m; +static bool trans_vmfirst_m(DisasContext *ctx, arg_vmfirst_m *a); +typedef arg_rmr arg_vmsbf_m; +static bool trans_vmsbf_m(DisasContext *ctx, arg_vmsbf_m *a); +typedef arg_rmr arg_vmsif_m; +static bool trans_vmsif_m(DisasContext *ctx, arg_vmsif_m *a); +typedef arg_rmr arg_vmsof_m; +static bool trans_vmsof_m(DisasContext *ctx, arg_vmsof_m *a); +typedef arg_rmr arg_viota_m; +static bool trans_viota_m(DisasContext *ctx, arg_viota_m *a); +typedef arg_decode_insn3219 arg_vid_v; +static bool trans_vid_v(DisasContext *ctx, arg_vid_v *a); +typedef arg_r arg_vext_x_v; +static bool trans_vext_x_v(DisasContext *ctx, arg_vext_x_v *a); +typedef arg_decode_insn3218 arg_vmv_s_x; +static bool trans_vmv_s_x(DisasContext *ctx, arg_vmv_s_x *a); +typedef arg_decode_insn3220 arg_vfmv_f_s; +static bool trans_vfmv_f_s(DisasContext *ctx, arg_vfmv_f_s *a); +typedef arg_decode_insn3218 arg_vfmv_s_f; +static bool trans_vfmv_s_f(DisasContext *ctx, arg_vfmv_s_f *a); +typedef arg_rmrr arg_vslideup_vx; +static bool trans_vslideup_vx(DisasContext *ctx, arg_vslideup_vx *a); +typedef arg_rmrr arg_vslideup_vi; +static bool trans_vslideup_vi(DisasContext *ctx, arg_vslideup_vi *a); +typedef arg_rmrr arg_vslide1up_vx; +static bool trans_vslide1up_vx(DisasContext *ctx, arg_vslide1up_vx *a); +typedef arg_rmrr arg_vslidedown_vx; +static bool trans_vslidedown_vx(DisasContext *ctx, arg_vslidedown_vx *a); +typedef arg_rmrr arg_vslidedown_vi; +static bool trans_vslidedown_vi(DisasContext *ctx, arg_vslidedown_vi *a); +typedef arg_rmrr arg_vslide1down_vx; +static bool trans_vslide1down_vx(DisasContext *ctx, arg_vslide1down_vx *a); +typedef arg_rmrr arg_vrgather_vv; +static bool trans_vrgather_vv(DisasContext *ctx, arg_vrgather_vv *a); +typedef arg_rmrr arg_vrgather_vx; +static bool trans_vrgather_vx(DisasContext *ctx, arg_vrgather_vx *a); +typedef arg_rmrr arg_vrgather_vi; +static bool trans_vrgather_vi(DisasContext *ctx, arg_vrgather_vi *a); +typedef arg_r arg_vcompress_vm; +static bool trans_vcompress_vm(DisasContext *ctx, arg_vcompress_vm *a); +typedef arg_decode_insn3221 arg_vsetvli; +static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a); +typedef arg_r arg_vsetvl; +static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a); static void decode_insn32_extract_atom_ld(DisasContext *ctx, arg_atomic *a, uint32_t insn) { @@ -378,30 +1112,30 @@ static void decode_insn32_extract_b(DisasContext *ctx, arg_b *a, uint32_t insn) a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn329 *a, uint32_t insn) +static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) { a->csr = extract32(insn, 20, 12); a->rs1 = extract32(insn, 15, 5); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_decode_insn32_Fmt_18(DisasContext *ctx, arg_empty *a, uint32_t insn) +static void decode_insn32_extract_decode_insn32_Fmt_28(DisasContext *ctx, arg_empty *a, uint32_t insn) { } -static void decode_insn32_extract_decode_insn32_Fmt_19(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn) +static void decode_insn32_extract_decode_insn32_Fmt_29(DisasContext *ctx, arg_decode_insn3224 *a, uint32_t insn) { a->pred = extract32(insn, 24, 4); a->succ = extract32(insn, 20, 4); } -static void decode_insn32_extract_hfence_bvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_hfence_vvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -427,20 +1161,54 @@ static void decode_insn32_extract_r(DisasContext *ctx, arg_r *a, uint32_t insn) a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3213 *a, uint32_t insn) +static void decode_insn32_extract_r1_vm(DisasContext *ctx, arg_decode_insn3219 *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3218 *a, uint32_t insn) +{ + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2_nfvm(DisasContext *ctx, arg_r2nfvm *a, uint32_t insn) { + a->vm = extract32(insn, 25, 1); + a->nf = ex_plus_1(ctx, extract32(insn, 29, 3)); a->rs1 = extract32(insn, 15, 5); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3212 *a, uint32_t insn) +static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3217 *a, uint32_t insn) { a->rs1 = extract32(insn, 15, 5); a->rm = extract32(insn, 12, 3); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 *a, uint32_t insn) +static void decode_insn32_extract_r2_vm(DisasContext *ctx, arg_rmr *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2_zimm(DisasContext *ctx, arg_decode_insn3221 *a, uint32_t insn) +{ + a->zimm = extract32(insn, 20, 11); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2rd(DisasContext *ctx, arg_decode_insn3220 *a, uint32_t insn) +{ + a->rs2 = extract32(insn, 20, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn) { a->rs3 = extract32(insn, 27, 5); a->rs2 = extract32(insn, 20, 5); @@ -449,7 +1217,16 @@ static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 * a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a, uint32_t insn) +static void decode_insn32_extract_r_nfvm(DisasContext *ctx, arg_rnfvm *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->nf = ex_plus_1(ctx, extract32(insn, 29, 3)); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -457,6 +1234,39 @@ static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a a->rd = extract32(insn, 7, 5); } +static void decode_insn32_extract_r_vm(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_vm_0(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = 0; + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_vm_1(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = 1; + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_wdvm(DisasContext *ctx, arg_rwdvm *a, uint32_t insn) +{ + a->wd = extract32(insn, 26, 1); + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn) { a->imm = deposit32(extract32(insn, 7, 5), 5, 27, sextract32(insn, 25, 7)); @@ -464,12 +1274,12 @@ static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn) a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn) +static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3223 *a, uint32_t insn) { a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -493,18 +1303,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) union { arg_atomic f_atomic; arg_b f_b; - arg_decode_insn3210 f_decode_insn3210; - arg_decode_insn3211 f_decode_insn3211; - arg_decode_insn3212 f_decode_insn3212; - arg_decode_insn3213 f_decode_insn3213; arg_decode_insn3214 f_decode_insn3214; arg_decode_insn3215 f_decode_insn3215; arg_decode_insn3216 f_decode_insn3216; - arg_decode_insn329 f_decode_insn329; + arg_decode_insn3217 f_decode_insn3217; + arg_decode_insn3218 f_decode_insn3218; + arg_decode_insn3219 f_decode_insn3219; + arg_decode_insn3220 f_decode_insn3220; + arg_decode_insn3221 f_decode_insn3221; + arg_decode_insn3222 f_decode_insn3222; + arg_decode_insn3223 f_decode_insn3223; + arg_decode_insn3224 f_decode_insn3224; arg_empty f_empty; arg_i f_i; arg_j f_j; arg_r f_r; + arg_r2nfvm f_r2nfvm; + arg_rmr f_rmr; + arg_rmrr f_rmrr; + arg_rnfvm f_rnfvm; + arg_rwdvm f_rwdvm; arg_s f_s; arg_shift f_shift; arg_u f_u; @@ -517,45 +1335,227 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:96 */ if (trans_lb(ctx, &u.f_i)) return true; return false; case 0x1: /* ........ ........ .001.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:97 */ if (trans_lh(ctx, &u.f_i)) return true; return false; case 0x2: /* ........ ........ .010.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:98 */ if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x4: /* ........ ........ .100.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:99 */ if (trans_lbu(ctx, &u.f_i)) return true; return false; case 0x5: /* ........ ........ .101.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:100 */ if (trans_lhu(ctx, &u.f_i)) return true; return false; } return false; case 0x00000007: /* ........ ........ ........ .0000111 */ - decode_insn32_extract_i(ctx, &u.f_i, insn); switch ((insn >> 12) & 0x7) { + case 0x0: + /* ........ ........ .000.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .000.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .000.... .0000111 */ + if (trans_vlbu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .000.... .0000111 */ + if (trans_vlbuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsbu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxbu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .000.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .000.... .0000111 */ + if (trans_vlb_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .000.... .0000111 */ + if (trans_vlbff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsb_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; case 0x2: /* ........ ........ .010.... .0000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:156 */ + decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_flw(ctx, &u.f_i)) return true; return false; case 0x3: /* ........ ........ .011.... .0000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:184 */ + decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; + case 0x5: + /* ........ ........ .101.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .101.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .101.... .0000111 */ + if (trans_vlhu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .101.... .0000111 */ + if (trans_vlhuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlshu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxhu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .101.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .101.... .0000111 */ + if (trans_vlh_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .101.... .0000111 */ + if (trans_vlhff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsh_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x6: + /* ........ ........ .110.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .110.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .110.... .0000111 */ + if (trans_vlwu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .110.... .0000111 */ + if (trans_vlwuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlswu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxwu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .110.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .110.... .0000111 */ + if (trans_vlw_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .110.... .0000111 */ + if (trans_vlwff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsw_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x7: + /* ........ ........ .111.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .111.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .111.... .0000111 */ + if (trans_vle_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .111.... .0000111 */ + if (trans_vleff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .111.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlse_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .111.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxe_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; } return false; case 0x0000000f: @@ -563,14 +1563,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:123 */ - decode_insn32_extract_decode_insn32_Fmt_19(ctx, &u.f_decode_insn3216, insn); - if (trans_fence(ctx, &u.f_decode_insn3216)) return true; + decode_insn32_extract_decode_insn32_Fmt_29(ctx, &u.f_decode_insn3224, insn); + if (trans_fence(ctx, &u.f_decode_insn3224)) return true; return false; case 0x1: /* ........ ........ .001.... .0001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:124 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); if (trans_fence_i(ctx, &u.f_empty)) return true; return false; } @@ -580,7 +1578,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:104 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -590,26 +1587,22 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 30) & 0x3) { case 0x0: /* 00...... ........ .001.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:110 */ if (trans_slli(ctx, &u.f_shift)) return true; return false; } return false; case 0x2: /* ........ ........ .010.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:105 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_slti(ctx, &u.f_i)) return true; return false; case 0x3: /* ........ ........ .011.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:106 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_sltiu(ctx, &u.f_i)) return true; return false; case 0x4: /* ........ ........ .100.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:107 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_xori(ctx, &u.f_i)) return true; return false; @@ -619,25 +1612,21 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 30) & 0x3) { case 0x0: /* 00...... ........ .101.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:111 */ if (trans_srli(ctx, &u.f_shift)) return true; return false; case 0x1: /* 01...... ........ .101.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:112 */ if (trans_srai(ctx, &u.f_shift)) return true; return false; } return false; case 0x6: /* ........ ........ .110.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:108 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_ori(ctx, &u.f_i)) return true; return false; case 0x7: /* ........ ........ .111.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:109 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_andi(ctx, &u.f_i)) return true; return false; @@ -645,7 +1634,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) return false; case 0x00000017: /* ........ ........ ........ .0010111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:87 */ decode_insn32_extract_u(ctx, &u.f_u, insn); if (trans_auipc(ctx, &u.f_u)) return true; return false; @@ -655,35 +1643,151 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:101 */ if (trans_sb(ctx, &u.f_s)) return true; return false; case 0x1: /* ........ ........ .001.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:102 */ if (trans_sh(ctx, &u.f_s)) return true; return false; case 0x2: /* ........ ........ .010.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:103 */ if (trans_sw(ctx, &u.f_s)) return true; return false; } return false; case 0x00000027: /* ........ ........ ........ .0100111 */ - decode_insn32_extract_s(ctx, &u.f_s, insn); switch ((insn >> 12) & 0x7) { + case 0x0: + /* ........ ........ .000.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .000.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .000.... .0100111 */ + if (trans_vsb_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .000.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .000.... .0100111 */ + if (trans_vssb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .000.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; case 0x2: /* ........ ........ .010.... .0100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:157 */ + decode_insn32_extract_s(ctx, &u.f_s, insn); if (trans_fsw(ctx, &u.f_s)) return true; return false; case 0x3: /* ........ ........ .011.... .0100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:185 */ + decode_insn32_extract_s(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; + case 0x5: + /* ........ ........ .101.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .101.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .101.... .0100111 */ + if (trans_vsh_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .101.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .101.... .0100111 */ + if (trans_vssh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .101.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x6: + /* ........ ........ .110.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .110.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .110.... .0100111 */ + if (trans_vsw_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .110.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .110.... .0100111 */ + if (trans_vssw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .110.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x7: + /* ........ ........ .111.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .111.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .111.... .0100111 */ + if (trans_vse_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .111.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .111.... .0100111 */ + if (trans_vsse_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .111.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxe_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; } return false; case 0x0000002f: @@ -691,75 +1795,109 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xf8007000) { case 0x00002000: /* 00000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:146 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoadd_w(ctx, &u.f_atomic)) return true; return false; + case 0x00006000: + /* 00000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoaddw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x08002000: /* 00001... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:145 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoswap_w(ctx, &u.f_atomic)) return true; return false; + case 0x08006000: + /* 00001... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoswapw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x10002000: /* 00010... ........ .010.... .0101111 */ decode_insn32_extract_atom_ld(ctx, &u.f_atomic, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 00010..0 0000.... .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:143 */ if (trans_lr_w(ctx, &u.f_atomic)) return true; return false; } return false; case 0x18002000: /* 00011... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:144 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_sc_w(ctx, &u.f_atomic)) return true; return false; case 0x20002000: /* 00100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:147 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoxor_w(ctx, &u.f_atomic)) return true; return false; + case 0x20006000: + /* 00100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoxorw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x40002000: /* 01000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:149 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoor_w(ctx, &u.f_atomic)) return true; return false; + case 0x40006000: + /* 01000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoorw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x60002000: /* 01100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:148 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoand_w(ctx, &u.f_atomic)) return true; return false; + case 0x60006000: + /* 01100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoandw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x80002000: /* 10000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:150 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomin_w(ctx, &u.f_atomic)) return true; return false; + case 0x80006000: + /* 10000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamominw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xa0002000: /* 10100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:151 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomax_w(ctx, &u.f_atomic)) return true; return false; + case 0xa0006000: + /* 10100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xc0002000: /* 11000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:152 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amominu_w(ctx, &u.f_atomic)) return true; return false; + case 0xc0006000: + /* 11000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamominuw_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xe0002000: /* 11100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:153 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomaxu_w(ctx, &u.f_atomic)) return true; return false; + case 0xe0006000: + /* 11100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxuw_v(ctx, &u.f_rwdvm)) return true; + return false; } return false; case 0x00000033: @@ -768,163 +1906,136 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xfe007000) { case 0x00000000: /* 0000000. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:113 */ if (trans_add(ctx, &u.f_r)) return true; return false; case 0x00001000: /* 0000000. ........ .001.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:115 */ if (trans_sll(ctx, &u.f_r)) return true; return false; case 0x00002000: /* 0000000. ........ .010.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:116 */ if (trans_slt(ctx, &u.f_r)) return true; return false; case 0x00003000: /* 0000000. ........ .011.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:117 */ if (trans_sltu(ctx, &u.f_r)) return true; return false; case 0x00004000: /* 0000000. ........ .100.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:118 */ if (trans_xor(ctx, &u.f_r)) return true; return false; case 0x00005000: /* 0000000. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:119 */ if (trans_srl(ctx, &u.f_r)) return true; return false; case 0x00006000: /* 0000000. ........ .110.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:121 */ if (trans_or(ctx, &u.f_r)) return true; return false; case 0x00007000: /* 0000000. ........ .111.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:122 */ if (trans_and(ctx, &u.f_r)) return true; return false; case 0x02000000: /* 0000001. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:133 */ if (trans_mul(ctx, &u.f_r)) return true; return false; case 0x02001000: /* 0000001. ........ .001.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:134 */ if (trans_mulh(ctx, &u.f_r)) return true; return false; case 0x02002000: /* 0000001. ........ .010.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:135 */ if (trans_mulhsu(ctx, &u.f_r)) return true; return false; case 0x02003000: /* 0000001. ........ .011.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:136 */ if (trans_mulhu(ctx, &u.f_r)) return true; return false; case 0x02004000: /* 0000001. ........ .100.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:137 */ if (trans_div(ctx, &u.f_r)) return true; return false; case 0x02005000: /* 0000001. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:138 */ if (trans_divu(ctx, &u.f_r)) return true; return false; case 0x02006000: /* 0000001. ........ .110.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:139 */ if (trans_rem(ctx, &u.f_r)) return true; return false; case 0x02007000: /* 0000001. ........ .111.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:140 */ if (trans_remu(ctx, &u.f_r)) return true; return false; case 0x40000000: /* 0100000. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:114 */ if (trans_sub(ctx, &u.f_r)) return true; return false; case 0x40005000: /* 0100000. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:120 */ if (trans_sra(ctx, &u.f_r)) return true; return false; } return false; case 0x00000037: /* ........ ........ ........ .0110111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:86 */ decode_insn32_extract_u(ctx, &u.f_u, insn); if (trans_lui(ctx, &u.f_u)) return true; return false; case 0x00000043: /* ........ ........ ........ .1000011 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:158 */ - if (trans_fmadd_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmadd_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:186 */ - if (trans_fmadd_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmadd_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x00000047: /* ........ ........ ........ .1000111 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:159 */ - if (trans_fmsub_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmsub_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:187 */ - if (trans_fmsub_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmsub_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x0000004b: /* ........ ........ ........ .1001011 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1001011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:160 */ - if (trans_fnmsub_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmsub_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1001011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:188 */ - if (trans_fnmsub_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmsub_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x0000004f: /* ........ ........ ........ .1001111 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:161 */ - if (trans_fnmadd_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmadd_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:189 */ - if (trans_fnmadd_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmadd_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; @@ -933,51 +2044,43 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 25) & 0x7f) { case 0x0: /* 0000000. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:162 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fadd_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fadd_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x1: /* 0000001. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:190 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fadd_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fadd_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x4: /* 0000100. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:163 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fsub_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fsub_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x5: /* 0000101. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:191 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fsub_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fsub_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x8: /* 0001000. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:164 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fmul_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fmul_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x9: /* 0001001. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:192 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fmul_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fmul_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0xc: /* 0001100. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:165 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fdiv_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fdiv_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0xd: /* 0001101. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:193 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fdiv_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fdiv_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x10: /* 0010000. ........ ........ .1010011 */ @@ -985,17 +2088,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010000. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:167 */ if (trans_fsgnj_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010000. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:168 */ if (trans_fsgnjn_s(ctx, &u.f_r)) return true; return false; case 0x2: /* 0010000. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:169 */ if (trans_fsgnjx_s(ctx, &u.f_r)) return true; return false; } @@ -1006,17 +2106,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010001. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:195 */ if (trans_fsgnj_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010001. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:196 */ if (trans_fsgnjn_d(ctx, &u.f_r)) return true; return false; case 0x2: /* 0010001. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:197 */ if (trans_fsgnjx_d(ctx, &u.f_r)) return true; return false; } @@ -1027,12 +2124,10 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010100. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:170 */ if (trans_fmin_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010100. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:171 */ if (trans_fmax_s(ctx, &u.f_r)) return true; return false; } @@ -1043,57 +2138,51 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010101. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:198 */ if (trans_fmin_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010101. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:199 */ if (trans_fmax_d(ctx, &u.f_r)) return true; return false; } return false; case 0x20: /* 0100000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x1: /* 01000000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:200 */ - if (trans_fcvt_s_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x21: /* 0100001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01000010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:201 */ - if (trans_fcvt_d_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x2c: /* 0101100. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01011000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:166 */ - if (trans_fsqrt_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fsqrt_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x2d: /* 0101101. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01011010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:194 */ - if (trans_fsqrt_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fsqrt_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; @@ -1103,17 +2192,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 1010000. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:177 */ if (trans_fle_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 1010000. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:176 */ if (trans_flt_s(ctx, &u.f_r)) return true; return false; case 0x2: /* 1010000. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:175 */ if (trans_feq_s(ctx, &u.f_r)) return true; return false; } @@ -1124,120 +2210,1726 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 1010001. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:204 */ if (trans_fle_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 1010001. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:203 */ if (trans_flt_d(ctx, &u.f_r)) return true; return false; case 0x2: /* 1010001. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:202 */ if (trans_feq_d(ctx, &u.f_r)) return true; return false; } return false; case 0x60: /* 1100000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11000000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:172 */ - if (trans_fcvt_w_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_w_s(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11000000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:173 */ - if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x61: /* 1100001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11000010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:206 */ - if (trans_fcvt_w_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_w_d(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11000010 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:207 */ - if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x68: /* 1101000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11010000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:179 */ - if (trans_fcvt_s_w(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_w(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11010000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:180 */ - if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x69: /* 1101001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11010010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:208 */ - if (trans_fcvt_d_w(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_w(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11010010 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:209 */ - if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x70: /* 1110000. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11100000 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:174 */ - if (trans_fmv_x_w(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_x_w(ctx, &u.f_decode_insn3218)) return true; return false; case 0x00001000: /* 11100000 0000.... .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:178 */ - if (trans_fclass_s(ctx, &u.f_decode_insn3213)) return true; + if (trans_fclass_s(ctx, &u.f_decode_insn3218)) return true; return false; } return false; case 0x71: /* 1110001. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00001000: /* 11100010 0000.... .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:205 */ - if (trans_fclass_d(ctx, &u.f_decode_insn3213)) return true; + if (trans_fclass_d(ctx, &u.f_decode_insn3218)) return true; return false; } return false; case 0x78: /* 1111000. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11110000 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:181 */ - if (trans_fmv_w_x(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_w_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x00000057: + /* ........ ........ ........ .1010111 */ + switch (insn & 0x80007000) { + case 0x00000000: + /* 0....... ........ .000.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vminu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmin_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmaxu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmax_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .000.... .1010111 */ + if (trans_vadc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .000.... .1010111 */ + if (trans_vmadc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x12: + /* 010010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100101. ........ .000.... .1010111 */ + if (trans_vsbc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x13: + /* 010011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100111. ........ .000.... .1010111 */ + if (trans_vmsbc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .000.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vvm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .000.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .000.... .1010111 */ + if (trans_vmv_v_v(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsltu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmslt_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00001000: + /* 0....... ........ .001.... .1010111 */ + switch (insn & 0x74000000) { + case 0x00000000: + /* 0000.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000000.. ........ .001.... .1010111 */ + if (trans_vfadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000010.. ........ .001.... .1010111 */ + if (trans_vfsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x04000000: + /* 0000.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x10000000: + /* 0001.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000100.. ........ .001.... .1010111 */ + if (trans_vfmin_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000110.. ........ .001.... .1010111 */ + if (trans_vfmax_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x14000000: + /* 0001.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000101.. ........ .001.... .1010111 */ + if (trans_vfredmin_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000111.. ........ .001.... .1010111 */ + if (trans_vfredmax_vs(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x20000000: + /* 0010.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 001000.. ........ .001.... .1010111 */ + if (trans_vfsgnj_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 001010.. ........ .001.... .1010111 */ + if (trans_vfsgnjx_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x24000000: + /* 0010.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 001001.. ........ .001.... .1010111 */ + if (trans_vfsgnjn_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x30000000: + /* 0011.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r2rd(ctx, &u.f_decode_insn3220, insn); + switch (insn & 0x0a0f8000) { + case 0x02000000: + /* 0011001. ....0000 0001.... .1010111 */ + if (trans_vfmv_f_s(ctx, &u.f_decode_insn3220)) return true; + return false; + } + return false; + case 0x60000000: + /* 0110.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011000.. ........ .001.... .1010111 */ + if (trans_vmfeq_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 011010.. ........ .001.... .1010111 */ + if (trans_vmford_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x64000000: + /* 0110.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011001.. ........ .001.... .1010111 */ + if (trans_vmfle_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 011011.. ........ .001.... .1010111 */ + if (trans_vmflt_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x70000000: + /* 0111.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011100.. ........ .001.... .1010111 */ + if (trans_vmfne_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + } + return false; + case 0x00002000: + /* 0....... ........ .010.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000001.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredand_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredor_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredxor_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredminu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmin_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmaxu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmax_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0011001. ........ .010.... .1010111 */ + if (trans_vext_x_v(ctx, &u.f_r)) return true; + return false; + } + return false; + case 0x14: + /* 010100.. ........ .010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmpopc_m(ctx, &u.f_rmr)) return true; + return false; + case 0x15: + /* 010101.. ........ .010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmfirst_m(ctx, &u.f_rmr)) return true; + return false; + case 0x16: + /* 010110.. ........ .010.... .1010111 */ + switch ((insn >> 15) & 0x1f) { + case 0x1: + /* 010110.. ....0000 1010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsbf_m(ctx, &u.f_rmr)) return true; + return false; + case 0x2: + /* 010110.. ....0001 0010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsof_m(ctx, &u.f_rmr)) return true; + return false; + case 0x3: + /* 010110.. ....0001 1010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsif_m(ctx, &u.f_rmr)) return true; + return false; + case 0x10: + /* 010110.. ....1000 0010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_viota_m(ctx, &u.f_rmr)) return true; + return false; + case 0x11: + /* 010110.. ....1000 1010.... .1010111 */ + decode_insn32_extract_r1_vm(ctx, &u.f_decode_insn3219, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 010110.0 00001000 1010.... .1010111 */ + if (trans_vid_v(ctx, &u.f_decode_insn3219)) return true; + return false; + } + return false; + } + return false; + case 0x17: + /* 010111.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vcompress_vm(ctx, &u.f_r)) return true; + return false; + case 0x18: + /* 011000.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmandnot_mm(ctx, &u.f_r)) return true; + return false; + case 0x19: + /* 011001.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmand_mm(ctx, &u.f_r)) return true; + return false; + case 0x1a: + /* 011010.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1b: + /* 011011.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmxor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1c: + /* 011100.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmornot_mm(ctx, &u.f_r)) return true; + return false; + case 0x1d: + /* 011101.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmnand_mm(ctx, &u.f_r)) return true; + return false; + case 0x1e: + /* 011110.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmnor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1f: + /* 011111.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmxnor_mm(ctx, &u.f_r)) return true; + return false; + } + return false; + case 0x00003000: + /* 0....... ........ .011.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrsub_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 001110.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslideup_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslidedown_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .011.... .1010111 */ + if (trans_vadc_vim(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .011.... .1010111 */ + if (trans_vmadc_vim(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .011.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vim(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .011.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .011.... .1010111 */ + if (trans_vmv_v_i(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 011110.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgtu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgt_vi(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00004000: + /* 0....... ........ .100.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vminu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmin_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmaxu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmax_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 001110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslideup_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslidedown_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .100.... .1010111 */ + if (trans_vadc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .100.... .1010111 */ + if (trans_vmadc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x12: + /* 010010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100101. ........ .100.... .1010111 */ + if (trans_vsbc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x13: + /* 010011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100111. ........ .100.... .1010111 */ + if (trans_vmsbc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .100.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vxm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .100.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .100.... .1010111 */ + if (trans_vmv_v_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsltu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmslt_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 011110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgtu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgt_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00005000: + /* 0....... ........ .101.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfmin_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfmax_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 001000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnj_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnjn_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnjx_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 001101.. ........ .101.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x3f) { + case 0x20: + /* 00110110 0000.... .101.... .1010111 */ + if (trans_vfmv_s_f(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .101.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vfmerge_vfm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .101.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .101.... .1010111 */ + if (trans_vfmv_v_f(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfeq_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfle_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmford_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmflt_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfne_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfgt_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfge_vf(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00006000: + /* 0....... ........ .110.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0xd: + /* 001101.. ........ .110.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x3f) { + case 0x20: + /* 00110110 0000.... .110.... .1010111 */ + if (trans_vmv_s_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + case 0xe: + /* 001110.. ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslide1up_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslide1down_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00007000: + /* 0....... ........ .111.... .1010111 */ + decode_insn32_extract_r2_zimm(ctx, &u.f_decode_insn3221, insn); + if (trans_vsetvli(ctx, &u.f_decode_insn3221)) return true; + return false; + case 0x80000000: + /* 1....... ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .000.... .1010111 */ + if (trans_vsaddu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .000.... .1010111 */ + if (trans_vsadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .000.... .1010111 */ + if (trans_vssubu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .000.... .1010111 */ + if (trans_vssub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .000.... .1010111 */ + if (trans_vaadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .000.... .1010111 */ + if (trans_vsll_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .000.... .1010111 */ + if (trans_vasub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .000.... .1010111 */ + if (trans_vsmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .000.... .1010111 */ + if (trans_vsrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .000.... .1010111 */ + if (trans_vsra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .000.... .1010111 */ + if (trans_vssrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .000.... .1010111 */ + if (trans_vssra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .000.... .1010111 */ + if (trans_vnsrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .000.... .1010111 */ + if (trans_vnsra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .000.... .1010111 */ + if (trans_vnclipu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .000.... .1010111 */ + if (trans_vnclip_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .000.... .1010111 */ + if (trans_vwredsumu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .000.... .1010111 */ + if (trans_vwredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .000.... .1010111 */ + if (trans_vwsmaccu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .000.... .1010111 */ + if (trans_vwsmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .000.... .1010111 */ + if (trans_vwsmaccsu_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80001000: + /* 1....... ........ .001.... .1010111 */ + switch (insn & 0x74000000) { + case 0x00000000: + /* 1000.0.. ........ .001.... .1010111 */ + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 100000.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfdiv_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100010.. ........ .001.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + switch ((insn >> 15) & 0x1f) { + case 0x0: + /* 100010.. ....0000 0001.... .1010111 */ + if (trans_vfcvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x1: + /* 100010.. ....0000 1001.... .1010111 */ + if (trans_vfcvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x2: + /* 100010.. ....0001 0001.... .1010111 */ + if (trans_vfcvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0x3: + /* 100010.. ....0001 1001.... .1010111 */ + if (trans_vfcvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0x8: + /* 100010.. ....0100 0001.... .1010111 */ + if (trans_vfwcvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x9: + /* 100010.. ....0100 1001.... .1010111 */ + if (trans_vfwcvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0xa: + /* 100010.. ....0101 0001.... .1010111 */ + if (trans_vfwcvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0xb: + /* 100010.. ....0101 1001.... .1010111 */ + if (trans_vfwcvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0xc: + /* 100010.. ....0110 0001.... .1010111 */ + if (trans_vfwcvt_f_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x10: + /* 100010.. ....1000 0001.... .1010111 */ + if (trans_vfncvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x11: + /* 100010.. ....1000 1001.... .1010111 */ + if (trans_vfncvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x12: + /* 100010.. ....1001 0001.... .1010111 */ + if (trans_vfncvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0x13: + /* 100010.. ....1001 1001.... .1010111 */ + if (trans_vfncvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0x14: + /* 100010.. ....1010 0001.... .1010111 */ + if (trans_vfncvt_f_f_v(ctx, &u.f_rmr)) return true; + return false; + } + return false; + } + return false; + case 0x04000000: + /* 1000.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + switch (insn & 0x080f8000) { + case 0x08000000: + /* 100011.. ....0000 0001.... .1010111 */ + if (trans_vfsqrt_v(ctx, &u.f_rmr)) return true; + return false; + case 0x08080000: + /* 100011.. ....1000 0001.... .1010111 */ + if (trans_vfclass_v(ctx, &u.f_rmr)) return true; + return false; + } + return false; + case 0x10000000: + /* 1001.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 100100.. ........ .001.... .1010111 */ + if (trans_vfmul_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x20000000: + /* 1010.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101000.. ........ .001.... .1010111 */ + if (trans_vfmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101010.. ........ .001.... .1010111 */ + if (trans_vfmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x24000000: + /* 1010.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101001.. ........ .001.... .1010111 */ + if (trans_vfnmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101011.. ........ .001.... .1010111 */ + if (trans_vfnmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x30000000: + /* 1011.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101100.. ........ .001.... .1010111 */ + if (trans_vfmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101110.. ........ .001.... .1010111 */ + if (trans_vfmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x34000000: + /* 1011.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101101.. ........ .001.... .1010111 */ + if (trans_vfnmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101111.. ........ .001.... .1010111 */ + if (trans_vfnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x40000000: + /* 1100.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 110000.. ........ .001.... .1010111 */ + if (trans_vfwadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 110010.. ........ .001.... .1010111 */ + if (trans_vfwsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x44000000: + /* 1100.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfwredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x50000000: + /* 1101.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 110100.. ........ .001.... .1010111 */ + if (trans_vfwadd_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 110110.. ........ .001.... .1010111 */ + if (trans_vfwsub_wv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x60000000: + /* 1110.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111000.. ........ .001.... .1010111 */ + if (trans_vfwmul_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x70000000: + /* 1111.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111100.. ........ .001.... .1010111 */ + if (trans_vfwmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 111110.. ........ .001.... .1010111 */ + if (trans_vfwmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x74000000: + /* 1111.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111101.. ........ .001.... .1010111 */ + if (trans_vfwnmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 111111.. ........ .001.... .1010111 */ + if (trans_vfwnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + } + return false; + case 0x80002000: + /* 1....... ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .010.... .1010111 */ + if (trans_vdivu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .010.... .1010111 */ + if (trans_vdiv_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .010.... .1010111 */ + if (trans_vremu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .010.... .1010111 */ + if (trans_vrem_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .010.... .1010111 */ + if (trans_vmulhu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .010.... .1010111 */ + if (trans_vmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .010.... .1010111 */ + if (trans_vmulhsu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .010.... .1010111 */ + if (trans_vmulh_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .010.... .1010111 */ + if (trans_vmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .010.... .1010111 */ + if (trans_vnmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .010.... .1010111 */ + if (trans_vmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .010.... .1010111 */ + if (trans_vnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .010.... .1010111 */ + if (trans_vwaddu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .010.... .1010111 */ + if (trans_vwadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .010.... .1010111 */ + if (trans_vwsubu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x13: + /* 110011.. ........ .010.... .1010111 */ + if (trans_vwsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .010.... .1010111 */ + if (trans_vwaddu_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x15: + /* 110101.. ........ .010.... .1010111 */ + if (trans_vwadd_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .010.... .1010111 */ + if (trans_vwsubu_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x17: + /* 110111.. ........ .010.... .1010111 */ + if (trans_vwsub_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .010.... .1010111 */ + if (trans_vwmulu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 111010.. ........ .010.... .1010111 */ + if (trans_vwmulsu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 111011.. ........ .010.... .1010111 */ + if (trans_vwmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .010.... .1010111 */ + if (trans_vwmaccu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .010.... .1010111 */ + if (trans_vwmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .010.... .1010111 */ + if (trans_vwmaccsu_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80003000: + /* 1....... ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .011.... .1010111 */ + if (trans_vsaddu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .011.... .1010111 */ + if (trans_vsadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .011.... .1010111 */ + if (trans_vaadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .011.... .1010111 */ + if (trans_vsll_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .011.... .1010111 */ + if (trans_vsrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .011.... .1010111 */ + if (trans_vsra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .011.... .1010111 */ + if (trans_vssrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .011.... .1010111 */ + if (trans_vssra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .011.... .1010111 */ + if (trans_vnsrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .011.... .1010111 */ + if (trans_vnsra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .011.... .1010111 */ + if (trans_vnclipu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .011.... .1010111 */ + if (trans_vnclip_vi(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80004000: + /* 1....... ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .100.... .1010111 */ + if (trans_vsaddu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .100.... .1010111 */ + if (trans_vsadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .100.... .1010111 */ + if (trans_vssubu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .100.... .1010111 */ + if (trans_vssub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .100.... .1010111 */ + if (trans_vaadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .100.... .1010111 */ + if (trans_vsll_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .100.... .1010111 */ + if (trans_vasub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .100.... .1010111 */ + if (trans_vsmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .100.... .1010111 */ + if (trans_vsrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .100.... .1010111 */ + if (trans_vsra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .100.... .1010111 */ + if (trans_vssrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .100.... .1010111 */ + if (trans_vssra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .100.... .1010111 */ + if (trans_vnsrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .100.... .1010111 */ + if (trans_vnsra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .100.... .1010111 */ + if (trans_vnclipu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .100.... .1010111 */ + if (trans_vnclip_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .100.... .1010111 */ + if (trans_vwsmaccu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .100.... .1010111 */ + if (trans_vwsmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .100.... .1010111 */ + if (trans_vwsmaccsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .100.... .1010111 */ + if (trans_vwsmaccus_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80005000: + /* 1....... ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .101.... .1010111 */ + if (trans_vfdiv_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .101.... .1010111 */ + if (trans_vfrdiv_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .101.... .1010111 */ + if (trans_vfmul_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .101.... .1010111 */ + if (trans_vfrsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .101.... .1010111 */ + if (trans_vfmadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .101.... .1010111 */ + if (trans_vfnmadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .101.... .1010111 */ + if (trans_vfmsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .101.... .1010111 */ + if (trans_vfnmsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .101.... .1010111 */ + if (trans_vfmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .101.... .1010111 */ + if (trans_vfnmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .101.... .1010111 */ + if (trans_vfmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .101.... .1010111 */ + if (trans_vfnmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .101.... .1010111 */ + if (trans_vfwadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .101.... .1010111 */ + if (trans_vfwsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .101.... .1010111 */ + if (trans_vfwadd_wf(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .101.... .1010111 */ + if (trans_vfwsub_wf(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .101.... .1010111 */ + if (trans_vfwmul_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .101.... .1010111 */ + if (trans_vfwmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .101.... .1010111 */ + if (trans_vfwnmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .101.... .1010111 */ + if (trans_vfwmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .101.... .1010111 */ + if (trans_vfwnmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80006000: + /* 1....... ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .110.... .1010111 */ + if (trans_vdivu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .110.... .1010111 */ + if (trans_vdiv_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .110.... .1010111 */ + if (trans_vremu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .110.... .1010111 */ + if (trans_vrem_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .110.... .1010111 */ + if (trans_vmulhu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .110.... .1010111 */ + if (trans_vmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .110.... .1010111 */ + if (trans_vmulhsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .110.... .1010111 */ + if (trans_vmulh_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .110.... .1010111 */ + if (trans_vmadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .110.... .1010111 */ + if (trans_vnmsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .110.... .1010111 */ + if (trans_vmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .110.... .1010111 */ + if (trans_vnmsac_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .110.... .1010111 */ + if (trans_vwaddu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .110.... .1010111 */ + if (trans_vwadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .110.... .1010111 */ + if (trans_vwsubu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x13: + /* 110011.. ........ .110.... .1010111 */ + if (trans_vwsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .110.... .1010111 */ + if (trans_vwaddu_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x15: + /* 110101.. ........ .110.... .1010111 */ + if (trans_vwadd_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .110.... .1010111 */ + if (trans_vwsubu_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x17: + /* 110111.. ........ .110.... .1010111 */ + if (trans_vwsub_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .110.... .1010111 */ + if (trans_vwmulu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 111010.. ........ .110.... .1010111 */ + if (trans_vwmulsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 111011.. ........ .110.... .1010111 */ + if (trans_vwmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .110.... .1010111 */ + if (trans_vwmaccu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .110.... .1010111 */ + if (trans_vwmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .110.... .1010111 */ + if (trans_vwmaccsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .110.... .1010111 */ + if (trans_vwmaccus_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80007000: + /* 1....... ........ .111.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + switch ((insn >> 25) & 0x3f) { + case 0x0: + /* 1000000. ........ .111.... .1010111 */ + if (trans_vsetvl(ctx, &u.f_r)) return true; return false; } return false; @@ -1249,32 +3941,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:90 */ if (trans_beq(ctx, &u.f_b)) return true; return false; case 0x1: /* ........ ........ .001.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:91 */ if (trans_bne(ctx, &u.f_b)) return true; return false; case 0x4: /* ........ ........ .100.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:92 */ if (trans_blt(ctx, &u.f_b)) return true; return false; case 0x5: /* ........ ........ .101.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:93 */ if (trans_bge(ctx, &u.f_b)) return true; return false; case 0x6: /* ........ ........ .110.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:94 */ if (trans_bltu(ctx, &u.f_b)) return true; return false; case 0x7: /* ........ ........ .111.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:95 */ if (trans_bgeu(ctx, &u.f_b)) return true; return false; } @@ -1285,14 +3971,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .1100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:89 */ if (trans_jalr(ctx, &u.f_i)) return true; return false; } return false; case 0x0000006f: /* ........ ........ ........ .1101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:88 */ decode_insn32_extract_j(ctx, &u.f_j, insn); if (trans_jal(ctx, &u.f_j)) return true; return false; @@ -1304,21 +3988,18 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xfe000f80) { case 0x00000000: /* 0000000. ........ .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x3ff) { case 0x0: /* 00000000 00000000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:74 */ if (trans_ecall(ctx, &u.f_empty)) return true; return false; case 0x20: /* 00000000 00010000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:75 */ if (trans_ebreak(ctx, &u.f_empty)) return true; return false; case 0x40: /* 00000000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:76 */ if (trans_uret(ctx, &u.f_empty)) return true; return false; } @@ -1328,28 +4009,25 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 20) & 0x1f) { case 0x2: /* 00010000 0010.... .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x1f) { case 0x0: /* 00010000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:77 */ if (trans_sret(ctx, &u.f_empty)) return true; return false; } return false; case 0x4: /* 00010000 0100.... .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:83 */ - decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3215, insn); - if (trans_sfence_vm(ctx, &u.f_decode_insn3215)) return true; + decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3223, insn); + if (trans_sfence_vm(ctx, &u.f_decode_insn3223)) return true; return false; case 0x5: /* 00010000 0101.... .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x1f) { case 0x0: /* 00010000 01010000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:79 */ if (trans_wfi(ctx, &u.f_empty)) return true; return false; } @@ -1358,70 +4036,60 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) return false; case 0x12000000: /* 0001001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:82 */ - decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3214, insn); - if (trans_sfence_vma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3222, insn); + if (trans_sfence_vma(ctx, &u.f_decode_insn3222)) return true; return false; case 0x22000000: /* 0010001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:81 */ - decode_insn32_extract_hfence_bvma(ctx, &u.f_decode_insn3214, insn); - if (trans_hfence_bvma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_hfence_vvma(ctx, &u.f_decode_insn3222, insn); + if (trans_hfence_vvma(ctx, &u.f_decode_insn3222)) return true; return false; case 0x30000000: /* 0011000. ........ .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x3ff) { case 0x40: /* 00110000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:78 */ if (trans_mret(ctx, &u.f_empty)) return true; return false; } return false; case 0x62000000: /* 0110001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:80 */ - decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3214, insn); - if (trans_hfence_gvma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3222, insn); + if (trans_hfence_gvma(ctx, &u.f_decode_insn3222)) return true; return false; } return false; case 0x1: /* ........ ........ .001.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:125 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrw(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrw(ctx, &u.f_decode_insn3214)) return true; return false; case 0x2: /* ........ ........ .010.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:126 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrs(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrs(ctx, &u.f_decode_insn3214)) return true; return false; case 0x3: /* ........ ........ .011.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:127 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrc(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrc(ctx, &u.f_decode_insn3214)) return true; return false; case 0x5: /* ........ ........ .101.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:128 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrwi(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrwi(ctx, &u.f_decode_insn3214)) return true; return false; case 0x6: /* ........ ........ .110.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:129 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrsi(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrsi(ctx, &u.f_decode_insn3214)) return true; return false; case 0x7: /* ........ ........ .111.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:130 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrci(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrci(ctx, &u.f_decode_insn3214)) return true; return false; } return false; diff --git a/qemu/target/riscv/riscv64/decode_insn16.inc.c b/qemu/target/riscv/riscv64/decode_insn16.inc.c index 719388566f..a3bfbd0d3f 100644 --- a/qemu/target/riscv/riscv64/decode_insn16.inc.c +++ b/qemu/target/riscv/riscv64/decode_insn16.inc.c @@ -1,11 +1,9 @@ /* This file is autogenerated by scripts/decodetree.py. */ -#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wredundant-decls" -# ifdef __clang__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#ifdef __clang__ # pragma GCC diagnostic ignored "-Wtypedef-redefinition" -# endif #endif typedef arg_empty arg_illegal; @@ -61,9 +59,7 @@ static bool trans_subw(DisasContext *ctx, arg_subw *a); typedef arg_r arg_addw; static bool trans_addw(DisasContext *ctx, arg_addw *a); -#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE -# pragma GCC diagnostic pop -#endif +#pragma GCC diagnostic pop static void decode_insn16_extract_c_addi16sp(DisasContext *ctx, arg_i *a, uint16_t insn) { @@ -237,30 +233,24 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 000..... ......00 */ if ((insn & 0x00001fe0) == 0x00000000) { /* 00000000 000...00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:87 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); - ctx->invalid = true; if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:88 */ decode_insn16_extract_c_addi4spn(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; case 0x00000001: /* 000..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:96 */ decode_insn16_extract_ci(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; case 0x00000002: /* 000..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:115 */ decode_insn16_extract_c_shift2(ctx, &u.f_shift, insn); if (trans_slli(ctx, &u.f_shift)) return true; return false; case 0x00002000: /* 001..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:90 */ decode_insn16_extract_cl_d(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; @@ -268,29 +258,24 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 001..... ......01 */ if ((insn & 0x00000f80) == 0x00000000) { /* 001.0000 0.....01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:25 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:26 */ decode_insn16_extract_ci(ctx, &u.f_i, insn); if (trans_addiw(ctx, &u.f_i)) return true; return false; case 0x00002002: /* 001..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:116 */ decode_insn16_extract_c_ldsp(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; case 0x00004000: /* 010..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:91 */ decode_insn16_extract_cl_w(ctx, &u.f_i, insn); if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x00004001: /* 010..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:97 */ decode_insn16_extract_c_li(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -298,17 +283,14 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 010..... ......10 */ if ((insn & 0x00000f80) == 0x00000000) { /* 010.0000 0.....10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:118 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:119 */ decode_insn16_extract_c_lwsp(ctx, &u.f_i, insn); if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x00006000: /* 011..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:20 */ decode_insn16_extract_cl_d(ctx, &u.f_i, insn); if (trans_ld(ctx, &u.f_i)) return true; return false; @@ -316,17 +298,14 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 011..... ......01 */ if ((insn & 0x0000107c) == 0x00000000) { /* 0110.... .0000001 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:99 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } if ((insn & 0x00000f80) == 0x00000100) { /* 011.0001 0.....01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:100 */ decode_insn16_extract_c_addi16sp(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:101 */ decode_insn16_extract_c_lui(ctx, &u.f_u, insn); if (trans_lui(ctx, &u.f_u)) return true; return false; @@ -334,11 +313,9 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 011..... ......10 */ if ((insn & 0x00000f80) == 0x00000000) { /* 011.0000 0.....10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:33 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:34 */ decode_insn16_extract_c_ldsp(ctx, &u.f_i, insn); if (trans_ld(ctx, &u.f_i)) return true; return false; @@ -347,19 +324,16 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) switch ((insn >> 10) & 0x3) { case 0x0: /* 100.00.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:103 */ decode_insn16_extract_c_shift(ctx, &u.f_shift, insn); if (trans_srli(ctx, &u.f_shift)) return true; return false; case 0x1: /* 100.01.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:104 */ decode_insn16_extract_c_shift(ctx, &u.f_shift, insn); if (trans_srai(ctx, &u.f_shift)) return true; return false; case 0x2: /* 100.10.. ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:105 */ decode_insn16_extract_c_andi(ctx, &u.f_i, insn); if (trans_andi(ctx, &u.f_i)) return true; return false; @@ -369,32 +343,26 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) switch (insn & 0x00001060) { case 0x00000000: /* 100011.. .00...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:106 */ if (trans_sub(ctx, &u.f_r)) return true; return false; case 0x00000020: /* 100011.. .01...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:107 */ if (trans_xor(ctx, &u.f_r)) return true; return false; case 0x00000040: /* 100011.. .10...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:108 */ if (trans_or(ctx, &u.f_r)) return true; return false; case 0x00000060: /* 100011.. .11...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:109 */ if (trans_and(ctx, &u.f_r)) return true; return false; case 0x00001000: /* 100111.. .00...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:28 */ if (trans_subw(ctx, &u.f_r)) return true; return false; case 0x00001020: /* 100111.. .01...01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:29 */ if (trans_addw(ctx, &u.f_r)) return true; return false; } @@ -408,18 +376,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 1000.... ......10 */ if ((insn & 0x00000ffc) == 0x00000000) { /* 10000000 00000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:122 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_illegal(ctx, &u.f_empty)) return true; } if ((insn & 0x0000007c) == 0x00000000) { /* 1000.... .0000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:123 */ decode_insn16_extract_c_jalr(ctx, &u.f_i, insn); u.f_i.rd = 0; if (trans_jalr(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:124 */ decode_insn16_extract_c_mv(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -427,18 +392,15 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) /* 1001.... ......10 */ if ((insn & 0x00000ffc) == 0x00000000) { /* 10010000 00000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:127 */ decode_insn16_extract_decode_insn16_Fmt_22(ctx, &u.f_empty, insn); if (trans_ebreak(ctx, &u.f_empty)) return true; } if ((insn & 0x0000007c) == 0x00000000) { /* 1001.... .0000010 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:128 */ decode_insn16_extract_c_jalr(ctx, &u.f_i, insn); u.f_i.rd = 1; if (trans_jalr(ctx, &u.f_i)) return true; } - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:129 */ decode_insn16_extract_cr(ctx, &u.f_r, insn); if (trans_add(ctx, &u.f_r)) return true; return false; @@ -446,56 +408,47 @@ static bool decode_insn16(DisasContext *ctx, uint16_t insn) return false; case 0x0000a000: /* 101..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:92 */ decode_insn16_extract_cs_d(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; case 0x0000a001: /* 101..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:110 */ decode_insn16_extract_cj(ctx, &u.f_j, insn); u.f_j.rd = 0; if (trans_jal(ctx, &u.f_j)) return true; return false; case 0x0000a002: /* 101..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:131 */ decode_insn16_extract_c_sdsp(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; case 0x0000c000: /* 110..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:93 */ decode_insn16_extract_cs_w(ctx, &u.f_s, insn); if (trans_sw(ctx, &u.f_s)) return true; return false; case 0x0000c001: /* 110..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:111 */ decode_insn16_extract_cb_z(ctx, &u.f_b, insn); if (trans_beq(ctx, &u.f_b)) return true; return false; case 0x0000c002: /* 110..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:132 */ decode_insn16_extract_c_swsp(ctx, &u.f_s, insn); if (trans_sw(ctx, &u.f_s)) return true; return false; case 0x0000e000: /* 111..... ......00 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:21 */ decode_insn16_extract_cs_d(ctx, &u.f_s, insn); if (trans_sd(ctx, &u.f_s)) return true; return false; case 0x0000e001: /* 111..... ......01 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16.decode:112 */ decode_insn16_extract_cb_z(ctx, &u.f_b, insn); if (trans_bne(ctx, &u.f_b)) return true; return false; case 0x0000e002: /* 111..... ......10 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn16-64.decode:36 */ decode_insn16_extract_c_sdsp(ctx, &u.f_s, insn); if (trans_sd(ctx, &u.f_s)) return true; return false; diff --git a/qemu/target/riscv/riscv64/decode_insn32.inc.c b/qemu/target/riscv/riscv64/decode_insn32.inc.c index b5d7896091..7ea843c575 100644 --- a/qemu/target/riscv/riscv64/decode_insn32.inc.c +++ b/qemu/target/riscv/riscv64/decode_insn32.inc.c @@ -14,56 +14,70 @@ typedef struct { int rs2; } arg_b; +typedef struct { + int csr; + int rd; + int rs1; +} arg_decode_insn3214; + typedef struct { int rd; int rm; int rs1; int rs2; int rs3; -} arg_decode_insn3210; +} arg_decode_insn3215; typedef struct { int rd; int rm; int rs1; int rs2; -} arg_decode_insn3211; +} arg_decode_insn3216; typedef struct { int rd; int rm; int rs1; -} arg_decode_insn3212; +} arg_decode_insn3217; typedef struct { int rd; int rs1; -} arg_decode_insn3213; +} arg_decode_insn3218; typedef struct { - int rs1; + int rd; + int vm; +} arg_decode_insn3219; + +typedef struct { + int rd; int rs2; -} arg_decode_insn3214; +} arg_decode_insn3220; typedef struct { + int rd; int rs1; -} arg_decode_insn3215; + int zimm; +} arg_decode_insn3221; typedef struct { - int pred; - int succ; -} arg_decode_insn3216; + int rs1; + int rs2; +} arg_decode_insn3222; typedef struct { - int csr; - int rd; int rs1; -} arg_decode_insn329; +} arg_decode_insn3223; + +typedef struct { + int pred; + int succ; +} arg_decode_insn3224; typedef struct { -#ifdef _MSC_VER - int dummy; // MSVC does not allow empty struct -#endif + int : 0; } arg_empty; typedef struct { @@ -83,6 +97,42 @@ typedef struct { int rs2; } arg_r; +typedef struct { + int nf; + int rd; + int rs1; + int vm; +} arg_r2nfvm; + +typedef struct { + int rd; + int rs2; + int vm; +} arg_rmr; + +typedef struct { + int rd; + int rs1; + int rs2; + int vm; +} arg_rmrr; + +typedef struct { + int nf; + int rd; + int rs1; + int rs2; + int vm; +} arg_rnfvm; + +typedef struct { + int rd; + int rs1; + int rs2; + int vm; + int wd; +} arg_rwdvm; + typedef struct { int imm; int rs1; @@ -112,13 +162,9 @@ typedef arg_empty arg_mret; static bool trans_mret(DisasContext *ctx, arg_mret *a); typedef arg_empty arg_wfi; static bool trans_wfi(DisasContext *ctx, arg_wfi *a); -typedef arg_decode_insn3214 arg_hfence_gvma; -static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a); -typedef arg_decode_insn3214 arg_hfence_bvma; -static bool trans_hfence_bvma(DisasContext *ctx, arg_hfence_bvma *a); -typedef arg_decode_insn3214 arg_sfence_vma; +typedef arg_decode_insn3222 arg_sfence_vma; static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a); -typedef arg_decode_insn3215 arg_sfence_vm; +typedef arg_decode_insn3223 arg_sfence_vm; static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a); typedef arg_u arg_lui; static bool trans_lui(DisasContext *ctx, arg_lui *a); @@ -194,21 +240,21 @@ typedef arg_r arg_or; static bool trans_or(DisasContext *ctx, arg_or *a); typedef arg_r arg_and; static bool trans_and(DisasContext *ctx, arg_and *a); -typedef arg_decode_insn3216 arg_fence; +typedef arg_decode_insn3224 arg_fence; static bool trans_fence(DisasContext *ctx, arg_fence *a); typedef arg_empty arg_fence_i; static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a); -typedef arg_decode_insn329 arg_csrrw; +typedef arg_decode_insn3214 arg_csrrw; static bool trans_csrrw(DisasContext *ctx, arg_csrrw *a); -typedef arg_decode_insn329 arg_csrrs; +typedef arg_decode_insn3214 arg_csrrs; static bool trans_csrrs(DisasContext *ctx, arg_csrrs *a); -typedef arg_decode_insn329 arg_csrrc; +typedef arg_decode_insn3214 arg_csrrc; static bool trans_csrrc(DisasContext *ctx, arg_csrrc *a); -typedef arg_decode_insn329 arg_csrrwi; +typedef arg_decode_insn3214 arg_csrrwi; static bool trans_csrrwi(DisasContext *ctx, arg_csrrwi *a); -typedef arg_decode_insn329 arg_csrrsi; +typedef arg_decode_insn3214 arg_csrrsi; static bool trans_csrrsi(DisasContext *ctx, arg_csrrsi *a); -typedef arg_decode_insn329 arg_csrrci; +typedef arg_decode_insn3214 arg_csrrci; static bool trans_csrrci(DisasContext *ctx, arg_csrrci *a); typedef arg_r arg_mul; static bool trans_mul(DisasContext *ctx, arg_mul *a); @@ -252,23 +298,23 @@ typedef arg_i arg_flw; static bool trans_flw(DisasContext *ctx, arg_flw *a); typedef arg_s arg_fsw; static bool trans_fsw(DisasContext *ctx, arg_fsw *a); -typedef arg_decode_insn3210 arg_fmadd_s; +typedef arg_decode_insn3215 arg_fmadd_s; static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a); -typedef arg_decode_insn3210 arg_fmsub_s; +typedef arg_decode_insn3215 arg_fmsub_s; static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a); -typedef arg_decode_insn3210 arg_fnmsub_s; +typedef arg_decode_insn3215 arg_fnmsub_s; static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a); -typedef arg_decode_insn3210 arg_fnmadd_s; +typedef arg_decode_insn3215 arg_fnmadd_s; static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a); -typedef arg_decode_insn3211 arg_fadd_s; +typedef arg_decode_insn3216 arg_fadd_s; static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a); -typedef arg_decode_insn3211 arg_fsub_s; +typedef arg_decode_insn3216 arg_fsub_s; static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a); -typedef arg_decode_insn3211 arg_fmul_s; +typedef arg_decode_insn3216 arg_fmul_s; static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a); -typedef arg_decode_insn3211 arg_fdiv_s; +typedef arg_decode_insn3216 arg_fdiv_s; static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a); -typedef arg_decode_insn3212 arg_fsqrt_s; +typedef arg_decode_insn3217 arg_fsqrt_s; static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a); typedef arg_r arg_fsgnj_s; static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a); @@ -280,11 +326,11 @@ typedef arg_r arg_fmin_s; static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a); typedef arg_r arg_fmax_s; static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a); -typedef arg_decode_insn3212 arg_fcvt_w_s; +typedef arg_decode_insn3217 arg_fcvt_w_s; static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a); -typedef arg_decode_insn3212 arg_fcvt_wu_s; +typedef arg_decode_insn3217 arg_fcvt_wu_s; static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a); -typedef arg_decode_insn3213 arg_fmv_x_w; +typedef arg_decode_insn3218 arg_fmv_x_w; static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a); typedef arg_r arg_feq_s; static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a); @@ -292,35 +338,35 @@ typedef arg_r arg_flt_s; static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a); typedef arg_r arg_fle_s; static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a); -typedef arg_decode_insn3213 arg_fclass_s; +typedef arg_decode_insn3218 arg_fclass_s; static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a); -typedef arg_decode_insn3212 arg_fcvt_s_w; +typedef arg_decode_insn3217 arg_fcvt_s_w; static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a); -typedef arg_decode_insn3212 arg_fcvt_s_wu; +typedef arg_decode_insn3217 arg_fcvt_s_wu; static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a); -typedef arg_decode_insn3213 arg_fmv_w_x; +typedef arg_decode_insn3218 arg_fmv_w_x; static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a); typedef arg_i arg_fld; static bool trans_fld(DisasContext *ctx, arg_fld *a); typedef arg_s arg_fsd; static bool trans_fsd(DisasContext *ctx, arg_fsd *a); -typedef arg_decode_insn3210 arg_fmadd_d; +typedef arg_decode_insn3215 arg_fmadd_d; static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a); -typedef arg_decode_insn3210 arg_fmsub_d; +typedef arg_decode_insn3215 arg_fmsub_d; static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a); -typedef arg_decode_insn3210 arg_fnmsub_d; +typedef arg_decode_insn3215 arg_fnmsub_d; static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a); -typedef arg_decode_insn3210 arg_fnmadd_d; +typedef arg_decode_insn3215 arg_fnmadd_d; static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a); -typedef arg_decode_insn3211 arg_fadd_d; +typedef arg_decode_insn3216 arg_fadd_d; static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a); -typedef arg_decode_insn3211 arg_fsub_d; +typedef arg_decode_insn3216 arg_fsub_d; static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a); -typedef arg_decode_insn3211 arg_fmul_d; +typedef arg_decode_insn3216 arg_fmul_d; static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a); -typedef arg_decode_insn3211 arg_fdiv_d; +typedef arg_decode_insn3216 arg_fdiv_d; static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a); -typedef arg_decode_insn3212 arg_fsqrt_d; +typedef arg_decode_insn3217 arg_fsqrt_d; static bool trans_fsqrt_d(DisasContext *ctx, arg_fsqrt_d *a); typedef arg_r arg_fsgnj_d; static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a); @@ -332,9 +378,9 @@ typedef arg_r arg_fmin_d; static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a); typedef arg_r arg_fmax_d; static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a); -typedef arg_decode_insn3212 arg_fcvt_s_d; +typedef arg_decode_insn3217 arg_fcvt_s_d; static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a); -typedef arg_decode_insn3212 arg_fcvt_d_s; +typedef arg_decode_insn3217 arg_fcvt_d_s; static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a); typedef arg_r arg_feq_d; static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a); @@ -342,16 +388,704 @@ typedef arg_r arg_flt_d; static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a); typedef arg_r arg_fle_d; static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a); -typedef arg_decode_insn3213 arg_fclass_d; +typedef arg_decode_insn3218 arg_fclass_d; static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a); -typedef arg_decode_insn3212 arg_fcvt_w_d; +typedef arg_decode_insn3217 arg_fcvt_w_d; static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a); -typedef arg_decode_insn3212 arg_fcvt_wu_d; +typedef arg_decode_insn3217 arg_fcvt_wu_d; static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a); -typedef arg_decode_insn3212 arg_fcvt_d_w; +typedef arg_decode_insn3217 arg_fcvt_d_w; static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a); -typedef arg_decode_insn3212 arg_fcvt_d_wu; +typedef arg_decode_insn3217 arg_fcvt_d_wu; static bool trans_fcvt_d_wu(DisasContext *ctx, arg_fcvt_d_wu *a); +typedef arg_decode_insn3222 arg_hfence_gvma; +static bool trans_hfence_gvma(DisasContext *ctx, arg_hfence_gvma *a); +typedef arg_decode_insn3222 arg_hfence_vvma; +static bool trans_hfence_vvma(DisasContext *ctx, arg_hfence_vvma *a); +typedef arg_r2nfvm arg_vlb_v; +static bool trans_vlb_v(DisasContext *ctx, arg_vlb_v *a); +typedef arg_r2nfvm arg_vlh_v; +static bool trans_vlh_v(DisasContext *ctx, arg_vlh_v *a); +typedef arg_r2nfvm arg_vlw_v; +static bool trans_vlw_v(DisasContext *ctx, arg_vlw_v *a); +typedef arg_r2nfvm arg_vle_v; +static bool trans_vle_v(DisasContext *ctx, arg_vle_v *a); +typedef arg_r2nfvm arg_vlbu_v; +static bool trans_vlbu_v(DisasContext *ctx, arg_vlbu_v *a); +typedef arg_r2nfvm arg_vlhu_v; +static bool trans_vlhu_v(DisasContext *ctx, arg_vlhu_v *a); +typedef arg_r2nfvm arg_vlwu_v; +static bool trans_vlwu_v(DisasContext *ctx, arg_vlwu_v *a); +typedef arg_r2nfvm arg_vlbff_v; +static bool trans_vlbff_v(DisasContext *ctx, arg_vlbff_v *a); +typedef arg_r2nfvm arg_vlhff_v; +static bool trans_vlhff_v(DisasContext *ctx, arg_vlhff_v *a); +typedef arg_r2nfvm arg_vlwff_v; +static bool trans_vlwff_v(DisasContext *ctx, arg_vlwff_v *a); +typedef arg_r2nfvm arg_vleff_v; +static bool trans_vleff_v(DisasContext *ctx, arg_vleff_v *a); +typedef arg_r2nfvm arg_vlbuff_v; +static bool trans_vlbuff_v(DisasContext *ctx, arg_vlbuff_v *a); +typedef arg_r2nfvm arg_vlhuff_v; +static bool trans_vlhuff_v(DisasContext *ctx, arg_vlhuff_v *a); +typedef arg_r2nfvm arg_vlwuff_v; +static bool trans_vlwuff_v(DisasContext *ctx, arg_vlwuff_v *a); +typedef arg_r2nfvm arg_vsb_v; +static bool trans_vsb_v(DisasContext *ctx, arg_vsb_v *a); +typedef arg_r2nfvm arg_vsh_v; +static bool trans_vsh_v(DisasContext *ctx, arg_vsh_v *a); +typedef arg_r2nfvm arg_vsw_v; +static bool trans_vsw_v(DisasContext *ctx, arg_vsw_v *a); +typedef arg_r2nfvm arg_vse_v; +static bool trans_vse_v(DisasContext *ctx, arg_vse_v *a); +typedef arg_rnfvm arg_vlsb_v; +static bool trans_vlsb_v(DisasContext *ctx, arg_vlsb_v *a); +typedef arg_rnfvm arg_vlsh_v; +static bool trans_vlsh_v(DisasContext *ctx, arg_vlsh_v *a); +typedef arg_rnfvm arg_vlsw_v; +static bool trans_vlsw_v(DisasContext *ctx, arg_vlsw_v *a); +typedef arg_rnfvm arg_vlse_v; +static bool trans_vlse_v(DisasContext *ctx, arg_vlse_v *a); +typedef arg_rnfvm arg_vlsbu_v; +static bool trans_vlsbu_v(DisasContext *ctx, arg_vlsbu_v *a); +typedef arg_rnfvm arg_vlshu_v; +static bool trans_vlshu_v(DisasContext *ctx, arg_vlshu_v *a); +typedef arg_rnfvm arg_vlswu_v; +static bool trans_vlswu_v(DisasContext *ctx, arg_vlswu_v *a); +typedef arg_rnfvm arg_vssb_v; +static bool trans_vssb_v(DisasContext *ctx, arg_vssb_v *a); +typedef arg_rnfvm arg_vssh_v; +static bool trans_vssh_v(DisasContext *ctx, arg_vssh_v *a); +typedef arg_rnfvm arg_vssw_v; +static bool trans_vssw_v(DisasContext *ctx, arg_vssw_v *a); +typedef arg_rnfvm arg_vsse_v; +static bool trans_vsse_v(DisasContext *ctx, arg_vsse_v *a); +typedef arg_rnfvm arg_vlxb_v; +static bool trans_vlxb_v(DisasContext *ctx, arg_vlxb_v *a); +typedef arg_rnfvm arg_vlxh_v; +static bool trans_vlxh_v(DisasContext *ctx, arg_vlxh_v *a); +typedef arg_rnfvm arg_vlxw_v; +static bool trans_vlxw_v(DisasContext *ctx, arg_vlxw_v *a); +typedef arg_rnfvm arg_vlxe_v; +static bool trans_vlxe_v(DisasContext *ctx, arg_vlxe_v *a); +typedef arg_rnfvm arg_vlxbu_v; +static bool trans_vlxbu_v(DisasContext *ctx, arg_vlxbu_v *a); +typedef arg_rnfvm arg_vlxhu_v; +static bool trans_vlxhu_v(DisasContext *ctx, arg_vlxhu_v *a); +typedef arg_rnfvm arg_vlxwu_v; +static bool trans_vlxwu_v(DisasContext *ctx, arg_vlxwu_v *a); +typedef arg_rnfvm arg_vsxb_v; +static bool trans_vsxb_v(DisasContext *ctx, arg_vsxb_v *a); +typedef arg_rnfvm arg_vsxh_v; +static bool trans_vsxh_v(DisasContext *ctx, arg_vsxh_v *a); +typedef arg_rnfvm arg_vsxw_v; +static bool trans_vsxw_v(DisasContext *ctx, arg_vsxw_v *a); +typedef arg_rnfvm arg_vsxe_v; +static bool trans_vsxe_v(DisasContext *ctx, arg_vsxe_v *a); +typedef arg_rwdvm arg_vamoswapw_v; +static bool trans_vamoswapw_v(DisasContext *ctx, arg_vamoswapw_v *a); +typedef arg_rwdvm arg_vamoaddw_v; +static bool trans_vamoaddw_v(DisasContext *ctx, arg_vamoaddw_v *a); +typedef arg_rwdvm arg_vamoxorw_v; +static bool trans_vamoxorw_v(DisasContext *ctx, arg_vamoxorw_v *a); +typedef arg_rwdvm arg_vamoandw_v; +static bool trans_vamoandw_v(DisasContext *ctx, arg_vamoandw_v *a); +typedef arg_rwdvm arg_vamoorw_v; +static bool trans_vamoorw_v(DisasContext *ctx, arg_vamoorw_v *a); +typedef arg_rwdvm arg_vamominw_v; +static bool trans_vamominw_v(DisasContext *ctx, arg_vamominw_v *a); +typedef arg_rwdvm arg_vamomaxw_v; +static bool trans_vamomaxw_v(DisasContext *ctx, arg_vamomaxw_v *a); +typedef arg_rwdvm arg_vamominuw_v; +static bool trans_vamominuw_v(DisasContext *ctx, arg_vamominuw_v *a); +typedef arg_rwdvm arg_vamomaxuw_v; +static bool trans_vamomaxuw_v(DisasContext *ctx, arg_vamomaxuw_v *a); +typedef arg_rmrr arg_vadd_vv; +static bool trans_vadd_vv(DisasContext *ctx, arg_vadd_vv *a); +typedef arg_rmrr arg_vadd_vx; +static bool trans_vadd_vx(DisasContext *ctx, arg_vadd_vx *a); +typedef arg_rmrr arg_vadd_vi; +static bool trans_vadd_vi(DisasContext *ctx, arg_vadd_vi *a); +typedef arg_rmrr arg_vsub_vv; +static bool trans_vsub_vv(DisasContext *ctx, arg_vsub_vv *a); +typedef arg_rmrr arg_vsub_vx; +static bool trans_vsub_vx(DisasContext *ctx, arg_vsub_vx *a); +typedef arg_rmrr arg_vrsub_vx; +static bool trans_vrsub_vx(DisasContext *ctx, arg_vrsub_vx *a); +typedef arg_rmrr arg_vrsub_vi; +static bool trans_vrsub_vi(DisasContext *ctx, arg_vrsub_vi *a); +typedef arg_rmrr arg_vwaddu_vv; +static bool trans_vwaddu_vv(DisasContext *ctx, arg_vwaddu_vv *a); +typedef arg_rmrr arg_vwaddu_vx; +static bool trans_vwaddu_vx(DisasContext *ctx, arg_vwaddu_vx *a); +typedef arg_rmrr arg_vwadd_vv; +static bool trans_vwadd_vv(DisasContext *ctx, arg_vwadd_vv *a); +typedef arg_rmrr arg_vwadd_vx; +static bool trans_vwadd_vx(DisasContext *ctx, arg_vwadd_vx *a); +typedef arg_rmrr arg_vwsubu_vv; +static bool trans_vwsubu_vv(DisasContext *ctx, arg_vwsubu_vv *a); +typedef arg_rmrr arg_vwsubu_vx; +static bool trans_vwsubu_vx(DisasContext *ctx, arg_vwsubu_vx *a); +typedef arg_rmrr arg_vwsub_vv; +static bool trans_vwsub_vv(DisasContext *ctx, arg_vwsub_vv *a); +typedef arg_rmrr arg_vwsub_vx; +static bool trans_vwsub_vx(DisasContext *ctx, arg_vwsub_vx *a); +typedef arg_rmrr arg_vwaddu_wv; +static bool trans_vwaddu_wv(DisasContext *ctx, arg_vwaddu_wv *a); +typedef arg_rmrr arg_vwaddu_wx; +static bool trans_vwaddu_wx(DisasContext *ctx, arg_vwaddu_wx *a); +typedef arg_rmrr arg_vwadd_wv; +static bool trans_vwadd_wv(DisasContext *ctx, arg_vwadd_wv *a); +typedef arg_rmrr arg_vwadd_wx; +static bool trans_vwadd_wx(DisasContext *ctx, arg_vwadd_wx *a); +typedef arg_rmrr arg_vwsubu_wv; +static bool trans_vwsubu_wv(DisasContext *ctx, arg_vwsubu_wv *a); +typedef arg_rmrr arg_vwsubu_wx; +static bool trans_vwsubu_wx(DisasContext *ctx, arg_vwsubu_wx *a); +typedef arg_rmrr arg_vwsub_wv; +static bool trans_vwsub_wv(DisasContext *ctx, arg_vwsub_wv *a); +typedef arg_rmrr arg_vwsub_wx; +static bool trans_vwsub_wx(DisasContext *ctx, arg_vwsub_wx *a); +typedef arg_rmrr arg_vadc_vvm; +static bool trans_vadc_vvm(DisasContext *ctx, arg_vadc_vvm *a); +typedef arg_rmrr arg_vadc_vxm; +static bool trans_vadc_vxm(DisasContext *ctx, arg_vadc_vxm *a); +typedef arg_rmrr arg_vadc_vim; +static bool trans_vadc_vim(DisasContext *ctx, arg_vadc_vim *a); +typedef arg_rmrr arg_vmadc_vvm; +static bool trans_vmadc_vvm(DisasContext *ctx, arg_vmadc_vvm *a); +typedef arg_rmrr arg_vmadc_vxm; +static bool trans_vmadc_vxm(DisasContext *ctx, arg_vmadc_vxm *a); +typedef arg_rmrr arg_vmadc_vim; +static bool trans_vmadc_vim(DisasContext *ctx, arg_vmadc_vim *a); +typedef arg_rmrr arg_vsbc_vvm; +static bool trans_vsbc_vvm(DisasContext *ctx, arg_vsbc_vvm *a); +typedef arg_rmrr arg_vsbc_vxm; +static bool trans_vsbc_vxm(DisasContext *ctx, arg_vsbc_vxm *a); +typedef arg_rmrr arg_vmsbc_vvm; +static bool trans_vmsbc_vvm(DisasContext *ctx, arg_vmsbc_vvm *a); +typedef arg_rmrr arg_vmsbc_vxm; +static bool trans_vmsbc_vxm(DisasContext *ctx, arg_vmsbc_vxm *a); +typedef arg_rmrr arg_vand_vv; +static bool trans_vand_vv(DisasContext *ctx, arg_vand_vv *a); +typedef arg_rmrr arg_vand_vx; +static bool trans_vand_vx(DisasContext *ctx, arg_vand_vx *a); +typedef arg_rmrr arg_vand_vi; +static bool trans_vand_vi(DisasContext *ctx, arg_vand_vi *a); +typedef arg_rmrr arg_vor_vv; +static bool trans_vor_vv(DisasContext *ctx, arg_vor_vv *a); +typedef arg_rmrr arg_vor_vx; +static bool trans_vor_vx(DisasContext *ctx, arg_vor_vx *a); +typedef arg_rmrr arg_vor_vi; +static bool trans_vor_vi(DisasContext *ctx, arg_vor_vi *a); +typedef arg_rmrr arg_vxor_vv; +static bool trans_vxor_vv(DisasContext *ctx, arg_vxor_vv *a); +typedef arg_rmrr arg_vxor_vx; +static bool trans_vxor_vx(DisasContext *ctx, arg_vxor_vx *a); +typedef arg_rmrr arg_vxor_vi; +static bool trans_vxor_vi(DisasContext *ctx, arg_vxor_vi *a); +typedef arg_rmrr arg_vsll_vv; +static bool trans_vsll_vv(DisasContext *ctx, arg_vsll_vv *a); +typedef arg_rmrr arg_vsll_vx; +static bool trans_vsll_vx(DisasContext *ctx, arg_vsll_vx *a); +typedef arg_rmrr arg_vsll_vi; +static bool trans_vsll_vi(DisasContext *ctx, arg_vsll_vi *a); +typedef arg_rmrr arg_vsrl_vv; +static bool trans_vsrl_vv(DisasContext *ctx, arg_vsrl_vv *a); +typedef arg_rmrr arg_vsrl_vx; +static bool trans_vsrl_vx(DisasContext *ctx, arg_vsrl_vx *a); +typedef arg_rmrr arg_vsrl_vi; +static bool trans_vsrl_vi(DisasContext *ctx, arg_vsrl_vi *a); +typedef arg_rmrr arg_vsra_vv; +static bool trans_vsra_vv(DisasContext *ctx, arg_vsra_vv *a); +typedef arg_rmrr arg_vsra_vx; +static bool trans_vsra_vx(DisasContext *ctx, arg_vsra_vx *a); +typedef arg_rmrr arg_vsra_vi; +static bool trans_vsra_vi(DisasContext *ctx, arg_vsra_vi *a); +typedef arg_rmrr arg_vnsrl_vv; +static bool trans_vnsrl_vv(DisasContext *ctx, arg_vnsrl_vv *a); +typedef arg_rmrr arg_vnsrl_vx; +static bool trans_vnsrl_vx(DisasContext *ctx, arg_vnsrl_vx *a); +typedef arg_rmrr arg_vnsrl_vi; +static bool trans_vnsrl_vi(DisasContext *ctx, arg_vnsrl_vi *a); +typedef arg_rmrr arg_vnsra_vv; +static bool trans_vnsra_vv(DisasContext *ctx, arg_vnsra_vv *a); +typedef arg_rmrr arg_vnsra_vx; +static bool trans_vnsra_vx(DisasContext *ctx, arg_vnsra_vx *a); +typedef arg_rmrr arg_vnsra_vi; +static bool trans_vnsra_vi(DisasContext *ctx, arg_vnsra_vi *a); +typedef arg_rmrr arg_vmseq_vv; +static bool trans_vmseq_vv(DisasContext *ctx, arg_vmseq_vv *a); +typedef arg_rmrr arg_vmseq_vx; +static bool trans_vmseq_vx(DisasContext *ctx, arg_vmseq_vx *a); +typedef arg_rmrr arg_vmseq_vi; +static bool trans_vmseq_vi(DisasContext *ctx, arg_vmseq_vi *a); +typedef arg_rmrr arg_vmsne_vv; +static bool trans_vmsne_vv(DisasContext *ctx, arg_vmsne_vv *a); +typedef arg_rmrr arg_vmsne_vx; +static bool trans_vmsne_vx(DisasContext *ctx, arg_vmsne_vx *a); +typedef arg_rmrr arg_vmsne_vi; +static bool trans_vmsne_vi(DisasContext *ctx, arg_vmsne_vi *a); +typedef arg_rmrr arg_vmsltu_vv; +static bool trans_vmsltu_vv(DisasContext *ctx, arg_vmsltu_vv *a); +typedef arg_rmrr arg_vmsltu_vx; +static bool trans_vmsltu_vx(DisasContext *ctx, arg_vmsltu_vx *a); +typedef arg_rmrr arg_vmslt_vv; +static bool trans_vmslt_vv(DisasContext *ctx, arg_vmslt_vv *a); +typedef arg_rmrr arg_vmslt_vx; +static bool trans_vmslt_vx(DisasContext *ctx, arg_vmslt_vx *a); +typedef arg_rmrr arg_vmsleu_vv; +static bool trans_vmsleu_vv(DisasContext *ctx, arg_vmsleu_vv *a); +typedef arg_rmrr arg_vmsleu_vx; +static bool trans_vmsleu_vx(DisasContext *ctx, arg_vmsleu_vx *a); +typedef arg_rmrr arg_vmsleu_vi; +static bool trans_vmsleu_vi(DisasContext *ctx, arg_vmsleu_vi *a); +typedef arg_rmrr arg_vmsle_vv; +static bool trans_vmsle_vv(DisasContext *ctx, arg_vmsle_vv *a); +typedef arg_rmrr arg_vmsle_vx; +static bool trans_vmsle_vx(DisasContext *ctx, arg_vmsle_vx *a); +typedef arg_rmrr arg_vmsle_vi; +static bool trans_vmsle_vi(DisasContext *ctx, arg_vmsle_vi *a); +typedef arg_rmrr arg_vmsgtu_vx; +static bool trans_vmsgtu_vx(DisasContext *ctx, arg_vmsgtu_vx *a); +typedef arg_rmrr arg_vmsgtu_vi; +static bool trans_vmsgtu_vi(DisasContext *ctx, arg_vmsgtu_vi *a); +typedef arg_rmrr arg_vmsgt_vx; +static bool trans_vmsgt_vx(DisasContext *ctx, arg_vmsgt_vx *a); +typedef arg_rmrr arg_vmsgt_vi; +static bool trans_vmsgt_vi(DisasContext *ctx, arg_vmsgt_vi *a); +typedef arg_rmrr arg_vminu_vv; +static bool trans_vminu_vv(DisasContext *ctx, arg_vminu_vv *a); +typedef arg_rmrr arg_vminu_vx; +static bool trans_vminu_vx(DisasContext *ctx, arg_vminu_vx *a); +typedef arg_rmrr arg_vmin_vv; +static bool trans_vmin_vv(DisasContext *ctx, arg_vmin_vv *a); +typedef arg_rmrr arg_vmin_vx; +static bool trans_vmin_vx(DisasContext *ctx, arg_vmin_vx *a); +typedef arg_rmrr arg_vmaxu_vv; +static bool trans_vmaxu_vv(DisasContext *ctx, arg_vmaxu_vv *a); +typedef arg_rmrr arg_vmaxu_vx; +static bool trans_vmaxu_vx(DisasContext *ctx, arg_vmaxu_vx *a); +typedef arg_rmrr arg_vmax_vv; +static bool trans_vmax_vv(DisasContext *ctx, arg_vmax_vv *a); +typedef arg_rmrr arg_vmax_vx; +static bool trans_vmax_vx(DisasContext *ctx, arg_vmax_vx *a); +typedef arg_rmrr arg_vmul_vv; +static bool trans_vmul_vv(DisasContext *ctx, arg_vmul_vv *a); +typedef arg_rmrr arg_vmul_vx; +static bool trans_vmul_vx(DisasContext *ctx, arg_vmul_vx *a); +typedef arg_rmrr arg_vmulh_vv; +static bool trans_vmulh_vv(DisasContext *ctx, arg_vmulh_vv *a); +typedef arg_rmrr arg_vmulh_vx; +static bool trans_vmulh_vx(DisasContext *ctx, arg_vmulh_vx *a); +typedef arg_rmrr arg_vmulhu_vv; +static bool trans_vmulhu_vv(DisasContext *ctx, arg_vmulhu_vv *a); +typedef arg_rmrr arg_vmulhu_vx; +static bool trans_vmulhu_vx(DisasContext *ctx, arg_vmulhu_vx *a); +typedef arg_rmrr arg_vmulhsu_vv; +static bool trans_vmulhsu_vv(DisasContext *ctx, arg_vmulhsu_vv *a); +typedef arg_rmrr arg_vmulhsu_vx; +static bool trans_vmulhsu_vx(DisasContext *ctx, arg_vmulhsu_vx *a); +typedef arg_rmrr arg_vdivu_vv; +static bool trans_vdivu_vv(DisasContext *ctx, arg_vdivu_vv *a); +typedef arg_rmrr arg_vdivu_vx; +static bool trans_vdivu_vx(DisasContext *ctx, arg_vdivu_vx *a); +typedef arg_rmrr arg_vdiv_vv; +static bool trans_vdiv_vv(DisasContext *ctx, arg_vdiv_vv *a); +typedef arg_rmrr arg_vdiv_vx; +static bool trans_vdiv_vx(DisasContext *ctx, arg_vdiv_vx *a); +typedef arg_rmrr arg_vremu_vv; +static bool trans_vremu_vv(DisasContext *ctx, arg_vremu_vv *a); +typedef arg_rmrr arg_vremu_vx; +static bool trans_vremu_vx(DisasContext *ctx, arg_vremu_vx *a); +typedef arg_rmrr arg_vrem_vv; +static bool trans_vrem_vv(DisasContext *ctx, arg_vrem_vv *a); +typedef arg_rmrr arg_vrem_vx; +static bool trans_vrem_vx(DisasContext *ctx, arg_vrem_vx *a); +typedef arg_rmrr arg_vwmulu_vv; +static bool trans_vwmulu_vv(DisasContext *ctx, arg_vwmulu_vv *a); +typedef arg_rmrr arg_vwmulu_vx; +static bool trans_vwmulu_vx(DisasContext *ctx, arg_vwmulu_vx *a); +typedef arg_rmrr arg_vwmulsu_vv; +static bool trans_vwmulsu_vv(DisasContext *ctx, arg_vwmulsu_vv *a); +typedef arg_rmrr arg_vwmulsu_vx; +static bool trans_vwmulsu_vx(DisasContext *ctx, arg_vwmulsu_vx *a); +typedef arg_rmrr arg_vwmul_vv; +static bool trans_vwmul_vv(DisasContext *ctx, arg_vwmul_vv *a); +typedef arg_rmrr arg_vwmul_vx; +static bool trans_vwmul_vx(DisasContext *ctx, arg_vwmul_vx *a); +typedef arg_rmrr arg_vmacc_vv; +static bool trans_vmacc_vv(DisasContext *ctx, arg_vmacc_vv *a); +typedef arg_rmrr arg_vmacc_vx; +static bool trans_vmacc_vx(DisasContext *ctx, arg_vmacc_vx *a); +typedef arg_rmrr arg_vnmsac_vv; +static bool trans_vnmsac_vv(DisasContext *ctx, arg_vnmsac_vv *a); +typedef arg_rmrr arg_vnmsac_vx; +static bool trans_vnmsac_vx(DisasContext *ctx, arg_vnmsac_vx *a); +typedef arg_rmrr arg_vmadd_vv; +static bool trans_vmadd_vv(DisasContext *ctx, arg_vmadd_vv *a); +typedef arg_rmrr arg_vmadd_vx; +static bool trans_vmadd_vx(DisasContext *ctx, arg_vmadd_vx *a); +typedef arg_rmrr arg_vnmsub_vv; +static bool trans_vnmsub_vv(DisasContext *ctx, arg_vnmsub_vv *a); +typedef arg_rmrr arg_vnmsub_vx; +static bool trans_vnmsub_vx(DisasContext *ctx, arg_vnmsub_vx *a); +typedef arg_rmrr arg_vwmaccu_vv; +static bool trans_vwmaccu_vv(DisasContext *ctx, arg_vwmaccu_vv *a); +typedef arg_rmrr arg_vwmaccu_vx; +static bool trans_vwmaccu_vx(DisasContext *ctx, arg_vwmaccu_vx *a); +typedef arg_rmrr arg_vwmacc_vv; +static bool trans_vwmacc_vv(DisasContext *ctx, arg_vwmacc_vv *a); +typedef arg_rmrr arg_vwmacc_vx; +static bool trans_vwmacc_vx(DisasContext *ctx, arg_vwmacc_vx *a); +typedef arg_rmrr arg_vwmaccsu_vv; +static bool trans_vwmaccsu_vv(DisasContext *ctx, arg_vwmaccsu_vv *a); +typedef arg_rmrr arg_vwmaccsu_vx; +static bool trans_vwmaccsu_vx(DisasContext *ctx, arg_vwmaccsu_vx *a); +typedef arg_rmrr arg_vwmaccus_vx; +static bool trans_vwmaccus_vx(DisasContext *ctx, arg_vwmaccus_vx *a); +typedef arg_decode_insn3218 arg_vmv_v_v; +static bool trans_vmv_v_v(DisasContext *ctx, arg_vmv_v_v *a); +typedef arg_decode_insn3218 arg_vmv_v_x; +static bool trans_vmv_v_x(DisasContext *ctx, arg_vmv_v_x *a); +typedef arg_decode_insn3218 arg_vmv_v_i; +static bool trans_vmv_v_i(DisasContext *ctx, arg_vmv_v_i *a); +typedef arg_rmrr arg_vmerge_vvm; +static bool trans_vmerge_vvm(DisasContext *ctx, arg_vmerge_vvm *a); +typedef arg_rmrr arg_vmerge_vxm; +static bool trans_vmerge_vxm(DisasContext *ctx, arg_vmerge_vxm *a); +typedef arg_rmrr arg_vmerge_vim; +static bool trans_vmerge_vim(DisasContext *ctx, arg_vmerge_vim *a); +typedef arg_rmrr arg_vsaddu_vv; +static bool trans_vsaddu_vv(DisasContext *ctx, arg_vsaddu_vv *a); +typedef arg_rmrr arg_vsaddu_vx; +static bool trans_vsaddu_vx(DisasContext *ctx, arg_vsaddu_vx *a); +typedef arg_rmrr arg_vsaddu_vi; +static bool trans_vsaddu_vi(DisasContext *ctx, arg_vsaddu_vi *a); +typedef arg_rmrr arg_vsadd_vv; +static bool trans_vsadd_vv(DisasContext *ctx, arg_vsadd_vv *a); +typedef arg_rmrr arg_vsadd_vx; +static bool trans_vsadd_vx(DisasContext *ctx, arg_vsadd_vx *a); +typedef arg_rmrr arg_vsadd_vi; +static bool trans_vsadd_vi(DisasContext *ctx, arg_vsadd_vi *a); +typedef arg_rmrr arg_vssubu_vv; +static bool trans_vssubu_vv(DisasContext *ctx, arg_vssubu_vv *a); +typedef arg_rmrr arg_vssubu_vx; +static bool trans_vssubu_vx(DisasContext *ctx, arg_vssubu_vx *a); +typedef arg_rmrr arg_vssub_vv; +static bool trans_vssub_vv(DisasContext *ctx, arg_vssub_vv *a); +typedef arg_rmrr arg_vssub_vx; +static bool trans_vssub_vx(DisasContext *ctx, arg_vssub_vx *a); +typedef arg_rmrr arg_vaadd_vv; +static bool trans_vaadd_vv(DisasContext *ctx, arg_vaadd_vv *a); +typedef arg_rmrr arg_vaadd_vx; +static bool trans_vaadd_vx(DisasContext *ctx, arg_vaadd_vx *a); +typedef arg_rmrr arg_vaadd_vi; +static bool trans_vaadd_vi(DisasContext *ctx, arg_vaadd_vi *a); +typedef arg_rmrr arg_vasub_vv; +static bool trans_vasub_vv(DisasContext *ctx, arg_vasub_vv *a); +typedef arg_rmrr arg_vasub_vx; +static bool trans_vasub_vx(DisasContext *ctx, arg_vasub_vx *a); +typedef arg_rmrr arg_vsmul_vv; +static bool trans_vsmul_vv(DisasContext *ctx, arg_vsmul_vv *a); +typedef arg_rmrr arg_vsmul_vx; +static bool trans_vsmul_vx(DisasContext *ctx, arg_vsmul_vx *a); +typedef arg_rmrr arg_vwsmaccu_vv; +static bool trans_vwsmaccu_vv(DisasContext *ctx, arg_vwsmaccu_vv *a); +typedef arg_rmrr arg_vwsmaccu_vx; +static bool trans_vwsmaccu_vx(DisasContext *ctx, arg_vwsmaccu_vx *a); +typedef arg_rmrr arg_vwsmacc_vv; +static bool trans_vwsmacc_vv(DisasContext *ctx, arg_vwsmacc_vv *a); +typedef arg_rmrr arg_vwsmacc_vx; +static bool trans_vwsmacc_vx(DisasContext *ctx, arg_vwsmacc_vx *a); +typedef arg_rmrr arg_vwsmaccsu_vv; +static bool trans_vwsmaccsu_vv(DisasContext *ctx, arg_vwsmaccsu_vv *a); +typedef arg_rmrr arg_vwsmaccsu_vx; +static bool trans_vwsmaccsu_vx(DisasContext *ctx, arg_vwsmaccsu_vx *a); +typedef arg_rmrr arg_vwsmaccus_vx; +static bool trans_vwsmaccus_vx(DisasContext *ctx, arg_vwsmaccus_vx *a); +typedef arg_rmrr arg_vssrl_vv; +static bool trans_vssrl_vv(DisasContext *ctx, arg_vssrl_vv *a); +typedef arg_rmrr arg_vssrl_vx; +static bool trans_vssrl_vx(DisasContext *ctx, arg_vssrl_vx *a); +typedef arg_rmrr arg_vssrl_vi; +static bool trans_vssrl_vi(DisasContext *ctx, arg_vssrl_vi *a); +typedef arg_rmrr arg_vssra_vv; +static bool trans_vssra_vv(DisasContext *ctx, arg_vssra_vv *a); +typedef arg_rmrr arg_vssra_vx; +static bool trans_vssra_vx(DisasContext *ctx, arg_vssra_vx *a); +typedef arg_rmrr arg_vssra_vi; +static bool trans_vssra_vi(DisasContext *ctx, arg_vssra_vi *a); +typedef arg_rmrr arg_vnclipu_vv; +static bool trans_vnclipu_vv(DisasContext *ctx, arg_vnclipu_vv *a); +typedef arg_rmrr arg_vnclipu_vx; +static bool trans_vnclipu_vx(DisasContext *ctx, arg_vnclipu_vx *a); +typedef arg_rmrr arg_vnclipu_vi; +static bool trans_vnclipu_vi(DisasContext *ctx, arg_vnclipu_vi *a); +typedef arg_rmrr arg_vnclip_vv; +static bool trans_vnclip_vv(DisasContext *ctx, arg_vnclip_vv *a); +typedef arg_rmrr arg_vnclip_vx; +static bool trans_vnclip_vx(DisasContext *ctx, arg_vnclip_vx *a); +typedef arg_rmrr arg_vnclip_vi; +static bool trans_vnclip_vi(DisasContext *ctx, arg_vnclip_vi *a); +typedef arg_rmrr arg_vfadd_vv; +static bool trans_vfadd_vv(DisasContext *ctx, arg_vfadd_vv *a); +typedef arg_rmrr arg_vfadd_vf; +static bool trans_vfadd_vf(DisasContext *ctx, arg_vfadd_vf *a); +typedef arg_rmrr arg_vfsub_vv; +static bool trans_vfsub_vv(DisasContext *ctx, arg_vfsub_vv *a); +typedef arg_rmrr arg_vfsub_vf; +static bool trans_vfsub_vf(DisasContext *ctx, arg_vfsub_vf *a); +typedef arg_rmrr arg_vfrsub_vf; +static bool trans_vfrsub_vf(DisasContext *ctx, arg_vfrsub_vf *a); +typedef arg_rmrr arg_vfwadd_vv; +static bool trans_vfwadd_vv(DisasContext *ctx, arg_vfwadd_vv *a); +typedef arg_rmrr arg_vfwadd_vf; +static bool trans_vfwadd_vf(DisasContext *ctx, arg_vfwadd_vf *a); +typedef arg_rmrr arg_vfwadd_wv; +static bool trans_vfwadd_wv(DisasContext *ctx, arg_vfwadd_wv *a); +typedef arg_rmrr arg_vfwadd_wf; +static bool trans_vfwadd_wf(DisasContext *ctx, arg_vfwadd_wf *a); +typedef arg_rmrr arg_vfwsub_vv; +static bool trans_vfwsub_vv(DisasContext *ctx, arg_vfwsub_vv *a); +typedef arg_rmrr arg_vfwsub_vf; +static bool trans_vfwsub_vf(DisasContext *ctx, arg_vfwsub_vf *a); +typedef arg_rmrr arg_vfwsub_wv; +static bool trans_vfwsub_wv(DisasContext *ctx, arg_vfwsub_wv *a); +typedef arg_rmrr arg_vfwsub_wf; +static bool trans_vfwsub_wf(DisasContext *ctx, arg_vfwsub_wf *a); +typedef arg_rmrr arg_vfmul_vv; +static bool trans_vfmul_vv(DisasContext *ctx, arg_vfmul_vv *a); +typedef arg_rmrr arg_vfmul_vf; +static bool trans_vfmul_vf(DisasContext *ctx, arg_vfmul_vf *a); +typedef arg_rmrr arg_vfdiv_vv; +static bool trans_vfdiv_vv(DisasContext *ctx, arg_vfdiv_vv *a); +typedef arg_rmrr arg_vfdiv_vf; +static bool trans_vfdiv_vf(DisasContext *ctx, arg_vfdiv_vf *a); +typedef arg_rmrr arg_vfrdiv_vf; +static bool trans_vfrdiv_vf(DisasContext *ctx, arg_vfrdiv_vf *a); +typedef arg_rmrr arg_vfwmul_vv; +static bool trans_vfwmul_vv(DisasContext *ctx, arg_vfwmul_vv *a); +typedef arg_rmrr arg_vfwmul_vf; +static bool trans_vfwmul_vf(DisasContext *ctx, arg_vfwmul_vf *a); +typedef arg_rmrr arg_vfmacc_vv; +static bool trans_vfmacc_vv(DisasContext *ctx, arg_vfmacc_vv *a); +typedef arg_rmrr arg_vfnmacc_vv; +static bool trans_vfnmacc_vv(DisasContext *ctx, arg_vfnmacc_vv *a); +typedef arg_rmrr arg_vfnmacc_vf; +static bool trans_vfnmacc_vf(DisasContext *ctx, arg_vfnmacc_vf *a); +typedef arg_rmrr arg_vfmacc_vf; +static bool trans_vfmacc_vf(DisasContext *ctx, arg_vfmacc_vf *a); +typedef arg_rmrr arg_vfmsac_vv; +static bool trans_vfmsac_vv(DisasContext *ctx, arg_vfmsac_vv *a); +typedef arg_rmrr arg_vfmsac_vf; +static bool trans_vfmsac_vf(DisasContext *ctx, arg_vfmsac_vf *a); +typedef arg_rmrr arg_vfnmsac_vv; +static bool trans_vfnmsac_vv(DisasContext *ctx, arg_vfnmsac_vv *a); +typedef arg_rmrr arg_vfnmsac_vf; +static bool trans_vfnmsac_vf(DisasContext *ctx, arg_vfnmsac_vf *a); +typedef arg_rmrr arg_vfmadd_vv; +static bool trans_vfmadd_vv(DisasContext *ctx, arg_vfmadd_vv *a); +typedef arg_rmrr arg_vfmadd_vf; +static bool trans_vfmadd_vf(DisasContext *ctx, arg_vfmadd_vf *a); +typedef arg_rmrr arg_vfnmadd_vv; +static bool trans_vfnmadd_vv(DisasContext *ctx, arg_vfnmadd_vv *a); +typedef arg_rmrr arg_vfnmadd_vf; +static bool trans_vfnmadd_vf(DisasContext *ctx, arg_vfnmadd_vf *a); +typedef arg_rmrr arg_vfmsub_vv; +static bool trans_vfmsub_vv(DisasContext *ctx, arg_vfmsub_vv *a); +typedef arg_rmrr arg_vfmsub_vf; +static bool trans_vfmsub_vf(DisasContext *ctx, arg_vfmsub_vf *a); +typedef arg_rmrr arg_vfnmsub_vv; +static bool trans_vfnmsub_vv(DisasContext *ctx, arg_vfnmsub_vv *a); +typedef arg_rmrr arg_vfnmsub_vf; +static bool trans_vfnmsub_vf(DisasContext *ctx, arg_vfnmsub_vf *a); +typedef arg_rmrr arg_vfwmacc_vv; +static bool trans_vfwmacc_vv(DisasContext *ctx, arg_vfwmacc_vv *a); +typedef arg_rmrr arg_vfwmacc_vf; +static bool trans_vfwmacc_vf(DisasContext *ctx, arg_vfwmacc_vf *a); +typedef arg_rmrr arg_vfwnmacc_vv; +static bool trans_vfwnmacc_vv(DisasContext *ctx, arg_vfwnmacc_vv *a); +typedef arg_rmrr arg_vfwnmacc_vf; +static bool trans_vfwnmacc_vf(DisasContext *ctx, arg_vfwnmacc_vf *a); +typedef arg_rmrr arg_vfwmsac_vv; +static bool trans_vfwmsac_vv(DisasContext *ctx, arg_vfwmsac_vv *a); +typedef arg_rmrr arg_vfwmsac_vf; +static bool trans_vfwmsac_vf(DisasContext *ctx, arg_vfwmsac_vf *a); +typedef arg_rmrr arg_vfwnmsac_vv; +static bool trans_vfwnmsac_vv(DisasContext *ctx, arg_vfwnmsac_vv *a); +typedef arg_rmrr arg_vfwnmsac_vf; +static bool trans_vfwnmsac_vf(DisasContext *ctx, arg_vfwnmsac_vf *a); +typedef arg_rmr arg_vfsqrt_v; +static bool trans_vfsqrt_v(DisasContext *ctx, arg_vfsqrt_v *a); +typedef arg_rmrr arg_vfmin_vv; +static bool trans_vfmin_vv(DisasContext *ctx, arg_vfmin_vv *a); +typedef arg_rmrr arg_vfmin_vf; +static bool trans_vfmin_vf(DisasContext *ctx, arg_vfmin_vf *a); +typedef arg_rmrr arg_vfmax_vv; +static bool trans_vfmax_vv(DisasContext *ctx, arg_vfmax_vv *a); +typedef arg_rmrr arg_vfmax_vf; +static bool trans_vfmax_vf(DisasContext *ctx, arg_vfmax_vf *a); +typedef arg_rmrr arg_vfsgnj_vv; +static bool trans_vfsgnj_vv(DisasContext *ctx, arg_vfsgnj_vv *a); +typedef arg_rmrr arg_vfsgnj_vf; +static bool trans_vfsgnj_vf(DisasContext *ctx, arg_vfsgnj_vf *a); +typedef arg_rmrr arg_vfsgnjn_vv; +static bool trans_vfsgnjn_vv(DisasContext *ctx, arg_vfsgnjn_vv *a); +typedef arg_rmrr arg_vfsgnjn_vf; +static bool trans_vfsgnjn_vf(DisasContext *ctx, arg_vfsgnjn_vf *a); +typedef arg_rmrr arg_vfsgnjx_vv; +static bool trans_vfsgnjx_vv(DisasContext *ctx, arg_vfsgnjx_vv *a); +typedef arg_rmrr arg_vfsgnjx_vf; +static bool trans_vfsgnjx_vf(DisasContext *ctx, arg_vfsgnjx_vf *a); +typedef arg_rmrr arg_vmfeq_vv; +static bool trans_vmfeq_vv(DisasContext *ctx, arg_vmfeq_vv *a); +typedef arg_rmrr arg_vmfeq_vf; +static bool trans_vmfeq_vf(DisasContext *ctx, arg_vmfeq_vf *a); +typedef arg_rmrr arg_vmfne_vv; +static bool trans_vmfne_vv(DisasContext *ctx, arg_vmfne_vv *a); +typedef arg_rmrr arg_vmfne_vf; +static bool trans_vmfne_vf(DisasContext *ctx, arg_vmfne_vf *a); +typedef arg_rmrr arg_vmflt_vv; +static bool trans_vmflt_vv(DisasContext *ctx, arg_vmflt_vv *a); +typedef arg_rmrr arg_vmflt_vf; +static bool trans_vmflt_vf(DisasContext *ctx, arg_vmflt_vf *a); +typedef arg_rmrr arg_vmfle_vv; +static bool trans_vmfle_vv(DisasContext *ctx, arg_vmfle_vv *a); +typedef arg_rmrr arg_vmfle_vf; +static bool trans_vmfle_vf(DisasContext *ctx, arg_vmfle_vf *a); +typedef arg_rmrr arg_vmfgt_vf; +static bool trans_vmfgt_vf(DisasContext *ctx, arg_vmfgt_vf *a); +typedef arg_rmrr arg_vmfge_vf; +static bool trans_vmfge_vf(DisasContext *ctx, arg_vmfge_vf *a); +typedef arg_rmrr arg_vmford_vv; +static bool trans_vmford_vv(DisasContext *ctx, arg_vmford_vv *a); +typedef arg_rmrr arg_vmford_vf; +static bool trans_vmford_vf(DisasContext *ctx, arg_vmford_vf *a); +typedef arg_rmr arg_vfclass_v; +static bool trans_vfclass_v(DisasContext *ctx, arg_vfclass_v *a); +typedef arg_rmrr arg_vfmerge_vfm; +static bool trans_vfmerge_vfm(DisasContext *ctx, arg_vfmerge_vfm *a); +typedef arg_decode_insn3218 arg_vfmv_v_f; +static bool trans_vfmv_v_f(DisasContext *ctx, arg_vfmv_v_f *a); +typedef arg_rmr arg_vfcvt_xu_f_v; +static bool trans_vfcvt_xu_f_v(DisasContext *ctx, arg_vfcvt_xu_f_v *a); +typedef arg_rmr arg_vfcvt_x_f_v; +static bool trans_vfcvt_x_f_v(DisasContext *ctx, arg_vfcvt_x_f_v *a); +typedef arg_rmr arg_vfcvt_f_xu_v; +static bool trans_vfcvt_f_xu_v(DisasContext *ctx, arg_vfcvt_f_xu_v *a); +typedef arg_rmr arg_vfcvt_f_x_v; +static bool trans_vfcvt_f_x_v(DisasContext *ctx, arg_vfcvt_f_x_v *a); +typedef arg_rmr arg_vfwcvt_xu_f_v; +static bool trans_vfwcvt_xu_f_v(DisasContext *ctx, arg_vfwcvt_xu_f_v *a); +typedef arg_rmr arg_vfwcvt_x_f_v; +static bool trans_vfwcvt_x_f_v(DisasContext *ctx, arg_vfwcvt_x_f_v *a); +typedef arg_rmr arg_vfwcvt_f_xu_v; +static bool trans_vfwcvt_f_xu_v(DisasContext *ctx, arg_vfwcvt_f_xu_v *a); +typedef arg_rmr arg_vfwcvt_f_x_v; +static bool trans_vfwcvt_f_x_v(DisasContext *ctx, arg_vfwcvt_f_x_v *a); +typedef arg_rmr arg_vfwcvt_f_f_v; +static bool trans_vfwcvt_f_f_v(DisasContext *ctx, arg_vfwcvt_f_f_v *a); +typedef arg_rmr arg_vfncvt_xu_f_v; +static bool trans_vfncvt_xu_f_v(DisasContext *ctx, arg_vfncvt_xu_f_v *a); +typedef arg_rmr arg_vfncvt_x_f_v; +static bool trans_vfncvt_x_f_v(DisasContext *ctx, arg_vfncvt_x_f_v *a); +typedef arg_rmr arg_vfncvt_f_xu_v; +static bool trans_vfncvt_f_xu_v(DisasContext *ctx, arg_vfncvt_f_xu_v *a); +typedef arg_rmr arg_vfncvt_f_x_v; +static bool trans_vfncvt_f_x_v(DisasContext *ctx, arg_vfncvt_f_x_v *a); +typedef arg_rmr arg_vfncvt_f_f_v; +static bool trans_vfncvt_f_f_v(DisasContext *ctx, arg_vfncvt_f_f_v *a); +typedef arg_rmrr arg_vredsum_vs; +static bool trans_vredsum_vs(DisasContext *ctx, arg_vredsum_vs *a); +typedef arg_rmrr arg_vredand_vs; +static bool trans_vredand_vs(DisasContext *ctx, arg_vredand_vs *a); +typedef arg_rmrr arg_vredor_vs; +static bool trans_vredor_vs(DisasContext *ctx, arg_vredor_vs *a); +typedef arg_rmrr arg_vredxor_vs; +static bool trans_vredxor_vs(DisasContext *ctx, arg_vredxor_vs *a); +typedef arg_rmrr arg_vredminu_vs; +static bool trans_vredminu_vs(DisasContext *ctx, arg_vredminu_vs *a); +typedef arg_rmrr arg_vredmin_vs; +static bool trans_vredmin_vs(DisasContext *ctx, arg_vredmin_vs *a); +typedef arg_rmrr arg_vredmaxu_vs; +static bool trans_vredmaxu_vs(DisasContext *ctx, arg_vredmaxu_vs *a); +typedef arg_rmrr arg_vredmax_vs; +static bool trans_vredmax_vs(DisasContext *ctx, arg_vredmax_vs *a); +typedef arg_rmrr arg_vwredsumu_vs; +static bool trans_vwredsumu_vs(DisasContext *ctx, arg_vwredsumu_vs *a); +typedef arg_rmrr arg_vwredsum_vs; +static bool trans_vwredsum_vs(DisasContext *ctx, arg_vwredsum_vs *a); +typedef arg_rmrr arg_vfredsum_vs; +static bool trans_vfredsum_vs(DisasContext *ctx, arg_vfredsum_vs *a); +typedef arg_rmrr arg_vfredmin_vs; +static bool trans_vfredmin_vs(DisasContext *ctx, arg_vfredmin_vs *a); +typedef arg_rmrr arg_vfredmax_vs; +static bool trans_vfredmax_vs(DisasContext *ctx, arg_vfredmax_vs *a); +typedef arg_rmrr arg_vfwredsum_vs; +static bool trans_vfwredsum_vs(DisasContext *ctx, arg_vfwredsum_vs *a); +typedef arg_r arg_vmand_mm; +static bool trans_vmand_mm(DisasContext *ctx, arg_vmand_mm *a); +typedef arg_r arg_vmnand_mm; +static bool trans_vmnand_mm(DisasContext *ctx, arg_vmnand_mm *a); +typedef arg_r arg_vmandnot_mm; +static bool trans_vmandnot_mm(DisasContext *ctx, arg_vmandnot_mm *a); +typedef arg_r arg_vmxor_mm; +static bool trans_vmxor_mm(DisasContext *ctx, arg_vmxor_mm *a); +typedef arg_r arg_vmor_mm; +static bool trans_vmor_mm(DisasContext *ctx, arg_vmor_mm *a); +typedef arg_r arg_vmnor_mm; +static bool trans_vmnor_mm(DisasContext *ctx, arg_vmnor_mm *a); +typedef arg_r arg_vmornot_mm; +static bool trans_vmornot_mm(DisasContext *ctx, arg_vmornot_mm *a); +typedef arg_r arg_vmxnor_mm; +static bool trans_vmxnor_mm(DisasContext *ctx, arg_vmxnor_mm *a); +typedef arg_rmr arg_vmpopc_m; +static bool trans_vmpopc_m(DisasContext *ctx, arg_vmpopc_m *a); +typedef arg_rmr arg_vmfirst_m; +static bool trans_vmfirst_m(DisasContext *ctx, arg_vmfirst_m *a); +typedef arg_rmr arg_vmsbf_m; +static bool trans_vmsbf_m(DisasContext *ctx, arg_vmsbf_m *a); +typedef arg_rmr arg_vmsif_m; +static bool trans_vmsif_m(DisasContext *ctx, arg_vmsif_m *a); +typedef arg_rmr arg_vmsof_m; +static bool trans_vmsof_m(DisasContext *ctx, arg_vmsof_m *a); +typedef arg_rmr arg_viota_m; +static bool trans_viota_m(DisasContext *ctx, arg_viota_m *a); +typedef arg_decode_insn3219 arg_vid_v; +static bool trans_vid_v(DisasContext *ctx, arg_vid_v *a); +typedef arg_r arg_vext_x_v; +static bool trans_vext_x_v(DisasContext *ctx, arg_vext_x_v *a); +typedef arg_decode_insn3218 arg_vmv_s_x; +static bool trans_vmv_s_x(DisasContext *ctx, arg_vmv_s_x *a); +typedef arg_decode_insn3220 arg_vfmv_f_s; +static bool trans_vfmv_f_s(DisasContext *ctx, arg_vfmv_f_s *a); +typedef arg_decode_insn3218 arg_vfmv_s_f; +static bool trans_vfmv_s_f(DisasContext *ctx, arg_vfmv_s_f *a); +typedef arg_rmrr arg_vslideup_vx; +static bool trans_vslideup_vx(DisasContext *ctx, arg_vslideup_vx *a); +typedef arg_rmrr arg_vslideup_vi; +static bool trans_vslideup_vi(DisasContext *ctx, arg_vslideup_vi *a); +typedef arg_rmrr arg_vslide1up_vx; +static bool trans_vslide1up_vx(DisasContext *ctx, arg_vslide1up_vx *a); +typedef arg_rmrr arg_vslidedown_vx; +static bool trans_vslidedown_vx(DisasContext *ctx, arg_vslidedown_vx *a); +typedef arg_rmrr arg_vslidedown_vi; +static bool trans_vslidedown_vi(DisasContext *ctx, arg_vslidedown_vi *a); +typedef arg_rmrr arg_vslide1down_vx; +static bool trans_vslide1down_vx(DisasContext *ctx, arg_vslide1down_vx *a); +typedef arg_rmrr arg_vrgather_vv; +static bool trans_vrgather_vv(DisasContext *ctx, arg_vrgather_vv *a); +typedef arg_rmrr arg_vrgather_vx; +static bool trans_vrgather_vx(DisasContext *ctx, arg_vrgather_vx *a); +typedef arg_rmrr arg_vrgather_vi; +static bool trans_vrgather_vi(DisasContext *ctx, arg_vrgather_vi *a); +typedef arg_r arg_vcompress_vm; +static bool trans_vcompress_vm(DisasContext *ctx, arg_vcompress_vm *a); +typedef arg_decode_insn3221 arg_vsetvli; +static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a); +typedef arg_r arg_vsetvl; +static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a); typedef arg_i arg_lwu; static bool trans_lwu(DisasContext *ctx, arg_lwu *a); typedef arg_i arg_ld; @@ -408,25 +1142,43 @@ typedef arg_atomic arg_amominu_d; static bool trans_amominu_d(DisasContext *ctx, arg_amominu_d *a); typedef arg_atomic arg_amomaxu_d; static bool trans_amomaxu_d(DisasContext *ctx, arg_amomaxu_d *a); -typedef arg_decode_insn3212 arg_fcvt_l_s; +typedef arg_rwdvm arg_vamoswapd_v; +static bool trans_vamoswapd_v(DisasContext *ctx, arg_vamoswapd_v *a); +typedef arg_rwdvm arg_vamoaddd_v; +static bool trans_vamoaddd_v(DisasContext *ctx, arg_vamoaddd_v *a); +typedef arg_rwdvm arg_vamoxord_v; +static bool trans_vamoxord_v(DisasContext *ctx, arg_vamoxord_v *a); +typedef arg_rwdvm arg_vamoandd_v; +static bool trans_vamoandd_v(DisasContext *ctx, arg_vamoandd_v *a); +typedef arg_rwdvm arg_vamoord_v; +static bool trans_vamoord_v(DisasContext *ctx, arg_vamoord_v *a); +typedef arg_rwdvm arg_vamomind_v; +static bool trans_vamomind_v(DisasContext *ctx, arg_vamomind_v *a); +typedef arg_rwdvm arg_vamomaxd_v; +static bool trans_vamomaxd_v(DisasContext *ctx, arg_vamomaxd_v *a); +typedef arg_rwdvm arg_vamominud_v; +static bool trans_vamominud_v(DisasContext *ctx, arg_vamominud_v *a); +typedef arg_rwdvm arg_vamomaxud_v; +static bool trans_vamomaxud_v(DisasContext *ctx, arg_vamomaxud_v *a); +typedef arg_decode_insn3217 arg_fcvt_l_s; static bool trans_fcvt_l_s(DisasContext *ctx, arg_fcvt_l_s *a); -typedef arg_decode_insn3212 arg_fcvt_lu_s; +typedef arg_decode_insn3217 arg_fcvt_lu_s; static bool trans_fcvt_lu_s(DisasContext *ctx, arg_fcvt_lu_s *a); -typedef arg_decode_insn3212 arg_fcvt_s_l; +typedef arg_decode_insn3217 arg_fcvt_s_l; static bool trans_fcvt_s_l(DisasContext *ctx, arg_fcvt_s_l *a); -typedef arg_decode_insn3212 arg_fcvt_s_lu; +typedef arg_decode_insn3217 arg_fcvt_s_lu; static bool trans_fcvt_s_lu(DisasContext *ctx, arg_fcvt_s_lu *a); -typedef arg_decode_insn3212 arg_fcvt_l_d; +typedef arg_decode_insn3217 arg_fcvt_l_d; static bool trans_fcvt_l_d(DisasContext *ctx, arg_fcvt_l_d *a); -typedef arg_decode_insn3212 arg_fcvt_lu_d; +typedef arg_decode_insn3217 arg_fcvt_lu_d; static bool trans_fcvt_lu_d(DisasContext *ctx, arg_fcvt_lu_d *a); -typedef arg_decode_insn3213 arg_fmv_x_d; +typedef arg_decode_insn3218 arg_fmv_x_d; static bool trans_fmv_x_d(DisasContext *ctx, arg_fmv_x_d *a); -typedef arg_decode_insn3212 arg_fcvt_d_l; +typedef arg_decode_insn3217 arg_fcvt_d_l; static bool trans_fcvt_d_l(DisasContext *ctx, arg_fcvt_d_l *a); -typedef arg_decode_insn3212 arg_fcvt_d_lu; +typedef arg_decode_insn3217 arg_fcvt_d_lu; static bool trans_fcvt_d_lu(DisasContext *ctx, arg_fcvt_d_lu *a); -typedef arg_decode_insn3213 arg_fmv_d_x; +typedef arg_decode_insn3218 arg_fmv_d_x; static bool trans_fmv_d_x(DisasContext *ctx, arg_fmv_d_x *a); static void decode_insn32_extract_atom_ld(DisasContext *ctx, arg_atomic *a, uint32_t insn) @@ -454,30 +1206,30 @@ static void decode_insn32_extract_b(DisasContext *ctx, arg_b *a, uint32_t insn) a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn329 *a, uint32_t insn) +static void decode_insn32_extract_csr(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) { a->csr = extract32(insn, 20, 12); a->rs1 = extract32(insn, 15, 5); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_decode_insn32_Fmt_18(DisasContext *ctx, arg_empty *a, uint32_t insn) +static void decode_insn32_extract_decode_insn32_Fmt_28(DisasContext *ctx, arg_empty *a, uint32_t insn) { } -static void decode_insn32_extract_decode_insn32_Fmt_19(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn) +static void decode_insn32_extract_decode_insn32_Fmt_29(DisasContext *ctx, arg_decode_insn3224 *a, uint32_t insn) { a->pred = extract32(insn, 24, 4); a->succ = extract32(insn, 20, 4); } -static void decode_insn32_extract_hfence_bvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_hfence_gvma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_hfence_vvma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -503,20 +1255,54 @@ static void decode_insn32_extract_r(DisasContext *ctx, arg_r *a, uint32_t insn) a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3213 *a, uint32_t insn) +static void decode_insn32_extract_r1_vm(DisasContext *ctx, arg_decode_insn3219 *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2(DisasContext *ctx, arg_decode_insn3218 *a, uint32_t insn) +{ + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2_nfvm(DisasContext *ctx, arg_r2nfvm *a, uint32_t insn) { + a->vm = extract32(insn, 25, 1); + a->nf = ex_plus_1(ctx, extract32(insn, 29, 3)); a->rs1 = extract32(insn, 15, 5); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3212 *a, uint32_t insn) +static void decode_insn32_extract_r2_rm(DisasContext *ctx, arg_decode_insn3217 *a, uint32_t insn) { a->rs1 = extract32(insn, 15, 5); a->rm = extract32(insn, 12, 3); a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 *a, uint32_t insn) +static void decode_insn32_extract_r2_vm(DisasContext *ctx, arg_rmr *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2_zimm(DisasContext *ctx, arg_decode_insn3221 *a, uint32_t insn) +{ + a->zimm = extract32(insn, 20, 11); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r2rd(DisasContext *ctx, arg_decode_insn3220 *a, uint32_t insn) +{ + a->rs2 = extract32(insn, 20, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn) { a->rs3 = extract32(insn, 27, 5); a->rs2 = extract32(insn, 20, 5); @@ -525,7 +1311,16 @@ static void decode_insn32_extract_r4_rm(DisasContext *ctx, arg_decode_insn3210 * a->rd = extract32(insn, 7, 5); } -static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a, uint32_t insn) +static void decode_insn32_extract_r_nfvm(DisasContext *ctx, arg_rnfvm *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->nf = ex_plus_1(ctx, extract32(insn, 29, 3)); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3216 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -533,6 +1328,39 @@ static void decode_insn32_extract_r_rm(DisasContext *ctx, arg_decode_insn3211 *a a->rd = extract32(insn, 7, 5); } +static void decode_insn32_extract_r_vm(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_vm_0(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = 0; + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_vm_1(DisasContext *ctx, arg_rmrr *a, uint32_t insn) +{ + a->vm = 1; + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + +static void decode_insn32_extract_r_wdvm(DisasContext *ctx, arg_rwdvm *a, uint32_t insn) +{ + a->wd = extract32(insn, 26, 1); + a->vm = extract32(insn, 25, 1); + a->rs2 = extract32(insn, 20, 5); + a->rs1 = extract32(insn, 15, 5); + a->rd = extract32(insn, 7, 5); +} + static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn) { a->imm = deposit32(extract32(insn, 7, 5), 5, 27, sextract32(insn, 25, 7)); @@ -540,12 +1368,12 @@ static void decode_insn32_extract_s(DisasContext *ctx, arg_s *a, uint32_t insn) a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3215 *a, uint32_t insn) +static void decode_insn32_extract_sfence_vm(DisasContext *ctx, arg_decode_insn3223 *a, uint32_t insn) { a->rs1 = extract32(insn, 15, 5); } -static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3214 *a, uint32_t insn) +static void decode_insn32_extract_sfence_vma(DisasContext *ctx, arg_decode_insn3222 *a, uint32_t insn) { a->rs2 = extract32(insn, 20, 5); a->rs1 = extract32(insn, 15, 5); @@ -576,18 +1404,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) union { arg_atomic f_atomic; arg_b f_b; - arg_decode_insn3210 f_decode_insn3210; - arg_decode_insn3211 f_decode_insn3211; - arg_decode_insn3212 f_decode_insn3212; - arg_decode_insn3213 f_decode_insn3213; arg_decode_insn3214 f_decode_insn3214; arg_decode_insn3215 f_decode_insn3215; arg_decode_insn3216 f_decode_insn3216; - arg_decode_insn329 f_decode_insn329; + arg_decode_insn3217 f_decode_insn3217; + arg_decode_insn3218 f_decode_insn3218; + arg_decode_insn3219 f_decode_insn3219; + arg_decode_insn3220 f_decode_insn3220; + arg_decode_insn3221 f_decode_insn3221; + arg_decode_insn3222 f_decode_insn3222; + arg_decode_insn3223 f_decode_insn3223; + arg_decode_insn3224 f_decode_insn3224; arg_empty f_empty; arg_i f_i; arg_j f_j; arg_r f_r; + arg_r2nfvm f_r2nfvm; + arg_rmr f_rmr; + arg_rmrr f_rmrr; + arg_rnfvm f_rnfvm; + arg_rwdvm f_rwdvm; arg_s f_s; arg_shift f_shift; arg_u f_u; @@ -600,55 +1436,235 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:96 */ if (trans_lb(ctx, &u.f_i)) return true; return false; case 0x1: /* ........ ........ .001.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:97 */ if (trans_lh(ctx, &u.f_i)) return true; return false; case 0x2: /* ........ ........ .010.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:98 */ if (trans_lw(ctx, &u.f_i)) return true; return false; case 0x3: /* ........ ........ .011.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:28 */ if (trans_ld(ctx, &u.f_i)) return true; return false; case 0x4: /* ........ ........ .100.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:99 */ if (trans_lbu(ctx, &u.f_i)) return true; return false; case 0x5: /* ........ ........ .101.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:100 */ if (trans_lhu(ctx, &u.f_i)) return true; return false; case 0x6: /* ........ ........ .110.... .0000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:27 */ if (trans_lwu(ctx, &u.f_i)) return true; return false; } return false; case 0x00000007: /* ........ ........ ........ .0000111 */ - decode_insn32_extract_i(ctx, &u.f_i, insn); switch ((insn >> 12) & 0x7) { + case 0x0: + /* ........ ........ .000.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .000.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .000.... .0000111 */ + if (trans_vlbu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .000.... .0000111 */ + if (trans_vlbuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsbu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxbu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .000.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .000.... .0000111 */ + if (trans_vlb_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .000.... .0000111 */ + if (trans_vlbff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsb_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .000.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; case 0x2: /* ........ ........ .010.... .0000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:156 */ + decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_flw(ctx, &u.f_i)) return true; return false; case 0x3: /* ........ ........ .011.... .0000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:184 */ + decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_fld(ctx, &u.f_i)) return true; return false; + case 0x5: + /* ........ ........ .101.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .101.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .101.... .0000111 */ + if (trans_vlhu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .101.... .0000111 */ + if (trans_vlhuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlshu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxhu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .101.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .101.... .0000111 */ + if (trans_vlh_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .101.... .0000111 */ + if (trans_vlhff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsh_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .101.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x6: + /* ........ ........ .110.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .110.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .110.... .0000111 */ + if (trans_vlwu_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .110.... .0000111 */ + if (trans_vlwuff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlswu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxwu_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x4: + /* ...100.. ........ .110.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...100.0 0000.... .110.... .0000111 */ + if (trans_vlw_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...100.1 0000.... .110.... .0000111 */ + if (trans_vlwff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x6: + /* ...110.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlsw_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x7: + /* ...111.. ........ .110.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x7: + /* ........ ........ .111.... .0000111 */ + switch ((insn >> 26) & 0x7) { + case 0x0: + /* ...000.. ........ .111.... .0000111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* ...000.0 0000.... .111.... .0000111 */ + if (trans_vle_v(ctx, &u.f_r2nfvm)) return true; + return false; + case 0x10: + /* ...000.1 0000.... .111.... .0000111 */ + if (trans_vleff_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ...010.. ........ .111.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlse_v(ctx, &u.f_rnfvm)) return true; + return false; + case 0x3: + /* ...011.. ........ .111.... .0000111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vlxe_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; } return false; case 0x0000000f: @@ -656,14 +1672,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:123 */ - decode_insn32_extract_decode_insn32_Fmt_19(ctx, &u.f_decode_insn3216, insn); - if (trans_fence(ctx, &u.f_decode_insn3216)) return true; + decode_insn32_extract_decode_insn32_Fmt_29(ctx, &u.f_decode_insn3224, insn); + if (trans_fence(ctx, &u.f_decode_insn3224)) return true; return false; case 0x1: /* ........ ........ .001.... .0001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:124 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); if (trans_fence_i(ctx, &u.f_empty)) return true; return false; } @@ -673,7 +1687,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:104 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_addi(ctx, &u.f_i)) return true; return false; @@ -683,26 +1696,22 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 30) & 0x3) { case 0x0: /* 00...... ........ .001.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:110 */ if (trans_slli(ctx, &u.f_shift)) return true; return false; } return false; case 0x2: /* ........ ........ .010.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:105 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_slti(ctx, &u.f_i)) return true; return false; case 0x3: /* ........ ........ .011.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:106 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_sltiu(ctx, &u.f_i)) return true; return false; case 0x4: /* ........ ........ .100.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:107 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_xori(ctx, &u.f_i)) return true; return false; @@ -712,25 +1721,21 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 30) & 0x3) { case 0x0: /* 00...... ........ .101.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:111 */ if (trans_srli(ctx, &u.f_shift)) return true; return false; case 0x1: /* 01...... ........ .101.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:112 */ if (trans_srai(ctx, &u.f_shift)) return true; return false; } return false; case 0x6: /* ........ ........ .110.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:108 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_ori(ctx, &u.f_i)) return true; return false; case 0x7: /* ........ ........ .111.... .0010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:109 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_andi(ctx, &u.f_i)) return true; return false; @@ -738,7 +1743,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) return false; case 0x00000017: /* ........ ........ ........ .0010111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:87 */ decode_insn32_extract_u(ctx, &u.f_u, insn); if (trans_auipc(ctx, &u.f_u)) return true; return false; @@ -747,7 +1751,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0011011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:30 */ decode_insn32_extract_i(ctx, &u.f_i, insn); if (trans_addiw(ctx, &u.f_i)) return true; return false; @@ -757,7 +1760,6 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 25) & 0x7f) { case 0x0: /* 0000000. ........ .001.... .0011011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:31 */ if (trans_slliw(ctx, &u.f_shift)) return true; return false; } @@ -768,12 +1770,10 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 25) & 0x7f) { case 0x0: /* 0000000. ........ .101.... .0011011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:32 */ if (trans_srliw(ctx, &u.f_shift)) return true; return false; case 0x20: /* 0100000. ........ .101.... .0011011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:33 */ if (trans_sraiw(ctx, &u.f_shift)) return true; return false; } @@ -786,40 +1786,155 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:101 */ if (trans_sb(ctx, &u.f_s)) return true; return false; case 0x1: /* ........ ........ .001.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:102 */ if (trans_sh(ctx, &u.f_s)) return true; return false; case 0x2: /* ........ ........ .010.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:103 */ if (trans_sw(ctx, &u.f_s)) return true; return false; case 0x3: /* ........ ........ .011.... .0100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:29 */ if (trans_sd(ctx, &u.f_s)) return true; return false; } return false; case 0x00000027: /* ........ ........ ........ .0100111 */ - decode_insn32_extract_s(ctx, &u.f_s, insn); switch ((insn >> 12) & 0x7) { + case 0x0: + /* ........ ........ .000.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .000.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .000.... .0100111 */ + if (trans_vsb_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .000.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .000.... .0100111 */ + if (trans_vssb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .000.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxb_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; case 0x2: /* ........ ........ .010.... .0100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:157 */ + decode_insn32_extract_s(ctx, &u.f_s, insn); if (trans_fsw(ctx, &u.f_s)) return true; return false; case 0x3: /* ........ ........ .011.... .0100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:185 */ + decode_insn32_extract_s(ctx, &u.f_s, insn); if (trans_fsd(ctx, &u.f_s)) return true; return false; + case 0x5: + /* ........ ........ .101.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .101.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .101.... .0100111 */ + if (trans_vsh_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .101.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .101.... .0100111 */ + if (trans_vssh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .101.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxh_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x6: + /* ........ ........ .110.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .110.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .110.... .0100111 */ + if (trans_vsw_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .110.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .110.... .0100111 */ + if (trans_vssw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .110.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxw_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x7: + /* ........ ........ .111.... .0100111 */ + switch ((insn >> 26) & 0x3) { + case 0x0: + /* ....00.. ........ .111.... .0100111 */ + decode_insn32_extract_r2_nfvm(ctx, &u.f_r2nfvm, insn); + switch (insn & 0x11f00000) { + case 0x00000000: + /* ...000.0 0000.... .111.... .0100111 */ + if (trans_vse_v(ctx, &u.f_r2nfvm)) return true; + return false; + } + return false; + case 0x2: + /* ....10.. ........ .111.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + switch ((insn >> 28) & 0x1) { + case 0x0: + /* ...010.. ........ .111.... .0100111 */ + if (trans_vsse_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; + case 0x3: + /* ....11.. ........ .111.... .0100111 */ + decode_insn32_extract_r_nfvm(ctx, &u.f_rnfvm, insn); + if (trans_vsxe_v(ctx, &u.f_rnfvm)) return true; + return false; + } + return false; } return false; case 0x0000002f: @@ -827,35 +1942,50 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xf8007000) { case 0x00002000: /* 00000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:146 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoadd_w(ctx, &u.f_atomic)) return true; return false; case 0x00003000: /* 00000... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:51 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoadd_d(ctx, &u.f_atomic)) return true; return false; + case 0x00006000: + /* 00000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoaddw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x00007000: + /* 00000... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoaddd_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x08002000: /* 00001... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:145 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoswap_w(ctx, &u.f_atomic)) return true; return false; case 0x08003000: /* 00001... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:50 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoswap_d(ctx, &u.f_atomic)) return true; return false; + case 0x08006000: + /* 00001... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoswapw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x08007000: + /* 00001... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoswapd_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x10002000: /* 00010... ........ .010.... .0101111 */ decode_insn32_extract_atom_ld(ctx, &u.f_atomic, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 00010..0 0000.... .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:143 */ if (trans_lr_w(ctx, &u.f_atomic)) return true; return false; } @@ -866,107 +1996,160 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 20) & 0x1f) { case 0x0: /* 00010..0 0000.... .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:48 */ if (trans_lr_d(ctx, &u.f_atomic)) return true; return false; } return false; case 0x18002000: /* 00011... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:144 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_sc_w(ctx, &u.f_atomic)) return true; return false; case 0x18003000: /* 00011... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:49 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_sc_d(ctx, &u.f_atomic)) return true; return false; case 0x20002000: /* 00100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:147 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoxor_w(ctx, &u.f_atomic)) return true; return false; case 0x20003000: /* 00100... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:52 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoxor_d(ctx, &u.f_atomic)) return true; return false; + case 0x20006000: + /* 00100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoxorw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x20007000: + /* 00100... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoxord_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x40002000: /* 01000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:149 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoor_w(ctx, &u.f_atomic)) return true; return false; case 0x40003000: /* 01000... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:54 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoor_d(ctx, &u.f_atomic)) return true; return false; + case 0x40006000: + /* 01000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoorw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x40007000: + /* 01000... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoord_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x60002000: /* 01100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:148 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoand_w(ctx, &u.f_atomic)) return true; return false; case 0x60003000: /* 01100... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:53 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amoand_d(ctx, &u.f_atomic)) return true; return false; + case 0x60006000: + /* 01100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoandw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x60007000: + /* 01100... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamoandd_v(ctx, &u.f_rwdvm)) return true; + return false; case 0x80002000: /* 10000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:150 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomin_w(ctx, &u.f_atomic)) return true; return false; case 0x80003000: /* 10000... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:55 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomin_d(ctx, &u.f_atomic)) return true; return false; + case 0x80006000: + /* 10000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamominw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0x80007000: + /* 10000... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomind_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xa0002000: /* 10100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:151 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomax_w(ctx, &u.f_atomic)) return true; return false; case 0xa0003000: /* 10100... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:56 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomax_d(ctx, &u.f_atomic)) return true; return false; + case 0xa0006000: + /* 10100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0xa0007000: + /* 10100... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxd_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xc0002000: /* 11000... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:152 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amominu_w(ctx, &u.f_atomic)) return true; return false; case 0xc0003000: /* 11000... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:57 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amominu_d(ctx, &u.f_atomic)) return true; return false; + case 0xc0006000: + /* 11000... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamominuw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0xc0007000: + /* 11000... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamominud_v(ctx, &u.f_rwdvm)) return true; + return false; case 0xe0002000: /* 11100... ........ .010.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:153 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomaxu_w(ctx, &u.f_atomic)) return true; return false; case 0xe0003000: /* 11100... ........ .011.... .0101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:58 */ decode_insn32_extract_atom_st(ctx, &u.f_atomic, insn); if (trans_amomaxu_d(ctx, &u.f_atomic)) return true; return false; + case 0xe0006000: + /* 11100... ........ .110.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxuw_v(ctx, &u.f_rwdvm)) return true; + return false; + case 0xe0007000: + /* 11100... ........ .111.... .0101111 */ + decode_insn32_extract_r_wdvm(ctx, &u.f_rwdvm, insn); + if (trans_vamomaxud_v(ctx, &u.f_rwdvm)) return true; + return false; } return false; case 0x00000033: @@ -975,99 +2158,80 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xfe007000) { case 0x00000000: /* 0000000. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:113 */ if (trans_add(ctx, &u.f_r)) return true; return false; case 0x00001000: /* 0000000. ........ .001.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:115 */ if (trans_sll(ctx, &u.f_r)) return true; return false; case 0x00002000: /* 0000000. ........ .010.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:116 */ if (trans_slt(ctx, &u.f_r)) return true; return false; case 0x00003000: /* 0000000. ........ .011.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:117 */ if (trans_sltu(ctx, &u.f_r)) return true; return false; case 0x00004000: /* 0000000. ........ .100.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:118 */ if (trans_xor(ctx, &u.f_r)) return true; return false; case 0x00005000: /* 0000000. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:119 */ if (trans_srl(ctx, &u.f_r)) return true; return false; case 0x00006000: /* 0000000. ........ .110.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:121 */ if (trans_or(ctx, &u.f_r)) return true; return false; case 0x00007000: /* 0000000. ........ .111.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:122 */ if (trans_and(ctx, &u.f_r)) return true; return false; case 0x02000000: /* 0000001. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:133 */ if (trans_mul(ctx, &u.f_r)) return true; return false; case 0x02001000: /* 0000001. ........ .001.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:134 */ if (trans_mulh(ctx, &u.f_r)) return true; return false; case 0x02002000: /* 0000001. ........ .010.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:135 */ if (trans_mulhsu(ctx, &u.f_r)) return true; return false; case 0x02003000: /* 0000001. ........ .011.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:136 */ if (trans_mulhu(ctx, &u.f_r)) return true; return false; case 0x02004000: /* 0000001. ........ .100.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:137 */ if (trans_div(ctx, &u.f_r)) return true; return false; case 0x02005000: /* 0000001. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:138 */ if (trans_divu(ctx, &u.f_r)) return true; return false; case 0x02006000: /* 0000001. ........ .110.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:139 */ if (trans_rem(ctx, &u.f_r)) return true; return false; case 0x02007000: /* 0000001. ........ .111.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:140 */ if (trans_remu(ctx, &u.f_r)) return true; return false; case 0x40000000: /* 0100000. ........ .000.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:114 */ if (trans_sub(ctx, &u.f_r)) return true; return false; case 0x40005000: /* 0100000. ........ .101.... .0110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:120 */ if (trans_sra(ctx, &u.f_r)) return true; return false; } return false; case 0x00000037: /* ........ ........ ........ .0110111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:86 */ decode_insn32_extract_u(ctx, &u.f_u, insn); if (trans_lui(ctx, &u.f_u)) return true; return false; @@ -1077,117 +2241,99 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xfe007000) { case 0x00000000: /* 0000000. ........ .000.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:34 */ if (trans_addw(ctx, &u.f_r)) return true; return false; case 0x00001000: /* 0000000. ........ .001.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:36 */ if (trans_sllw(ctx, &u.f_r)) return true; return false; case 0x00005000: /* 0000000. ........ .101.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:37 */ if (trans_srlw(ctx, &u.f_r)) return true; return false; case 0x02000000: /* 0000001. ........ .000.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:41 */ if (trans_mulw(ctx, &u.f_r)) return true; return false; case 0x02004000: /* 0000001. ........ .100.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:42 */ if (trans_divw(ctx, &u.f_r)) return true; return false; case 0x02005000: /* 0000001. ........ .101.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:43 */ if (trans_divuw(ctx, &u.f_r)) return true; return false; case 0x02006000: /* 0000001. ........ .110.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:44 */ if (trans_remw(ctx, &u.f_r)) return true; return false; case 0x02007000: /* 0000001. ........ .111.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:45 */ if (trans_remuw(ctx, &u.f_r)) return true; return false; case 0x40000000: /* 0100000. ........ .000.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:35 */ if (trans_subw(ctx, &u.f_r)) return true; return false; case 0x40005000: /* 0100000. ........ .101.... .0111011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:38 */ if (trans_sraw(ctx, &u.f_r)) return true; return false; } return false; case 0x00000043: /* ........ ........ ........ .1000011 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:158 */ - if (trans_fmadd_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmadd_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1000011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:186 */ - if (trans_fmadd_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmadd_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x00000047: /* ........ ........ ........ .1000111 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:159 */ - if (trans_fmsub_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmsub_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1000111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:187 */ - if (trans_fmsub_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fmsub_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x0000004b: /* ........ ........ ........ .1001011 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1001011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:160 */ - if (trans_fnmsub_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmsub_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1001011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:188 */ - if (trans_fnmsub_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmsub_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; case 0x0000004f: /* ........ ........ ........ .1001111 */ - decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3210, insn); + decode_insn32_extract_r4_rm(ctx, &u.f_decode_insn3215, insn); switch ((insn >> 25) & 0x3) { case 0x0: /* .....00. ........ ........ .1001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:161 */ - if (trans_fnmadd_s(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmadd_s(ctx, &u.f_decode_insn3215)) return true; return false; case 0x1: /* .....01. ........ ........ .1001111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:189 */ - if (trans_fnmadd_d(ctx, &u.f_decode_insn3210)) return true; + if (trans_fnmadd_d(ctx, &u.f_decode_insn3215)) return true; return false; } return false; @@ -1196,51 +2342,43 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 25) & 0x7f) { case 0x0: /* 0000000. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:162 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fadd_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fadd_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x1: /* 0000001. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:190 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fadd_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fadd_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x4: /* 0000100. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:163 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fsub_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fsub_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x5: /* 0000101. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:191 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fsub_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fsub_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x8: /* 0001000. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:164 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fmul_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fmul_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0x9: /* 0001001. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:192 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fmul_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fmul_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0xc: /* 0001100. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:165 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fdiv_s(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fdiv_s(ctx, &u.f_decode_insn3216)) return true; return false; case 0xd: /* 0001101. ........ ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:193 */ - decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3211, insn); - if (trans_fdiv_d(ctx, &u.f_decode_insn3211)) return true; + decode_insn32_extract_r_rm(ctx, &u.f_decode_insn3216, insn); + if (trans_fdiv_d(ctx, &u.f_decode_insn3216)) return true; return false; case 0x10: /* 0010000. ........ ........ .1010011 */ @@ -1248,17 +2386,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010000. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:167 */ if (trans_fsgnj_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010000. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:168 */ if (trans_fsgnjn_s(ctx, &u.f_r)) return true; return false; case 0x2: /* 0010000. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:169 */ if (trans_fsgnjx_s(ctx, &u.f_r)) return true; return false; } @@ -1269,17 +2404,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010001. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:195 */ if (trans_fsgnj_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010001. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:196 */ if (trans_fsgnjn_d(ctx, &u.f_r)) return true; return false; case 0x2: /* 0010001. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:197 */ if (trans_fsgnjx_d(ctx, &u.f_r)) return true; return false; } @@ -1290,12 +2422,10 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010100. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:170 */ if (trans_fmin_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010100. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:171 */ if (trans_fmax_s(ctx, &u.f_r)) return true; return false; } @@ -1306,57 +2436,51 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 0010101. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:198 */ if (trans_fmin_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 0010101. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:199 */ if (trans_fmax_d(ctx, &u.f_r)) return true; return false; } return false; case 0x20: /* 0100000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x1: /* 01000000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:200 */ - if (trans_fcvt_s_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x21: /* 0100001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01000010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:201 */ - if (trans_fcvt_d_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x2c: /* 0101100. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01011000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:166 */ - if (trans_fsqrt_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fsqrt_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x2d: /* 0101101. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 01011010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:194 */ - if (trans_fsqrt_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fsqrt_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; @@ -1366,17 +2490,14 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 1010000. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:177 */ if (trans_fle_s(ctx, &u.f_r)) return true; return false; case 0x1: /* 1010000. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:176 */ if (trans_flt_s(ctx, &u.f_r)) return true; return false; case 0x2: /* 1010000. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:175 */ if (trans_feq_s(ctx, &u.f_r)) return true; return false; } @@ -1387,176 +2508,1772 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* 1010001. ........ .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:204 */ if (trans_fle_d(ctx, &u.f_r)) return true; return false; case 0x1: /* 1010001. ........ .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:203 */ if (trans_flt_d(ctx, &u.f_r)) return true; return false; case 0x2: /* 1010001. ........ .010.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:202 */ if (trans_feq_d(ctx, &u.f_r)) return true; return false; } return false; case 0x60: /* 1100000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11000000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:172 */ - if (trans_fcvt_w_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_w_s(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11000000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:173 */ - if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_wu_s(ctx, &u.f_decode_insn3217)) return true; return false; case 0x2: /* 11000000 0010.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:61 */ - if (trans_fcvt_l_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_l_s(ctx, &u.f_decode_insn3217)) return true; return false; case 0x3: /* 11000000 0011.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:62 */ - if (trans_fcvt_lu_s(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_lu_s(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x61: /* 1100001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11000010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:206 */ - if (trans_fcvt_w_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_w_d(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11000010 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:207 */ - if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_wu_d(ctx, &u.f_decode_insn3217)) return true; return false; case 0x2: /* 11000010 0010.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:67 */ - if (trans_fcvt_l_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_l_d(ctx, &u.f_decode_insn3217)) return true; return false; case 0x3: /* 11000010 0011.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:68 */ - if (trans_fcvt_lu_d(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_lu_d(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x68: /* 1101000. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11010000 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:179 */ - if (trans_fcvt_s_w(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_w(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11010000 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:180 */ - if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_wu(ctx, &u.f_decode_insn3217)) return true; return false; case 0x2: /* 11010000 0010.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:63 */ - if (trans_fcvt_s_l(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_l(ctx, &u.f_decode_insn3217)) return true; return false; case 0x3: /* 11010000 0011.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:64 */ - if (trans_fcvt_s_lu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_s_lu(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x69: /* 1101001. ........ ........ .1010011 */ - decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3212, insn); + decode_insn32_extract_r2_rm(ctx, &u.f_decode_insn3217, insn); switch ((insn >> 20) & 0x1f) { case 0x0: /* 11010010 0000.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:208 */ - if (trans_fcvt_d_w(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_w(ctx, &u.f_decode_insn3217)) return true; return false; case 0x1: /* 11010010 0001.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:209 */ - if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_wu(ctx, &u.f_decode_insn3217)) return true; return false; case 0x2: /* 11010010 0010.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:70 */ - if (trans_fcvt_d_l(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_l(ctx, &u.f_decode_insn3217)) return true; return false; case 0x3: /* 11010010 0011.... ........ .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:71 */ - if (trans_fcvt_d_lu(ctx, &u.f_decode_insn3212)) return true; + if (trans_fcvt_d_lu(ctx, &u.f_decode_insn3217)) return true; return false; } return false; case 0x70: /* 1110000. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11100000 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:174 */ - if (trans_fmv_x_w(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_x_w(ctx, &u.f_decode_insn3218)) return true; return false; case 0x00001000: /* 11100000 0000.... .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:178 */ - if (trans_fclass_s(ctx, &u.f_decode_insn3213)) return true; + if (trans_fclass_s(ctx, &u.f_decode_insn3218)) return true; return false; } return false; case 0x71: /* 1110001. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11100010 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:69 */ - if (trans_fmv_x_d(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_x_d(ctx, &u.f_decode_insn3218)) return true; return false; case 0x00001000: /* 11100010 0000.... .001.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:205 */ - if (trans_fclass_d(ctx, &u.f_decode_insn3213)) return true; + if (trans_fclass_d(ctx, &u.f_decode_insn3218)) return true; return false; } return false; case 0x78: /* 1111000. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11110000 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:181 */ - if (trans_fmv_w_x(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_w_x(ctx, &u.f_decode_insn3218)) return true; return false; } return false; case 0x79: /* 1111001. ........ ........ .1010011 */ - decode_insn32_extract_r2(ctx, &u.f_decode_insn3213, insn); + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); switch (insn & 0x01f07000) { case 0x00000000: /* 11110010 0000.... .000.... .1010011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32-64.decode:72 */ - if (trans_fmv_d_x(ctx, &u.f_decode_insn3213)) return true; + if (trans_fmv_d_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x00000057: + /* ........ ........ ........ .1010111 */ + switch (insn & 0x80007000) { + case 0x00000000: + /* 0....... ........ .000.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vminu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmin_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmaxu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmax_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .000.... .1010111 */ + if (trans_vadc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .000.... .1010111 */ + if (trans_vmadc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x12: + /* 010010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100101. ........ .000.... .1010111 */ + if (trans_vsbc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x13: + /* 010011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100111. ........ .000.... .1010111 */ + if (trans_vmsbc_vvm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .000.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vvm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .000.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .000.... .1010111 */ + if (trans_vmv_v_v(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsltu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmslt_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00001000: + /* 0....... ........ .001.... .1010111 */ + switch (insn & 0x74000000) { + case 0x00000000: + /* 0000.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000000.. ........ .001.... .1010111 */ + if (trans_vfadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000010.. ........ .001.... .1010111 */ + if (trans_vfsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x04000000: + /* 0000.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x10000000: + /* 0001.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000100.. ........ .001.... .1010111 */ + if (trans_vfmin_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000110.. ........ .001.... .1010111 */ + if (trans_vfmax_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x14000000: + /* 0001.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 000101.. ........ .001.... .1010111 */ + if (trans_vfredmin_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000111.. ........ .001.... .1010111 */ + if (trans_vfredmax_vs(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x20000000: + /* 0010.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 001000.. ........ .001.... .1010111 */ + if (trans_vfsgnj_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 001010.. ........ .001.... .1010111 */ + if (trans_vfsgnjx_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x24000000: + /* 0010.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 001001.. ........ .001.... .1010111 */ + if (trans_vfsgnjn_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x30000000: + /* 0011.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r2rd(ctx, &u.f_decode_insn3220, insn); + switch (insn & 0x0a0f8000) { + case 0x02000000: + /* 0011001. ....0000 0001.... .1010111 */ + if (trans_vfmv_f_s(ctx, &u.f_decode_insn3220)) return true; + return false; + } + return false; + case 0x60000000: + /* 0110.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011000.. ........ .001.... .1010111 */ + if (trans_vmfeq_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 011010.. ........ .001.... .1010111 */ + if (trans_vmford_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x64000000: + /* 0110.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011001.. ........ .001.... .1010111 */ + if (trans_vmfle_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 011011.. ........ .001.... .1010111 */ + if (trans_vmflt_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x70000000: + /* 0111.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 011100.. ........ .001.... .1010111 */ + if (trans_vmfne_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + } + return false; + case 0x00002000: + /* 0....... ........ .010.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 000001.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredand_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredor_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredxor_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredminu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmin_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmaxu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vredmax_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0011001. ........ .010.... .1010111 */ + if (trans_vext_x_v(ctx, &u.f_r)) return true; + return false; + } + return false; + case 0x14: + /* 010100.. ........ .010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmpopc_m(ctx, &u.f_rmr)) return true; + return false; + case 0x15: + /* 010101.. ........ .010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmfirst_m(ctx, &u.f_rmr)) return true; + return false; + case 0x16: + /* 010110.. ........ .010.... .1010111 */ + switch ((insn >> 15) & 0x1f) { + case 0x1: + /* 010110.. ....0000 1010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsbf_m(ctx, &u.f_rmr)) return true; + return false; + case 0x2: + /* 010110.. ....0001 0010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsof_m(ctx, &u.f_rmr)) return true; + return false; + case 0x3: + /* 010110.. ....0001 1010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_vmsif_m(ctx, &u.f_rmr)) return true; + return false; + case 0x10: + /* 010110.. ....1000 0010.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + if (trans_viota_m(ctx, &u.f_rmr)) return true; + return false; + case 0x11: + /* 010110.. ....1000 1010.... .1010111 */ + decode_insn32_extract_r1_vm(ctx, &u.f_decode_insn3219, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 010110.0 00001000 1010.... .1010111 */ + if (trans_vid_v(ctx, &u.f_decode_insn3219)) return true; + return false; + } + return false; + } + return false; + case 0x17: + /* 010111.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vcompress_vm(ctx, &u.f_r)) return true; + return false; + case 0x18: + /* 011000.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmandnot_mm(ctx, &u.f_r)) return true; + return false; + case 0x19: + /* 011001.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmand_mm(ctx, &u.f_r)) return true; + return false; + case 0x1a: + /* 011010.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1b: + /* 011011.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmxor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1c: + /* 011100.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmornot_mm(ctx, &u.f_r)) return true; + return false; + case 0x1d: + /* 011101.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmnand_mm(ctx, &u.f_r)) return true; + return false; + case 0x1e: + /* 011110.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmnor_mm(ctx, &u.f_r)) return true; + return false; + case 0x1f: + /* 011111.. ........ .010.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + if (trans_vmxnor_mm(ctx, &u.f_r)) return true; + return false; + } + return false; + case 0x00003000: + /* 0....... ........ .011.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrsub_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 001110.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslideup_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslidedown_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .011.... .1010111 */ + if (trans_vadc_vim(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .011.... .1010111 */ + if (trans_vmadc_vim(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .011.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vim(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .011.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .011.... .1010111 */ + if (trans_vmv_v_i(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 011110.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgtu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgt_vi(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00004000: + /* 0....... ........ .100.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 000011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vminu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 000101.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmin_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmaxu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 000111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmax_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vand_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vor_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 001011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vxor_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 001100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vrgather_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 001110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslideup_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslidedown_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 010000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100001. ........ .100.... .1010111 */ + if (trans_vadc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x11: + /* 010001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100011. ........ .100.... .1010111 */ + if (trans_vmadc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x12: + /* 010010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100101. ........ .100.... .1010111 */ + if (trans_vsbc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x13: + /* 010011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_1(ctx, &u.f_rmrr, insn); + switch ((insn >> 25) & 0x1) { + case 0x1: + /* 0100111. ........ .100.... .1010111 */ + if (trans_vmsbc_vxm(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .100.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vmerge_vxm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .100.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .100.... .1010111 */ + if (trans_vmv_v_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmseq_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsne_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsltu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmslt_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsleu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsle_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 011110.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgtu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmsgt_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00005000: + /* 0....... ........ .101.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 000000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 000010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 000100.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfmin_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 000110.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfmax_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 001000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnj_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 001001.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnjn_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 001010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfsgnjx_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 001101.. ........ .101.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x3f) { + case 0x20: + /* 00110110 0000.... .101.... .1010111 */ + if (trans_vfmv_s_f(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + case 0x17: + /* 010111.. ........ .101.... .1010111 */ + switch ((insn >> 25) & 0x1) { + case 0x0: + /* 0101110. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm_0(ctx, &u.f_rmrr, insn); + if (trans_vfmerge_vfm(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 0101111. ........ .101.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x1f) { + case 0x0: + /* 01011110 0000.... .101.... .1010111 */ + if (trans_vfmv_v_f(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + } + return false; + case 0x18: + /* 011000.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfeq_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x19: + /* 011001.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfle_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 011010.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmford_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 011011.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmflt_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 011100.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfne_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 011101.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfgt_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 011111.. ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vmfge_vf(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00006000: + /* 0....... ........ .110.... .1010111 */ + switch ((insn >> 26) & 0x1f) { + case 0xd: + /* 001101.. ........ .110.... .1010111 */ + decode_insn32_extract_r2(ctx, &u.f_decode_insn3218, insn); + switch ((insn >> 20) & 0x3f) { + case 0x20: + /* 00110110 0000.... .110.... .1010111 */ + if (trans_vmv_s_x(ctx, &u.f_decode_insn3218)) return true; + return false; + } + return false; + case 0xe: + /* 001110.. ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslide1up_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 001111.. ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vslide1down_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x00007000: + /* 0....... ........ .111.... .1010111 */ + decode_insn32_extract_r2_zimm(ctx, &u.f_decode_insn3221, insn); + if (trans_vsetvli(ctx, &u.f_decode_insn3221)) return true; + return false; + case 0x80000000: + /* 1....... ........ .000.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .000.... .1010111 */ + if (trans_vsaddu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .000.... .1010111 */ + if (trans_vsadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .000.... .1010111 */ + if (trans_vssubu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .000.... .1010111 */ + if (trans_vssub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .000.... .1010111 */ + if (trans_vaadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .000.... .1010111 */ + if (trans_vsll_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .000.... .1010111 */ + if (trans_vasub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .000.... .1010111 */ + if (trans_vsmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .000.... .1010111 */ + if (trans_vsrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .000.... .1010111 */ + if (trans_vsra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .000.... .1010111 */ + if (trans_vssrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .000.... .1010111 */ + if (trans_vssra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .000.... .1010111 */ + if (trans_vnsrl_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .000.... .1010111 */ + if (trans_vnsra_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .000.... .1010111 */ + if (trans_vnclipu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .000.... .1010111 */ + if (trans_vnclip_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .000.... .1010111 */ + if (trans_vwredsumu_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .000.... .1010111 */ + if (trans_vwredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .000.... .1010111 */ + if (trans_vwsmaccu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .000.... .1010111 */ + if (trans_vwsmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .000.... .1010111 */ + if (trans_vwsmaccsu_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80001000: + /* 1....... ........ .001.... .1010111 */ + switch (insn & 0x74000000) { + case 0x00000000: + /* 1000.0.. ........ .001.... .1010111 */ + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 100000.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfdiv_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100010.. ........ .001.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + switch ((insn >> 15) & 0x1f) { + case 0x0: + /* 100010.. ....0000 0001.... .1010111 */ + if (trans_vfcvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x1: + /* 100010.. ....0000 1001.... .1010111 */ + if (trans_vfcvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x2: + /* 100010.. ....0001 0001.... .1010111 */ + if (trans_vfcvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0x3: + /* 100010.. ....0001 1001.... .1010111 */ + if (trans_vfcvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0x8: + /* 100010.. ....0100 0001.... .1010111 */ + if (trans_vfwcvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x9: + /* 100010.. ....0100 1001.... .1010111 */ + if (trans_vfwcvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0xa: + /* 100010.. ....0101 0001.... .1010111 */ + if (trans_vfwcvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0xb: + /* 100010.. ....0101 1001.... .1010111 */ + if (trans_vfwcvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0xc: + /* 100010.. ....0110 0001.... .1010111 */ + if (trans_vfwcvt_f_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x10: + /* 100010.. ....1000 0001.... .1010111 */ + if (trans_vfncvt_xu_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x11: + /* 100010.. ....1000 1001.... .1010111 */ + if (trans_vfncvt_x_f_v(ctx, &u.f_rmr)) return true; + return false; + case 0x12: + /* 100010.. ....1001 0001.... .1010111 */ + if (trans_vfncvt_f_xu_v(ctx, &u.f_rmr)) return true; + return false; + case 0x13: + /* 100010.. ....1001 1001.... .1010111 */ + if (trans_vfncvt_f_x_v(ctx, &u.f_rmr)) return true; + return false; + case 0x14: + /* 100010.. ....1010 0001.... .1010111 */ + if (trans_vfncvt_f_f_v(ctx, &u.f_rmr)) return true; + return false; + } + return false; + } + return false; + case 0x04000000: + /* 1000.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r2_vm(ctx, &u.f_rmr, insn); + switch (insn & 0x080f8000) { + case 0x08000000: + /* 100011.. ....0000 0001.... .1010111 */ + if (trans_vfsqrt_v(ctx, &u.f_rmr)) return true; + return false; + case 0x08080000: + /* 100011.. ....1000 0001.... .1010111 */ + if (trans_vfclass_v(ctx, &u.f_rmr)) return true; + return false; + } + return false; + case 0x10000000: + /* 1001.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 100100.. ........ .001.... .1010111 */ + if (trans_vfmul_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x20000000: + /* 1010.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101000.. ........ .001.... .1010111 */ + if (trans_vfmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101010.. ........ .001.... .1010111 */ + if (trans_vfmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x24000000: + /* 1010.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101001.. ........ .001.... .1010111 */ + if (trans_vfnmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101011.. ........ .001.... .1010111 */ + if (trans_vfnmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x30000000: + /* 1011.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101100.. ........ .001.... .1010111 */ + if (trans_vfmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101110.. ........ .001.... .1010111 */ + if (trans_vfmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x34000000: + /* 1011.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 101101.. ........ .001.... .1010111 */ + if (trans_vfnmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 101111.. ........ .001.... .1010111 */ + if (trans_vfnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x40000000: + /* 1100.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 110000.. ........ .001.... .1010111 */ + if (trans_vfwadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 110010.. ........ .001.... .1010111 */ + if (trans_vfwsub_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x44000000: + /* 1100.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + if (trans_vfwredsum_vs(ctx, &u.f_rmrr)) return true; + return false; + case 0x50000000: + /* 1101.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 110100.. ........ .001.... .1010111 */ + if (trans_vfwadd_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 110110.. ........ .001.... .1010111 */ + if (trans_vfwsub_wv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x60000000: + /* 1110.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111000.. ........ .001.... .1010111 */ + if (trans_vfwmul_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x70000000: + /* 1111.0.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111100.. ........ .001.... .1010111 */ + if (trans_vfwmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 111110.. ........ .001.... .1010111 */ + if (trans_vfwmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x74000000: + /* 1111.1.. ........ .001.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 27) & 0x1) { + case 0x0: + /* 111101.. ........ .001.... .1010111 */ + if (trans_vfwnmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 111111.. ........ .001.... .1010111 */ + if (trans_vfwnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + } + return false; + case 0x80002000: + /* 1....... ........ .010.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .010.... .1010111 */ + if (trans_vdivu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .010.... .1010111 */ + if (trans_vdiv_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .010.... .1010111 */ + if (trans_vremu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .010.... .1010111 */ + if (trans_vrem_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .010.... .1010111 */ + if (trans_vmulhu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .010.... .1010111 */ + if (trans_vmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .010.... .1010111 */ + if (trans_vmulhsu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .010.... .1010111 */ + if (trans_vmulh_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .010.... .1010111 */ + if (trans_vmadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .010.... .1010111 */ + if (trans_vnmsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .010.... .1010111 */ + if (trans_vmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .010.... .1010111 */ + if (trans_vnmsac_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .010.... .1010111 */ + if (trans_vwaddu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .010.... .1010111 */ + if (trans_vwadd_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .010.... .1010111 */ + if (trans_vwsubu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x13: + /* 110011.. ........ .010.... .1010111 */ + if (trans_vwsub_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .010.... .1010111 */ + if (trans_vwaddu_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x15: + /* 110101.. ........ .010.... .1010111 */ + if (trans_vwadd_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .010.... .1010111 */ + if (trans_vwsubu_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x17: + /* 110111.. ........ .010.... .1010111 */ + if (trans_vwsub_wv(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .010.... .1010111 */ + if (trans_vwmulu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 111010.. ........ .010.... .1010111 */ + if (trans_vwmulsu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 111011.. ........ .010.... .1010111 */ + if (trans_vwmul_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .010.... .1010111 */ + if (trans_vwmaccu_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .010.... .1010111 */ + if (trans_vwmacc_vv(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .010.... .1010111 */ + if (trans_vwmaccsu_vv(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80003000: + /* 1....... ........ .011.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .011.... .1010111 */ + if (trans_vsaddu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .011.... .1010111 */ + if (trans_vsadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .011.... .1010111 */ + if (trans_vaadd_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .011.... .1010111 */ + if (trans_vsll_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .011.... .1010111 */ + if (trans_vsrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .011.... .1010111 */ + if (trans_vsra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .011.... .1010111 */ + if (trans_vssrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .011.... .1010111 */ + if (trans_vssra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .011.... .1010111 */ + if (trans_vnsrl_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .011.... .1010111 */ + if (trans_vnsra_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .011.... .1010111 */ + if (trans_vnclipu_vi(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .011.... .1010111 */ + if (trans_vnclip_vi(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80004000: + /* 1....... ........ .100.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .100.... .1010111 */ + if (trans_vsaddu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .100.... .1010111 */ + if (trans_vsadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .100.... .1010111 */ + if (trans_vssubu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .100.... .1010111 */ + if (trans_vssub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .100.... .1010111 */ + if (trans_vaadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .100.... .1010111 */ + if (trans_vsll_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .100.... .1010111 */ + if (trans_vasub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .100.... .1010111 */ + if (trans_vsmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .100.... .1010111 */ + if (trans_vsrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .100.... .1010111 */ + if (trans_vsra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .100.... .1010111 */ + if (trans_vssrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .100.... .1010111 */ + if (trans_vssra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .100.... .1010111 */ + if (trans_vnsrl_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .100.... .1010111 */ + if (trans_vnsra_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .100.... .1010111 */ + if (trans_vnclipu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .100.... .1010111 */ + if (trans_vnclip_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .100.... .1010111 */ + if (trans_vwsmaccu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .100.... .1010111 */ + if (trans_vwsmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .100.... .1010111 */ + if (trans_vwsmaccsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .100.... .1010111 */ + if (trans_vwsmaccus_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80005000: + /* 1....... ........ .101.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .101.... .1010111 */ + if (trans_vfdiv_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .101.... .1010111 */ + if (trans_vfrdiv_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .101.... .1010111 */ + if (trans_vfmul_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .101.... .1010111 */ + if (trans_vfrsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x8: + /* 101000.. ........ .101.... .1010111 */ + if (trans_vfmadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .101.... .1010111 */ + if (trans_vfnmadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xa: + /* 101010.. ........ .101.... .1010111 */ + if (trans_vfmsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .101.... .1010111 */ + if (trans_vfnmsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xc: + /* 101100.. ........ .101.... .1010111 */ + if (trans_vfmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .101.... .1010111 */ + if (trans_vfnmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xe: + /* 101110.. ........ .101.... .1010111 */ + if (trans_vfmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .101.... .1010111 */ + if (trans_vfnmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .101.... .1010111 */ + if (trans_vfwadd_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .101.... .1010111 */ + if (trans_vfwsub_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .101.... .1010111 */ + if (trans_vfwadd_wf(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .101.... .1010111 */ + if (trans_vfwsub_wf(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .101.... .1010111 */ + if (trans_vfwmul_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .101.... .1010111 */ + if (trans_vfwmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .101.... .1010111 */ + if (trans_vfwnmacc_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .101.... .1010111 */ + if (trans_vfwmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .101.... .1010111 */ + if (trans_vfwnmsac_vf(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80006000: + /* 1....... ........ .110.... .1010111 */ + decode_insn32_extract_r_vm(ctx, &u.f_rmrr, insn); + switch ((insn >> 26) & 0x1f) { + case 0x0: + /* 100000.. ........ .110.... .1010111 */ + if (trans_vdivu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1: + /* 100001.. ........ .110.... .1010111 */ + if (trans_vdiv_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x2: + /* 100010.. ........ .110.... .1010111 */ + if (trans_vremu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x3: + /* 100011.. ........ .110.... .1010111 */ + if (trans_vrem_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x4: + /* 100100.. ........ .110.... .1010111 */ + if (trans_vmulhu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x5: + /* 100101.. ........ .110.... .1010111 */ + if (trans_vmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x6: + /* 100110.. ........ .110.... .1010111 */ + if (trans_vmulhsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x7: + /* 100111.. ........ .110.... .1010111 */ + if (trans_vmulh_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x9: + /* 101001.. ........ .110.... .1010111 */ + if (trans_vmadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xb: + /* 101011.. ........ .110.... .1010111 */ + if (trans_vnmsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xd: + /* 101101.. ........ .110.... .1010111 */ + if (trans_vmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0xf: + /* 101111.. ........ .110.... .1010111 */ + if (trans_vnmsac_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x10: + /* 110000.. ........ .110.... .1010111 */ + if (trans_vwaddu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x11: + /* 110001.. ........ .110.... .1010111 */ + if (trans_vwadd_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x12: + /* 110010.. ........ .110.... .1010111 */ + if (trans_vwsubu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x13: + /* 110011.. ........ .110.... .1010111 */ + if (trans_vwsub_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x14: + /* 110100.. ........ .110.... .1010111 */ + if (trans_vwaddu_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x15: + /* 110101.. ........ .110.... .1010111 */ + if (trans_vwadd_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x16: + /* 110110.. ........ .110.... .1010111 */ + if (trans_vwsubu_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x17: + /* 110111.. ........ .110.... .1010111 */ + if (trans_vwsub_wx(ctx, &u.f_rmrr)) return true; + return false; + case 0x18: + /* 111000.. ........ .110.... .1010111 */ + if (trans_vwmulu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1a: + /* 111010.. ........ .110.... .1010111 */ + if (trans_vwmulsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1b: + /* 111011.. ........ .110.... .1010111 */ + if (trans_vwmul_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1c: + /* 111100.. ........ .110.... .1010111 */ + if (trans_vwmaccu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1d: + /* 111101.. ........ .110.... .1010111 */ + if (trans_vwmacc_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1e: + /* 111110.. ........ .110.... .1010111 */ + if (trans_vwmaccsu_vx(ctx, &u.f_rmrr)) return true; + return false; + case 0x1f: + /* 111111.. ........ .110.... .1010111 */ + if (trans_vwmaccus_vx(ctx, &u.f_rmrr)) return true; + return false; + } + return false; + case 0x80007000: + /* 1....... ........ .111.... .1010111 */ + decode_insn32_extract_r(ctx, &u.f_r, insn); + switch ((insn >> 25) & 0x3f) { + case 0x0: + /* 1000000. ........ .111.... .1010111 */ + if (trans_vsetvl(ctx, &u.f_r)) return true; return false; } return false; @@ -1568,32 +4285,26 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:90 */ if (trans_beq(ctx, &u.f_b)) return true; return false; case 0x1: /* ........ ........ .001.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:91 */ if (trans_bne(ctx, &u.f_b)) return true; return false; case 0x4: /* ........ ........ .100.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:92 */ if (trans_blt(ctx, &u.f_b)) return true; return false; case 0x5: /* ........ ........ .101.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:93 */ if (trans_bge(ctx, &u.f_b)) return true; return false; case 0x6: /* ........ ........ .110.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:94 */ if (trans_bltu(ctx, &u.f_b)) return true; return false; case 0x7: /* ........ ........ .111.... .1100011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:95 */ if (trans_bgeu(ctx, &u.f_b)) return true; return false; } @@ -1604,14 +4315,12 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 12) & 0x7) { case 0x0: /* ........ ........ .000.... .1100111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:89 */ if (trans_jalr(ctx, &u.f_i)) return true; return false; } return false; case 0x0000006f: /* ........ ........ ........ .1101111 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:88 */ decode_insn32_extract_j(ctx, &u.f_j, insn); if (trans_jal(ctx, &u.f_j)) return true; return false; @@ -1623,21 +4332,18 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch (insn & 0xfe000f80) { case 0x00000000: /* 0000000. ........ .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x3ff) { case 0x0: /* 00000000 00000000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:74 */ if (trans_ecall(ctx, &u.f_empty)) return true; return false; case 0x20: /* 00000000 00010000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:75 */ if (trans_ebreak(ctx, &u.f_empty)) return true; return false; case 0x40: /* 00000000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:76 */ if (trans_uret(ctx, &u.f_empty)) return true; return false; } @@ -1647,28 +4353,25 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) switch ((insn >> 20) & 0x1f) { case 0x2: /* 00010000 0010.... .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x1f) { case 0x0: /* 00010000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:77 */ if (trans_sret(ctx, &u.f_empty)) return true; return false; } return false; case 0x4: /* 00010000 0100.... .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:83 */ - decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3215, insn); - if (trans_sfence_vm(ctx, &u.f_decode_insn3215)) return true; + decode_insn32_extract_sfence_vm(ctx, &u.f_decode_insn3223, insn); + if (trans_sfence_vm(ctx, &u.f_decode_insn3223)) return true; return false; case 0x5: /* 00010000 0101.... .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x1f) { case 0x0: /* 00010000 01010000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:79 */ if (trans_wfi(ctx, &u.f_empty)) return true; return false; } @@ -1677,70 +4380,60 @@ static bool decode_insn32(DisasContext *ctx, uint32_t insn) return false; case 0x12000000: /* 0001001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:82 */ - decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3214, insn); - if (trans_sfence_vma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_sfence_vma(ctx, &u.f_decode_insn3222, insn); + if (trans_sfence_vma(ctx, &u.f_decode_insn3222)) return true; return false; case 0x22000000: /* 0010001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:81 */ - decode_insn32_extract_hfence_bvma(ctx, &u.f_decode_insn3214, insn); - if (trans_hfence_bvma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_hfence_vvma(ctx, &u.f_decode_insn3222, insn); + if (trans_hfence_vvma(ctx, &u.f_decode_insn3222)) return true; return false; case 0x30000000: /* 0011000. ........ .0000000 01110011 */ - decode_insn32_extract_decode_insn32_Fmt_18(ctx, &u.f_empty, insn); + decode_insn32_extract_decode_insn32_Fmt_28(ctx, &u.f_empty, insn); switch ((insn >> 15) & 0x3ff) { case 0x40: /* 00110000 00100000 00000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:78 */ if (trans_mret(ctx, &u.f_empty)) return true; return false; } return false; case 0x62000000: /* 0110001. ........ .0000000 01110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:80 */ - decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3214, insn); - if (trans_hfence_gvma(ctx, &u.f_decode_insn3214)) return true; + decode_insn32_extract_hfence_gvma(ctx, &u.f_decode_insn3222, insn); + if (trans_hfence_gvma(ctx, &u.f_decode_insn3222)) return true; return false; } return false; case 0x1: /* ........ ........ .001.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:125 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrw(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrw(ctx, &u.f_decode_insn3214)) return true; return false; case 0x2: /* ........ ........ .010.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:126 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrs(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrs(ctx, &u.f_decode_insn3214)) return true; return false; case 0x3: /* ........ ........ .011.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:127 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrc(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrc(ctx, &u.f_decode_insn3214)) return true; return false; case 0x5: /* ........ ........ .101.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:128 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrwi(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrwi(ctx, &u.f_decode_insn3214)) return true; return false; case 0x6: /* ........ ........ .110.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:129 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrsi(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrsi(ctx, &u.f_decode_insn3214)) return true; return false; case 0x7: /* ........ ........ .111.... .1110011 */ - /* /home/me/projects/unicorn2/qemu-5.0.0-build/target/riscv/insn32.decode:130 */ - decode_insn32_extract_csr(ctx, &u.f_decode_insn329, insn); - if (trans_csrrci(ctx, &u.f_decode_insn329)) return true; + decode_insn32_extract_csr(ctx, &u.f_decode_insn3214, insn); + if (trans_csrrci(ctx, &u.f_decode_insn3214)) return true; return false; } return false; diff --git a/qemu/target/riscv/translate.c b/qemu/target/riscv/translate.c index 792bc12fd0..37f11cc481 100644 --- a/qemu/target/riscv/translate.c +++ b/qemu/target/riscv/translate.c @@ -56,6 +56,13 @@ typedef struct DisasContext { to reset this known value. */ int frm; bool ext_ifencei; + /* vector extension */ + bool vill; + uint8_t lmul; + uint8_t sew; + uint16_t vlen; + uint16_t mlen; + bool vl_eq_vlmax; // Unicorn struct uc_struct *uc; @@ -557,6 +564,11 @@ static void decode_RV32_64C(DisasContext *ctx, uint16_t opcode) } } +static int ex_plus_1(DisasContext *ctx, int nf) +{ + return nf + 1; +} + #define EX_SH(amount) \ static int ex_shift_##amount(DisasContext *ctx, int imm) \ { \ @@ -733,6 +745,8 @@ static bool gen_shift(DisasContext *ctx, arg_r *a, #include "insn_trans/trans_rva.inc.c" #include "insn_trans/trans_rvf.inc.c" #include "insn_trans/trans_rvd.inc.c" +#include "insn_trans/trans_rvh.inc.c" +#include "insn_trans/trans_rvv.inc.c" #include "insn_trans/trans_privileged.inc.c" /* Include the auto-generated decoder for 16 bit insn */ @@ -779,13 +793,14 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) DisasContext *ctx = container_of(dcbase, DisasContext, base); CPURISCVState *env = cs->env_ptr; RISCVCPU *cpu = RISCV_CPU(cs); + uint32_t tb_flags = ctx->base.tb->flags; // unicorn setup ctx->uc = cs->uc; ctx->pc_succ_insn = ctx->base.pc_first; - ctx->mem_idx = ctx->base.tb->flags & TB_FLAGS_MMU_MASK; - ctx->mstatus_fs = ctx->base.tb->flags & TB_FLAGS_MSTATUS_FS; + ctx->mem_idx = tb_flags & TB_FLAGS_MMU_MASK; + ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS; ctx->priv_ver = env->priv_ver; if (riscv_has_ext(env, RVH)) { @@ -807,6 +822,12 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->misa = env->misa; ctx->frm = -1; /* unknown rounding mode */ ctx->ext_ifencei = cpu->cfg.ext_ifencei; + ctx->vlen = cpu->cfg.vlen; + ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL); + ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW); + ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL); + ctx->mlen = 1 << (ctx->sew + 3 - ctx->lmul); + ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX); } static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -965,6 +986,7 @@ void riscv_translate_init(struct uc_struct *uc) } tcg_ctx->cpu_pc = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, pc), "pc"); + tcg_ctx->cpu_vl = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, vl), "vl"); tcg_ctx->load_res = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, load_res), "load_res"); tcg_ctx->load_val = tcg_global_mem_new(tcg_ctx, tcg_ctx->cpu_env, offsetof(CPURISCVState, load_val), diff --git a/qemu/target/riscv/vector_helper.c b/qemu/target/riscv/vector_helper.c new file mode 100644 index 0000000000..1c726edf0a --- /dev/null +++ b/qemu/target/riscv/vector_helper.c @@ -0,0 +1,4913 @@ +/* + * RISC-V Vector Extension Helpers for QEMU. + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/memop.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "fpu/softfloat.h" +#include "tcg/tcg-gvec-desc.h" +#include "internals.h" +#include + +target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, + target_ulong s2) +{ + int vlmax, vl; + RISCVCPU *cpu = env_archcpu(env); + uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); + uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); + bool vill = FIELD_EX64(s2, VTYPE, VILL); + target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); + + if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { + /* only set vill bit. */ + FIELD_DP64(0, VTYPE, VILL, 1, env->vtype); + env->vl = 0; + env->vstart = 0; + return 0; + } + + vlmax = vext_get_vlmax(cpu, s2); + if (s1 <= vlmax) { + vl = s1; + } else { + vl = vlmax; + } + env->vl = vl; + env->vtype = s2; + env->vstart = 0; + return vl; +} + +/* + * Note that vector data is stored in host-endian 64-bit chunks, + * so addressing units smaller than that needs a host-endian fixup. + */ +#ifdef HOST_WORDS_BIGENDIAN +#define H1(x) ((x) ^ 7) +#define H1_2(x) ((x) ^ 6) +#define H1_4(x) ((x) ^ 4) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) +#define H8(x) ((x)) +#else +#define H1(x) (x) +#define H1_2(x) (x) +#define H1_4(x) (x) +#define H2(x) (x) +#define H4(x) (x) +#define H8(x) (x) +#endif + +static inline uint32_t vext_nf(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, NF); +} + +static inline uint32_t vext_mlen(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, MLEN); +} + +static inline uint32_t vext_vm(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VM); +} + +static inline uint32_t vext_lmul(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, LMUL); +} + +static uint32_t vext_wd(uint32_t desc) +{ + return (simd_data(desc) >> 11) & 0x1; +} + +/* + * Get vector group length in bytes. Its range is [64, 2048]. + * + * As simd_desc support at most 256, the max vlen is 512 bits. + * So vlen in bytes is encoded as maxsz. + */ +static inline uint32_t vext_maxsz(uint32_t desc) +{ + return simd_maxsz(desc) << vext_lmul(desc); +} + +/* + * This function checks watchpoint before real load operation. + * + * In softmmu mode, the TLB API probe_access is enough for watchpoint check. + * In user mode, there is no watchpoint support now. + * + * It will trigger an exception if there is no mapping in TLB + * and page table walk can't fill the TLB entry. Then the guest + * software can return here after process the exception or never return. + */ +static void probe_pages(CPURISCVState *env, target_ulong addr, + target_ulong len, uintptr_t ra, + MMUAccessType access_type) +{ + target_ulong pagelen = -(addr | TARGET_PAGE_MASK); + target_ulong curlen = MIN(pagelen, len); + + probe_access(env, addr, curlen, access_type, + cpu_mmu_index(env, false), ra); + if (len > curlen) { + addr += curlen; + curlen = len - curlen; + probe_access(env, addr, curlen, access_type, + cpu_mmu_index(env, false), ra); + } +} + +#ifdef HOST_WORDS_BIGENDIAN +static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) +{ + /* + * Split the remaining range to two parts. + * The first part is in the last uint64_t unit. + * The second part start from the next uint64_t unit. + */ + int part1 = 0, part2 = tot - cnt; + if (cnt % 8) { + part1 = 8 - (cnt % 8); + part2 = tot - cnt - part1; + memset(QEMU_ALIGN_PTR_DOWN(tail, 8), 0, part1); + memset(QEMU_ALIGN_PTR_UP(tail, 8), 0, part2); + } else { + memset(tail, 0, part2); + } +} +#else +static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) +{ + memset(tail, 0, tot - cnt); +} +#endif + +static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int8_t *cur = ((int8_t *)vd + H1(idx)); + vext_clear(cur, cnt, tot); +} + +static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int16_t *cur = ((int16_t *)vd + H2(idx)); + vext_clear(cur, cnt, tot); +} + +static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int32_t *cur = ((int32_t *)vd + H4(idx)); + vext_clear(cur, cnt, tot); +} + +static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) +{ + int64_t *cur = (int64_t *)vd + idx; + vext_clear(cur, cnt, tot); +} + +static inline void vext_set_elem_mask(void *v0, int mlen, int index, + uint8_t value) +{ + int idx = (index * mlen) / 64; + int pos = (index * mlen) % 64; + uint64_t old = ((uint64_t *)v0)[idx]; + ((uint64_t *)v0)[idx] = deposit64(old, pos, mlen, value); +} + +static inline int vext_elem_mask(void *v0, int mlen, int index) +{ + int idx = (index * mlen) / 64; + int pos = (index * mlen) % 64; + return (((uint64_t *)v0)[idx] >> pos) & 1; +} + +/* elements operations for load and store */ +typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, + uint32_t idx, void *vd, uintptr_t retaddr); +typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot); + +#define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \ +static void NAME(CPURISCVState *env, abi_ptr addr, \ + uint32_t idx, void *vd, uintptr_t retaddr)\ +{ \ + MTYPE data; \ + ETYPE *cur = ((ETYPE *)vd + H(idx)); \ + data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ + *cur = data; \ +} \ + +GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb) +GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb) +GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb) +GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb) +GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw) +GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw) +GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw) +GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl) +GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl) +GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb) +GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw) +GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl) +GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq) +GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub) +GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub) +GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub) +GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub) +GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw) +GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw) +GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw) +GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl) +GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl) + +#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ +static void NAME(CPURISCVState *env, abi_ptr addr, \ + uint32_t idx, void *vd, uintptr_t retaddr)\ +{ \ + ETYPE data = *((ETYPE *)vd + H(idx)); \ + cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ +} + +GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb) +GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb) +GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb) +GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb) +GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw) +GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw) +GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw) +GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl) +GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl) +GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) +GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) +GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) +GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) + +/* + *** stride: access vector element from strided memory + */ +static void +vext_ldst_stride(void *vd, void *v0, target_ulong base, + target_ulong stride, CPURISCVState *env, + uint32_t desc, uint32_t vm, + vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra, + MMUAccessType access_type) +{ + uint32_t i, k; + uint32_t nf = vext_nf(desc); + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + /* probe every access*/ + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + probe_pages(env, base + stride * i, nf * msz, ra, access_type); + } + /* do real access */ + for (i = 0; i < env->vl; i++) { + k = 0; + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + while (k < nf) { + target_ulong addr = base + stride * i + k * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (clear_elem) { + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } + } +} + +#define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ + target_ulong stride, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vm = vext_vm(desc); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_LOAD); \ +} + +GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b, clearb) +GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h, clearh) +GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w, clearl) +GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d, clearq) +GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h, clearh) +GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w, clearl) +GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d, clearq) +GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w, clearl) +GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d, clearq) +GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b, clearb) +GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h, clearh) +GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w, clearl) +GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d, clearq) +GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) +GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) +GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) +GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) +GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh) +GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl) +GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq) +GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl) +GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq) + +#define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + target_ulong stride, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vm = vext_vm(desc); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ + NULL, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_STORE); \ +} + +GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b) +GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h) +GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w) +GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d) +GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h) +GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w) +GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d) +GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w) +GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d) +GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b) +GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h) +GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w) +GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d) + +/* + *** unit-stride: access elements stored contiguously in memory + */ + +/* unmasked unit-stride load and store operation*/ +static void +vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, + vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra, + MMUAccessType access_type) +{ + uint32_t i, k; + uint32_t nf = vext_nf(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + /* probe every access */ + probe_pages(env, base, env->vl * nf * msz, ra, access_type); + /* load bytes from guest memory */ + for (i = 0; i < env->vl; i++) { + k = 0; + while (k < nf) { + target_ulong addr = base + (i * nf + k) * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (clear_elem) { + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } + } +} + +/* + * masked unit-stride load and store operation will be a special case of stride, + * stride = NF * sizeof (MTYPE) + */ + +#define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_LOAD); \ +} \ + \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN, \ + sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \ +} + +GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b, clearb) +GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h, clearh) +GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w, clearl) +GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d, clearq) +GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h, clearh) +GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w, clearl) +GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d, clearq) +GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w, clearl) +GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d, clearq) +GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b, clearb) +GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h, clearh) +GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w, clearl) +GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d, clearq) +GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) +GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) +GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) +GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) +GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh) +GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl) +GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq) +GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl) +GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq) + +#define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \ +void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ + vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ + NULL, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_STORE); \ +} \ + \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_us(vd, base, env, desc, STORE_FN, NULL, \ + sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\ +} + +GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b) +GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h) +GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w) +GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d) +GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h) +GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w) +GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d) +GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w) +GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d) +GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) +GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) +GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) +GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) + +/* + *** index: access vector element from indexed memory + */ +typedef target_ulong vext_get_index_addr(target_ulong base, + uint32_t idx, void *vs2); + +#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ +static target_ulong NAME(target_ulong base, \ + uint32_t idx, void *vs2) \ +{ \ + return (base + *((ETYPE *)vs2 + H(idx))); \ +} + +GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1) +GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2) +GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4) +GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8) + +static inline void +vext_ldst_index(void *vd, void *v0, target_ulong base, + void *vs2, CPURISCVState *env, uint32_t desc, + vext_get_index_addr get_index_addr, + vext_ldst_elem_fn *ldst_elem, + clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra, + MMUAccessType access_type) +{ + uint32_t i, k; + uint32_t nf = vext_nf(desc); + uint32_t vm = vext_vm(desc); + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + /* probe every access*/ + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra, + access_type); + } + /* load bytes from guest memory */ + for (i = 0; i < env->vl; i++) { + k = 0; + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + while (k < nf) { + abi_ptr addr = get_index_addr(base, i, vs2) + k * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (clear_elem) { + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } + } +} + +#define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ + LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC(), MMU_DATA_LOAD); \ +} + +GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b, clearb) +GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h, clearh) +GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w, clearl) +GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d, clearq) +GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h, clearh) +GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w, clearl) +GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d, clearq) +GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w, clearl) +GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d, clearq) +GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b, clearb) +GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h, clearh) +GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w, clearl) +GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d, clearq) +GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b, clearb) +GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h, clearh) +GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w, clearl) +GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d, clearq) +GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh) +GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl) +GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq) +GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl) +GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq) + +#define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ + STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\ + GETPC(), MMU_DATA_STORE); \ +} + +GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b) +GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h) +GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w) +GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d) +GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h) +GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w) +GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d) +GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w) +GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d) +GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) +GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) +GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) +GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) + +/* + *** unit-stride fault-only-fisrt load instructions + */ +static inline void +vext_ldff(void *vd, void *v0, target_ulong base, + CPURISCVState *env, uint32_t desc, + vext_ldst_elem_fn *ldst_elem, + clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra) +{ + void *host; + uint32_t i, k, vl = 0; + uint32_t mlen = vext_mlen(desc); + uint32_t nf = vext_nf(desc); + uint32_t vm = vext_vm(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + target_ulong addr, offset, remain; + + /* probe every access*/ + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + addr = base + nf * i * msz; + if (i == 0) { + probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); + } else { + /* if it triggers an exception, no need to check watchpoint */ + remain = nf * msz; + while (remain > 0) { + offset = -(addr | TARGET_PAGE_MASK); + host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, + cpu_mmu_index(env, false)); + if (host) { +#ifdef CONFIG_USER_ONLY + if (page_check_range(addr, nf * msz, PAGE_READ) < 0) { + vl = i; + goto ProbeSuccess; + } +#else + probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); +#endif + } else { + vl = i; + goto ProbeSuccess; + } + if (remain <= offset) { + break; + } + remain -= offset; + addr += offset; + } + } + } +ProbeSuccess: + /* load bytes from guest memory */ + if (vl != 0) { + env->vl = vl; + } + for (i = 0; i < env->vl; i++) { + k = 0; + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + while (k < nf) { + target_ulong addr = base + (i * nf + k) * msz; + ldst_elem(env, addr, i + k * vlmax, vd, ra); + k++; + } + } + /* clear tail elements */ + if (vl != 0) { + return; + } + for (k = 0; k < nf; k++) { + clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); + } +} + +#define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN, \ + sizeof(ETYPE), sizeof(MTYPE), GETPC()); \ +} + +GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b, clearb) +GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h, clearh) +GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w, clearl) +GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d, clearq) +GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h, clearh) +GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w, clearl) +GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d, clearq) +GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w, clearl) +GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d, clearq) +GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b, clearb) +GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h, clearh) +GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w, clearl) +GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d, clearq) +GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b, clearb) +GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h, clearh) +GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w, clearl) +GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d, clearq) +GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh) +GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl) +GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq) +GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl) +GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq) + +/* + *** Vector AMO Operations (Zvamo) + */ +typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr, + uint32_t wd, uint32_t idx, CPURISCVState *env, + uintptr_t retaddr); + +/* no atomic opreation for vector atomic insructions */ +#define DO_SWAP(N, M) (M) +#define DO_AND(N, M) (N & M) +#define DO_XOR(N, M) (N ^ M) +#define DO_OR(N, M) (N | M) +#define DO_ADD(N, M) (N + M) + +#define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \ +static void \ +vext_##NAME##_noatomic_op(void *vs3, target_ulong addr, \ + uint32_t wd, uint32_t idx, \ + CPURISCVState *env, uintptr_t retaddr)\ +{ \ + typedef int##ESZ##_t ETYPE; \ + typedef int##MSZ##_t MTYPE; \ + typedef uint##MSZ##_t UMTYPE UNICORN_UNUSED; \ + ETYPE *pe3 = (ETYPE *)vs3 + H(idx); \ + MTYPE a = cpu_ld##SUF##_data(env, addr), b = *pe3; \ + \ + cpu_st##SUF##_data(env, addr, DO_OP(a, b)); \ + if (wd) { \ + *pe3 = a; \ + } \ +} + +/* Signed min/max */ +#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) +#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) + +/* Unsigned min/max */ +#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) +#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) + +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w, 32, 32, H4, DO_ADD, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w, 32, 32, H4, DO_XOR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w, 32, 32, H4, DO_AND, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w, 32, 32, H4, DO_OR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w, 32, 32, H4, DO_MIN, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w, 32, 32, H4, DO_MAX, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l) +#ifdef TARGET_RISCV64 +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d, 64, 32, H8, DO_ADD, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d, 64, 64, H8, DO_ADD, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d, 64, 32, H8, DO_XOR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d, 64, 64, H8, DO_XOR, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d, 64, 32, H8, DO_AND, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d, 64, 64, H8, DO_AND, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d, 64, 32, H8, DO_OR, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d, 64, 64, H8, DO_OR, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d, 64, 32, H8, DO_MIN, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d, 64, 64, H8, DO_MIN, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d, 64, 32, H8, DO_MAX, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d, 64, 64, H8, DO_MAX, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l) +GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q) +#endif + +static inline void +vext_amo_noatomic(void *vs3, void *v0, target_ulong base, + void *vs2, CPURISCVState *env, uint32_t desc, + vext_get_index_addr get_index_addr, + vext_amo_noatomic_fn *noatomic_op, + clear_fn *clear_elem, + uint32_t esz, uint32_t msz, uintptr_t ra) +{ + uint32_t i; + target_long addr; + uint32_t wd = vext_wd(desc); + uint32_t vm = vext_vm(desc); + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = vext_maxsz(desc) / esz; + + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD); + probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE); + } + for (i = 0; i < env->vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + addr = get_index_addr(base, i, vs2); + noatomic_op(vs3, addr, wd, i, env, ra); + } + clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz); +} + +#define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN) \ +void HELPER(NAME)(void *vs3, void *v0, target_ulong base, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_amo_noatomic(vs3, v0, base, vs2, env, desc, \ + INDEX_FN, vext_##NAME##_noatomic_op, \ + CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ + GETPC()); \ +} + +#ifdef TARGET_RISCV64 +GEN_VEXT_AMO(vamoswapw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoswapd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoaddw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoaddd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoxorw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoxord_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoandw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoandd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoorw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamoord_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamominw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomind_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxw_v_d, int32_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxd_v_d, int64_t, int64_t, idx_d, clearq) +GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq) +GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq) +GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq) +#endif +GEN_VEXT_AMO(vamoswapw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoaddw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoxorw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoandw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamoorw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) +GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) +GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) + +/* + *** Vector Integer Arithmetic Instructions + */ + +/* expand macro args before macro */ +#define RVVCALL(macro, ...) macro(__VA_ARGS__) + +/* (TD, T1, T2, TX1, TX2) */ +#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t +#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t +#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t +#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t +#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t +#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t +#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t +#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t +#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t +#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t +#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t +#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t +#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t +#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t +#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t +#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t +#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t +#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t +#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t +#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t +#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t +#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t +#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t +#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t +#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t +#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t +#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t + +/* operation of two vector elements */ +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); + +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1); \ +} +#define DO_SUB(N, M) (N - M) +#define DO_RSUB(N, M) (M - N) + +RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) +RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) + +static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, uint32_t desc, + uint32_t esz, uint32_t dsz, + opivv2_fn *fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, vs1, vs2, i); + } + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate the helpers for OPIVV */ +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) + +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); + +/* + * (T1)s1 gives the real operator type. + * (TX1)(T1)s1 expands the operator type of widen or narrow operations. + */ +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ +} + +RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) +RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) +RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) +RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) +RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) +RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) + +static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, uint32_t desc, + uint32_t esz, uint32_t dsz, + opivx2_fn fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, s1, vs2, i); + } + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate the helpers for OPIVX */ +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq) + +void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + *(uint8_t *)((char*)d + i) = (uint8_t)b - *(uint8_t *)((char*)a + i); + } +} + +void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + *(uint16_t *)((char*)d + i) = (uint16_t)b - *(uint16_t *)((char*)a + i); + } +} + +void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + *(uint32_t *)((char*)d + i) = (uint32_t)b - *(uint32_t *)((char*)a + i); + } +} + +void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + *(uint64_t *)((char*)d + i) = b - *(uint64_t *)((char*)a + i); + } +} + +/* Vector Widening Integer Add/Subtract */ +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t +#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t +#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t +#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t +#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t +#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t +#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t +#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t +#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t +#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t +RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) +RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) +RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) +RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) +RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) +RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) +RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) +GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq) +GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq) +GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh) +GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl) +GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq) + +RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) +RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) +RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) +RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) +RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) +RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) +RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) +RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) +RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) +GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq) +GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq) +GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh) +GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl) +GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq) + +/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ +#define DO_VADC(N, M, C) (N + M + C) +#define DO_VSBC(N, M, C) (N - M - C) + +#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC, clearb) +GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC, clearh) +GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC, clearl) +GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC, clearq) + +GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC, clearb) +GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC, clearh) +GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC, clearl) +GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC, clearq) + +#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC, clearb) +GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC, clearh) +GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC, clearl) +GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC, clearq) + +GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC, clearb) +GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC, clearh) +GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC, clearl) +GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq) + +#ifdef _MSC_VER + #define DO_MADC(N, M, C) (C ? ((N) + (M) + 1) <= (N) : \ + ((N) + (M)) < (N)) +#else + #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ + (__typeof(N))(N + M) < N) +#endif +#define DO_MSBC(N, M, C) (C ? N <= M : N < M) + +#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1, carry));\ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) +GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) +GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) +GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) + +GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) +GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) +GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) +GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) + +#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + uint8_t carry = vext_elem_mask(v0, mlen, i); \ + \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) +GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) +GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) +GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) + +GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) +GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) +GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) +GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) + +/* Vector Bitwise Logical Instructions */ +RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) +RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) +RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) +RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) +RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) +RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) +RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) +RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) +RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) +RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) +RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) +RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) +GEN_VEXT_VV(vand_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vand_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vand_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vand_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vor_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vor_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vor_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vor_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vxor_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vxor_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vxor_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vxor_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) +RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) +RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) +RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) +RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) +RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) +RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) +RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) +RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) +RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) +RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) +RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) +GEN_VEXT_VX(vand_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vand_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vand_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vand_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vor_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vor_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vor_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vor_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq) + +/* Vector Single-Width Bit Shift Instructions */ +#define DO_SLL(N, M) (N << (M)) +#define DO_SRL(N, M) (N >> (M)) + +/* generate the helpers for shift instructions with two vector operators */ +#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TS1); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7, clearb) +GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf, clearh) +GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f, clearl) +GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) + +/* generate the helpers for shift instructions with one vector and one scalar */ +#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(TD); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7, clearb) +GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf, clearh) +GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f, clearl) +GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq) + +GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) +GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) +GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) +GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) + +/* Vector Narrowing Integer Right Shift Instructions */ +GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) +GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) +GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) +GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) +GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) +GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) + +/* Vector Integer Comparison Instructions */ +#define DO_MSEQ(N, M) (N == M) +#define DO_MSNE(N, M) (N != M) +#define DO_MSLT(N, M) (N < M) +#define DO_MSLE(N, M) (N <= M) +#define DO_MSGT(N, M) (N > M) + +#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) +GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) +GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) +GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) + +GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) +GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) +GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) +GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) + +GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) + +GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) + +#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, (ETYPE)(target_long)s1)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) +GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) +GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) +GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) + +GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) +GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) +GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) +GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) + +GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) +GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) +GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) +GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) + +GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) + +GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) +GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) +GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) +GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) + +GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) + +GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) +GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) + +/* Vector Integer Min/Max Instructions */ +RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) +RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) +RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) +RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) +RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) +RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) +RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) +RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) +RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) +RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) +GEN_VEXT_VV(vminu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vminu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vminu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vminu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmin_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmin_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmin_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmin_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmaxu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmaxu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmaxu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmaxu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmax_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmax_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmax_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmax_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) +RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) +RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) +RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) +RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) +RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) +RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) +RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) +RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) +RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) +GEN_VEXT_VX(vminu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vminu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vminu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vminu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmin_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmin_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmin_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmin_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmaxu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmaxu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmaxu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmaxu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq) + +/* Vector Single-Width Integer Multiply Instructions */ +#define DO_MUL(N, M) (N * M) +RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) +RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) +GEN_VEXT_VV(vmul_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmul_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmul_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmul_vv_d, 8, 8, clearq) + +static int8_t do_mulh_b(int8_t s2, int8_t s1) +{ + return (int16_t)s2 * (int16_t)s1 >> 8; +} + +static int16_t do_mulh_h(int16_t s2, int16_t s1) +{ + return (int32_t)s2 * (int32_t)s1 >> 16; +} + +static int32_t do_mulh_w(int32_t s2, int32_t s1) +{ + return (int64_t)s2 * (int64_t)s1 >> 32; +} + +static int64_t do_mulh_d(int64_t s2, int64_t s1) +{ + uint64_t hi_64, lo_64; + + muls64(&lo_64, &hi_64, s1, s2); + return hi_64; +} + +static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) +{ + return (uint16_t)s2 * (uint16_t)s1 >> 8; +} + +static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) +{ + return (uint32_t)s2 * (uint32_t)s1 >> 16; +} + +static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) +{ + return (uint64_t)s2 * (uint64_t)s1 >> 32; +} + +static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) +{ + uint64_t hi_64, lo_64; + + mulu64(&lo_64, &hi_64, s2, s1); + return hi_64; +} + +static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) +{ + return (int16_t)s2 * (uint16_t)s1 >> 8; +} + +static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) +{ + return (int32_t)s2 * (uint32_t)s1 >> 16; +} + +static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) +{ + return (int64_t)s2 * (uint64_t)s1 >> 32; +} + +/* + * Let A = signed operand, + * B = unsigned operand + * P = mulu64(A, B), unsigned product + * + * LET X = 2 ** 64 - A, 2's complement of A + * SP = signed product + * THEN + * IF A < 0 + * SP = -X * B + * = -(2 ** 64 - A) * B + * = A * B - 2 ** 64 * B + * = P - 2 ** 64 * B + * ELSE + * SP = P + * THEN + * HI_P -= (A < 0 ? B : 0) + */ + +static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) +{ + uint64_t hi_64, lo_64; + + mulu64(&lo_64, &hi_64, s2, s1); + + hi_64 -= s2 < 0 ? s1 : 0; + return hi_64; +} + +RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) +RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) +RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) +RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) +RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) +RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) +RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) +RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) +RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) +RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) +RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) +RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) +GEN_VEXT_VV(vmulh_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmulh_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmulh_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmulh_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmulhu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmulhu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmulhu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmulhu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmulhsu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmulhsu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmulhsu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmulhsu_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) +RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) +RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) +RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) +RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) +RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) +RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) +RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) +RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) +RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) +RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) +RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) +RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) +RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) +RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) +RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) +GEN_VEXT_VX(vmul_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmul_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmul_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmul_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmulh_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmulh_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmulh_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmulh_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmulhu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmulhu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmulhu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmulhu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq) + +/* Vector Integer Divide Instructions */ +#ifdef _MSC_VER + #define DO_DIVU(N, M) (unlikely(M == 0) ? (UINT64_MAX) : (N) / (M)) + #define DO_REMU(N, M) (unlikely(M == 0) ? (N) : (N) % (M)) + #define DO_DIV(N, M) (unlikely(M == 0) ? (-1) :\ + unlikely((N == -(N)) && (M == -1)) ? (N) : (N) / (M)) + #define DO_REM(N, M) (unlikely(M == 0) ? (N) :\ + unlikely((N == -(N)) && (M == -1)) ? 0 : (N) % (M)) +#else + #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) + #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) + #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) + #define DO_REM(N, M) (unlikely(M == 0) ? N :\ + unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) +#endif + +RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) +RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) +RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) +RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) +RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) +RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) +RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) +RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) +RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) +RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) +RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) +RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) +RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) +RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) +RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) +RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) +GEN_VEXT_VV(vdivu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vdivu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vdivu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vdivu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vdiv_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vdiv_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vdiv_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vdiv_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vremu_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vremu_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vremu_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vremu_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vrem_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vrem_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vrem_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vrem_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) +RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) +RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) +RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) +RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) +RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) +RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) +RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) +RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) +RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) +RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) +RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) +RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) +RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) +RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) +RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) +GEN_VEXT_VX(vdivu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vdivu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vdivu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vdivu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vdiv_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vdiv_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vdiv_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vdiv_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vremu_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vremu_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vremu_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vremu_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq) + +/* Vector Widening Integer Multiply Instructions */ +RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) +RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) +RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) +RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) +RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) +GEN_VEXT_VV(vwmul_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmul_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmul_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmulu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmulu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmulu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmulsu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmulsu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmulsu_vv_w, 4, 8, clearq) + +RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) +RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) +RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) +RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) +RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) +RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) +RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) +RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) +RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) +GEN_VEXT_VX(vwmul_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmul_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmul_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmulu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmulu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq) + +/* Vector Single-Width Integer Multiply-Add Instructions */ +#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1, d); \ +} + +#define DO_MACC(N, M, D) (M * N + D) +#define DO_NMSAC(N, M, D) (-(M * N) + D) +#define DO_MADD(N, M, D) (M * D + N) +#define DO_NMSUB(N, M, D) (-(M * D) + N) +RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) +RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) +RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) +RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) +RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) +RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) +RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) +RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) +RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) +RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) +RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) +RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) +RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) +RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) +GEN_VEXT_VV(vmacc_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmacc_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmacc_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmacc_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vnmsac_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vnmsac_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vnmsac_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vnmsac_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vmadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vmadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vmadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vmadd_vv_d, 8, 8, clearq) +GEN_VEXT_VV(vnmsub_vv_b, 1, 1, clearb) +GEN_VEXT_VV(vnmsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV(vnmsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV(vnmsub_vv_d, 8, 8, clearq) + +#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ +} + +RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) +RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) +RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) +RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) +RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) +RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) +RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) +RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) +RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) +RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) +RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) +RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) +RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) +RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) +RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) +RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) +GEN_VEXT_VX(vmacc_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmacc_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmacc_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmacc_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vnmsac_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vnmsac_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vnmsac_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vnmsac_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vmadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vmadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vmadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vmadd_vx_d, 8, 8, clearq) +GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb) +GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh) +GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl) +GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq) + +/* Vector Widening Integer Multiply-Add Instructions */ +RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) +RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) +RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) +RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) +RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) +GEN_VEXT_VV(vwmaccu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmaccu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmaccu_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmacc_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmacc_vv_w, 4, 8, clearq) +GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2, clearh) +GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4, clearl) +GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8, clearq) + +RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) +RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) +RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) +RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) +RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) +RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) +GEN_VEXT_VX(vwmaccu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmaccu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmaccu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmacc_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmacc_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmacc_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq) +GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh) +GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl) +GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq) + +/* Vector Integer Merge and Move Instructions */ +#define GEN_VEXT_VMV_VV(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + *((ETYPE *)vd + H(i)) = s1; \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1, clearb) +GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2, clearh) +GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4, clearl) +GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8, clearq) + +#define GEN_VEXT_VMV_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1, clearb) +GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2, clearh) +GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4, clearl) +GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8, clearq) + +#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE *vt = (!vext_elem_mask(v0, mlen, i) ? vs2 : vs1); \ + *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1, clearb) +GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh) +GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl) +GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq) + +#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + ETYPE d = (!vext_elem_mask(v0, mlen, i) ? s2 : \ + (ETYPE)(target_long)s1); \ + *((ETYPE *)vd + H(i)) = d; \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb) +GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh) +GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl) +GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq) + +/* + *** Vector Fixed-Point Arithmetic Instructions + */ + +/* Vector Single-Width Saturating Add and Subtract */ + +/* + * As fixed point instructions probably have round mode and saturation, + * define common macros for fixed point here. + */ +typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, + CPURISCVState *env, int vxrm); + +#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ +} + +static inline void +vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, + uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm, + opivv2_rm_fn *fn) +{ + for (uint32_t i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, vs1, vs2, i, env, vxrm); + } +} + +static inline void +vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, + uint32_t desc, uint32_t esz, uint32_t dsz, + opivv2_rm_fn *fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + + switch (env->vxrm) { + case 0: /* rnu */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 0, fn); + break; + case 1: /* rne */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 1, fn); + break; + case 2: /* rdn */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 2, fn); + break; + default: /* rod */ + vext_vv_rm_1(vd, v0, vs1, vs2, + env, vl, vm, mlen, 3, fn); + break; + } + + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate helpers for fixed point instructions with OPIVV format */ +#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +{ + uint8_t res = a + b; + if (res < a) { + res = UINT8_MAX; + env->vxsat = 0x1; + } + return res; +} + +static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, + uint16_t b) +{ + uint16_t res = a + b; + if (res < a) { + res = UINT16_MAX; + env->vxsat = 0x1; + } + return res; +} + +static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, + uint32_t b) +{ + uint32_t res = a + b; + if (res < a) { + res = UINT32_MAX; + env->vxsat = 0x1; + } + return res; +} + +static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, + uint64_t b) +{ + uint64_t res = a + b; + if (res < a) { + res = UINT64_MAX; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) +RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) +RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) +RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) +GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8, clearq) + +typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, + CPURISCVState *env, int vxrm); + +#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, target_long s1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ +} + +static inline void +vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, + uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm, + opivx2_rm_fn *fn) +{ + for (uint32_t i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + fn(vd, s1, vs2, i, env, vxrm); + } +} + +static inline void +vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, + uint32_t desc, uint32_t esz, uint32_t dsz, + opivx2_rm_fn *fn, clear_fn *clearfn) +{ + uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + + switch (env->vxrm) { + case 0: /* rnu */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 0, fn); + break; + case 1: /* rne */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 1, fn); + break; + case 2: /* rdn */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 2, fn); + break; + default: /* rod */ + vext_vx_rm_1(vd, v0, s1, vs2, + env, vl, vm, mlen, 3, fn); + break; + } + + clearfn(vd, vl, vl * dsz, vlmax * dsz); +} + +/* generate helpers for fixed point instructions with OPIVX format */ +#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ + do_##NAME, CLEAR_FN); \ +} + +RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) +RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) +RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) +RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) +GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8, clearq) + +static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + int8_t res = a + b; + if ((res ^ a) & (res ^ b) & INT8_MIN) { + res = a > 0 ? INT8_MAX : INT8_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + int16_t res = a + b; + if ((res ^ a) & (res ^ b) & INT16_MIN) { + res = a > 0 ? INT16_MAX : INT16_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int32_t res = a + b; + if ((res ^ a) & (res ^ b) & INT32_MIN) { + res = a > 0 ? INT32_MAX : INT32_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = a + b; + if ((res ^ a) & (res ^ b) & INT64_MIN) { + res = a > 0 ? INT64_MAX : INT64_MIN; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) +RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) +RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) +RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) +GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) +RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) +RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) +RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) +GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8, clearq) + +static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +{ + uint8_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, + uint16_t b) +{ + uint16_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, + uint32_t b) +{ + uint32_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, + uint64_t b) +{ + uint64_t res = a - b; + if (res > a) { + res = 0; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) +RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) +RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) +RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) +GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) +RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) +RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) +RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) +GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8, clearq) + +static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + int8_t res = a - b; + if ((res ^ a) & (a ^ b) & INT8_MIN) { + res = a > 0 ? INT8_MAX : INT8_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + int16_t res = a - b; + if ((res ^ a) & (a ^ b) & INT16_MIN) { + res = a > 0 ? INT16_MAX : INT16_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int32_t res = a - b; + if ((res ^ a) & (a ^ b) & INT32_MIN) { + res = a > 0 ? INT32_MAX : INT32_MIN; + env->vxsat = 0x1; + } + return res; +} + +static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = a - b; + if ((res ^ a) & (a ^ b) & INT64_MIN) { + res = a > 0 ? INT64_MAX : INT64_MIN; + env->vxsat = 0x1; + } + return res; +} + +RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) +RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) +RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) +RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) +GEN_VEXT_VV_RM(vssub_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssub_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) +RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) +RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) +RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) +GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq) + +/* Vector Single-Width Averaging Add and Subtract */ +static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) +{ + uint8_t d = extract64(v, shift, 1); + uint8_t d1; + uint64_t D1, D2; + + if (shift == 0 || shift > 64) { + return 0; + } + + d1 = extract64(v, shift - 1, 1); + D1 = extract64(v, 0, shift); + if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ + return d1; + } else if (vxrm == 1) { /* round-to-nearest-even */ + if (shift > 1) { + D2 = extract64(v, 0, shift - 1); + return d1 & ((D2 != 0) | d); + } else { + return d1 & d; + } + } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ + return !d & (D1 != 0); + } + return 0; /* round-down (truncate) */ +} + +static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int64_t res = (int64_t)a + b; + uint8_t round = get_round(vxrm, res, 1); + + return (res >> 1) + round; +} + +static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = a + b; + uint8_t round = get_round(vxrm, res, 1); + int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; + + /* With signed overflow, bit 64 is inverse of bit 63. */ + return ((res >> 1) ^ over) + round; +} + +RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) +RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) +RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) +RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) +GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) +RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) +RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) +RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) +GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8, clearq) + +static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + int64_t res = (int64_t)a - b; + uint8_t round = get_round(vxrm, res, 1); + + return (res >> 1) + round; +} + +static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + int64_t res = (int64_t)a - b; + uint8_t round = get_round(vxrm, res, 1); + int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; + + /* With signed overflow, bit 64 is inverse of bit 63. */ + return ((res >> 1) ^ over) + round; +} + +RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) +RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) +RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) +RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) +GEN_VEXT_VV_RM(vasub_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vasub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vasub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vasub_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) +RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) +RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) +RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) +GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq) + +/* Vector Single-Width Fractional Multiply with Rounding and Saturation */ +static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + uint8_t round; + int16_t res; + + res = (int16_t)a * (int16_t)b; + round = get_round(vxrm, res, 7); + res = (res >> 7) + round; + + if (res > INT8_MAX) { + env->vxsat = 0x1; + return INT8_MAX; + } else if (res < INT8_MIN) { + env->vxsat = 0x1; + return INT8_MIN; + } else { + return res; + } +} + +static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + uint8_t round; + int32_t res; + + res = (int32_t)a * (int32_t)b; + round = get_round(vxrm, res, 15); + res = (res >> 15) + round; + + if (res > INT16_MAX) { + env->vxsat = 0x1; + return INT16_MAX; + } else if (res < INT16_MIN) { + env->vxsat = 0x1; + return INT16_MIN; + } else { + return res; + } +} + +static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + uint8_t round; + int64_t res; + + res = (int64_t)a * (int64_t)b; + round = get_round(vxrm, res, 31); + res = (res >> 31) + round; + + if (res > INT32_MAX) { + env->vxsat = 0x1; + return INT32_MAX; + } else if (res < INT32_MIN) { + env->vxsat = 0x1; + return INT32_MIN; + } else { + return res; + } +} + +static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + uint8_t round; + uint64_t hi_64, lo_64; + int64_t res; + + if (a == INT64_MIN && b == INT64_MIN) { + env->vxsat = 1; + return INT64_MAX; + } + + muls64(&lo_64, &hi_64, a, b); + round = get_round(vxrm, lo_64, 63); + /* + * Cannot overflow, as there are always + * 2 sign bits after multiply. + */ + res = (hi_64 << 1) | (lo_64 >> 63); + if (round) { + if (res == INT64_MAX) { + env->vxsat = 1; + } else { + res += 1; + } + } + return res; +} + +RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) +RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) +RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) +RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) +GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) +RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) +RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) +RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) +GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq) + +/* Vector Widening Saturating Scaled Multiply-Add */ +static inline uint16_t +vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, + uint16_t c) +{ + uint8_t round; + uint16_t res = (uint16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return saddu16(env, vxrm, c, res); +} + +static inline uint32_t +vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, + uint32_t c) +{ + uint8_t round; + uint32_t res = (uint32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return saddu32(env, vxrm, c, res); +} + +static inline uint64_t +vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, + uint64_t c) +{ + uint8_t round; + uint64_t res = (uint64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return saddu64(env, vxrm, c, res); +} + +#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ +} + +RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) +RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) +RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) +GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq) + +#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static inline void \ +do_##NAME(void *vd, target_long s1, void *vs2, int i, \ + CPURISCVState *env, int vxrm) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ +} + +RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) +RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) +RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) +GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) +{ + uint8_t round; + int16_t res = (int16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return sadd16(env, vxrm, c, res); +} + +static inline int32_t +vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) +{ + uint8_t round; + int32_t res = (int32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return sadd32(env, vxrm, c, res); + +} + +static inline int64_t +vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) +{ + uint8_t round; + int64_t res = (int64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return sadd64(env, vxrm, c, res); +} + +RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) +RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) +RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) +GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq) +RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) +RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) +RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) +GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) +{ + uint8_t round; + int16_t res = a * (int16_t)b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return ssub16(env, vxrm, c, res); +} + +static inline int32_t +vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) +{ + uint8_t round; + int32_t res = a * (int32_t)b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return ssub32(env, vxrm, c, res); +} + +static inline int64_t +vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) +{ + uint8_t round; + int64_t res = a * (int64_t)b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return ssub64(env, vxrm, c, res); +} + +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) +RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) +GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh) +GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl) +GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) +RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) +GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq) + +static inline int16_t +vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) +{ + uint8_t round; + int16_t res = (int16_t)a * b; + + round = get_round(vxrm, res, 4); + res = (res >> 4) + round; + return ssub16(env, vxrm, c, res); +} + +static inline int32_t +vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) +{ + uint8_t round; + int32_t res = (int32_t)a * b; + + round = get_round(vxrm, res, 8); + res = (res >> 8) + round; + return ssub32(env, vxrm, c, res); +} + +static inline int64_t +vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) +{ + uint8_t round; + int64_t res = (int64_t)a * b; + + round = get_round(vxrm, res, 16); + res = (res >> 16) + round; + return ssub64(env, vxrm, c, res); +} + +RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) +RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) +RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) +GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh) +GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl) +GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq) + +/* Vector Single-Width Scaling Shift Instructions */ +static inline uint8_t +vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +{ + uint8_t round, shift = b & 0x7; + uint8_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline uint16_t +vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) +{ + uint8_t round, shift = b & 0xf; + uint16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline uint32_t +vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) +{ + uint8_t round, shift = b & 0x1f; + uint32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline uint64_t +vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) +{ + uint8_t round, shift = b & 0x3f; + uint64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) +RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) +RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) +RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) +GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) +RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) +RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) +RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) +GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8, clearq) + +static inline int8_t +vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) +{ + uint8_t round, shift = b & 0x7; + int8_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline int16_t +vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +{ + uint8_t round, shift = b & 0xf; + int16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline int32_t +vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +{ + uint8_t round, shift = b & 0x1f; + int32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} +static inline int64_t +vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +{ + uint8_t round, shift = b & 0x3f; + int64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + return res; +} + +RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) +RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) +RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) +RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) +GEN_VEXT_VV_RM(vssra_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vssra_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vssra_vv_w, 4, 4, clearl) +GEN_VEXT_VV_RM(vssra_vv_d, 8, 8, clearq) + +RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) +RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) +RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) +RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) +GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl) +GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq) + +/* Vector Narrowing Fixed-Point Clip Instructions */ +static inline int8_t +vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) +{ + uint8_t round, shift = b & 0xf; + int16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > INT8_MAX) { + env->vxsat = 0x1; + return INT8_MAX; + } else if (res < INT8_MIN) { + env->vxsat = 0x1; + return INT8_MIN; + } else { + return res; + } +} + +static inline int16_t +vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) +{ + uint8_t round, shift = b & 0x1f; + int32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > INT16_MAX) { + env->vxsat = 0x1; + return INT16_MAX; + } else if (res < INT16_MIN) { + env->vxsat = 0x1; + return INT16_MIN; + } else { + return res; + } +} + +static inline int32_t +vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) +{ + uint8_t round, shift = b & 0x3f; + int64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > INT32_MAX) { + env->vxsat = 0x1; + return INT32_MAX; + } else if (res < INT32_MIN) { + env->vxsat = 0x1; + return INT32_MIN; + } else { + return res; + } +} + +RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) +RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) +RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) +GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4, clearl) + +RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) +RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) +RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) +GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4, clearl) + +static inline uint8_t +vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) +{ + uint8_t round, shift = b & 0xf; + uint16_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > UINT8_MAX) { + env->vxsat = 0x1; + return UINT8_MAX; + } else { + return res; + } +} + +static inline uint16_t +vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) +{ + uint8_t round, shift = b & 0x1f; + uint32_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > UINT16_MAX) { + env->vxsat = 0x1; + return UINT16_MAX; + } else { + return res; + } +} + +static inline uint32_t +vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) +{ + uint8_t round, shift = b & 0x3f; + int64_t res; + + round = get_round(vxrm, a, shift); + res = (a >> shift) + round; + if (res > UINT32_MAX) { + env->vxsat = 0x1; + return UINT32_MAX; + } else { + return res; + } +} + +RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) +RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) +RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) +GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1, clearb) +GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2, clearh) +GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4, clearl) + +RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) +RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) +RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) +GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb) +GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh) +GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl) + +/* + *** Vector Float Point Arithmetic Instructions + */ +/* Vector Single-Width Floating-Point Add/Subtract Instructions */ +#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ +} + +#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, vs1, vs2, i, env); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) +RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) +RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) +GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8, clearq) + +#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ +} + +#define GEN_VEXT_VF(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, s1, vs2, i, env); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) +RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) +RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) +GEN_VEXT_VF(vfadd_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfadd_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfadd_vf_d, 8, 8, clearq) + +RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) +RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) +RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) +GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) +RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) +RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) +GEN_VEXT_VF(vfsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsub_vf_d, 8, 8, clearq) + +static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) +{ + return float16_sub(b, a, s); +} + +static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) +{ + return float32_sub(b, a, s); +} + +static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) +{ + return float64_sub(b, a, s); +} + +RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) +RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) +RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) +GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq) + +/* Vector Widening Floating-Point Add/Subtract Instructions */ +static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_add(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), s); +} + +static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) +{ + return float64_add(float32_to_float64(a, s), + float32_to_float64(b, s), s); + +} + +RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) +RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) +GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) +RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) +GEN_VEXT_VF(vfwadd_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwadd_vf_w, 4, 8, clearq) + +static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_sub(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), s); +} + +static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) +{ + return float64_sub(float32_to_float64(a, s), + float32_to_float64(b, s), s); + +} + +RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) +RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) +GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) +RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) +GEN_VEXT_VF(vfwsub_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwsub_vf_w, 4, 8, clearq) + +static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) +{ + return float32_add(a, float16_to_float32(b, true, s), s); +} + +static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) +{ + return float64_add(a, float32_to_float64(b, s), s); +} + +RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) +RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) +GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) +RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) +GEN_VEXT_VF(vfwadd_wf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwadd_wf_w, 4, 8, clearq) + +static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) +{ + return float32_sub(a, float16_to_float32(b, true, s), s); +} + +static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) +{ + return float64_sub(a, float32_to_float64(b, s), s); +} + +RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) +RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) +GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) +RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) +GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq) + +/* Vector Single-Width Floating-Point Multiply/Divide Instructions */ +RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) +RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) +RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) +GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) +RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) +RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) +GEN_VEXT_VF(vfmul_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmul_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmul_vf_d, 8, 8, clearq) + +RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) +RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) +RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) +GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) +RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) +RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) +GEN_VEXT_VF(vfdiv_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfdiv_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfdiv_vf_d, 8, 8, clearq) + +static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) +{ + return float16_div(b, a, s); +} + +static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) +{ + return float32_div(b, a, s); +} + +static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) +{ + return float64_div(b, a, s); +} + +RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) +RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) +RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) +GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq) + +/* Vector Widening Floating-Point Multiply */ +static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) +{ + return float32_mul(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), s); +} + +static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) +{ + return float64_mul(float32_to_float64(a, s), + float32_to_float64(b, s), s); + +} +RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) +RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) +GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8, clearq) +RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) +RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) +GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq) + +/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ +#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ +} + +static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, 0, s); +} + +static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, 0, s); +} + +static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, 0, s); +} + +RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) +RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) +RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) +GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8, clearq) + +#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + TD d = *((TD *)vd + HD(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ +} + +RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) +RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) +RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) +GEN_VEXT_VF(vfmacc_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmacc_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmacc_vf_d, 8, 8, clearq) + +static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) +RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) +RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) +GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) +RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) +RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) +GEN_VEXT_VF(vfnmacc_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmacc_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmacc_vf_d, 8, 8, clearq) + +static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, float_muladd_negate_c, s); +} + +static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, float_muladd_negate_c, s); +} + +static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, float_muladd_negate_c, s); +} + +RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) +RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) +RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) +GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) +RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) +RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) +GEN_VEXT_VF(vfmsac_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmsac_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmsac_vf_d, 8, 8, clearq) + +static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(a, b, d, float_muladd_negate_product, s); +} + +static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(a, b, d, float_muladd_negate_product, s); +} + +static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(a, b, d, float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) +RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) +RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) +GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) +RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) +RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) +GEN_VEXT_VF(vfnmsac_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmsac_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmsac_vf_d, 8, 8, clearq) + +static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, 0, s); +} + +static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, 0, s); +} + +static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, 0, s); +} + +RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) +RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) +RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) +GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) +RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) +RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) +GEN_VEXT_VF(vfmadd_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmadd_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmadd_vf_d, 8, 8, clearq) + +static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) +RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) +RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) +GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) +RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) +RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) +GEN_VEXT_VF(vfnmadd_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmadd_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmadd_vf_d, 8, 8, clearq) + +static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, float_muladd_negate_c, s); +} + +static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, float_muladd_negate_c, s); +} + +static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, float_muladd_negate_c, s); +} + +RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) +RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) +RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) +GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) +RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) +RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) +GEN_VEXT_VF(vfmsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmsub_vf_d, 8, 8, clearq) + +static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) +{ + return float16_muladd(d, b, a, float_muladd_negate_product, s); +} + +static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) +{ + return float32_muladd(d, b, a, float_muladd_negate_product, s); +} + +static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) +{ + return float64_muladd(d, b, a, float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) +RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) +RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) +GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8, clearq) +RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) +RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) +RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) +GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq) + +/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ +static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, 0, s); +} + +static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, 0, s); +} + +RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) +RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) +GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) +RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) +GEN_VEXT_VF(vfwmacc_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwmacc_vf_w, 4, 8, clearq) + +static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, + float_muladd_negate_c | float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) +RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) +GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) +RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) +GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8, clearq) + +static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, + float_muladd_negate_c, s); +} + +static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, + float_muladd_negate_c, s); +} + +RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) +RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) +GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) +RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) +GEN_VEXT_VF(vfwmsac_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwmsac_vf_w, 4, 8, clearq) + +static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +{ + return float32_muladd(float16_to_float32(a, true, s), + float16_to_float32(b, true, s), d, + float_muladd_negate_product, s); +} + +static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +{ + return float64_muladd(float32_to_float64(a, s), + float32_to_float64(b, s), d, + float_muladd_negate_product, s); +} + +RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) +RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) +GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4, clearl) +GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8, clearq) +RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) +RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) +GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl) +GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq) + +/* Vector Floating-Point Square-Root Instruction */ +/* (TD, T2, TX2) */ +#define OP_UU_H uint16_t, uint16_t, uint16_t +#define OP_UU_W uint32_t, uint32_t, uint32_t +#define OP_UU_D uint64_t, uint64_t, uint64_t + +#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, void *vs2, int i, \ + CPURISCVState *env) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ +} + +#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + if (vl == 0) { \ + return; \ + } \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, vs2, i, env); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) +RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) +RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) +GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq) + +/* Vector Floating-Point MIN/MAX Instructions */ +RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) +RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) +RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) +GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) +RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) +RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) +GEN_VEXT_VF(vfmin_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmin_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmin_vf_d, 8, 8, clearq) + +RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) +RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) +RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) +GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) +RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) +RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) +GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq) + +/* Vector Floating-Point Sign-Injection Instructions */ +static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) +{ + return deposit64(b, 0, 15, a); +} + +static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) +{ + return deposit64(b, 0, 31, a); +} + +static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) +{ + return deposit64(b, 0, 63, a); +} + +RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) +RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) +RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) +GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) +RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) +RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) +GEN_VEXT_VF(vfsgnj_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsgnj_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsgnj_vf_d, 8, 8, clearq) + +static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) +{ + return deposit64(~b, 0, 15, a); +} + +static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) +{ + return deposit64(~b, 0, 31, a); +} + +static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) +{ + return deposit64(~b, 0, 63, a); +} + +RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) +RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) +RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) +GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) +RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) +RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) +GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8, clearq) + +static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) +{ + return deposit64(b ^ a, 0, 15, a); +} + +static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) +{ + return deposit64(b ^ a, 0, 31, a); +} + +static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) +{ + return deposit64(b ^ a, 0, 63, a); +} + +RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) +RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) +RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) +GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2, clearh) +GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4, clearl) +GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8, clearq) +RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) +RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) +RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) +GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh) +GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl) +GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq) + +/* Vector Floating-Point Compare Instructions */ +#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, s1, &env->fp_status)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +static bool float16_eq_quiet(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare_quiet(a, b, s); + return compare == float_relation_equal; +} + +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) +GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) + +#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + vext_set_elem_mask(vd, mlen, i, \ + DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) +GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) +GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) + +static bool vmfne16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare_quiet(a, b, s); + return compare != float_relation_equal; +} + +static bool vmfne32(uint32_t a, uint32_t b, float_status *s) +{ + FloatRelation compare = float32_compare_quiet(a, b, s); + return compare != float_relation_equal; +} + +static bool vmfne64(uint64_t a, uint64_t b, float_status *s) +{ + FloatRelation compare = float64_compare_quiet(a, b, s); + return compare != float_relation_equal; +} + +GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) +GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) +GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) +GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) +GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) +GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) + +static bool float16_lt(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_less; +} + +GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) +GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) +GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) +GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) +GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) +GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) + +static bool float16_le(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_less || + compare == float_relation_equal; +} + +GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) +GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) +GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) +GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) +GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) +GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) + +static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_greater; +} + +static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) +{ + FloatRelation compare = float32_compare(a, b, s); + return compare == float_relation_greater; +} + +static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) +{ + FloatRelation compare = float64_compare(a, b, s); + return compare == float_relation_greater; +} + +GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) +GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) +GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) + +static bool vmfge16(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare(a, b, s); + return compare == float_relation_greater || + compare == float_relation_equal; +} + +static bool vmfge32(uint32_t a, uint32_t b, float_status *s) +{ + FloatRelation compare = float32_compare(a, b, s); + return compare == float_relation_greater || + compare == float_relation_equal; +} + +static bool vmfge64(uint64_t a, uint64_t b, float_status *s) +{ + FloatRelation compare = float64_compare(a, b, s); + return compare == float_relation_greater || + compare == float_relation_equal; +} + +GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) +GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) +GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) + +static bool float16_unordered_quiet(uint16_t a, uint16_t b, float_status *s) +{ + FloatRelation compare = float16_compare_quiet(a, b, s); + return compare == float_relation_unordered; +} + +GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) +GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) +GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) +GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) +GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) +GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) + +/* Vector Floating-Point Classify Instruction */ +#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2); \ +} + +#define GEN_VEXT_V(NAME, ESZ, DSZ, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + do_##NAME(vd, vs2, i); \ + } \ + CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ +} + +target_ulong fclass_h(uint64_t frs1) +{ + float16 f = frs1; + bool sign = float16_is_neg(f); + + if (float16_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float16_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float16_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float16_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +target_ulong fclass_s(uint64_t frs1) +{ + float32 f = frs1; + bool sign = float32_is_neg(f); + + if (float32_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float32_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float32_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float32_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +target_ulong fclass_d(uint64_t frs1) +{ + float64 f = frs1; + bool sign = float64_is_neg(f); + + if (float64_is_infinity(f)) { + return sign ? 1 << 0 : 1 << 7; + } else if (float64_is_zero(f)) { + return sign ? 1 << 3 : 1 << 4; + } else if (float64_is_zero_or_denormal(f)) { + return sign ? 1 << 2 : 1 << 5; + } else if (float64_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; + } else { + return sign ? 1 << 1 : 1 << 6; + } +} + +RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) +RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) +RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) +GEN_VEXT_V(vfclass_v_h, 2, 2, clearh) +GEN_VEXT_V(vfclass_v_w, 4, 4, clearl) +GEN_VEXT_V(vfclass_v_d, 8, 8, clearq) + +/* Vector Floating-Point Merge Instruction */ +#define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t esz = sizeof(ETYPE); \ + uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ + *((ETYPE *)vd + H(i)) \ + = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1); \ + } \ + CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ +} + +GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh) +GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl) +GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq) + +/* Single-Width Floating-Point/Integer Type-Convert Instructions */ +/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ +RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) +RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) +RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8, clearq) + +/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ +RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) +RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) +RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) +GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8, clearq) + +/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ +RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) +RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) +RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8, clearq) + +/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ +RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) +RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) +RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) +GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl) +GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq) + +/* Widening Floating-Point/Integer Type-Convert Instructions */ +/* (TD, T2, TX2) */ +#define WOP_UU_H uint32_t, uint16_t, uint16_t +#define WOP_UU_W uint64_t, uint32_t, uint32_t +/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ +RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) +RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8, clearq) + +/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ +RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) +RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8, clearq) + +/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ +RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) +RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8, clearq) + +/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ +RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) +RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8, clearq) + +/* + * vfwcvt.f.f.v vd, vs2, vm # + * Convert single-width float to double-width float. + */ +static uint32_t vfwcvtffv16(uint16_t a, float_status *s) +{ + return float16_to_float32(a, true, s); +} + +RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) +RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl) +GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq) + +/* Narrowing Floating-Point/Integer Type-Convert Instructions */ +/* (TD, T2, TX2) */ +#define NOP_UU_H uint16_t, uint32_t, uint32_t +#define NOP_UU_W uint32_t, uint64_t, uint64_t +/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ +RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) +RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) +GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4, clearl) + +/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ +RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) +RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) +GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4, clearl) + +/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ +RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) +RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) +GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4, clearl) + +/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ +RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) +RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) +GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4, clearl) + +/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ +static uint16_t vfncvtffv16(uint32_t a, float_status *s) +{ + return float32_to_float16(a, true, s); +} + +RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) +RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) +GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh) +GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl) + +/* + *** Vector Reduction Operations + */ +/* Vector Single-Width Integer Reduction Instructions */ +#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \ + TD s1 = *((TD *)vs1 + HD(0)); \ + \ + for (i = 0; i < vl; i++) { \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + s1 = OP(s1, (TD)s2); \ + } \ + *((TD *)vd + HD(0)) = s1; \ + CLEAR_FN(vd, 1, sizeof(TD), tot); \ +} + +/* vd[0] = sum(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD, clearb) +GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD, clearh) +GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD, clearl) +GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD, clearq) + +/* vd[0] = maxu(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX, clearb) +GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX, clearh) +GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX, clearl) +GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX, clearq) + +/* vd[0] = max(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX, clearb) +GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX, clearh) +GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX, clearl) +GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX, clearq) + +/* vd[0] = minu(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN, clearb) +GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN, clearh) +GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN, clearl) +GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN, clearq) + +/* vd[0] = min(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN, clearb) +GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN, clearh) +GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN, clearl) +GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN, clearq) + +/* vd[0] = and(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND, clearb) +GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND, clearh) +GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND, clearl) +GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND, clearq) + +/* vd[0] = or(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR, clearb) +GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR, clearh) +GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR, clearl) +GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR, clearq) + +/* vd[0] = xor(vs1[0], vs2[*]) */ +GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb) +GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh) +GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl) +GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq) + +/* Vector Widening Integer Reduction Instructions */ +/* signed sum reduction into double-width accumulator */ +GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD, clearh) +GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD, clearl) +GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq) + +/* Unsigned sum reduction into double-width accumulator */ +GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh) +GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl) +GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq) + +/* Vector Single-Width Floating-Point Reduction Instructions */ +#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \ + TD s1 = *((TD *)vs1 + HD(0)); \ + \ + for (i = 0; i < vl; i++) { \ + TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + s1 = OP(s1, (TD)s2, &env->fp_status); \ + } \ + *((TD *)vd + HD(0)) = s1; \ + CLEAR_FN(vd, 1, sizeof(TD), tot); \ +} + +/* Unordered sum */ +GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh) +GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl) +GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq) + +/* Maximum value */ +GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, clearh) +GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, clearl) +GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq) + +/* Minimum value */ +GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh) +GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl) +GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq) + +/* Vector Widening Floating-Point Reduction Instructions */ +/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ +void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, + void *vs2, CPURISCVState *env, uint32_t desc) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; + uint32_t s1 = *((uint32_t *)vs1 + H4(0)); + + for (i = 0; i < vl; i++) { + uint16_t s2 = *((uint16_t *)vs2 + H2(i)); + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), + &env->fp_status); + } + *((uint32_t *)vd + H4(0)) = s1; + clearl(vd, 1, sizeof(uint32_t), tot); +} + +void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, + void *vs2, CPURISCVState *env, uint32_t desc) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t i; + uint32_t tot = env_archcpu(env)->cfg.vlen / 8; + uint64_t s1 = *((uint64_t *)vs1); + + for (i = 0; i < vl; i++) { + uint32_t s2 = *((uint32_t *)vs2 + H4(i)); + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), + &env->fp_status); + } + *((uint64_t *)vd) = s1; + clearq(vd, 1, sizeof(uint64_t), tot); +} + +/* + *** Vector Mask Operations + */ +/* Vector Mask-Register Logical Instructions */ +#define GEN_VEXT_MASK_VV(NAME, OP) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vl = env->vl; \ + uint32_t i; \ + int a, b; \ + \ + for (i = 0; i < vl; i++) { \ + a = vext_elem_mask(vs1, mlen, i); \ + b = vext_elem_mask(vs2, mlen, i); \ + vext_set_elem_mask(vd, mlen, i, OP(b, a)); \ + } \ + for (; i < vlmax; i++) { \ + vext_set_elem_mask(vd, mlen, i, 0); \ + } \ +} + +#define DO_NAND(N, M) (!(N & M)) +#define DO_ANDNOT(N, M) (N & !M) +#define DO_NOR(N, M) (!(N | M)) +#define DO_ORNOT(N, M) (N | !M) +#define DO_XNOR(N, M) (!(N ^ M)) + +GEN_VEXT_MASK_VV(vmand_mm, DO_AND) +GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) +GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) +GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) +GEN_VEXT_MASK_VV(vmor_mm, DO_OR) +GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) +GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) +GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) + +/* Vector mask population count vmpopc */ +target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + target_ulong cnt = 0; + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + int i; + + for (i = 0; i < vl; i++) { + if (vm || vext_elem_mask(v0, mlen, i)) { + if (vext_elem_mask(vs2, mlen, i)) { + cnt++; + } + } + } + return cnt; +} + +/* vmfirst find-first-set mask bit*/ +target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + int i; + + for (i = 0; i < vl; i++) { + if (vm || vext_elem_mask(v0, mlen, i)) { + if (vext_elem_mask(vs2, mlen, i)) { + return i; + } + } + } + return -1LL; +} + +enum set_mask_type { + ONLY_FIRST = 1, + INCLUDE_FIRST, + BEFORE_FIRST, +}; + +static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc, enum set_mask_type type) +{ + uint32_t mlen = vext_mlen(desc); + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + int i; + bool first_mask_bit = false; + + for (i = 0; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, mlen, i)) { + continue; + } + /* write a zero to all following active elements */ + if (first_mask_bit) { + vext_set_elem_mask(vd, mlen, i, 0); + continue; + } + if (vext_elem_mask(vs2, mlen, i)) { + first_mask_bit = true; + if (type == BEFORE_FIRST) { + vext_set_elem_mask(vd, mlen, i, 0); + } else { + vext_set_elem_mask(vd, mlen, i, 1); + } + } else { + if (type == ONLY_FIRST) { + vext_set_elem_mask(vd, mlen, i, 0); + } else { + vext_set_elem_mask(vd, mlen, i, 1); + } + } + } + for (; i < vlmax; i++) { + vext_set_elem_mask(vd, mlen, i, 0); + } +} + +void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); +} + +void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); +} + +void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); +} + +/* Vector Iota Instruction */ +#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t sum = 0; \ + int i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = sum; \ + if (vext_elem_mask(vs2, mlen, i)) { \ + sum++; \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb) +GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh) +GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl) +GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq) + +/* Vector Element Index Instruction */ +#define GEN_VEXT_VID_V(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + int i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = i; \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb) +GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh) +GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl) +GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq) + +/* + *** Vector Permutation Instructions + */ + +/* Vector Slide Instructions */ +#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + target_ulong offset = s1, i; \ + \ + for (i = offset; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + target_ulong offset = s1, i; \ + \ + for (i = 0; i < vl; ++i) { \ + target_ulong j = i + offset; \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + if (i == 0) { \ + *((ETYPE *)vd + H(i)) = s1; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + if (i == vl - 1) { \ + *((ETYPE *)vd + H(i)) = s1; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq) + +/* Vector Register Gather Instruction */ +#define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t index, i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + index = *((ETYPE *)vs1 + H(i)); \ + if (index >= vlmax) { \ + *((ETYPE *)vd + H(i)) = 0; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ +GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1, clearb) +GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2, clearh) +GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4, clearl) +GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq) + +#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t index = s1, i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, mlen, i)) { \ + continue; \ + } \ + if (index >= vlmax) { \ + *((ETYPE *)vd + H(i)) = 0; \ + } else { \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ + } \ + } \ + CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ +GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb) +GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh) +GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl) +GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq) + +/* Vector Compress Instruction */ +#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H, CLEAR_FN) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t mlen = vext_mlen(desc); \ + uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ + uint32_t vl = env->vl; \ + uint32_t num = 0, i; \ + \ + for (i = 0; i < vl; i++) { \ + if (!vext_elem_mask(vs1, mlen, i)) { \ + continue; \ + } \ + *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ + num++; \ + } \ + CLEAR_FN(vd, num, num * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ +} + +/* Compress into vd elements of vs2 where vs1 is enabled */ +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb) +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh) +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl) +GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq) diff --git a/qemu/target/s390x/cpu_features_def.inc.h b/qemu/target/s390x/cpu_features_def.inc.h index 31dff0d84e..5942f81f16 100644 --- a/qemu/target/s390x/cpu_features_def.inc.h +++ b/qemu/target/s390x/cpu_features_def.inc.h @@ -107,6 +107,7 @@ DEF_FEAT(DEFLATE_BASE, "deflate-base", STFL, 151, "Deflate-conversion facility ( DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, "Vector-Packed-Decimal-Enhancement Facility") DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, "Message-security-assist-extension-9 facility (excluding subfunctions)") DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility") +DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility") /* Features exposed via SCLP SCCB Byte 80 - 98 (bit numbers relative to byte-80) */ DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility") @@ -310,7 +311,7 @@ DEF_FEAT(PCC_CMAC_ETDEA_192, "pcc-cmac-etdea-128", PCC, 10, "PCC Compute-Last-Bl DEF_FEAT(PCC_CMAC_TDEA, "pcc-cmac-etdea-192", PCC, 11, "PCC Compute-Last-Block-CMAC-Using-EncryptedTDEA-192") DEF_FEAT(PCC_CMAC_AES_128, "pcc-cmac-aes-128", PCC, 18, "PCC Compute-Last-Block-CMAC-Using-AES-128") DEF_FEAT(PCC_CMAC_AES_192, "pcc-cmac-aes-192", PCC, 19, "PCC Compute-Last-Block-CMAC-Using-AES-192") -DEF_FEAT(PCC_CMAC_AES_256, "pcc-cmac-eaes-256", PCC, 20, "PCC Compute-Last-Block-CMAC-Using-AES-256") +DEF_FEAT(PCC_CMAC_AES_256, "pcc-cmac-aes-256", PCC, 20, "PCC Compute-Last-Block-CMAC-Using-AES-256") DEF_FEAT(PCC_CMAC_EAES_128, "pcc-cmac-eaes-128", PCC, 26, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-128") DEF_FEAT(PCC_CMAC_EAES_192, "pcc-cmac-eaes-192", PCC, 27, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-192") DEF_FEAT(PCC_CMAC_EAES_256, "pcc-cmac-eaes-256", PCC, 28, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-256") diff --git a/qemu/target/s390x/fpu_helper.c b/qemu/target/s390x/fpu_helper.c index 0fc39d7138..241260f605 100644 --- a/qemu/target/s390x/fpu_helper.c +++ b/qemu/target/s390x/fpu_helper.c @@ -112,7 +112,7 @@ static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr) } } -int float_comp_to_cc(CPUS390XState *env, int float_compare) +int float_comp_to_cc(CPUS390XState *env, FloatRelation float_compare) { switch (float_compare) { case float_relation_equal: @@ -368,7 +368,7 @@ uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al, /* 32-bit FP compare */ uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2) { - int cmp = float32_compare_quiet(f1, f2, &env->fpu_status); + FloatRelation cmp = float32_compare_quiet(f1, f2, &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } @@ -376,7 +376,7 @@ uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2) /* 64-bit FP compare */ uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) { - int cmp = float64_compare_quiet(f1, f2, &env->fpu_status); + FloatRelation cmp = float64_compare_quiet(f1, f2, &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } @@ -385,9 +385,9 @@ uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl) { - int cmp = float128_compare_quiet(make_float128(ah, al), - make_float128(bh, bl), - &env->fpu_status); + FloatRelation cmp = float128_compare_quiet(make_float128(ah, al), + make_float128(bh, bl), + &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } @@ -675,7 +675,7 @@ uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, /* 32-bit FP compare and signal */ uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2) { - int cmp = float32_compare(f1, f2, &env->fpu_status); + FloatRelation cmp = float32_compare(f1, f2, &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } @@ -683,7 +683,7 @@ uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2) /* 64-bit FP compare and signal */ uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) { - int cmp = float64_compare(f1, f2, &env->fpu_status); + FloatRelation cmp = float64_compare(f1, f2, &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } @@ -692,9 +692,9 @@ uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl) { - int cmp = float128_compare(make_float128(ah, al), - make_float128(bh, bl), - &env->fpu_status); + FloatRelation cmp = float128_compare(make_float128(ah, al), + make_float128(bh, bl), + &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); } diff --git a/qemu/target/s390x/gen-features.c b/qemu/target/s390x/gen-features.c index 6278845b12..8ddeebc544 100644 --- a/qemu/target/s390x/gen-features.c +++ b/qemu/target/s390x/gen-features.c @@ -562,6 +562,7 @@ static uint16_t full_GEN15_GA1[] = { S390_FEAT_GROUP_MSA_EXT_9, S390_FEAT_GROUP_MSA_EXT_9_PCKMO, S390_FEAT_ETOKEN, + S390_FEAT_UNPACK, }; /* Default features (in order of release) diff --git a/qemu/target/s390x/helper.h b/qemu/target/s390x/helper.h index abd8dd2a97..ddcce6de88 100644 --- a/qemu/target/s390x/helper.h +++ b/qemu/target/s390x/helper.h @@ -202,10 +202,6 @@ DEF_HELPER_FLAGS_4(gvec_vmlo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_vmlo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_3(gvec_vpopct8, TCG_CALL_NO_RWG, void, ptr, cptr, i32) DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, ptr, cptr, i32) -DEF_HELPER_FLAGS_4(gvec_verllv8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) -DEF_HELPER_FLAGS_4(gvec_verllv16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) -DEF_HELPER_FLAGS_4(gvec_verll8, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32) -DEF_HELPER_FLAGS_4(gvec_verll16, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32) DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32) diff --git a/qemu/target/s390x/insn-data.def b/qemu/target/s390x/insn-data.def index 1660c4d1f8..5ff795fa13 100644 --- a/qemu/target/s390x/insn-data.def +++ b/qemu/target/s390x/insn-data.def @@ -798,7 +798,7 @@ /* SQUARE ROOT */ F(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0, IF_BFP) F(0xb315, SQDBR, RRE, Z, 0, f2, new, f1, sqdb, 0, IF_BFP) - F(0xb316, SQXBR, RRE, Z, x2h, x2l, new, x1, sqxb, 0, IF_BFP) + F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_P, x1, sqxb, 0, IF_BFP) F(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0, IF_BFP) F(0xed15, SQDB, RXE, Z, 0, m2_64, new, f1, sqdb, 0, IF_BFP) @@ -1147,8 +1147,8 @@ /* VECTOR POPULATION COUNT */ F(0xe750, VPOPCT, VRR_a, V, 0, 0, 0, 0, vpopct, 0, IF_VEC) /* VECTOR ELEMENT ROTATE LEFT LOGICAL */ - F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, verllv, 0, IF_VEC) - F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, verll, 0, IF_VEC) + F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC) + F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC) /* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */ F(0xe772, VERIM, VRI_d, V, 0, 0, 0, 0, verim, 0, IF_VEC) /* VECTOR ELEMENT SHIFT LEFT */ diff --git a/qemu/target/s390x/internal.h b/qemu/target/s390x/internal.h index 82cf8726be..cec0957fb4 100644 --- a/qemu/target/s390x/internal.h +++ b/qemu/target/s390x/internal.h @@ -11,6 +11,7 @@ #define S390X_INTERNAL_H #include "cpu.h" +#include "fpu/softfloat.h" #ifndef CONFIG_USER_ONLY QEMU_PACK(typedef struct LowCore { @@ -268,7 +269,7 @@ uint32_t set_cc_nz_f128(float128 v); uint8_t s390_softfloat_exc_to_ieee(unsigned int exc); int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3); void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode); -int float_comp_to_cc(CPUS390XState *env, int float_compare); +int float_comp_to_cc(CPUS390XState *env, FloatRelation float_compare); uint16_t float32_dcmask(CPUS390XState *env, float32 f1); uint16_t float64_dcmask(CPUS390XState *env, float64 f1); uint16_t float128_dcmask(CPUS390XState *env, float128 f1); diff --git a/qemu/target/s390x/translate.c b/qemu/target/s390x/translate.c index e41a3b73b0..dec5f4139c 100644 --- a/qemu/target/s390x/translate.c +++ b/qemu/target/s390x/translate.c @@ -3936,8 +3936,7 @@ static DisasJumpType op_risbg(DisasContext *s, DisasOps *o) pmask = 0x00000000ffffffffull; break; default: - // g_assert_not_reached(); - break; + g_assert_not_reached(); } /* MASK is the set of bits to be inserted from R2. diff --git a/qemu/target/s390x/translate_vx.inc.c b/qemu/target/s390x/translate_vx.inc.c index 568b6a2acb..bdf0aecf34 100644 --- a/qemu/target/s390x/translate_vx.inc.c +++ b/qemu/target/s390x/translate_vx.inc.c @@ -233,8 +233,8 @@ static void get_vec_element_ptr_i64(TCGContext *tcg_ctx, TCGv_ptr ptr, uint8_t r #define gen_gvec_mov(tcg_ctx, v1, v2) \ tcg_gen_gvec_mov(tcg_ctx, 0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \ 16) -#define gen_gvec_dup64i(tcg_ctx, v1, c) \ - tcg_gen_gvec_dup64i(tcg_ctx, vec_full_reg_offset(v1), 16, 16, c) +#define gen_gvec_dup_imm(tcg_ctx, es, v1, c) \ + tcg_gen_gvec_dup_imm(tcg_ctx, es, vec_full_reg_offset(v1), 16, 16, c); #define gen_gvec_fn_2(tcg_ctx, fn, es, v1, v2) \ tcg_gen_gvec_##fn(tcg_ctx, es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ 16, 16) @@ -318,31 +318,6 @@ static void gen_gvec128_4_i64(TCGContext *tcg_ctx, gen_gvec128_4_i64_fn fn, uint tcg_temp_free_i64(tcg_ctx, cl); } -static void gen_gvec_dupi(TCGContext *tcg_ctx, uint8_t es, uint8_t reg, uint64_t c) -{ - switch (es) { - case ES_8: - tcg_gen_gvec_dup8i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, c); - break; - case ES_16: - tcg_gen_gvec_dup16i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, c); - break; - case ES_32: - tcg_gen_gvec_dup32i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, c); - break; - case ES_64: - gen_gvec_dup64i(tcg_ctx, reg, c); - break; - default: - g_assert_not_reached(); - } -} - -static void zero_vec(TCGContext *tcg_ctx, uint8_t reg) -{ - tcg_gen_gvec_dup8i(tcg_ctx, vec_full_reg_offset(reg), 16, 16, 0); -} - static void gen_addi2_i64(TCGContext *tcg_ctx, TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah, uint64_t b) { @@ -400,8 +375,8 @@ static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o) * Masks for both 64 bit elements of the vector are the same. * Trust tcg to produce a good constant loading. */ - gen_gvec_dup64i(tcg_ctx, get_field(s, v1), - generate_byte_mask(i2 & 0xff)); + gen_gvec_dup_imm(tcg_ctx, ES_64, get_field(s, v1), + generate_byte_mask(i2 & 0xff)); } else { TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); @@ -437,7 +412,7 @@ static DisasJumpType op_vgm(DisasContext *s, DisasOps *o) } } - gen_gvec_dupi(tcg_ctx, es, get_field(s, v1), mask); + gen_gvec_dup_imm(tcg_ctx, es, get_field(s, v1), mask); return DISAS_NEXT; } @@ -598,7 +573,7 @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o) t = tcg_temp_new_i64(tcg_ctx); tcg_gen_qemu_ld_i64(tcg_ctx, t, o->addr1, get_mem_index(s), MO_TE | es); - zero_vec(tcg_ctx, get_field(s, v1)); + gen_gvec_dup_imm(tcg_ctx, es, get_field(s, v1), 0); write_vec_element_i64(tcg_ctx, t, get_field(s, v1), enr, es); tcg_temp_free_i64(tcg_ctx, t); return DISAS_NEXT; @@ -917,7 +892,7 @@ static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o) return DISAS_NORETURN; } - gen_gvec_dupi(tcg_ctx, es, get_field(s, v1), data); + gen_gvec_dup_imm(tcg_ctx, es, get_field(s, v1), data); return DISAS_NEXT; } @@ -1414,7 +1389,7 @@ static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o) read_vec_element_i32(tcg_ctx, tmp, get_field(s, v2), i, ES_32); tcg_gen_add2_i32(tcg_ctx, tmp, sum, sum, sum, tmp, tmp); } - zero_vec(tcg_ctx, get_field(s, v1)); + gen_gvec_dup_imm(tcg_ctx, ES_32, get_field(s, v1), 0); write_vec_element_i32(tcg_ctx, sum, get_field(s, v1), 1, ES_32); tcg_temp_free_i32(tcg_ctx, tmp); @@ -1910,65 +1885,6 @@ static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o) return DISAS_NEXT; } -static void gen_rll_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx); - - tcg_gen_andi_i32(tcg_ctx, t0, b, 31); - tcg_gen_rotl_i32(tcg_ctx, d, a, t0); - tcg_temp_free_i32(tcg_ctx, t0); -} - -static void gen_rll_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t0 = tcg_temp_new_i64(tcg_ctx); - - tcg_gen_andi_i64(tcg_ctx, t0, b, 63); - tcg_gen_rotl_i64(tcg_ctx, d, a, t0); - tcg_temp_free_i64(tcg_ctx, t0); -} - -static DisasJumpType op_verllv(DisasContext *s, DisasOps *o) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - const uint8_t es = get_field(s, m4); - static const GVecGen3 g[4] = { - { .fno = gen_helper_gvec_verllv8, }, - { .fno = gen_helper_gvec_verllv16, }, - { .fni4 = gen_rll_i32, }, - { .fni8 = gen_rll_i64, }, - }; - - if (es > ES_64) { - gen_program_exception(s, PGM_SPECIFICATION); - return DISAS_NORETURN; - } - - gen_gvec_3(tcg_ctx, get_field(s, v1), get_field(s, v2), - get_field(s, v3), &g[es]); - return DISAS_NEXT; -} - -static DisasJumpType op_verll(DisasContext *s, DisasOps *o) -{ - TCGContext *tcg_ctx = s->uc->tcg_ctx; - const uint8_t es = get_field(s, m4); - static const GVecGen2s g[4] = { - { .fno = gen_helper_gvec_verll8, }, - { .fno = gen_helper_gvec_verll16, }, - { .fni4 = gen_rll_i32, }, - { .fni8 = gen_rll_i64, }, - }; - - if (es > ES_64) { - gen_program_exception(s, PGM_SPECIFICATION); - return DISAS_NORETURN; - } - gen_gvec_2s(tcg_ctx, get_field(s, v1), get_field(s, v3), o->addr1, - &g[es]); - return DISAS_NEXT; -} - static void gen_rim_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c) { TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); @@ -2035,6 +1951,9 @@ static DisasJumpType op_vesv(DisasContext *s, DisasOps *o) case 0x70: gen_gvec_fn_3(tcg_ctx, shlv, es, v1, v2, v3); break; + case 0x73: + gen_gvec_fn_3(tcg_ctx, rotlv, es, v1, v2, v3); + break; case 0x7a: gen_gvec_fn_3(tcg_ctx, sarv, es, v1, v2, v3); break; @@ -2067,6 +1986,9 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o) case 0x30: gen_gvec_fn_2i(tcg_ctx, shli, es, v1, v3, d2); break; + case 0x33: + gen_gvec_fn_2i(tcg_ctx, rotli, es, v1, v3, d2); + break; case 0x3a: gen_gvec_fn_2i(tcg_ctx, sari, es, v1, v3, d2); break; @@ -2084,6 +2006,9 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o) case 0x30: gen_gvec_fn_2s(tcg_ctx, shls, es, v1, v3, shift); break; + case 0x33: + gen_gvec_fn_2s(tcg_ctx, rotls, es, v1, v3, shift); + break; case 0x3a: gen_gvec_fn_2s(tcg_ctx, sars, es, v1, v3, shift); break; diff --git a/qemu/target/s390x/vec_fpu_helper.c b/qemu/target/s390x/vec_fpu_helper.c index e87ef56f04..09cb61fbeb 100644 --- a/qemu/target/s390x/vec_fpu_helper.c +++ b/qemu/target/s390x/vec_fpu_helper.c @@ -174,7 +174,7 @@ void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env, env->cc_op = wfc64(v1, v2, env, true, GETPC()); } -typedef int (*vfc64_fn)(float64 a, float64 b, float_status *status); +typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) { diff --git a/qemu/target/s390x/vec_int_helper.c b/qemu/target/s390x/vec_int_helper.c index b81441395c..a4e486a8b8 100644 --- a/qemu/target/s390x/vec_int_helper.c +++ b/qemu/target/s390x/vec_int_helper.c @@ -515,37 +515,6 @@ void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ DEF_VPOPCT(8) DEF_VPOPCT(16) -#define DEF_VERLLV(BITS) \ -void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3, \ - uint32_t desc) \ -{ \ - int i; \ - \ - for (i = 0; i < (128 / BITS); i++) { \ - const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ - const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ - \ - s390_vec_write_element##BITS(v1, i, rol##BITS(a, b)); \ - } \ -} -DEF_VERLLV(8) -DEF_VERLLV(16) - -#define DEF_VERLL(BITS) \ -void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count, \ - uint32_t desc) \ -{ \ - int i; \ - \ - for (i = 0; i < (128 / BITS); i++) { \ - const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ - \ - s390_vec_write_element##BITS(v1, i, rol##BITS(a, count)); \ - } \ -} -DEF_VERLL(8) -DEF_VERLL(16) - #define DEF_VERIM(BITS) \ void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ uint32_t desc) \ diff --git a/qemu/target/sparc/fop_helper.c b/qemu/target/sparc/fop_helper.c index 9eb9b75718..e6dd3fc313 100644 --- a/qemu/target/sparc/fop_helper.c +++ b/qemu/target/sparc/fop_helper.c @@ -264,7 +264,7 @@ void helper_fsqrtq(CPUSPARCState *env) #define GEN_FCMP(name, size, reg1, reg2, FS, E) \ target_ulong glue(helper_, name) (CPUSPARCState *env) \ { \ - int ret; \ + FloatRelation ret; \ target_ulong fsr; \ if (E) { \ ret = glue(size, _compare)(reg1, reg2, &env->fp_status); \ @@ -295,7 +295,7 @@ void helper_fsqrtq(CPUSPARCState *env) #define GEN_FCMP_T(name, size, FS, E) \ target_ulong glue(helper_, name)(CPUSPARCState *env, size src1, size src2)\ { \ - int ret; \ + FloatRelation ret; \ target_ulong fsr; \ if (E) { \ ret = glue(size, _compare)(src1, src2, &env->fp_status); \ diff --git a/qemu/target/tricore/translate.c b/qemu/target/tricore/translate.c index 75188b8be6..d8d9584787 100644 --- a/qemu/target/tricore/translate.c +++ b/qemu/target/tricore/translate.c @@ -52,7 +52,6 @@ static const char *regnames_d[] = { typedef struct DisasContext { DisasContextBase base; - CPUTriCoreState *env; target_ulong pc; // CCOp cc_op; /* Current CC operation */ target_ulong pc_succ_insn; diff --git a/qemu/tcg/README b/qemu/tcg/README index bfa2e4ed24..a64f67809b 100644 --- a/qemu/tcg/README +++ b/qemu/tcg/README @@ -605,10 +605,11 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. * shri_vec v0, v1, i2 * sari_vec v0, v1, i2 +* rotli_vec v0, v1, i2 * shrs_vec v0, v1, s2 * sars_vec v0, v1, s2 - Similarly for logical and arithmetic right shift. + Similarly for logical and arithmetic right shift, and left rotate. * shlv_vec v0, v1, v2 @@ -620,8 +621,10 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. * shrv_vec v0, v1, v2 * sarv_vec v0, v1, v2 +* rotlv_vec v0, v1, v2 +* rotrv_vec v0, v1, v2 - Similarly for logical and arithmetic right shift. + Similarly for logical and arithmetic right shift, and rotates. * cmp_vec v0, v1, v2, cond diff --git a/qemu/tcg/aarch64/tcg-target.h b/qemu/tcg/aarch64/tcg-target.h index 13993a70e5..e7673bb032 100644 --- a/qemu/tcg/aarch64/tcg-target.h +++ b/qemu/tcg/aarch64/tcg-target.h @@ -137,6 +137,9 @@ typedef enum { #define TCG_TARGET_HAS_not_vec 1 #define TCG_TARGET_HAS_neg_vec 1 #define TCG_TARGET_HAS_abs_vec 1 +#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 0 #define TCG_TARGET_HAS_shi_vec 1 #define TCG_TARGET_HAS_shs_vec 0 #define TCG_TARGET_HAS_shv_vec 1 diff --git a/qemu/tcg/aarch64/tcg-target.inc.c b/qemu/tcg/aarch64/tcg-target.inc.c index 50c9e595bb..c1f5483651 100644 --- a/qemu/tcg/aarch64/tcg-target.inc.c +++ b/qemu/tcg/aarch64/tcg-target.inc.c @@ -557,6 +557,7 @@ typedef enum { I3614_SSHR = 0x0f000400, I3614_SSRA = 0x0f001400, I3614_SHL = 0x0f005400, + I3614_SLI = 0x2f005400, I3614_USHR = 0x2f000400, I3614_USRA = 0x2f001400, @@ -1504,11 +1505,21 @@ static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, static inline void tcg_out_mb(TCGContext *s, TCGArg a0) { static const uint32_t sync[] = { - [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, - [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, - [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, - [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, - [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, + [0] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_LD_LD] = DMB_ISH | DMB_LD, + [TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_LD_LD | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_LD_ST] = DMB_ISH | DMB_LD, + [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD, + [TCG_MO_LD_ST | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_LD_ST | TCG_MO_LD_LD | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST] = DMB_ISH | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_ST_LD | TCG_MO_LD_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_LD_ST] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD | DMB_ST, + [TCG_MO_ST_ST | TCG_MO_LD_ST | TCG_MO_ST_LD] = DMB_ISH | DMB_LD | DMB_ST, }; tcg_out32(s, sync[a0 & TCG_MO_ALL]); } @@ -1659,9 +1670,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, tcg_insn_unit **label_ptr, int mem_index, bool is_read) { -#ifdef TARGET_ARM - struct uc_struct *uc = s->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = s->uc; unsigned a_bits = get_alignment_bits(opc); unsigned s_bits = opc & MO_SIZE; unsigned a_mask = (1u << a_bits) - 1; @@ -2422,6 +2431,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_sari_vec: tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2); break; + case INDEX_op_aa64_sli_vec: + tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece)); + break; case INDEX_op_shlv_vec: tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2); break; @@ -2509,8 +2521,11 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign case INDEX_op_shlv_vec: case INDEX_op_bitsel_vec: return 1; + case INDEX_op_rotli_vec: case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: + case INDEX_op_rotlv_vec: + case INDEX_op_rotrv_vec: return -1; case INDEX_op_mul_vec: case INDEX_op_smax_vec: @@ -2528,14 +2543,24 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne TCGArg a0, ...) { va_list va; - TCGv_vec v0, v1, v2, t1; + TCGv_vec v0, v1, v2, t1, t2; + TCGArg a2; va_start(va, a0); v0 = temp_tcgv_vec(tcg_ctx, arg_temp(a0)); v1 = temp_tcgv_vec(tcg_ctx, arg_temp(va_arg(va, TCGArg))); - v2 = temp_tcgv_vec(tcg_ctx, arg_temp(va_arg(va, TCGArg))); + a2 = va_arg(va, TCGArg); + v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); switch (opc) { + case INDEX_op_rotli_vec: + t1 = tcg_temp_new_vec(tcg_ctx, type); + tcg_gen_shri_vec(tcg_ctx, vece, t1, v1, -a2 & ((8 << vece) - 1)); + vec_gen_4(tcg_ctx, INDEX_op_aa64_sli_vec, type, vece, + tcgv_vec_arg(tcg_ctx, v0), tcgv_vec_arg(tcg_ctx, t1), tcgv_vec_arg(tcg_ctx, v1), a2); + tcg_temp_free_vec(tcg_ctx, t1); + break; + case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: /* Right shifts are negative left shifts for AArch64. */ @@ -2548,6 +2573,35 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne tcg_temp_free_vec(tcg_ctx, t1); break; + case INDEX_op_rotlv_vec: + t1 = tcg_temp_new_vec(tcg_ctx, type); + tcg_gen_dupi_vec(tcg_ctx, vece, t1, 8 << vece); + tcg_gen_sub_vec(tcg_ctx, vece, t1, v2, t1); + /* Right shifts are negative left shifts for AArch64. */ + vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, t1), + tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, t1)); + vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, v0), + tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v2)); + tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t1); + tcg_temp_free_vec(tcg_ctx, t1); + break; + + case INDEX_op_rotrv_vec: + t1 = tcg_temp_new_vec(tcg_ctx, type); + t2 = tcg_temp_new_vec(tcg_ctx, type); + tcg_gen_neg_vec(tcg_ctx, vece, t1, v2); + tcg_gen_dupi_vec(tcg_ctx, vece, t2, 8 << vece); + tcg_gen_add_vec(tcg_ctx, vece, t2, t1, t2); + /* Right shifts are negative left shifts for AArch64. */ + vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, t1), + tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, t1)); + vec_gen_3(tcg_ctx, INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(tcg_ctx, t2), + tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, t2)); + tcg_gen_or_vec(tcg_ctx, vece, v0, t1, t2); + tcg_temp_free_vec(tcg_ctx, t1); + tcg_temp_free_vec(tcg_ctx, t2); + break; + default: g_assert_not_reached(); } @@ -2568,6 +2622,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } }; static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } }; static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } }; + static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } }; static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } }; static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } }; static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } }; @@ -2762,6 +2817,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) return &w_w_wZ; case INDEX_op_bitsel_vec: return &w_w_w_w; + case INDEX_op_aa64_sli_vec: + return &w_0_w; default: return NULL; diff --git a/qemu/tcg/aarch64/tcg-target.opc.h b/qemu/tcg/aarch64/tcg-target.opc.h index 26bfd9c460..bce30accd9 100644 --- a/qemu/tcg/aarch64/tcg-target.opc.h +++ b/qemu/tcg/aarch64/tcg-target.opc.h @@ -12,3 +12,4 @@ */ DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC) +DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC) diff --git a/qemu/tcg/arm/tcg-target.inc.c b/qemu/tcg/arm/tcg-target.inc.c index 467d063690..8884968fb3 100644 --- a/qemu/tcg/arm/tcg-target.inc.c +++ b/qemu/tcg/arm/tcg-target.inc.c @@ -1235,9 +1235,7 @@ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, MemOp opc, int mem_index, bool is_load) { -#ifdef TARGET_ARM struct uc_struct *uc = s->uc; -#endif int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); int fast_off = TLB_MASK_TABLE_OFS(mem_index); diff --git a/qemu/tcg/i386/tcg-target.h b/qemu/tcg/i386/tcg-target.h index 24ba5d19be..8508e68e7c 100644 --- a/qemu/tcg/i386/tcg-target.h +++ b/qemu/tcg/i386/tcg-target.h @@ -183,6 +183,9 @@ extern bool have_avx2; #define TCG_TARGET_HAS_not_vec 0 #define TCG_TARGET_HAS_neg_vec 0 #define TCG_TARGET_HAS_abs_vec 1 +#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 0 #define TCG_TARGET_HAS_shi_vec 1 #define TCG_TARGET_HAS_shs_vec 1 #define TCG_TARGET_HAS_shv_vec have_avx2 diff --git a/qemu/tcg/i386/tcg-target.inc.c b/qemu/tcg/i386/tcg-target.inc.c index 15cc1c05d9..9cb46fe1be 100644 --- a/qemu/tcg/i386/tcg-target.inc.c +++ b/qemu/tcg/i386/tcg-target.inc.c @@ -1704,9 +1704,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, int mem_index, MemOp opc, tcg_insn_unit **label_ptr, int which) { -#ifdef TARGET_ARM - struct uc_struct *uc = s->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = s->uc; const TCGReg r0 = TCG_REG_L0; const TCGReg r1 = TCG_REG_L1; TCGType ttype = TCG_TYPE_I32; @@ -3195,6 +3193,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_shls_vec: case INDEX_op_shrs_vec: case INDEX_op_sars_vec: + case INDEX_op_rotls_vec: case INDEX_op_cmp_vec: case INDEX_op_x86_shufps_vec: case INDEX_op_x86_blend_vec: @@ -3233,6 +3232,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign case INDEX_op_xor_vec: case INDEX_op_andc_vec: return 1; + case INDEX_op_rotli_vec: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: return -1; @@ -3259,12 +3259,17 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign return vece >= MO_16; case INDEX_op_sars_vec: return vece >= MO_16 && vece <= MO_32; + case INDEX_op_rotls_vec: + return vece >= MO_16 ? -1 : 0; case INDEX_op_shlv_vec: case INDEX_op_shrv_vec: return have_avx2 && vece >= MO_32; case INDEX_op_sarv_vec: return have_avx2 && vece == MO_32; + case INDEX_op_rotlv_vec: + case INDEX_op_rotrv_vec: + return have_avx2 && vece >= MO_32 ? -1 : 0; case INDEX_op_mul_vec: if (vece == MO_8) { @@ -3293,7 +3298,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign } } -static void expand_vec_shi(TCGContext *tcg_ctx, TCGType type, unsigned vece, bool shr, +static void expand_vec_shi(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCGOpcode opc, TCGv_vec v0, TCGv_vec v1, TCGArg imm) { TCGv_vec t1, t2; @@ -3303,26 +3308,31 @@ static void expand_vec_shi(TCGContext *tcg_ctx, TCGType type, unsigned vece, boo t1 = tcg_temp_new_vec(tcg_ctx, type); t2 = tcg_temp_new_vec(tcg_ctx, type); - /* Unpack to W, shift, and repack. Tricky bits: - (1) Use punpck*bw x,x to produce DDCCBBAA, - i.e. duplicate in other half of the 16-bit lane. - (2) For right-shift, add 8 so that the high half of - the lane becomes zero. For left-shift, we must - shift up and down again. - (3) Step 2 leaves high half zero such that PACKUSWB - (pack with unsigned saturation) does not modify - the quantity. */ + /* + * Unpack to W, shift, and repack. Tricky bits: + * (1) Use punpck*bw x,x to produce DDCCBBAA, + * i.e. duplicate in other half of the 16-bit lane. + * (2) For right-shift, add 8 so that the high half of the lane + * becomes zero. For left-shift, and left-rotate, we must + * shift up and down again. + * (3) Step 2 leaves high half zero such that PACKUSWB + * (pack with unsigned saturation) does not modify + * the quantity. + */ vec_gen_3(tcg_ctx, INDEX_op_x86_punpckl_vec, type, MO_8, tcgv_vec_arg(tcg_ctx, t1), tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v1)); vec_gen_3(tcg_ctx, INDEX_op_x86_punpckh_vec, type, MO_8, tcgv_vec_arg(tcg_ctx, t2), tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v1)); - if (shr) { - tcg_gen_shri_vec(tcg_ctx, MO_16, t1, t1, imm + 8); - tcg_gen_shri_vec(tcg_ctx, MO_16, t2, t2, imm + 8); + if (opc != INDEX_op_rotli_vec) { + imm += 8; + } + if (opc == INDEX_op_shri_vec) { + tcg_gen_shri_vec(tcg_ctx, MO_16, t1, t1, imm); + tcg_gen_shri_vec(tcg_ctx, MO_16, t2, t2, imm); } else { - tcg_gen_shli_vec(tcg_ctx, MO_16, t1, t1, imm + 8); - tcg_gen_shli_vec(tcg_ctx, MO_16, t2, t2, imm + 8); + tcg_gen_shli_vec(tcg_ctx, MO_16, t1, t1, imm); + tcg_gen_shli_vec(tcg_ctx, MO_16, t2, t2, imm); tcg_gen_shri_vec(tcg_ctx, MO_16, t1, t1, 8); tcg_gen_shri_vec(tcg_ctx, MO_16, t2, t2, 8); } @@ -3389,6 +3399,61 @@ static void expand_vec_sari(TCGContext *tcg_ctx, TCGType type, unsigned vece, } } +static void expand_vec_rotli(TCGContext *tcg_ctx, TCGType type, unsigned vece, + TCGv_vec v0, TCGv_vec v1, TCGArg imm) +{ + TCGv_vec t; + + if (vece == MO_8) { + expand_vec_shi(tcg_ctx, type, vece, INDEX_op_rotli_vec, v0, v1, imm); + return; + } + + t = tcg_temp_new_vec(tcg_ctx, type); + tcg_gen_shli_vec(tcg_ctx, vece, t, v1, imm); + tcg_gen_shri_vec(tcg_ctx, vece, v0, v1, (8 << vece) - imm); + tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t); + tcg_temp_free_vec(tcg_ctx, t); +} + +static void expand_vec_rotls(TCGContext *tcg_ctx, TCGType type, unsigned vece, + TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh) +{ + TCGv_i32 rsh; + TCGv_vec t; + + tcg_debug_assert(vece != MO_8); + + t = tcg_temp_new_vec(tcg_ctx, type); + rsh = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_neg_i32(tcg_ctx, rsh, lsh); + tcg_gen_andi_i32(tcg_ctx, rsh, rsh, (8 << vece) - 1); + tcg_gen_shls_vec(tcg_ctx, vece, t, v1, lsh); + tcg_gen_shrs_vec(tcg_ctx, vece, v0, v1, rsh); + tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t); + tcg_temp_free_vec(tcg_ctx, t); + tcg_temp_free_i32(tcg_ctx, rsh); +} + +static void expand_vec_rotv(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCGv_vec v0, + TCGv_vec v1, TCGv_vec sh, bool right) +{ + TCGv_vec t = tcg_temp_new_vec(tcg_ctx, type); + + tcg_gen_dupi_vec(tcg_ctx, vece, t, 8 << vece); + tcg_gen_sub_vec(tcg_ctx, vece, t, t, sh); + if (right) { + tcg_gen_shlv_vec(tcg_ctx, vece, t, v1, t); + tcg_gen_shrv_vec(tcg_ctx, vece, v0, v1, sh); + } else { + tcg_gen_shrv_vec(tcg_ctx, vece, t, v1, t); + tcg_gen_shlv_vec(tcg_ctx, vece, v0, v1, sh); + } + tcg_gen_or_vec(tcg_ctx, vece, v0, v0, t); + tcg_temp_free_vec(tcg_ctx, t); +} + static void expand_vec_mul(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGv_vec v2) { @@ -3598,13 +3663,30 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne switch (opc) { case INDEX_op_shli_vec: case INDEX_op_shri_vec: - expand_vec_shi(tcg_ctx, type, vece, opc == INDEX_op_shri_vec, v0, v1, a2); + expand_vec_shi(tcg_ctx, type, vece, opc, v0, v1, a2); break; case INDEX_op_sari_vec: expand_vec_sari(tcg_ctx, type, vece, v0, v1, a2); break; + case INDEX_op_rotli_vec: + expand_vec_rotli(tcg_ctx, type, vece, v0, v1, a2); + break; + + case INDEX_op_rotls_vec: + expand_vec_rotls(tcg_ctx, type, vece, v0, v1, temp_tcgv_i32(tcg_ctx, arg_temp(a2))); + break; + + case INDEX_op_rotlv_vec: + v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); + expand_vec_rotv(tcg_ctx, type, vece, v0, v1, v2, false); + break; + case INDEX_op_rotrv_vec: + v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); + expand_vec_rotv(tcg_ctx, type, vece, v0, v1, v2, true); + break; + case INDEX_op_mul_vec: v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); expand_vec_mul(tcg_ctx, type, vece, v0, v1, v2); diff --git a/qemu/tcg/mips/tcg-target.inc.c b/qemu/tcg/mips/tcg-target.inc.c index ed5a9356c3..addf4c661d 100644 --- a/qemu/tcg/mips/tcg-target.inc.c +++ b/qemu/tcg/mips/tcg-target.inc.c @@ -1215,9 +1215,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, TCGReg addrh, TCGMemOpIdx oi, tcg_insn_unit *label_ptr[2], bool is_load) { -#ifdef TARGET_ARM struct uc_struct *uc = s->uc; -#endif MemOp opc = get_memop(oi); unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); diff --git a/qemu/tcg/ppc/tcg-target.h b/qemu/tcg/ppc/tcg-target.h index 4fa21f0e71..be5b2901c3 100644 --- a/qemu/tcg/ppc/tcg-target.h +++ b/qemu/tcg/ppc/tcg-target.h @@ -161,6 +161,9 @@ extern bool have_vsx; #define TCG_TARGET_HAS_not_vec 1 #define TCG_TARGET_HAS_neg_vec have_isa_3_00 #define TCG_TARGET_HAS_abs_vec 0 +#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_rots_vec 0 +#define TCG_TARGET_HAS_rotv_vec 1 #define TCG_TARGET_HAS_shi_vec 0 #define TCG_TARGET_HAS_shs_vec 0 #define TCG_TARGET_HAS_shv_vec 1 diff --git a/qemu/tcg/ppc/tcg-target.inc.c b/qemu/tcg/ppc/tcg-target.inc.c index 00b7942901..a74e02c9d8 100644 --- a/qemu/tcg/ppc/tcg-target.inc.c +++ b/qemu/tcg/ppc/tcg-target.inc.c @@ -1885,9 +1885,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc, TCGReg addrlo, TCGReg addrhi, int mem_index, bool is_read) { -#ifdef TARGET_ARM - struct uc_struct *uc = s->uc; -#endif + UNICORN_UNUSED struct uc_struct *uc = s->uc; int cmp_off = (is_read ? offsetof(CPUTLBEntry, addr_read) @@ -2623,21 +2621,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_shl_i32: if (const_args[2]) { - tcg_out_shli32(s, args[0], args[1], args[2]); + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_shli32(s, args[0], args[1], args[2] & 31); } else { tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); } break; case INDEX_op_shr_i32: if (const_args[2]) { - tcg_out_shri32(s, args[0], args[1], args[2]); + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_shri32(s, args[0], args[1], args[2] & 31); } else { tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); } break; case INDEX_op_sar_i32: if (const_args[2]) { - tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31)); } else { tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); } @@ -2709,14 +2710,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_shl_i64: if (const_args[2]) { - tcg_out_shli64(s, args[0], args[1], args[2]); + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_shli64(s, args[0], args[1], args[2] & 63); } else { tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); } break; case INDEX_op_shr_i64: if (const_args[2]) { - tcg_out_shri64(s, args[0], args[1], args[2]); + /* Limit immediate shift count lest we create an illegal insn. */ + tcg_out_shri64(s, args[0], args[1], args[2] & 63); } else { tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); } @@ -3008,6 +3011,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign case INDEX_op_shlv_vec: case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: + case INDEX_op_rotlv_vec: return vece <= MO_32 || have_isa_2_07; case INDEX_op_ssadd_vec: case INDEX_op_sssub_vec: @@ -3018,6 +3022,7 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign case INDEX_op_shli_vec: case INDEX_op_shri_vec: case INDEX_op_sari_vec: + case INDEX_op_rotli_vec: return vece <= MO_32 || have_isa_2_07 ? -1 : 0; case INDEX_op_neg_vec: return vece >= MO_32 && have_isa_3_00; @@ -3032,6 +3037,8 @@ int tcg_can_emit_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsign return 0; case INDEX_op_bitsel_vec: return have_vsx; + case INDEX_op_rotrv_vec: + return -1; default: return 0; } @@ -3314,7 +3321,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ppc_pkum_vec: insn = pkum_op[vece]; break; - case INDEX_op_ppc_rotl_vec: + case INDEX_op_rotlv_vec: insn = rotl_op[vece]; break; case INDEX_op_ppc_msum_vec: @@ -3422,7 +3429,7 @@ static void expand_vec_mul(TCGContext *tcg_ctx, TCGType type, unsigned vece, TCG t3 = tcg_temp_new_vec(tcg_ctx, type); t4 = tcg_temp_new_vec(tcg_ctx, type); tcg_gen_dupi_vec(tcg_ctx, MO_8, t4, -16); - vec_gen_3(tcg_ctx, INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(tcg_ctx, t1), + vec_gen_3(tcg_ctx, INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(tcg_ctx, t1), tcgv_vec_arg(tcg_ctx, v2), tcgv_vec_arg(tcg_ctx, t4)); vec_gen_3(tcg_ctx, INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(tcg_ctx, t2), tcgv_vec_arg(tcg_ctx, v1), tcgv_vec_arg(tcg_ctx, v2)); @@ -3447,7 +3454,7 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne TCGArg a0, ...) { va_list va; - TCGv_vec v0, v1, v2; + TCGv_vec v0, v1, v2, t0; TCGArg a2; va_start(va, a0); @@ -3465,6 +3472,9 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne case INDEX_op_sari_vec: expand_vec_shi(tcg_ctx, type, vece, v0, v1, a2, INDEX_op_sarv_vec); break; + case INDEX_op_rotli_vec: + expand_vec_shi(tcg_ctx, type, vece, v0, v1, a2, INDEX_op_rotlv_vec); + break; case INDEX_op_cmp_vec: v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); expand_vec_cmp(tcg_ctx, type, vece, v0, v1, v2, va_arg(va, TCGArg)); @@ -3473,6 +3483,13 @@ void tcg_expand_vec_op(TCGContext *tcg_ctx, TCGOpcode opc, TCGType type, unsigne v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); expand_vec_mul(tcg_ctx, type, vece, v0, v1, v2); break; + case INDEX_op_rotlv_vec: + v2 = temp_tcgv_vec(tcg_ctx, arg_temp(a2)); + t0 = tcg_temp_new_vec(tcg_ctx, type); + tcg_gen_neg_vec(tcg_ctx, vece, t0, v2); + tcg_gen_rotlv_vec(tcg_ctx, vece, v0, v1, t0); + tcg_temp_free_vec(tcg_ctx, t0); + break; default: g_assert_not_reached(); } @@ -3677,12 +3694,13 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_shlv_vec: case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: + case INDEX_op_rotlv_vec: + case INDEX_op_rotrv_vec: case INDEX_op_ppc_mrgh_vec: case INDEX_op_ppc_mrgl_vec: case INDEX_op_ppc_muleu_vec: case INDEX_op_ppc_mulou_vec: case INDEX_op_ppc_pkum_vec: - case INDEX_op_ppc_rotl_vec: case INDEX_op_dup2_vec: return &v_v_v; case INDEX_op_not_vec: diff --git a/qemu/tcg/ppc/tcg-target.opc.h b/qemu/tcg/ppc/tcg-target.opc.h index 1373f77e82..db514403c3 100644 --- a/qemu/tcg/ppc/tcg-target.opc.h +++ b/qemu/tcg/ppc/tcg-target.opc.h @@ -30,4 +30,3 @@ DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC) DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC) DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC) DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC) -DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC) diff --git a/qemu/tcg/riscv/tcg-target.inc.c b/qemu/tcg/riscv/tcg-target.inc.c index 2a5d3347d3..3d34141092 100644 --- a/qemu/tcg/riscv/tcg-target.inc.c +++ b/qemu/tcg/riscv/tcg-target.inc.c @@ -502,10 +502,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, break; case R_RISCV_JAL: return reloc_jimm20(code_ptr, (tcg_insn_unit *)value); - break; case R_RISCV_CALL: return reloc_call(code_ptr, (tcg_insn_unit *)value); - break; default: tcg_abort(); } @@ -970,9 +968,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, TCGReg addrh, TCGMemOpIdx oi, tcg_insn_unit **label_ptr, bool is_load) { -#ifdef TARGET_ARM struct uc_struct *uc = s->uc; -#endif MemOp opc = get_memop(oi); unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); diff --git a/qemu/tcg/s390/tcg-target.inc.c b/qemu/tcg/s390/tcg-target.inc.c index c8fa20046f..3d64a675eb 100644 --- a/qemu/tcg/s390/tcg-target.inc.c +++ b/qemu/tcg/s390/tcg-target.inc.c @@ -1547,9 +1547,7 @@ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19)); static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, int mem_index, bool is_ld) { -#ifdef TARGET_ARM struct uc_struct *uc = s->uc; -#endif unsigned s_bits = opc & MO_SIZE; unsigned a_bits = get_alignment_bits(opc); unsigned s_mask = (1 << s_bits) - 1; diff --git a/qemu/tcg/sparc/tcg-target.inc.c b/qemu/tcg/sparc/tcg-target.inc.c index d4bc69d3b5..cf5533e8f4 100644 --- a/qemu/tcg/sparc/tcg-target.inc.c +++ b/qemu/tcg/sparc/tcg-target.inc.c @@ -1083,9 +1083,7 @@ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12)); static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, MemOp opc, int which) { -#ifdef TARGET_ARM struct uc_struct *uc = s->uc; -#endif int fast_off = TLB_MASK_TABLE_OFS(mem_index); int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); int table_off = fast_off + offsetof(CPUTLBDescFast, table); diff --git a/qemu/tcg/tcg-op-gvec.c b/qemu/tcg/tcg-op-gvec.c index cab429c44a..13e4b287b8 100644 --- a/qemu/tcg/tcg-op-gvec.c +++ b/qemu/tcg/tcg-op-gvec.c @@ -325,11 +325,35 @@ void tcg_gen_gvec_5_ptr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint3 in units of LNSZ. This limits the expansion of inline code. */ static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz) { - if (oprsz % lnsz == 0) { - uint32_t lnct = oprsz / lnsz; - return lnct >= 1 && lnct <= MAX_UNROLL; + uint32_t q, r; + + if (oprsz < lnsz) { + return false; } - return false; + + q = oprsz / lnsz; + r = oprsz % lnsz; + tcg_debug_assert((r & 7) == 0); + + if (lnsz < 16) { + /* For sizes below 16, accept no remainder. */ + if (r != 0) { + return false; + } + } else { + /* + * Recall that ARM SVE allows vector sizes that are not a + * power of 2, but always a multiple of 16. The intent is + * that e.g. size == 80 would be expanded with 2x32 + 1x16. + * In addition, expand_clr needs to handle a multiple of 8. + * Thus we can handle the tail with one more operation per + * diminishing power of 2. + */ + q += ctpop32(r); + } + + return q <= MAX_UNROLL; + } static void expand_clr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t maxsz); @@ -404,22 +428,31 @@ static void gen_dup_i64(TCGContext *tcg_ctx, unsigned vece, TCGv_i64 out, TCGv_i static TCGType choose_vector_type(TCGContext *tcg_ctx, const TCGOpcode *list, unsigned vece, uint32_t size, bool prefer_i64) { - if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) { - /* - * Recall that ARM SVE allows vector sizes that are not a - * power of 2, but always a multiple of 16. The intent is - * that e.g. size == 80 would be expanded with 2x32 + 1x16. - * It is hard to imagine a case in which v256 is supported - * but v128 is not, but check anyway. - */ - if (tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V256, vece) - && (size % 32 == 0 - || tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece))) { - return TCG_TYPE_V256; - } - } - if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16) - && tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece)) { + /* + * Recall that ARM SVE allows vector sizes that are not a + * power of 2, but always a multiple of 16. The intent is + * that e.g. size == 80 would be expanded with 2x32 + 1x16. + * It is hard to imagine a case in which v256 is supported + * but v128 is not, but check anyway. + * In addition, expand_clr needs to handle a multiple of 8. + */ + if (TCG_TARGET_HAS_v256 && + check_size_impl(size, 32) && + tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V256, vece) && + (!(size & 16) || + (TCG_TARGET_HAS_v128 && + tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece))) && + (!(size & 8) || + (TCG_TARGET_HAS_v64 && + tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V64, vece)))) { + return TCG_TYPE_V256; + } + if (TCG_TARGET_HAS_v128 && + check_size_impl(size, 16) && + tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V128, vece) && + (!(size & 8) || + (TCG_TARGET_HAS_v64 && + tcg_can_emit_vecop_list(tcg_ctx, list, TCG_TYPE_V64, vece)))) { return TCG_TYPE_V128; } if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8) @@ -434,6 +467,18 @@ static void do_dup_store(TCGContext *tcg_ctx, TCGType type, uint32_t dofs, uint3 { uint32_t i = 0; + tcg_debug_assert(oprsz >= 8); + + /* + * This may be expand_clr for the tail of an operation, e.g. + * oprsz == 8 && maxsz == 64. The first 8 bytes of this store + * are misaligned wrt the maximum vector size, so do that first. + */ + if (dofs & 8) { + tcg_gen_stl_vec(tcg_ctx, t_vec, tcg_ctx->cpu_env, dofs + i, TCG_TYPE_V64); + i += 8; + } + switch (type) { case TCG_TYPE_V256: /* @@ -621,17 +666,22 @@ static void expand_clr(TCGContext *tcg_ctx, uint32_t dofs, uint32_t maxsz) /* Expand OPSZ bytes worth of two-operand operations using i32 elements. */ static void expand_2_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - void (*fni)(TCGContext *, TCGv_i32, TCGv_i32)) + bool load_dest, void (*fni)(TCGContext *, TCGv_i32, TCGv_i32)) { TCGv_i32 t0 = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 t1 = tcg_temp_new_i32(tcg_ctx); uint32_t i; for (i = 0; i < oprsz; i += 4) { tcg_gen_ld_i32(tcg_ctx, t0, tcg_ctx->cpu_env, aofs + i); - fni(tcg_ctx, t0, t0); - tcg_gen_st_i32(tcg_ctx, t0, tcg_ctx->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_i32(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); + } + fni(tcg_ctx, t1, t0); + tcg_gen_st_i32(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); } tcg_temp_free_i32(tcg_ctx, t0); + tcg_temp_free_i32(tcg_ctx, t1); } static void expand_2i_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, @@ -751,17 +801,22 @@ static void expand_4_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint /* Expand OPSZ bytes worth of two-operand operations using i64 elements. */ static void expand_2_i64(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - void (*fni)(TCGContext *, TCGv_i64, TCGv_i64)) + bool load_dest, void (*fni)(TCGContext *, TCGv_i64, TCGv_i64)) { TCGv_i64 t0 = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 t1 = tcg_temp_new_i64(tcg_ctx); uint32_t i; for (i = 0; i < oprsz; i += 8) { tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, aofs + i); - fni(tcg_ctx, t0, t0); - tcg_gen_st_i64(tcg_ctx, t0, tcg_ctx->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_i64(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); + } + fni(tcg_ctx, t1, t0); + tcg_gen_st_i64(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); } tcg_temp_free_i64(tcg_ctx, t0); + tcg_temp_free_i64(tcg_ctx, t1); } static void expand_2i_i64(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t oprsz, @@ -882,17 +937,23 @@ static void expand_4_i64(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint /* Expand OPSZ bytes worth of two-operand operations using host vectors. */ static void expand_2_vec(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, + bool load_dest, void (*fni)(TCGContext *, unsigned, TCGv_vec, TCGv_vec)) { TCGv_vec t0 = tcg_temp_new_vec(tcg_ctx, type); + TCGv_vec t1 = tcg_temp_new_vec(tcg_ctx, type); uint32_t i; for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(tcg_ctx, t0, tcg_ctx->cpu_env, aofs + i); - fni(tcg_ctx, vece, t0, t0); - tcg_gen_st_vec(tcg_ctx, t0, tcg_ctx->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_vec(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); + } + fni(tcg_ctx, vece, t1, t0); + tcg_gen_st_vec(tcg_ctx, t1, tcg_ctx->cpu_env, dofs + i); } tcg_temp_free_vec(tcg_ctx, t0); + tcg_temp_free_vec(tcg_ctx, t1); } /* Expand OPSZ bytes worth of two-vector operands and an immediate operand @@ -1046,7 +1107,8 @@ void tcg_gen_gvec_2(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ some = QEMU_ALIGN_DOWN(oprsz, 32); - expand_2_vec(tcg_ctx, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv); + expand_2_vec(tcg_ctx, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, + g->load_dest, g->fniv); if (some == oprsz) { break; } @@ -1056,17 +1118,19 @@ void tcg_gen_gvec_2(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, maxsz -= some; /* fallthru */ case TCG_TYPE_V128: - expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv); + expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, + g->load_dest, g->fniv); break; case TCG_TYPE_V64: - expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv); + expand_2_vec(tcg_ctx, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, + g->load_dest, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_2_i64(tcg_ctx, dofs, aofs, oprsz, g->fni8); + expand_2_i64(tcg_ctx, dofs, aofs, oprsz, g->load_dest, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_2_i32(tcg_ctx, dofs, aofs, oprsz, g->fni4); + expand_2_i32(tcg_ctx, dofs, aofs, oprsz, g->load_dest, g->fni4); } else { assert(g->fno != NULL); tcg_gen_gvec_2_ool(tcg_ctx, dofs, aofs, oprsz, maxsz, g->data, g->fno); @@ -1543,32 +1607,11 @@ void tcg_gen_gvec_dup_mem(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uin } } -void tcg_gen_gvec_dup64i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz, - uint32_t maxsz, uint64_t x) -{ - check_size_align(oprsz, maxsz, dofs); - do_dup(tcg_ctx, MO_64, dofs, oprsz, maxsz, NULL, NULL, x); -} - -void tcg_gen_gvec_dup32i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz, - uint32_t maxsz, uint32_t x) -{ - check_size_align(oprsz, maxsz, dofs); - do_dup(tcg_ctx, MO_32, dofs, oprsz, maxsz, NULL, NULL, x); -} - -void tcg_gen_gvec_dup16i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz, - uint32_t maxsz, uint16_t x) -{ - check_size_align(oprsz, maxsz, dofs); - do_dup(tcg_ctx, MO_16, dofs, oprsz, maxsz, NULL, NULL, x); -} - -void tcg_gen_gvec_dup8i(TCGContext *tcg_ctx, uint32_t dofs, uint32_t oprsz, - uint32_t maxsz, uint8_t x) +void tcg_gen_gvec_dup_imm(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t oprsz, + uint32_t maxsz, uint64_t x) { check_size_align(oprsz, maxsz, dofs); - do_dup(tcg_ctx, MO_8, dofs, oprsz, maxsz, NULL, NULL, x); + do_dup(tcg_ctx, vece, dofs, oprsz, maxsz, NULL, NULL, x); } void tcg_gen_gvec_not(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, @@ -2321,7 +2364,7 @@ void tcg_gen_gvec_xor(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_ }; if (aofs == bofs) { - tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, 0); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, 0); } else { tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g); } @@ -2338,7 +2381,7 @@ void tcg_gen_gvec_andc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 }; if (aofs == bofs) { - tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, 0); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, 0); } else { tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g); } @@ -2355,7 +2398,7 @@ void tcg_gen_gvec_orc(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_ }; if (aofs == bofs) { - tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, -1); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, -1); } else { tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g); } @@ -2406,7 +2449,7 @@ void tcg_gen_gvec_eqv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_ }; if (aofs == bofs) { - tcg_gen_gvec_dup8i(tcg_ctx, dofs, oprsz, maxsz, -1); + tcg_gen_gvec_dup_imm(tcg_ctx, MO_64, dofs, oprsz, maxsz, -1); } else { tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g); } @@ -2654,6 +2697,74 @@ void tcg_gen_gvec_sari(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 } } +void tcg_gen_vec_rotl8i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c) +{ + uint64_t mask = dup_const(MO_8, 0xff << c); + + tcg_gen_shli_i64(tcg_ctx, d, a, c); + tcg_gen_shri_i64(tcg_ctx, a, a, 8 - c); + tcg_gen_andi_i64(tcg_ctx, d, d, mask); + tcg_gen_andi_i64(tcg_ctx, a, a, ~mask); + tcg_gen_or_i64(tcg_ctx, d, d, a); +} + +void tcg_gen_vec_rotl16i_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, int64_t c) +{ + uint64_t mask = dup_const(MO_16, 0xffff << c); + + tcg_gen_shli_i64(tcg_ctx, d, a, c); + tcg_gen_shri_i64(tcg_ctx, a, a, 16 - c); + tcg_gen_andi_i64(tcg_ctx, d, d, mask); + tcg_gen_andi_i64(tcg_ctx, a, a, ~mask); + tcg_gen_or_i64(tcg_ctx, d, d, a); +} + +void tcg_gen_gvec_rotli(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen2i g[4] = { + { .fni8 = tcg_gen_vec_rotl8i_i64, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl8i, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = tcg_gen_vec_rotl16i_i64, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl16i, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_rotli_i32, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl32i, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_rotli_i64, + .fniv = tcg_gen_rotli_vec, + .fno = gen_helper_gvec_rotl64i, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_debug_assert(shift >= 0 && shift < (8 << vece)); + if (shift == 0) { + tcg_gen_gvec_mov(tcg_ctx, vece, dofs, aofs, oprsz, maxsz); + } else { + tcg_gen_gvec_2i(tcg_ctx, dofs, aofs, oprsz, maxsz, shift, &g[vece]); + } +} + +void tcg_gen_gvec_rotri(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + tcg_debug_assert(vece <= MO_64); + tcg_debug_assert(shift >= 0 && shift < (8 << vece)); + tcg_gen_gvec_rotli(tcg_ctx, vece, dofs, aofs, -shift & ((8 << vece) - 1), + oprsz, maxsz); +} + /* * Specialized generation vector shifts by a non-constant scalar. */ @@ -2868,6 +2979,28 @@ void tcg_gen_gvec_sars(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 do_gvec_shifts(tcg_ctx, vece, dofs, aofs, shift, oprsz, maxsz, &g); } +void tcg_gen_gvec_rotls(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen2sh g = { + .fni4 = tcg_gen_rotl_i32, + .fni8 = tcg_gen_rotl_i64, + .fniv_s = tcg_gen_rotls_vec, + .fniv_v = tcg_gen_rotlv_vec, + .fno = { + gen_helper_gvec_rotl8i, + gen_helper_gvec_rotl16i, + gen_helper_gvec_rotl32i, + gen_helper_gvec_rotl64i, + }, + .s_list = { INDEX_op_rotls_vec, 0 }, + .v_list = { INDEX_op_rotlv_vec, 0 }, + }; + + tcg_debug_assert(vece <= MO_64); + do_gvec_shifts(tcg_ctx, vece, dofs, aofs, shift, oprsz, maxsz, &g); +} + /* * Expand D = A << (B % element bits) * @@ -3063,6 +3196,128 @@ void tcg_gen_gvec_sarv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32 tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g[vece]); } +/* + * Similarly for rotates. + */ + +static void tcg_gen_rotlv_mod_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + + tcg_gen_dupi_vec(tcg_ctx, vece, t, (8 << vece) - 1); + tcg_gen_and_vec(tcg_ctx, vece, t, t, b); + tcg_gen_rotlv_vec(tcg_ctx, vece, d, a, t); + tcg_temp_free_vec(tcg_ctx, t); +} + +static void tcg_gen_rotl_mod_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_andi_i32(tcg_ctx, t, b, 31); + tcg_gen_rotl_i32(tcg_ctx, d, a, t); + tcg_temp_free_i32(tcg_ctx, t); +} + +static void tcg_gen_rotl_mod_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + tcg_gen_andi_i64(tcg_ctx, t, b, 63); + tcg_gen_rotl_i64(tcg_ctx, d, a, t); + tcg_temp_free_i64(tcg_ctx, t); +} + +void tcg_gen_gvec_rotlv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotlv_vec, 0 }; + static const GVecGen3 g[4] = { + { .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl8v, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl16v, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_rotl_mod_i32, + .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl32v, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_rotl_mod_i64, + .fniv = tcg_gen_rotlv_mod_vec, + .fno = gen_helper_gvec_rotl64v, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g[vece]); +} + +static void tcg_gen_rotrv_mod_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec d, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(tcg_ctx, d); + + tcg_gen_dupi_vec(tcg_ctx, vece, t, (8 << vece) - 1); + tcg_gen_and_vec(tcg_ctx, vece, t, t, b); + tcg_gen_rotrv_vec(tcg_ctx, vece, d, a, t); + tcg_temp_free_vec(tcg_ctx, t); +} + +static void tcg_gen_rotr_mod_i32(TCGContext *tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(tcg_ctx); + + tcg_gen_andi_i32(tcg_ctx, t, b, 31); + tcg_gen_rotr_i32(tcg_ctx, d, a, t); + tcg_temp_free_i32(tcg_ctx, t); +} + +static void tcg_gen_rotr_mod_i64(TCGContext *tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(tcg_ctx); + + tcg_gen_andi_i64(tcg_ctx, t, b, 63); + tcg_gen_rotr_i64(tcg_ctx, d, a, t); + tcg_temp_free_i64(tcg_ctx, t); +} + +void tcg_gen_gvec_rotrv(TCGContext *tcg_ctx, unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotrv_vec, 0 }; + static const GVecGen3 g[4] = { + { .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr8v, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr16v, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = tcg_gen_rotr_mod_i32, + .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr32v, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = tcg_gen_rotr_mod_i64, + .fniv = tcg_gen_rotrv_mod_vec, + .fno = gen_helper_gvec_rotr64v, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, + }; + + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3(tcg_ctx, dofs, aofs, bofs, oprsz, maxsz, &g[vece]); +} + /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */ static void expand_cmp_i32(TCGContext *tcg_ctx, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, TCGCond cond) diff --git a/qemu/tcg/tcg-op-vec.c b/qemu/tcg/tcg-op-vec.c index 99343962ac..02d3e22564 100644 --- a/qemu/tcg/tcg-op-vec.c +++ b/qemu/tcg/tcg-op-vec.c @@ -547,6 +547,18 @@ void tcg_gen_sari_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a do_shifti(tcg_ctx, INDEX_op_sari_vec, vece, r, a, i); } +void tcg_gen_rotli_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) +{ + do_shifti(tcg_ctx, INDEX_op_rotli_vec, vece, r, a, i); +} + +void tcg_gen_rotri_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i) +{ + int bits = 8 << vece; + tcg_debug_assert(i >= 0 && i < bits); + do_shifti(tcg_ctx, INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1)); +} + void tcg_gen_cmp_vec(TCGContext *tcg_ctx, TCGCond cond, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { @@ -647,7 +659,9 @@ static void do_minmax(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a TCGv_vec b, TCGOpcode opc, TCGCond cond) { if (!do_op3(tcg_ctx, vece, r, a, b, opc)) { + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); tcg_gen_cmpsel_vec(tcg_ctx, cond, vece, r, a, b, a, b); + tcg_swap_vecop_list(hold_list); } } @@ -686,8 +700,18 @@ void tcg_gen_sarv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a do_op3_nofail(tcg_ctx, vece, r, a, b, INDEX_op_sarv_vec); } +void tcg_gen_rotlv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + do_op3_nofail(tcg_ctx, vece, r, a, b, INDEX_op_rotlv_vec); +} + +void tcg_gen_rotrv_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) +{ + do_op3_nofail(tcg_ctx, vece, r, a, b, INDEX_op_rotrv_vec); +} + static void do_shifts(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, - TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v) + TCGv_i32 s, TCGOpcode opc) { TCGTemp *rt = tcgv_vec_temp(tcg_ctx, r); TCGTemp *at = tcgv_vec_temp(tcg_ctx, a); @@ -696,48 +720,41 @@ static void do_shifts(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a TCGArg ai = temp_arg(at); TCGArg si = temp_arg(st); TCGType type = rt->base_type; - const TCGOpcode *hold_list; int can; tcg_debug_assert(at->base_type >= type); - tcg_assert_listed_vecop(opc_s); - hold_list = tcg_swap_vecop_list(NULL); - can = tcg_can_emit_vec_op(tcg_ctx, opc_s, type, vece); + tcg_assert_listed_vecop(opc); + can = tcg_can_emit_vec_op(tcg_ctx, opc, type, vece); if (can > 0) { - vec_gen_3(tcg_ctx, opc_s, type, vece, ri, ai, si); + vec_gen_3(tcg_ctx, opc, type, vece, ri, ai, si); } else if (can < 0) { - tcg_expand_vec_op(tcg_ctx, opc_s, type, vece, ri, ai, si); + const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL); + tcg_expand_vec_op(tcg_ctx, opc, type, vece, ri, ai, si); + tcg_swap_vecop_list(hold_list); } else { - TCGv_vec vec_s = tcg_temp_new_vec(tcg_ctx, type); - - if (vece == MO_64) { - TCGv_i64 s64 = tcg_temp_new_i64(tcg_ctx); - tcg_gen_extu_i32_i64(tcg_ctx, s64, s); - tcg_gen_dup_i64_vec(tcg_ctx, MO_64, vec_s, s64); - tcg_temp_free_i64(tcg_ctx, s64); - } else { - tcg_gen_dup_i32_vec(tcg_ctx, vece, vec_s, s); - } - do_op3_nofail(tcg_ctx, vece, r, a, vec_s, opc_v); - tcg_temp_free_vec(tcg_ctx, vec_s); + g_assert_not_reached(); } - tcg_swap_vecop_list(hold_list); } void tcg_gen_shls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) { - do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec); + do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shls_vec); } void tcg_gen_shrs_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) { - do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec); + do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_shrs_vec); } void tcg_gen_sars_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b) { - do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec); + do_shifts(tcg_ctx, vece, r, a, b, INDEX_op_sars_vec); +} + +void tcg_gen_rotls_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s) +{ + do_shifts(tcg_ctx, vece, r, a, s, INDEX_op_rotls_vec); } void tcg_gen_bitsel_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec r, TCGv_vec a, diff --git a/qemu/tcg/tcg-op.c b/qemu/tcg/tcg-op.c index 8a5865dfe8..d2d44666cb 100644 --- a/qemu/tcg/tcg-op.c +++ b/qemu/tcg/tcg-op.c @@ -538,9 +538,9 @@ void tcg_gen_rotl_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 } } -void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) +void tcg_gen_rotli_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { - tcg_debug_assert(arg2 < 32); + tcg_debug_assert(arg2 >= 0 && arg2 < 32); /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i32(tcg_ctx, ret, arg1); @@ -578,9 +578,9 @@ void tcg_gen_rotr_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 } } -void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) +void tcg_gen_rotri_i32(TCGContext *tcg_ctx, TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { - tcg_debug_assert(arg2 < 32); + tcg_debug_assert(arg2 >= 0 && arg2 < 32); /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i32(tcg_ctx, ret, arg1); @@ -2000,9 +2000,9 @@ void tcg_gen_rotl_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 #endif } -void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) +void tcg_gen_rotli_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { - tcg_debug_assert(arg2 < 64); + tcg_debug_assert(arg2 >= 0 && arg2 < 64); /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i64(tcg_ctx, ret, arg1); @@ -2041,9 +2041,9 @@ void tcg_gen_rotr_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 #endif } -void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) +void tcg_gen_rotri_i64(TCGContext *tcg_ctx, TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { - tcg_debug_assert(arg2 < 64); + tcg_debug_assert(arg2 >= 0 && arg2 < 64); /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i64(tcg_ctx, ret, arg1); diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c index 1c9353032e..3d23487176 100644 --- a/qemu/tcg/tcg.c +++ b/qemu/tcg/tcg.c @@ -1411,6 +1411,13 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: return have_vec && TCG_TARGET_HAS_shv_vec; + case INDEX_op_rotli_vec: + return have_vec && TCG_TARGET_HAS_roti_vec; + case INDEX_op_rotls_vec: + return have_vec && TCG_TARGET_HAS_rots_vec; + case INDEX_op_rotlv_vec: + case INDEX_op_rotrv_vec: + return have_vec && TCG_TARGET_HAS_rotv_vec; case INDEX_op_ssadd_vec: case INDEX_op_usadd_vec: case INDEX_op_sssub_vec: @@ -2779,34 +2786,68 @@ static bool liveness_pass_2(TCGContext *s) } /* Outputs become available. */ - for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); + if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { + arg_ts = arg_temp(op->args[0]); dir_ts = arg_ts->state_ptr; - if (!dir_ts) { - continue; + if (dir_ts) { + op->args[0] = temp_arg(dir_ts); + changes = true; + + /* The output is now live and modified. */ + arg_ts->state = 0; + + if (NEED_SYNC_ARG(0)) { + TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 + ? INDEX_op_st_i32 + : INDEX_op_st_i64); + TCGOp *sop = tcg_op_insert_after(s, op, sopc); + TCGTemp *out_ts = dir_ts; + + if (IS_DEAD_ARG(0)) { + out_ts = arg_temp(op->args[1]); + arg_ts->state = TS_DEAD; + tcg_op_remove(s, op); + } else { + arg_ts->state = TS_MEM; + } + + sop->args[0] = temp_arg(out_ts); + sop->args[1] = temp_arg(arg_ts->mem_base); + sop->args[2] = arg_ts->mem_offset; + } else { + tcg_debug_assert(!IS_DEAD_ARG(0)); + } } - op->args[i] = temp_arg(dir_ts); - changes = true; + } else { + for (i = 0; i < nb_oargs; i++) { + arg_ts = arg_temp(op->args[i]); + dir_ts = arg_ts->state_ptr; + if (!dir_ts) { + continue; + } + op->args[i] = temp_arg(dir_ts); + changes = true; - /* The output is now live and modified. */ - arg_ts->state = 0; + /* The output is now live and modified. */ + arg_ts->state = 0; - /* Sync outputs upon their last write. */ - if (NEED_SYNC_ARG(i)) { - TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_st_i32 - : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc); + /* Sync outputs upon their last write. */ + if (NEED_SYNC_ARG(i)) { + TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 + ? INDEX_op_st_i32 + : INDEX_op_st_i64); + TCGOp *sop = tcg_op_insert_after(s, op, sopc); - sop->args[0] = temp_arg(dir_ts); - sop->args[1] = temp_arg(arg_ts->mem_base); - sop->args[2] = arg_ts->mem_offset; + sop->args[0] = temp_arg(dir_ts); + sop->args[1] = temp_arg(arg_ts->mem_base); + sop->args[2] = arg_ts->mem_offset; - arg_ts->state = TS_MEM; - } - /* Drop outputs that are dead. */ - if (IS_DEAD_ARG(i)) { - arg_ts->state = TS_DEAD; + arg_ts->state = TS_MEM; + } + /* Drop outputs that are dead. */ + if (IS_DEAD_ARG(i)) { + arg_ts->state = TS_DEAD; + } } } } diff --git a/qemu/tricore.h b/qemu/tricore.h index e378868526..30cc3e9a50 100644 --- a/qemu/tricore.h +++ b/qemu/tricore.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_tricore #define tcg_gen_shr_i64 tcg_gen_shr_i64_tricore #define tcg_gen_st_i64 tcg_gen_st_i64_tricore +#define tcg_gen_add_i64 tcg_gen_add_i64_tricore +#define tcg_gen_sub_i64 tcg_gen_sub_i64_tricore #define tcg_gen_xor_i64 tcg_gen_xor_i64_tricore +#define tcg_gen_neg_i64 tcg_gen_neg_i64_tricore #define cpu_icount_to_ns cpu_icount_to_ns_tricore #define cpu_is_stopped cpu_is_stopped_tricore #define cpu_get_ticks cpu_get_ticks_tricore @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_tricore #define floatx80_mul floatx80_mul_tricore #define floatx80_div floatx80_div_tricore +#define floatx80_modrem floatx80_modrem_tricore +#define floatx80_mod floatx80_mod_tricore #define floatx80_rem floatx80_rem_tricore #define floatx80_sqrt floatx80_sqrt_tricore #define floatx80_eq floatx80_eq_tricore @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_tricore #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_tricore #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_tricore +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_tricore #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_tricore #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_tricore #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_tricore @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_tricore #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_tricore #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_tricore +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_tricore +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_tricore #define tcg_gen_gvec_sari tcg_gen_gvec_sari_tricore +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_tricore +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_tricore #define tcg_gen_gvec_shls tcg_gen_gvec_shls_tricore #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_tricore #define tcg_gen_gvec_sars tcg_gen_gvec_sars_tricore +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_tricore #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_tricore #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_tricore #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_tricore +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_tricore +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_tricore #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_tricore #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_tricore #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_tricore @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_tricore #define tcg_gen_shri_vec tcg_gen_shri_vec_tricore #define tcg_gen_sari_vec tcg_gen_sari_vec_tricore +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_tricore +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_tricore #define tcg_gen_cmp_vec tcg_gen_cmp_vec_tricore #define tcg_gen_add_vec tcg_gen_add_vec_tricore #define tcg_gen_sub_vec tcg_gen_sub_vec_tricore @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_tricore #define tcg_gen_shrv_vec tcg_gen_shrv_vec_tricore #define tcg_gen_sarv_vec tcg_gen_sarv_vec_tricore +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_tricore +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_tricore #define tcg_gen_shls_vec tcg_gen_shls_vec_tricore #define tcg_gen_shrs_vec tcg_gen_shrs_vec_tricore #define tcg_gen_sars_vec tcg_gen_sars_vec_tricore +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_tricore #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_tricore #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_tricore #define tb_htable_lookup tb_htable_lookup_tricore @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_tricore #define cpu_loop_exit_atomic cpu_loop_exit_atomic_tricore #define tlb_init tlb_init_tricore +#define tlb_destroy tlb_destroy_tricore #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_tricore #define tlb_flush tlb_flush_tricore #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_tricore @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_tricore #define get_page_addr_code_hostp get_page_addr_code_hostp_tricore #define get_page_addr_code get_page_addr_code_tricore +#define probe_access_flags probe_access_flags_tricore #define probe_access probe_access_tricore #define tlb_vaddr_to_host tlb_vaddr_to_host_tricore #define helper_ret_ldub_mmu helper_ret_ldub_mmu_tricore @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_tricore #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_tricore #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_tricore -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_tricore -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_tricore -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_tricore -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_tricore +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_tricore +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_tricore +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_tricore +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_tricore +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_tricore +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_tricore +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_tricore +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_tricore #define cpu_ldub_data_ra cpu_ldub_data_ra_tricore #define cpu_ldsb_data_ra cpu_ldsb_data_ra_tricore -#define cpu_lduw_data_ra cpu_lduw_data_ra_tricore -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_tricore -#define cpu_ldl_data_ra cpu_ldl_data_ra_tricore -#define cpu_ldq_data_ra cpu_ldq_data_ra_tricore +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_tricore +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_tricore +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_tricore +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_tricore +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_tricore +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_tricore +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_tricore +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_tricore #define cpu_ldub_data cpu_ldub_data_tricore #define cpu_ldsb_data cpu_ldsb_data_tricore -#define cpu_lduw_data cpu_lduw_data_tricore -#define cpu_ldsw_data cpu_ldsw_data_tricore -#define cpu_ldl_data cpu_ldl_data_tricore -#define cpu_ldq_data cpu_ldq_data_tricore +#define cpu_lduw_be_data cpu_lduw_be_data_tricore +#define cpu_lduw_le_data cpu_lduw_le_data_tricore +#define cpu_ldsw_be_data cpu_ldsw_be_data_tricore +#define cpu_ldsw_le_data cpu_ldsw_le_data_tricore +#define cpu_ldl_be_data cpu_ldl_be_data_tricore +#define cpu_ldl_le_data cpu_ldl_le_data_tricore +#define cpu_ldq_le_data cpu_ldq_le_data_tricore +#define cpu_ldq_be_data cpu_ldq_be_data_tricore #define helper_ret_stb_mmu helper_ret_stb_mmu_tricore #define helper_le_stw_mmu helper_le_stw_mmu_tricore #define helper_be_stw_mmu helper_be_stw_mmu_tricore @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_tricore #define helper_be_stq_mmu helper_be_stq_mmu_tricore #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_tricore -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_tricore -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_tricore -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_tricore +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_tricore +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_tricore +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_tricore +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_tricore +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_tricore +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_tricore #define cpu_stb_data_ra cpu_stb_data_ra_tricore -#define cpu_stw_data_ra cpu_stw_data_ra_tricore -#define cpu_stl_data_ra cpu_stl_data_ra_tricore -#define cpu_stq_data_ra cpu_stq_data_ra_tricore +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_tricore +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_tricore +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_tricore +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_tricore +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_tricore +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_tricore #define cpu_stb_data cpu_stb_data_tricore -#define cpu_stw_data cpu_stw_data_tricore -#define cpu_stl_data cpu_stl_data_tricore -#define cpu_stq_data cpu_stq_data_tricore +#define cpu_stw_be_data cpu_stw_be_data_tricore +#define cpu_stw_le_data cpu_stw_le_data_tricore +#define cpu_stl_be_data cpu_stl_be_data_tricore +#define cpu_stl_le_data cpu_stl_le_data_tricore +#define cpu_stq_be_data cpu_stq_be_data_tricore +#define cpu_stq_le_data cpu_stq_le_data_tricore #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_tricore #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_tricore #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_tricore @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_tricore #define cpu_ldl_code cpu_ldl_code_tricore #define cpu_ldq_code cpu_ldq_code_tricore +#define cpu_interrupt_handler cpu_interrupt_handler_tricore #define helper_div_i32 helper_div_i32_tricore #define helper_rem_i32 helper_rem_i32_tricore #define helper_divu_i32 helper_divu_i32_tricore @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_tricore #define helper_gvec_sar32i helper_gvec_sar32i_tricore #define helper_gvec_sar64i helper_gvec_sar64i_tricore +#define helper_gvec_rotl8i helper_gvec_rotl8i_tricore +#define helper_gvec_rotl16i helper_gvec_rotl16i_tricore +#define helper_gvec_rotl32i helper_gvec_rotl32i_tricore +#define helper_gvec_rotl64i helper_gvec_rotl64i_tricore #define helper_gvec_shl8v helper_gvec_shl8v_tricore #define helper_gvec_shl16v helper_gvec_shl16v_tricore #define helper_gvec_shl32v helper_gvec_shl32v_tricore @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_tricore #define helper_gvec_sar32v helper_gvec_sar32v_tricore #define helper_gvec_sar64v helper_gvec_sar64v_tricore +#define helper_gvec_rotl8v helper_gvec_rotl8v_tricore +#define helper_gvec_rotl16v helper_gvec_rotl16v_tricore +#define helper_gvec_rotl32v helper_gvec_rotl32v_tricore +#define helper_gvec_rotl64v helper_gvec_rotl64v_tricore +#define helper_gvec_rotr8v helper_gvec_rotr8v_tricore +#define helper_gvec_rotr16v helper_gvec_rotr16v_tricore +#define helper_gvec_rotr32v helper_gvec_rotr32v_tricore +#define helper_gvec_rotr64v helper_gvec_rotr64v_tricore #define helper_gvec_eq8 helper_gvec_eq8_tricore #define helper_gvec_ne8 helper_gvec_ne8_tricore #define helper_gvec_lt8 helper_gvec_lt8_tricore diff --git a/qemu/util/guest-random.c b/qemu/util/guest-random.c index 7c1fe7be4f..9a0f300ba4 100644 --- a/qemu/util/guest-random.c +++ b/qemu/util/guest-random.c @@ -78,4 +78,3 @@ void qemu_guest_random_seed_thread_part2(uint64_t seed) } #endif } - diff --git a/qemu/x86_64.h b/qemu/x86_64.h index 0118257e9e..cec72b9667 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -42,7 +42,10 @@ #define tcg_gen_shl_i64 tcg_gen_shl_i64_x86_64 #define tcg_gen_shr_i64 tcg_gen_shr_i64_x86_64 #define tcg_gen_st_i64 tcg_gen_st_i64_x86_64 +#define tcg_gen_add_i64 tcg_gen_add_i64_x86_64 +#define tcg_gen_sub_i64 tcg_gen_sub_i64_x86_64 #define tcg_gen_xor_i64 tcg_gen_xor_i64_x86_64 +#define tcg_gen_neg_i64 tcg_gen_neg_i64_x86_64 #define cpu_icount_to_ns cpu_icount_to_ns_x86_64 #define cpu_is_stopped cpu_is_stopped_x86_64 #define cpu_get_ticks cpu_get_ticks_x86_64 @@ -374,6 +377,8 @@ #define floatx80_sub floatx80_sub_x86_64 #define floatx80_mul floatx80_mul_x86_64 #define floatx80_div floatx80_div_x86_64 +#define floatx80_modrem floatx80_modrem_x86_64 +#define floatx80_mod floatx80_mod_x86_64 #define floatx80_rem floatx80_rem_x86_64 #define floatx80_sqrt floatx80_sqrt_x86_64 #define floatx80_eq floatx80_eq_x86_64 @@ -648,6 +653,7 @@ #define tcg_gen_gvec_dup_i32 tcg_gen_gvec_dup_i32_x86_64 #define tcg_gen_gvec_dup_i64 tcg_gen_gvec_dup_i64_x86_64 #define tcg_gen_gvec_dup_mem tcg_gen_gvec_dup_mem_x86_64 +#define tcg_gen_gvec_dup_imm tcg_gen_gvec_dup_imm_x86_64 #define tcg_gen_gvec_dup64i tcg_gen_gvec_dup64i_x86_64 #define tcg_gen_gvec_dup32i tcg_gen_gvec_dup32i_x86_64 #define tcg_gen_gvec_dup16i tcg_gen_gvec_dup16i_x86_64 @@ -702,13 +708,20 @@ #define tcg_gen_gvec_shri tcg_gen_gvec_shri_x86_64 #define tcg_gen_vec_sar8i_i64 tcg_gen_vec_sar8i_i64_x86_64 #define tcg_gen_vec_sar16i_i64 tcg_gen_vec_sar16i_i64_x86_64 +#define tcg_gen_vec_rotl8i_i64 tcg_gen_vec_rotl8i_i64_x86_64 +#define tcg_gen_vec_rotl16i_i64 tcg_gen_vec_rotl16i_i64_x86_64 #define tcg_gen_gvec_sari tcg_gen_gvec_sari_x86_64 +#define tcg_gen_gvec_rotli tcg_gen_gvec_rotli_x86_64 +#define tcg_gen_gvec_rotri tcg_gen_gvec_rotri_x86_64 #define tcg_gen_gvec_shls tcg_gen_gvec_shls_x86_64 #define tcg_gen_gvec_shrs tcg_gen_gvec_shrs_x86_64 #define tcg_gen_gvec_sars tcg_gen_gvec_sars_x86_64 +#define tcg_gen_gvec_rotls tcg_gen_gvec_rotls_x86_64 #define tcg_gen_gvec_shlv tcg_gen_gvec_shlv_x86_64 #define tcg_gen_gvec_shrv tcg_gen_gvec_shrv_x86_64 #define tcg_gen_gvec_sarv tcg_gen_gvec_sarv_x86_64 +#define tcg_gen_gvec_rotlv tcg_gen_gvec_rotlv_x86_64 +#define tcg_gen_gvec_rotrv tcg_gen_gvec_rotrv_x86_64 #define tcg_gen_gvec_cmp tcg_gen_gvec_cmp_x86_64 #define tcg_gen_gvec_bitsel tcg_gen_gvec_bitsel_x86_64 #define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_x86_64 @@ -745,6 +758,8 @@ #define tcg_gen_shli_vec tcg_gen_shli_vec_x86_64 #define tcg_gen_shri_vec tcg_gen_shri_vec_x86_64 #define tcg_gen_sari_vec tcg_gen_sari_vec_x86_64 +#define tcg_gen_rotli_vec tcg_gen_rotli_vec_x86_64 +#define tcg_gen_rotri_vec tcg_gen_rotri_vec_x86_64 #define tcg_gen_cmp_vec tcg_gen_cmp_vec_x86_64 #define tcg_gen_add_vec tcg_gen_add_vec_x86_64 #define tcg_gen_sub_vec tcg_gen_sub_vec_x86_64 @@ -760,9 +775,12 @@ #define tcg_gen_shlv_vec tcg_gen_shlv_vec_x86_64 #define tcg_gen_shrv_vec tcg_gen_shrv_vec_x86_64 #define tcg_gen_sarv_vec tcg_gen_sarv_vec_x86_64 +#define tcg_gen_rotlv_vec tcg_gen_rotlv_vec_x86_64 +#define tcg_gen_rotrv_vec tcg_gen_rotrv_vec_x86_64 #define tcg_gen_shls_vec tcg_gen_shls_vec_x86_64 #define tcg_gen_shrs_vec tcg_gen_shrs_vec_x86_64 #define tcg_gen_sars_vec tcg_gen_sars_vec_x86_64 +#define tcg_gen_rotls_vec tcg_gen_rotls_vec_x86_64 #define tcg_gen_bitsel_vec tcg_gen_bitsel_vec_x86_64 #define tcg_gen_cmpsel_vec tcg_gen_cmpsel_vec_x86_64 #define tb_htable_lookup tb_htable_lookup_x86_64 @@ -774,6 +792,7 @@ #define cpu_loop_exit_restore cpu_loop_exit_restore_x86_64 #define cpu_loop_exit_atomic cpu_loop_exit_atomic_x86_64 #define tlb_init tlb_init_x86_64 +#define tlb_destroy tlb_destroy_x86_64 #define tlb_flush_by_mmuidx tlb_flush_by_mmuidx_x86_64 #define tlb_flush tlb_flush_x86_64 #define tlb_flush_by_mmuidx_all_cpus tlb_flush_by_mmuidx_all_cpus_x86_64 @@ -794,6 +813,7 @@ #define tlb_set_page tlb_set_page_x86_64 #define get_page_addr_code_hostp get_page_addr_code_hostp_x86_64 #define get_page_addr_code get_page_addr_code_x86_64 +#define probe_access_flags probe_access_flags_x86_64 #define probe_access probe_access_x86_64 #define tlb_vaddr_to_host tlb_vaddr_to_host_x86_64 #define helper_ret_ldub_mmu helper_ret_ldub_mmu_x86_64 @@ -810,22 +830,34 @@ #define helper_be_ldsl_mmu helper_be_ldsl_mmu_x86_64 #define cpu_ldub_mmuidx_ra cpu_ldub_mmuidx_ra_x86_64 #define cpu_ldsb_mmuidx_ra cpu_ldsb_mmuidx_ra_x86_64 -#define cpu_lduw_mmuidx_ra cpu_lduw_mmuidx_ra_x86_64 -#define cpu_ldsw_mmuidx_ra cpu_ldsw_mmuidx_ra_x86_64 -#define cpu_ldl_mmuidx_ra cpu_ldl_mmuidx_ra_x86_64 -#define cpu_ldq_mmuidx_ra cpu_ldq_mmuidx_ra_x86_64 +#define cpu_lduw_be_mmuidx_ra cpu_lduw_be_mmuidx_ra_x86_64 +#define cpu_lduw_le_mmuidx_ra cpu_lduw_le_mmuidx_ra_x86_64 +#define cpu_ldsw_be_mmuidx_ra cpu_ldsw_be_mmuidx_ra_x86_64 +#define cpu_ldsw_le_mmuidx_ra cpu_ldsw_le_mmuidx_ra_x86_64 +#define cpu_ldl_be_mmuidx_ra cpu_ldl_be_mmuidx_ra_x86_64 +#define cpu_ldl_le_mmuidx_ra cpu_ldl_le_mmuidx_ra_x86_64 +#define cpu_ldq_be_mmuidx_ra cpu_ldq_be_mmuidx_ra_x86_64 +#define cpu_ldq_le_mmuidx_ra cpu_ldq_le_mmuidx_ra_x86_64 #define cpu_ldub_data_ra cpu_ldub_data_ra_x86_64 #define cpu_ldsb_data_ra cpu_ldsb_data_ra_x86_64 -#define cpu_lduw_data_ra cpu_lduw_data_ra_x86_64 -#define cpu_ldsw_data_ra cpu_ldsw_data_ra_x86_64 -#define cpu_ldl_data_ra cpu_ldl_data_ra_x86_64 -#define cpu_ldq_data_ra cpu_ldq_data_ra_x86_64 +#define cpu_lduw_be_data_ra cpu_lduw_be_data_ra_x86_64 +#define cpu_lduw_le_data_ra cpu_lduw_le_data_ra_x86_64 +#define cpu_ldsw_be_data_ra cpu_ldsw_be_data_ra_x86_64 +#define cpu_ldsw_le_data_ra cpu_ldsw_le_data_ra_x86_64 +#define cpu_ldl_be_data_ra cpu_ldl_be_data_ra_x86_64 +#define cpu_ldl_le_data_ra cpu_ldl_le_data_ra_x86_64 +#define cpu_ldq_be_data_ra cpu_ldq_be_data_ra_x86_64 +#define cpu_ldq_le_data_ra cpu_ldq_le_data_ra_x86_64 #define cpu_ldub_data cpu_ldub_data_x86_64 #define cpu_ldsb_data cpu_ldsb_data_x86_64 -#define cpu_lduw_data cpu_lduw_data_x86_64 -#define cpu_ldsw_data cpu_ldsw_data_x86_64 -#define cpu_ldl_data cpu_ldl_data_x86_64 -#define cpu_ldq_data cpu_ldq_data_x86_64 +#define cpu_lduw_be_data cpu_lduw_be_data_x86_64 +#define cpu_lduw_le_data cpu_lduw_le_data_x86_64 +#define cpu_ldsw_be_data cpu_ldsw_be_data_x86_64 +#define cpu_ldsw_le_data cpu_ldsw_le_data_x86_64 +#define cpu_ldl_be_data cpu_ldl_be_data_x86_64 +#define cpu_ldl_le_data cpu_ldl_le_data_x86_64 +#define cpu_ldq_le_data cpu_ldq_le_data_x86_64 +#define cpu_ldq_be_data cpu_ldq_be_data_x86_64 #define helper_ret_stb_mmu helper_ret_stb_mmu_x86_64 #define helper_le_stw_mmu helper_le_stw_mmu_x86_64 #define helper_be_stw_mmu helper_be_stw_mmu_x86_64 @@ -834,17 +866,26 @@ #define helper_le_stq_mmu helper_le_stq_mmu_x86_64 #define helper_be_stq_mmu helper_be_stq_mmu_x86_64 #define cpu_stb_mmuidx_ra cpu_stb_mmuidx_ra_x86_64 -#define cpu_stw_mmuidx_ra cpu_stw_mmuidx_ra_x86_64 -#define cpu_stl_mmuidx_ra cpu_stl_mmuidx_ra_x86_64 -#define cpu_stq_mmuidx_ra cpu_stq_mmuidx_ra_x86_64 +#define cpu_stw_be_mmuidx_ra cpu_stw_be_mmuidx_ra_x86_64 +#define cpu_stw_le_mmuidx_ra cpu_stw_le_mmuidx_ra_x86_64 +#define cpu_stl_be_mmuidx_ra cpu_stl_be_mmuidx_ra_x86_64 +#define cpu_stl_le_mmuidx_ra cpu_stl_le_mmuidx_ra_x86_64 +#define cpu_stq_be_mmuidx_ra cpu_stq_be_mmuidx_ra_x86_64 +#define cpu_stq_le_mmuidx_ra cpu_stq_le_mmuidx_ra_x86_64 #define cpu_stb_data_ra cpu_stb_data_ra_x86_64 -#define cpu_stw_data_ra cpu_stw_data_ra_x86_64 -#define cpu_stl_data_ra cpu_stl_data_ra_x86_64 -#define cpu_stq_data_ra cpu_stq_data_ra_x86_64 +#define cpu_stw_be_data_ra cpu_stw_be_data_ra_x86_64 +#define cpu_stw_le_data_ra cpu_stw_le_data_ra_x86_64 +#define cpu_stl_be_data_ra cpu_stl_be_data_ra_x86_64 +#define cpu_stl_le_data_ra cpu_stl_le_data_ra_x86_64 +#define cpu_stq_be_data_ra cpu_stq_be_data_ra_x86_64 +#define cpu_stq_le_data_ra cpu_stq_le_data_ra_x86_64 #define cpu_stb_data cpu_stb_data_x86_64 -#define cpu_stw_data cpu_stw_data_x86_64 -#define cpu_stl_data cpu_stl_data_x86_64 -#define cpu_stq_data cpu_stq_data_x86_64 +#define cpu_stw_be_data cpu_stw_be_data_x86_64 +#define cpu_stw_le_data cpu_stw_le_data_x86_64 +#define cpu_stl_be_data cpu_stl_be_data_x86_64 +#define cpu_stl_le_data cpu_stl_le_data_x86_64 +#define cpu_stq_be_data cpu_stq_be_data_x86_64 +#define cpu_stq_le_data cpu_stq_le_data_x86_64 #define helper_atomic_cmpxchgb_mmu helper_atomic_cmpxchgb_mmu_x86_64 #define helper_atomic_xchgb_mmu helper_atomic_xchgb_mmu_x86_64 #define helper_atomic_fetch_addb_mmu helper_atomic_fetch_addb_mmu_x86_64 @@ -1101,6 +1142,7 @@ #define cpu_lduw_code cpu_lduw_code_x86_64 #define cpu_ldl_code cpu_ldl_code_x86_64 #define cpu_ldq_code cpu_ldq_code_x86_64 +#define cpu_interrupt_handler cpu_interrupt_handler_x86_64 #define helper_div_i32 helper_div_i32_x86_64 #define helper_rem_i32 helper_rem_i32_x86_64 #define helper_divu_i32 helper_divu_i32_x86_64 @@ -1185,6 +1227,10 @@ #define helper_gvec_sar16i helper_gvec_sar16i_x86_64 #define helper_gvec_sar32i helper_gvec_sar32i_x86_64 #define helper_gvec_sar64i helper_gvec_sar64i_x86_64 +#define helper_gvec_rotl8i helper_gvec_rotl8i_x86_64 +#define helper_gvec_rotl16i helper_gvec_rotl16i_x86_64 +#define helper_gvec_rotl32i helper_gvec_rotl32i_x86_64 +#define helper_gvec_rotl64i helper_gvec_rotl64i_x86_64 #define helper_gvec_shl8v helper_gvec_shl8v_x86_64 #define helper_gvec_shl16v helper_gvec_shl16v_x86_64 #define helper_gvec_shl32v helper_gvec_shl32v_x86_64 @@ -1197,6 +1243,14 @@ #define helper_gvec_sar16v helper_gvec_sar16v_x86_64 #define helper_gvec_sar32v helper_gvec_sar32v_x86_64 #define helper_gvec_sar64v helper_gvec_sar64v_x86_64 +#define helper_gvec_rotl8v helper_gvec_rotl8v_x86_64 +#define helper_gvec_rotl16v helper_gvec_rotl16v_x86_64 +#define helper_gvec_rotl32v helper_gvec_rotl32v_x86_64 +#define helper_gvec_rotl64v helper_gvec_rotl64v_x86_64 +#define helper_gvec_rotr8v helper_gvec_rotr8v_x86_64 +#define helper_gvec_rotr16v helper_gvec_rotr16v_x86_64 +#define helper_gvec_rotr32v helper_gvec_rotr32v_x86_64 +#define helper_gvec_rotr64v helper_gvec_rotr64v_x86_64 #define helper_gvec_eq8 helper_gvec_eq8_x86_64 #define helper_gvec_ne8 helper_gvec_ne8_x86_64 #define helper_gvec_lt8 helper_gvec_lt8_x86_64 @@ -1419,6 +1473,7 @@ #define helper_xgetbv helper_xgetbv_x86_64 #define helper_xsetbv helper_xsetbv_x86_64 #define update_mxcsr_status update_mxcsr_status_x86_64 +#define update_mxcsr_from_sse_status update_mxcsr_from_sse_status_x86_64 #define helper_ldmxcsr helper_ldmxcsr_x86_64 #define helper_enter_mmx helper_enter_mmx_x86_64 #define helper_emms helper_emms_x86_64 diff --git a/symbols.sh b/symbols.sh index 11866f9334..4424fb4319 100755 --- a/symbols.sh +++ b/symbols.sh @@ -42,7 +42,10 @@ tcg_gen_sar_i64 \ tcg_gen_shl_i64 \ tcg_gen_shr_i64 \ tcg_gen_st_i64 \ +tcg_gen_add_i64 \ +tcg_gen_sub_i64 \ tcg_gen_xor_i64 \ +tcg_gen_neg_i64 \ cpu_icount_to_ns \ cpu_is_stopped \ cpu_get_ticks \ @@ -374,6 +377,8 @@ floatx80_add \ floatx80_sub \ floatx80_mul \ floatx80_div \ +floatx80_modrem \ +floatx80_mod \ floatx80_rem \ floatx80_sqrt \ floatx80_eq \ @@ -648,6 +653,7 @@ tcg_gen_gvec_mov \ tcg_gen_gvec_dup_i32 \ tcg_gen_gvec_dup_i64 \ tcg_gen_gvec_dup_mem \ +tcg_gen_gvec_dup_imm \ tcg_gen_gvec_dup64i \ tcg_gen_gvec_dup32i \ tcg_gen_gvec_dup16i \ @@ -702,13 +708,20 @@ tcg_gen_vec_shr16i_i64 \ tcg_gen_gvec_shri \ tcg_gen_vec_sar8i_i64 \ tcg_gen_vec_sar16i_i64 \ +tcg_gen_vec_rotl8i_i64 \ +tcg_gen_vec_rotl16i_i64 \ tcg_gen_gvec_sari \ +tcg_gen_gvec_rotli \ +tcg_gen_gvec_rotri \ tcg_gen_gvec_shls \ tcg_gen_gvec_shrs \ tcg_gen_gvec_sars \ +tcg_gen_gvec_rotls \ tcg_gen_gvec_shlv \ tcg_gen_gvec_shrv \ tcg_gen_gvec_sarv \ +tcg_gen_gvec_rotlv \ +tcg_gen_gvec_rotrv \ tcg_gen_gvec_cmp \ tcg_gen_gvec_bitsel \ tcg_can_emit_vecop_list \ @@ -745,6 +758,8 @@ tcg_gen_abs_vec \ tcg_gen_shli_vec \ tcg_gen_shri_vec \ tcg_gen_sari_vec \ +tcg_gen_rotli_vec \ +tcg_gen_rotri_vec \ tcg_gen_cmp_vec \ tcg_gen_add_vec \ tcg_gen_sub_vec \ @@ -760,9 +775,12 @@ tcg_gen_umax_vec \ tcg_gen_shlv_vec \ tcg_gen_shrv_vec \ tcg_gen_sarv_vec \ +tcg_gen_rotlv_vec \ +tcg_gen_rotrv_vec \ tcg_gen_shls_vec \ tcg_gen_shrs_vec \ tcg_gen_sars_vec \ +tcg_gen_rotls_vec \ tcg_gen_bitsel_vec \ tcg_gen_cmpsel_vec \ tb_htable_lookup \ @@ -774,6 +792,7 @@ cpu_loop_exit \ cpu_loop_exit_restore \ cpu_loop_exit_atomic \ tlb_init \ +tlb_destroy \ tlb_flush_by_mmuidx \ tlb_flush \ tlb_flush_by_mmuidx_all_cpus \ @@ -794,6 +813,7 @@ tlb_set_page_with_attrs \ tlb_set_page \ get_page_addr_code_hostp \ get_page_addr_code \ +probe_access_flags \ probe_access \ tlb_vaddr_to_host \ helper_ret_ldub_mmu \ @@ -810,22 +830,34 @@ helper_le_ldsl_mmu \ helper_be_ldsl_mmu \ cpu_ldub_mmuidx_ra \ cpu_ldsb_mmuidx_ra \ -cpu_lduw_mmuidx_ra \ -cpu_ldsw_mmuidx_ra \ -cpu_ldl_mmuidx_ra \ -cpu_ldq_mmuidx_ra \ +cpu_lduw_be_mmuidx_ra \ +cpu_lduw_le_mmuidx_ra \ +cpu_ldsw_be_mmuidx_ra \ +cpu_ldsw_le_mmuidx_ra \ +cpu_ldl_be_mmuidx_ra \ +cpu_ldl_le_mmuidx_ra \ +cpu_ldq_be_mmuidx_ra \ +cpu_ldq_le_mmuidx_ra \ cpu_ldub_data_ra \ cpu_ldsb_data_ra \ -cpu_lduw_data_ra \ -cpu_ldsw_data_ra \ -cpu_ldl_data_ra \ -cpu_ldq_data_ra \ +cpu_lduw_be_data_ra \ +cpu_lduw_le_data_ra \ +cpu_ldsw_be_data_ra \ +cpu_ldsw_le_data_ra \ +cpu_ldl_be_data_ra \ +cpu_ldl_le_data_ra \ +cpu_ldq_be_data_ra \ +cpu_ldq_le_data_ra \ cpu_ldub_data \ cpu_ldsb_data \ -cpu_lduw_data \ -cpu_ldsw_data \ -cpu_ldl_data \ -cpu_ldq_data \ +cpu_lduw_be_data \ +cpu_lduw_le_data \ +cpu_ldsw_be_data \ +cpu_ldsw_le_data \ +cpu_ldl_be_data \ +cpu_ldl_le_data \ +cpu_ldq_le_data \ +cpu_ldq_be_data \ helper_ret_stb_mmu \ helper_le_stw_mmu \ helper_be_stw_mmu \ @@ -834,17 +866,26 @@ helper_be_stl_mmu \ helper_le_stq_mmu \ helper_be_stq_mmu \ cpu_stb_mmuidx_ra \ -cpu_stw_mmuidx_ra \ -cpu_stl_mmuidx_ra \ -cpu_stq_mmuidx_ra \ +cpu_stw_be_mmuidx_ra \ +cpu_stw_le_mmuidx_ra \ +cpu_stl_be_mmuidx_ra \ +cpu_stl_le_mmuidx_ra \ +cpu_stq_be_mmuidx_ra \ +cpu_stq_le_mmuidx_ra \ cpu_stb_data_ra \ -cpu_stw_data_ra \ -cpu_stl_data_ra \ -cpu_stq_data_ra \ +cpu_stw_be_data_ra \ +cpu_stw_le_data_ra \ +cpu_stl_be_data_ra \ +cpu_stl_le_data_ra \ +cpu_stq_be_data_ra \ +cpu_stq_le_data_ra \ cpu_stb_data \ -cpu_stw_data \ -cpu_stl_data \ -cpu_stq_data \ +cpu_stw_be_data \ +cpu_stw_le_data \ +cpu_stl_be_data \ +cpu_stl_le_data \ +cpu_stq_be_data \ +cpu_stq_le_data \ helper_atomic_cmpxchgb_mmu \ helper_atomic_xchgb_mmu \ helper_atomic_fetch_addb_mmu \ @@ -1101,6 +1142,7 @@ cpu_ldub_code \ cpu_lduw_code \ cpu_ldl_code \ cpu_ldq_code \ +cpu_interrupt_handler \ helper_div_i32 \ helper_rem_i32 \ helper_divu_i32 \ @@ -1185,6 +1227,10 @@ helper_gvec_sar8i \ helper_gvec_sar16i \ helper_gvec_sar32i \ helper_gvec_sar64i \ +helper_gvec_rotl8i \ +helper_gvec_rotl16i \ +helper_gvec_rotl32i \ +helper_gvec_rotl64i \ helper_gvec_shl8v \ helper_gvec_shl16v \ helper_gvec_shl32v \ @@ -1197,6 +1243,14 @@ helper_gvec_sar8v \ helper_gvec_sar16v \ helper_gvec_sar32v \ helper_gvec_sar64v \ +helper_gvec_rotl8v \ +helper_gvec_rotl16v \ +helper_gvec_rotl32v \ +helper_gvec_rotl64v \ +helper_gvec_rotr8v \ +helper_gvec_rotr16v \ +helper_gvec_rotr32v \ +helper_gvec_rotr64v \ helper_gvec_eq8 \ helper_gvec_ne8 \ helper_gvec_lt8 \ @@ -1422,6 +1476,7 @@ helper_xrstor \ helper_xgetbv \ helper_xsetbv \ update_mxcsr_status \ +update_mxcsr_from_sse_status \ helper_ldmxcsr \ helper_enter_mmx \ helper_emms \ @@ -2604,6 +2659,102 @@ cmtst_op \ sri_op \ usra_op \ ssra_op \ +gen_gvec_ceq0 \ +gen_gvec_cge0 \ +gen_gvec_cgt0 \ +gen_gvec_cle0 \ +gen_gvec_clt0 \ +gen_gvec_cmtst \ +gen_gvec_mla \ +gen_gvec_mls \ +gen_gvec_saba \ +gen_gvec_sabd \ +gen_gvec_sli \ +gen_gvec_sqadd_qc \ +gen_gvec_sqrdmlah_qc \ +gen_gvec_sqrdmlsh_qc \ +gen_gvec_sqsub_qc \ +gen_gvec_sri \ +gen_gvec_srshr \ +gen_gvec_srsra \ +gen_gvec_sshl \ +gen_gvec_ssra \ +gen_gvec_uaba \ +gen_gvec_uabd \ +gen_gvec_uqadd_qc \ +gen_gvec_uqsub_qc \ +gen_gvec_urshr \ +gen_gvec_ursra \ +gen_gvec_ushl \ +gen_gvec_usra \ +helper_crypto_rax1 \ +helper_crypto_sha1c \ +helper_crypto_sha1m \ +helper_crypto_sha1p \ +helper_crypto_sha1su0 \ +helper_crypto_sm3tt1a \ +helper_crypto_sm3tt1b \ +helper_crypto_sm3tt2a \ +helper_crypto_sm3tt2b \ +helper_gvec_ceq0_b \ +helper_gvec_ceq0_h \ +helper_gvec_cge0_b \ +helper_gvec_cge0_h \ +helper_gvec_cgt0_b \ +helper_gvec_cgt0_h \ +helper_gvec_cle0_b \ +helper_gvec_cle0_h \ +helper_gvec_clt0_b \ +helper_gvec_clt0_h \ +helper_gvec_fabd_s \ +helper_gvec_saba_b \ +helper_gvec_saba_d \ +helper_gvec_saba_h \ +helper_gvec_saba_s \ +helper_gvec_sabd_b \ +helper_gvec_sabd_d \ +helper_gvec_sabd_h \ +helper_gvec_sabd_s \ +helper_gvec_sli_b \ +helper_gvec_sli_d \ +helper_gvec_sli_h \ +helper_gvec_sli_s \ +helper_gvec_sri_b \ +helper_gvec_sri_d \ +helper_gvec_sri_h \ +helper_gvec_sri_s \ +helper_gvec_srshr_b \ +helper_gvec_srshr_d \ +helper_gvec_srshr_h \ +helper_gvec_srshr_s \ +helper_gvec_srsra_b \ +helper_gvec_srsra_d \ +helper_gvec_srsra_h \ +helper_gvec_srsra_s \ +helper_gvec_ssra_b \ +helper_gvec_ssra_d \ +helper_gvec_ssra_h \ +helper_gvec_ssra_s \ +helper_gvec_uaba_b \ +helper_gvec_uaba_d \ +helper_gvec_uaba_h \ +helper_gvec_uaba_s \ +helper_gvec_uabd_b \ +helper_gvec_uabd_d \ +helper_gvec_uabd_h \ +helper_gvec_uabd_s \ +helper_gvec_urshr_b \ +helper_gvec_urshr_d \ +helper_gvec_urshr_h \ +helper_gvec_urshr_s \ +helper_gvec_ursra_b \ +helper_gvec_ursra_d \ +helper_gvec_ursra_h \ +helper_gvec_ursra_s \ +helper_gvec_usra_b \ +helper_gvec_usra_d \ +helper_gvec_usra_h \ +helper_gvec_usra_s \ " aarch64_SYMBOLS=" @@ -2930,6 +3081,11 @@ helper_v7m_tt \ arm_v7m_mmu_idx_all \ arm_v7m_mmu_idx_for_secstate_and_priv \ arm_v7m_mmu_idx_for_secstate \ +mte_probe1 \ +mte_check1 \ +mte_checkN \ +gen_helper_mte_check1 \ +gen_helper_mte_checkN \ helper_neon_qadd_u8 \ helper_neon_qadd_u16 \ helper_neon_qadd_u32 \ @@ -3169,6 +3325,21 @@ helper_autda \ helper_autdb \ helper_xpaci \ helper_xpacd \ +helper_mte_check1 \ +helper_mte_checkN \ +helper_mte_check_zva \ +helper_irg \ +helper_addsubg \ +helper_ldg \ +helper_stg \ +helper_stg_parallel \ +helper_stg_stub \ +helper_st2g \ +helper_st2g_parallel \ +helper_st2g_stub \ +helper_ldgm \ +helper_stgm \ +helper_stzgm_tags \ arm_is_psci_call \ arm_handle_psci_call \ helper_sve_predtest1 \ @@ -4061,6 +4232,7 @@ a64_translate_init \ gen_a64_set_pc_im \ unallocated_encoding \ new_tmp_a64 \ +new_tmp_a64_local \ new_tmp_a64_zero \ cpu_reg \ cpu_reg_sp \ @@ -4381,6 +4553,7 @@ helper_sret \ helper_mret \ helper_wfi \ helper_tlb_flush \ +helper_hyp_tlb_flush \ pmp_hart_has_privs \ pmpcfg_csr_write \ pmpcfg_csr_read \ @@ -4401,6 +4574,1008 @@ helper_fcvt_d_lu \ gen_helper_tlb_flush \ riscv_fpr_regnames \ riscv_int_regnames \ +fclass_d \ +fclass_h \ +fclass_s \ +helper_vaadd_vv_b \ +helper_vaadd_vv_d \ +helper_vaadd_vv_h \ +helper_vaadd_vv_w \ +helper_vaadd_vx_b \ +helper_vaadd_vx_d \ +helper_vaadd_vx_h \ +helper_vaadd_vx_w \ +helper_vadc_vvm_b \ +helper_vadc_vvm_d \ +helper_vadc_vvm_h \ +helper_vadc_vvm_w \ +helper_vadc_vxm_b \ +helper_vadc_vxm_d \ +helper_vadc_vxm_h \ +helper_vadc_vxm_w \ +helper_vadd_vv_b \ +helper_vadd_vv_d \ +helper_vadd_vv_h \ +helper_vadd_vv_w \ +helper_vadd_vx_b \ +helper_vadd_vx_d \ +helper_vadd_vx_h \ +helper_vadd_vx_w \ +helper_vamoaddw_v_w \ +helper_vamoandw_v_w \ +helper_vamomaxuw_v_w \ +helper_vamomaxw_v_w \ +helper_vamominuw_v_w \ +helper_vamominw_v_w \ +helper_vamoorw_v_w \ +helper_vamoswapw_v_w \ +helper_vamoxorw_v_w \ +helper_vand_vv_b \ +helper_vand_vv_d \ +helper_vand_vv_h \ +helper_vand_vv_w \ +helper_vand_vx_b \ +helper_vand_vx_d \ +helper_vand_vx_h \ +helper_vand_vx_w \ +helper_vasub_vv_b \ +helper_vasub_vv_d \ +helper_vasub_vv_h \ +helper_vasub_vv_w \ +helper_vasub_vx_b \ +helper_vasub_vx_d \ +helper_vasub_vx_h \ +helper_vasub_vx_w \ +helper_vcompress_vm_b \ +helper_vcompress_vm_d \ +helper_vcompress_vm_h \ +helper_vcompress_vm_w \ +helper_vdiv_vv_b \ +helper_vdiv_vv_d \ +helper_vdiv_vv_h \ +helper_vdiv_vv_w \ +helper_vdiv_vx_b \ +helper_vdiv_vx_d \ +helper_vdiv_vx_h \ +helper_vdiv_vx_w \ +helper_vdivu_vv_b \ +helper_vdivu_vv_d \ +helper_vdivu_vv_h \ +helper_vdivu_vv_w \ +helper_vdivu_vx_b \ +helper_vdivu_vx_d \ +helper_vdivu_vx_h \ +helper_vdivu_vx_w \ +helper_vec_rsubs16 \ +helper_vec_rsubs32 \ +helper_vec_rsubs64 \ +helper_vec_rsubs8 \ +helper_vfadd_vf_d \ +helper_vfadd_vf_h \ +helper_vfadd_vf_w \ +helper_vfadd_vv_d \ +helper_vfadd_vv_h \ +helper_vfadd_vv_w \ +helper_vfclass_v_d \ +helper_vfclass_v_h \ +helper_vfclass_v_w \ +helper_vfcvt_f_x_v_d \ +helper_vfcvt_f_x_v_h \ +helper_vfcvt_f_x_v_w \ +helper_vfcvt_f_xu_v_d \ +helper_vfcvt_f_xu_v_h \ +helper_vfcvt_f_xu_v_w \ +helper_vfcvt_x_f_v_d \ +helper_vfcvt_x_f_v_h \ +helper_vfcvt_x_f_v_w \ +helper_vfcvt_xu_f_v_d \ +helper_vfcvt_xu_f_v_h \ +helper_vfcvt_xu_f_v_w \ +helper_vfdiv_vf_d \ +helper_vfdiv_vf_h \ +helper_vfdiv_vf_w \ +helper_vfdiv_vv_d \ +helper_vfdiv_vv_h \ +helper_vfdiv_vv_w \ +helper_vfmacc_vf_d \ +helper_vfmacc_vf_h \ +helper_vfmacc_vf_w \ +helper_vfmacc_vv_d \ +helper_vfmacc_vv_h \ +helper_vfmacc_vv_w \ +helper_vfmadd_vf_d \ +helper_vfmadd_vf_h \ +helper_vfmadd_vf_w \ +helper_vfmadd_vv_d \ +helper_vfmadd_vv_h \ +helper_vfmadd_vv_w \ +helper_vfmax_vf_d \ +helper_vfmax_vf_h \ +helper_vfmax_vf_w \ +helper_vfmax_vv_d \ +helper_vfmax_vv_h \ +helper_vfmax_vv_w \ +helper_vfmerge_vfm_d \ +helper_vfmerge_vfm_h \ +helper_vfmerge_vfm_w \ +helper_vfmin_vf_d \ +helper_vfmin_vf_h \ +helper_vfmin_vf_w \ +helper_vfmin_vv_d \ +helper_vfmin_vv_h \ +helper_vfmin_vv_w \ +helper_vfmsac_vf_d \ +helper_vfmsac_vf_h \ +helper_vfmsac_vf_w \ +helper_vfmsac_vv_d \ +helper_vfmsac_vv_h \ +helper_vfmsac_vv_w \ +helper_vfmsub_vf_d \ +helper_vfmsub_vf_h \ +helper_vfmsub_vf_w \ +helper_vfmsub_vv_d \ +helper_vfmsub_vv_h \ +helper_vfmsub_vv_w \ +helper_vfmul_vf_d \ +helper_vfmul_vf_h \ +helper_vfmul_vf_w \ +helper_vfmul_vv_d \ +helper_vfmul_vv_h \ +helper_vfmul_vv_w \ +helper_vfncvt_f_f_v_h \ +helper_vfncvt_f_f_v_w \ +helper_vfncvt_f_x_v_h \ +helper_vfncvt_f_x_v_w \ +helper_vfncvt_f_xu_v_h \ +helper_vfncvt_f_xu_v_w \ +helper_vfncvt_x_f_v_h \ +helper_vfncvt_x_f_v_w \ +helper_vfncvt_xu_f_v_h \ +helper_vfncvt_xu_f_v_w \ +helper_vfnmacc_vf_d \ +helper_vfnmacc_vf_h \ +helper_vfnmacc_vf_w \ +helper_vfnmacc_vv_d \ +helper_vfnmacc_vv_h \ +helper_vfnmacc_vv_w \ +helper_vfnmadd_vf_d \ +helper_vfnmadd_vf_h \ +helper_vfnmadd_vf_w \ +helper_vfnmadd_vv_d \ +helper_vfnmadd_vv_h \ +helper_vfnmadd_vv_w \ +helper_vfnmsac_vf_d \ +helper_vfnmsac_vf_h \ +helper_vfnmsac_vf_w \ +helper_vfnmsac_vv_d \ +helper_vfnmsac_vv_h \ +helper_vfnmsac_vv_w \ +helper_vfnmsub_vf_d \ +helper_vfnmsub_vf_h \ +helper_vfnmsub_vf_w \ +helper_vfnmsub_vv_d \ +helper_vfnmsub_vv_h \ +helper_vfnmsub_vv_w \ +helper_vfrdiv_vf_d \ +helper_vfrdiv_vf_h \ +helper_vfrdiv_vf_w \ +helper_vfredmax_vs_d \ +helper_vfredmax_vs_h \ +helper_vfredmax_vs_w \ +helper_vfredmin_vs_d \ +helper_vfredmin_vs_h \ +helper_vfredmin_vs_w \ +helper_vfredsum_vs_d \ +helper_vfredsum_vs_h \ +helper_vfredsum_vs_w \ +helper_vfrsub_vf_d \ +helper_vfrsub_vf_h \ +helper_vfrsub_vf_w \ +helper_vfsgnj_vf_d \ +helper_vfsgnj_vf_h \ +helper_vfsgnj_vf_w \ +helper_vfsgnj_vv_d \ +helper_vfsgnj_vv_h \ +helper_vfsgnj_vv_w \ +helper_vfsgnjn_vf_d \ +helper_vfsgnjn_vf_h \ +helper_vfsgnjn_vf_w \ +helper_vfsgnjn_vv_d \ +helper_vfsgnjn_vv_h \ +helper_vfsgnjn_vv_w \ +helper_vfsgnjx_vf_d \ +helper_vfsgnjx_vf_h \ +helper_vfsgnjx_vf_w \ +helper_vfsgnjx_vv_d \ +helper_vfsgnjx_vv_h \ +helper_vfsgnjx_vv_w \ +helper_vfsqrt_v_d \ +helper_vfsqrt_v_h \ +helper_vfsqrt_v_w \ +helper_vfsub_vf_d \ +helper_vfsub_vf_h \ +helper_vfsub_vf_w \ +helper_vfsub_vv_d \ +helper_vfsub_vv_h \ +helper_vfsub_vv_w \ +helper_vfwadd_vf_h \ +helper_vfwadd_vf_w \ +helper_vfwadd_vv_h \ +helper_vfwadd_vv_w \ +helper_vfwadd_wf_h \ +helper_vfwadd_wf_w \ +helper_vfwadd_wv_h \ +helper_vfwadd_wv_w \ +helper_vfwcvt_f_f_v_h \ +helper_vfwcvt_f_f_v_w \ +helper_vfwcvt_f_x_v_h \ +helper_vfwcvt_f_x_v_w \ +helper_vfwcvt_f_xu_v_h \ +helper_vfwcvt_f_xu_v_w \ +helper_vfwcvt_x_f_v_h \ +helper_vfwcvt_x_f_v_w \ +helper_vfwcvt_xu_f_v_h \ +helper_vfwcvt_xu_f_v_w \ +helper_vfwmacc_vf_h \ +helper_vfwmacc_vf_w \ +helper_vfwmacc_vv_h \ +helper_vfwmacc_vv_w \ +helper_vfwmsac_vf_h \ +helper_vfwmsac_vf_w \ +helper_vfwmsac_vv_h \ +helper_vfwmsac_vv_w \ +helper_vfwmul_vf_h \ +helper_vfwmul_vf_w \ +helper_vfwmul_vv_h \ +helper_vfwmul_vv_w \ +helper_vfwnmacc_vf_h \ +helper_vfwnmacc_vf_w \ +helper_vfwnmacc_vv_h \ +helper_vfwnmacc_vv_w \ +helper_vfwnmsac_vf_h \ +helper_vfwnmsac_vf_w \ +helper_vfwnmsac_vv_h \ +helper_vfwnmsac_vv_w \ +helper_vfwredsum_vs_h \ +helper_vfwredsum_vs_w \ +helper_vfwsub_vf_h \ +helper_vfwsub_vf_w \ +helper_vfwsub_vv_h \ +helper_vfwsub_vv_w \ +helper_vfwsub_wf_h \ +helper_vfwsub_wf_w \ +helper_vfwsub_wv_h \ +helper_vfwsub_wv_w \ +helper_vid_v_b \ +helper_vid_v_d \ +helper_vid_v_h \ +helper_vid_v_w \ +helper_viota_m_b \ +helper_viota_m_d \ +helper_viota_m_h \ +helper_viota_m_w \ +helper_vlb_v_b \ +helper_vlb_v_b_mask \ +helper_vlb_v_d \ +helper_vlb_v_d_mask \ +helper_vlb_v_h \ +helper_vlb_v_h_mask \ +helper_vlb_v_w \ +helper_vlb_v_w_mask \ +helper_vlbff_v_b \ +helper_vlbff_v_d \ +helper_vlbff_v_h \ +helper_vlbff_v_w \ +helper_vlbu_v_b \ +helper_vlbu_v_b_mask \ +helper_vlbu_v_d \ +helper_vlbu_v_d_mask \ +helper_vlbu_v_h \ +helper_vlbu_v_h_mask \ +helper_vlbu_v_w \ +helper_vlbu_v_w_mask \ +helper_vlbuff_v_b \ +helper_vlbuff_v_d \ +helper_vlbuff_v_h \ +helper_vlbuff_v_w \ +helper_vle_v_b \ +helper_vle_v_b_mask \ +helper_vle_v_d \ +helper_vle_v_d_mask \ +helper_vle_v_h \ +helper_vle_v_h_mask \ +helper_vle_v_w \ +helper_vle_v_w_mask \ +helper_vleff_v_b \ +helper_vleff_v_d \ +helper_vleff_v_h \ +helper_vleff_v_w \ +helper_vlh_v_d \ +helper_vlh_v_d_mask \ +helper_vlh_v_h \ +helper_vlh_v_h_mask \ +helper_vlh_v_w \ +helper_vlh_v_w_mask \ +helper_vlhff_v_d \ +helper_vlhff_v_h \ +helper_vlhff_v_w \ +helper_vlhu_v_d \ +helper_vlhu_v_d_mask \ +helper_vlhu_v_h \ +helper_vlhu_v_h_mask \ +helper_vlhu_v_w \ +helper_vlhu_v_w_mask \ +helper_vlhuff_v_d \ +helper_vlhuff_v_h \ +helper_vlhuff_v_w \ +helper_vlsb_v_b \ +helper_vlsb_v_d \ +helper_vlsb_v_h \ +helper_vlsb_v_w \ +helper_vlsbu_v_b \ +helper_vlsbu_v_d \ +helper_vlsbu_v_h \ +helper_vlsbu_v_w \ +helper_vlse_v_b \ +helper_vlse_v_d \ +helper_vlse_v_h \ +helper_vlse_v_w \ +helper_vlsh_v_d \ +helper_vlsh_v_h \ +helper_vlsh_v_w \ +helper_vlshu_v_d \ +helper_vlshu_v_h \ +helper_vlshu_v_w \ +helper_vlsw_v_d \ +helper_vlsw_v_w \ +helper_vlswu_v_d \ +helper_vlswu_v_w \ +helper_vlw_v_d \ +helper_vlw_v_d_mask \ +helper_vlw_v_w \ +helper_vlw_v_w_mask \ +helper_vlwff_v_d \ +helper_vlwff_v_w \ +helper_vlwu_v_d \ +helper_vlwu_v_d_mask \ +helper_vlwu_v_w \ +helper_vlwu_v_w_mask \ +helper_vlwuff_v_d \ +helper_vlwuff_v_w \ +helper_vlxb_v_b \ +helper_vlxb_v_d \ +helper_vlxb_v_h \ +helper_vlxb_v_w \ +helper_vlxbu_v_b \ +helper_vlxbu_v_d \ +helper_vlxbu_v_h \ +helper_vlxbu_v_w \ +helper_vlxe_v_b \ +helper_vlxe_v_d \ +helper_vlxe_v_h \ +helper_vlxe_v_w \ +helper_vlxh_v_d \ +helper_vlxh_v_h \ +helper_vlxh_v_w \ +helper_vlxhu_v_d \ +helper_vlxhu_v_h \ +helper_vlxhu_v_w \ +helper_vlxw_v_d \ +helper_vlxw_v_w \ +helper_vlxwu_v_d \ +helper_vlxwu_v_w \ +helper_vmacc_vv_b \ +helper_vmacc_vv_d \ +helper_vmacc_vv_h \ +helper_vmacc_vv_w \ +helper_vmacc_vx_b \ +helper_vmacc_vx_d \ +helper_vmacc_vx_h \ +helper_vmacc_vx_w \ +helper_vmadc_vvm_b \ +helper_vmadc_vvm_d \ +helper_vmadc_vvm_h \ +helper_vmadc_vvm_w \ +helper_vmadc_vxm_b \ +helper_vmadc_vxm_d \ +helper_vmadc_vxm_h \ +helper_vmadc_vxm_w \ +helper_vmadd_vv_b \ +helper_vmadd_vv_d \ +helper_vmadd_vv_h \ +helper_vmadd_vv_w \ +helper_vmadd_vx_b \ +helper_vmadd_vx_d \ +helper_vmadd_vx_h \ +helper_vmadd_vx_w \ +helper_vmand_mm \ +helper_vmandnot_mm \ +helper_vmax_vv_b \ +helper_vmax_vv_d \ +helper_vmax_vv_h \ +helper_vmax_vv_w \ +helper_vmax_vx_b \ +helper_vmax_vx_d \ +helper_vmax_vx_h \ +helper_vmax_vx_w \ +helper_vmaxu_vv_b \ +helper_vmaxu_vv_d \ +helper_vmaxu_vv_h \ +helper_vmaxu_vv_w \ +helper_vmaxu_vx_b \ +helper_vmaxu_vx_d \ +helper_vmaxu_vx_h \ +helper_vmaxu_vx_w \ +helper_vmerge_vvm_b \ +helper_vmerge_vvm_d \ +helper_vmerge_vvm_h \ +helper_vmerge_vvm_w \ +helper_vmerge_vxm_b \ +helper_vmerge_vxm_d \ +helper_vmerge_vxm_h \ +helper_vmerge_vxm_w \ +helper_vmfeq_vf_d \ +helper_vmfeq_vf_h \ +helper_vmfeq_vf_w \ +helper_vmfeq_vv_d \ +helper_vmfeq_vv_h \ +helper_vmfeq_vv_w \ +helper_vmfge_vf_d \ +helper_vmfge_vf_h \ +helper_vmfge_vf_w \ +helper_vmfgt_vf_d \ +helper_vmfgt_vf_h \ +helper_vmfgt_vf_w \ +helper_vmfirst_m \ +helper_vmfle_vf_d \ +helper_vmfle_vf_h \ +helper_vmfle_vf_w \ +helper_vmfle_vv_d \ +helper_vmfle_vv_h \ +helper_vmfle_vv_w \ +helper_vmflt_vf_d \ +helper_vmflt_vf_h \ +helper_vmflt_vf_w \ +helper_vmflt_vv_d \ +helper_vmflt_vv_h \ +helper_vmflt_vv_w \ +helper_vmfne_vf_d \ +helper_vmfne_vf_h \ +helper_vmfne_vf_w \ +helper_vmfne_vv_d \ +helper_vmfne_vv_h \ +helper_vmfne_vv_w \ +helper_vmford_vf_d \ +helper_vmford_vf_h \ +helper_vmford_vf_w \ +helper_vmford_vv_d \ +helper_vmford_vv_h \ +helper_vmford_vv_w \ +helper_vmin_vv_b \ +helper_vmin_vv_d \ +helper_vmin_vv_h \ +helper_vmin_vv_w \ +helper_vmin_vx_b \ +helper_vmin_vx_d \ +helper_vmin_vx_h \ +helper_vmin_vx_w \ +helper_vminu_vv_b \ +helper_vminu_vv_d \ +helper_vminu_vv_h \ +helper_vminu_vv_w \ +helper_vminu_vx_b \ +helper_vminu_vx_d \ +helper_vminu_vx_h \ +helper_vminu_vx_w \ +helper_vmnand_mm \ +helper_vmnor_mm \ +helper_vmor_mm \ +helper_vmornot_mm \ +helper_vmpopc_m \ +helper_vmsbc_vvm_b \ +helper_vmsbc_vvm_d \ +helper_vmsbc_vvm_h \ +helper_vmsbc_vvm_w \ +helper_vmsbc_vxm_b \ +helper_vmsbc_vxm_d \ +helper_vmsbc_vxm_h \ +helper_vmsbc_vxm_w \ +helper_vmsbf_m \ +helper_vmseq_vv_b \ +helper_vmseq_vv_d \ +helper_vmseq_vv_h \ +helper_vmseq_vv_w \ +helper_vmseq_vx_b \ +helper_vmseq_vx_d \ +helper_vmseq_vx_h \ +helper_vmseq_vx_w \ +helper_vmsgt_vx_b \ +helper_vmsgt_vx_d \ +helper_vmsgt_vx_h \ +helper_vmsgt_vx_w \ +helper_vmsgtu_vx_b \ +helper_vmsgtu_vx_d \ +helper_vmsgtu_vx_h \ +helper_vmsgtu_vx_w \ +helper_vmsif_m \ +helper_vmsle_vv_b \ +helper_vmsle_vv_d \ +helper_vmsle_vv_h \ +helper_vmsle_vv_w \ +helper_vmsle_vx_b \ +helper_vmsle_vx_d \ +helper_vmsle_vx_h \ +helper_vmsle_vx_w \ +helper_vmsleu_vv_b \ +helper_vmsleu_vv_d \ +helper_vmsleu_vv_h \ +helper_vmsleu_vv_w \ +helper_vmsleu_vx_b \ +helper_vmsleu_vx_d \ +helper_vmsleu_vx_h \ +helper_vmsleu_vx_w \ +helper_vmslt_vv_b \ +helper_vmslt_vv_d \ +helper_vmslt_vv_h \ +helper_vmslt_vv_w \ +helper_vmslt_vx_b \ +helper_vmslt_vx_d \ +helper_vmslt_vx_h \ +helper_vmslt_vx_w \ +helper_vmsltu_vv_b \ +helper_vmsltu_vv_d \ +helper_vmsltu_vv_h \ +helper_vmsltu_vv_w \ +helper_vmsltu_vx_b \ +helper_vmsltu_vx_d \ +helper_vmsltu_vx_h \ +helper_vmsltu_vx_w \ +helper_vmsne_vv_b \ +helper_vmsne_vv_d \ +helper_vmsne_vv_h \ +helper_vmsne_vv_w \ +helper_vmsne_vx_b \ +helper_vmsne_vx_d \ +helper_vmsne_vx_h \ +helper_vmsne_vx_w \ +helper_vmsof_m \ +helper_vmul_vv_b \ +helper_vmul_vv_d \ +helper_vmul_vv_h \ +helper_vmul_vv_w \ +helper_vmul_vx_b \ +helper_vmul_vx_d \ +helper_vmul_vx_h \ +helper_vmul_vx_w \ +helper_vmulh_vv_b \ +helper_vmulh_vv_d \ +helper_vmulh_vv_h \ +helper_vmulh_vv_w \ +helper_vmulh_vx_b \ +helper_vmulh_vx_d \ +helper_vmulh_vx_h \ +helper_vmulh_vx_w \ +helper_vmulhsu_vv_b \ +helper_vmulhsu_vv_d \ +helper_vmulhsu_vv_h \ +helper_vmulhsu_vv_w \ +helper_vmulhsu_vx_b \ +helper_vmulhsu_vx_d \ +helper_vmulhsu_vx_h \ +helper_vmulhsu_vx_w \ +helper_vmulhu_vv_b \ +helper_vmulhu_vv_d \ +helper_vmulhu_vv_h \ +helper_vmulhu_vv_w \ +helper_vmulhu_vx_b \ +helper_vmulhu_vx_d \ +helper_vmulhu_vx_h \ +helper_vmulhu_vx_w \ +helper_vmv_v_v_b \ +helper_vmv_v_v_d \ +helper_vmv_v_v_h \ +helper_vmv_v_v_w \ +helper_vmv_v_x_b \ +helper_vmv_v_x_d \ +helper_vmv_v_x_h \ +helper_vmv_v_x_w \ +helper_vmxnor_mm \ +helper_vmxor_mm \ +helper_vnclip_vv_b \ +helper_vnclip_vv_h \ +helper_vnclip_vv_w \ +helper_vnclip_vx_b \ +helper_vnclip_vx_h \ +helper_vnclip_vx_w \ +helper_vnclipu_vv_b \ +helper_vnclipu_vv_h \ +helper_vnclipu_vv_w \ +helper_vnclipu_vx_b \ +helper_vnclipu_vx_h \ +helper_vnclipu_vx_w \ +helper_vnmsac_vv_b \ +helper_vnmsac_vv_d \ +helper_vnmsac_vv_h \ +helper_vnmsac_vv_w \ +helper_vnmsac_vx_b \ +helper_vnmsac_vx_d \ +helper_vnmsac_vx_h \ +helper_vnmsac_vx_w \ +helper_vnmsub_vv_b \ +helper_vnmsub_vv_d \ +helper_vnmsub_vv_h \ +helper_vnmsub_vv_w \ +helper_vnmsub_vx_b \ +helper_vnmsub_vx_d \ +helper_vnmsub_vx_h \ +helper_vnmsub_vx_w \ +helper_vnsra_vv_b \ +helper_vnsra_vv_h \ +helper_vnsra_vv_w \ +helper_vnsra_vx_b \ +helper_vnsra_vx_h \ +helper_vnsra_vx_w \ +helper_vnsrl_vv_b \ +helper_vnsrl_vv_h \ +helper_vnsrl_vv_w \ +helper_vnsrl_vx_b \ +helper_vnsrl_vx_h \ +helper_vnsrl_vx_w \ +helper_vor_vv_b \ +helper_vor_vv_d \ +helper_vor_vv_h \ +helper_vor_vv_w \ +helper_vor_vx_b \ +helper_vor_vx_d \ +helper_vor_vx_h \ +helper_vor_vx_w \ +helper_vredand_vs_b \ +helper_vredand_vs_d \ +helper_vredand_vs_h \ +helper_vredand_vs_w \ +helper_vredmax_vs_b \ +helper_vredmax_vs_d \ +helper_vredmax_vs_h \ +helper_vredmax_vs_w \ +helper_vredmaxu_vs_b \ +helper_vredmaxu_vs_d \ +helper_vredmaxu_vs_h \ +helper_vredmaxu_vs_w \ +helper_vredmin_vs_b \ +helper_vredmin_vs_d \ +helper_vredmin_vs_h \ +helper_vredmin_vs_w \ +helper_vredminu_vs_b \ +helper_vredminu_vs_d \ +helper_vredminu_vs_h \ +helper_vredminu_vs_w \ +helper_vredor_vs_b \ +helper_vredor_vs_d \ +helper_vredor_vs_h \ +helper_vredor_vs_w \ +helper_vredsum_vs_b \ +helper_vredsum_vs_d \ +helper_vredsum_vs_h \ +helper_vredsum_vs_w \ +helper_vredxor_vs_b \ +helper_vredxor_vs_d \ +helper_vredxor_vs_h \ +helper_vredxor_vs_w \ +helper_vrem_vv_b \ +helper_vrem_vv_d \ +helper_vrem_vv_h \ +helper_vrem_vv_w \ +helper_vrem_vx_b \ +helper_vrem_vx_d \ +helper_vrem_vx_h \ +helper_vrem_vx_w \ +helper_vremu_vv_b \ +helper_vremu_vv_d \ +helper_vremu_vv_h \ +helper_vremu_vv_w \ +helper_vremu_vx_b \ +helper_vremu_vx_d \ +helper_vremu_vx_h \ +helper_vremu_vx_w \ +helper_vrgather_vv_b \ +helper_vrgather_vv_d \ +helper_vrgather_vv_h \ +helper_vrgather_vv_w \ +helper_vrgather_vx_b \ +helper_vrgather_vx_d \ +helper_vrgather_vx_h \ +helper_vrgather_vx_w \ +helper_vrsub_vx_b \ +helper_vrsub_vx_d \ +helper_vrsub_vx_h \ +helper_vrsub_vx_w \ +helper_vsadd_vv_b \ +helper_vsadd_vv_d \ +helper_vsadd_vv_h \ +helper_vsadd_vv_w \ +helper_vsadd_vx_b \ +helper_vsadd_vx_d \ +helper_vsadd_vx_h \ +helper_vsadd_vx_w \ +helper_vsaddu_vv_b \ +helper_vsaddu_vv_d \ +helper_vsaddu_vv_h \ +helper_vsaddu_vv_w \ +helper_vsaddu_vx_b \ +helper_vsaddu_vx_d \ +helper_vsaddu_vx_h \ +helper_vsaddu_vx_w \ +helper_vsb_v_b \ +helper_vsb_v_b_mask \ +helper_vsb_v_d \ +helper_vsb_v_d_mask \ +helper_vsb_v_h \ +helper_vsb_v_h_mask \ +helper_vsb_v_w \ +helper_vsb_v_w_mask \ +helper_vsbc_vvm_b \ +helper_vsbc_vvm_d \ +helper_vsbc_vvm_h \ +helper_vsbc_vvm_w \ +helper_vsbc_vxm_b \ +helper_vsbc_vxm_d \ +helper_vsbc_vxm_h \ +helper_vsbc_vxm_w \ +helper_vse_v_b \ +helper_vse_v_b_mask \ +helper_vse_v_d \ +helper_vse_v_d_mask \ +helper_vse_v_h \ +helper_vse_v_h_mask \ +helper_vse_v_w \ +helper_vse_v_w_mask \ +helper_vsetvl \ +helper_vsh_v_d \ +helper_vsh_v_d_mask \ +helper_vsh_v_h \ +helper_vsh_v_h_mask \ +helper_vsh_v_w \ +helper_vsh_v_w_mask \ +helper_vslide1down_vx_b \ +helper_vslide1down_vx_d \ +helper_vslide1down_vx_h \ +helper_vslide1down_vx_w \ +helper_vslide1up_vx_b \ +helper_vslide1up_vx_d \ +helper_vslide1up_vx_h \ +helper_vslide1up_vx_w \ +helper_vslidedown_vx_b \ +helper_vslidedown_vx_d \ +helper_vslidedown_vx_h \ +helper_vslidedown_vx_w \ +helper_vslideup_vx_b \ +helper_vslideup_vx_d \ +helper_vslideup_vx_h \ +helper_vslideup_vx_w \ +helper_vsll_vv_b \ +helper_vsll_vv_d \ +helper_vsll_vv_h \ +helper_vsll_vv_w \ +helper_vsll_vx_b \ +helper_vsll_vx_d \ +helper_vsll_vx_h \ +helper_vsll_vx_w \ +helper_vsmul_vv_b \ +helper_vsmul_vv_d \ +helper_vsmul_vv_h \ +helper_vsmul_vv_w \ +helper_vsmul_vx_b \ +helper_vsmul_vx_d \ +helper_vsmul_vx_h \ +helper_vsmul_vx_w \ +helper_vsra_vv_b \ +helper_vsra_vv_d \ +helper_vsra_vv_h \ +helper_vsra_vv_w \ +helper_vsra_vx_b \ +helper_vsra_vx_d \ +helper_vsra_vx_h \ +helper_vsra_vx_w \ +helper_vsrl_vv_b \ +helper_vsrl_vv_d \ +helper_vsrl_vv_h \ +helper_vsrl_vv_w \ +helper_vsrl_vx_b \ +helper_vsrl_vx_d \ +helper_vsrl_vx_h \ +helper_vsrl_vx_w \ +helper_vssb_v_b \ +helper_vssb_v_d \ +helper_vssb_v_h \ +helper_vssb_v_w \ +helper_vsse_v_b \ +helper_vsse_v_d \ +helper_vsse_v_h \ +helper_vsse_v_w \ +helper_vssh_v_d \ +helper_vssh_v_h \ +helper_vssh_v_w \ +helper_vssra_vv_b \ +helper_vssra_vv_d \ +helper_vssra_vv_h \ +helper_vssra_vv_w \ +helper_vssra_vx_b \ +helper_vssra_vx_d \ +helper_vssra_vx_h \ +helper_vssra_vx_w \ +helper_vssrl_vv_b \ +helper_vssrl_vv_d \ +helper_vssrl_vv_h \ +helper_vssrl_vv_w \ +helper_vssrl_vx_b \ +helper_vssrl_vx_d \ +helper_vssrl_vx_h \ +helper_vssrl_vx_w \ +helper_vssub_vv_b \ +helper_vssub_vv_d \ +helper_vssub_vv_h \ +helper_vssub_vv_w \ +helper_vssub_vx_b \ +helper_vssub_vx_d \ +helper_vssub_vx_h \ +helper_vssub_vx_w \ +helper_vssubu_vv_b \ +helper_vssubu_vv_d \ +helper_vssubu_vv_h \ +helper_vssubu_vv_w \ +helper_vssubu_vx_b \ +helper_vssubu_vx_d \ +helper_vssubu_vx_h \ +helper_vssubu_vx_w \ +helper_vssw_v_d \ +helper_vssw_v_w \ +helper_vsub_vv_b \ +helper_vsub_vv_d \ +helper_vsub_vv_h \ +helper_vsub_vv_w \ +helper_vsub_vx_b \ +helper_vsub_vx_d \ +helper_vsub_vx_h \ +helper_vsub_vx_w \ +helper_vsw_v_d \ +helper_vsw_v_d_mask \ +helper_vsw_v_w \ +helper_vsw_v_w_mask \ +helper_vsxb_v_b \ +helper_vsxb_v_d \ +helper_vsxb_v_h \ +helper_vsxb_v_w \ +helper_vsxe_v_b \ +helper_vsxe_v_d \ +helper_vsxe_v_h \ +helper_vsxe_v_w \ +helper_vsxh_v_d \ +helper_vsxh_v_h \ +helper_vsxh_v_w \ +helper_vsxw_v_d \ +helper_vsxw_v_w \ +helper_vwadd_vv_b \ +helper_vwadd_vv_h \ +helper_vwadd_vv_w \ +helper_vwadd_vx_b \ +helper_vwadd_vx_h \ +helper_vwadd_vx_w \ +helper_vwadd_wv_b \ +helper_vwadd_wv_h \ +helper_vwadd_wv_w \ +helper_vwadd_wx_b \ +helper_vwadd_wx_h \ +helper_vwadd_wx_w \ +helper_vwaddu_vv_b \ +helper_vwaddu_vv_h \ +helper_vwaddu_vv_w \ +helper_vwaddu_vx_b \ +helper_vwaddu_vx_h \ +helper_vwaddu_vx_w \ +helper_vwaddu_wv_b \ +helper_vwaddu_wv_h \ +helper_vwaddu_wv_w \ +helper_vwaddu_wx_b \ +helper_vwaddu_wx_h \ +helper_vwaddu_wx_w \ +helper_vwmacc_vv_b \ +helper_vwmacc_vv_h \ +helper_vwmacc_vv_w \ +helper_vwmacc_vx_b \ +helper_vwmacc_vx_h \ +helper_vwmacc_vx_w \ +helper_vwmaccsu_vv_b \ +helper_vwmaccsu_vv_h \ +helper_vwmaccsu_vv_w \ +helper_vwmaccsu_vx_b \ +helper_vwmaccsu_vx_h \ +helper_vwmaccsu_vx_w \ +helper_vwmaccu_vv_b \ +helper_vwmaccu_vv_h \ +helper_vwmaccu_vv_w \ +helper_vwmaccu_vx_b \ +helper_vwmaccu_vx_h \ +helper_vwmaccu_vx_w \ +helper_vwmaccus_vx_b \ +helper_vwmaccus_vx_h \ +helper_vwmaccus_vx_w \ +helper_vwmul_vv_b \ +helper_vwmul_vv_h \ +helper_vwmul_vv_w \ +helper_vwmul_vx_b \ +helper_vwmul_vx_h \ +helper_vwmul_vx_w \ +helper_vwmulsu_vv_b \ +helper_vwmulsu_vv_h \ +helper_vwmulsu_vv_w \ +helper_vwmulsu_vx_b \ +helper_vwmulsu_vx_h \ +helper_vwmulsu_vx_w \ +helper_vwmulu_vv_b \ +helper_vwmulu_vv_h \ +helper_vwmulu_vv_w \ +helper_vwmulu_vx_b \ +helper_vwmulu_vx_h \ +helper_vwmulu_vx_w \ +helper_vwredsum_vs_b \ +helper_vwredsum_vs_h \ +helper_vwredsum_vs_w \ +helper_vwredsumu_vs_b \ +helper_vwredsumu_vs_h \ +helper_vwredsumu_vs_w \ +helper_vwsmacc_vv_b \ +helper_vwsmacc_vv_h \ +helper_vwsmacc_vv_w \ +helper_vwsmacc_vx_b \ +helper_vwsmacc_vx_h \ +helper_vwsmacc_vx_w \ +helper_vwsmaccsu_vv_b \ +helper_vwsmaccsu_vv_h \ +helper_vwsmaccsu_vv_w \ +helper_vwsmaccsu_vx_b \ +helper_vwsmaccsu_vx_h \ +helper_vwsmaccsu_vx_w \ +helper_vwsmaccu_vv_b \ +helper_vwsmaccu_vv_h \ +helper_vwsmaccu_vv_w \ +helper_vwsmaccu_vx_b \ +helper_vwsmaccu_vx_h \ +helper_vwsmaccu_vx_w \ +helper_vwsmaccus_vx_b \ +helper_vwsmaccus_vx_h \ +helper_vwsmaccus_vx_w \ +helper_vwsub_vv_b \ +helper_vwsub_vv_h \ +helper_vwsub_vv_w \ +helper_vwsub_vx_b \ +helper_vwsub_vx_h \ +helper_vwsub_vx_w \ +helper_vwsub_wv_b \ +helper_vwsub_wv_h \ +helper_vwsub_wv_w \ +helper_vwsub_wx_b \ +helper_vwsub_wx_h \ +helper_vwsub_wx_w \ +helper_vwsubu_vv_b \ +helper_vwsubu_vv_h \ +helper_vwsubu_vv_w \ +helper_vwsubu_vx_b \ +helper_vwsubu_vx_h \ +helper_vwsubu_vx_w \ +helper_vwsubu_wv_b \ +helper_vwsubu_wv_h \ +helper_vwsubu_wv_w \ +helper_vwsubu_wx_b \ +helper_vwsubu_wx_h \ +helper_vwsubu_wx_w \ +helper_vxor_vv_b \ +helper_vxor_vv_d \ +helper_vxor_vv_h \ +helper_vxor_vv_w \ +helper_vxor_vx_b \ +helper_vxor_vx_d \ +helper_vxor_vx_h \ +helper_vxor_vx_w \ " riscv64_SYMBOLS=${riscv32_SYMBOLS} @@ -4791,7 +5966,6 @@ cpu_rddsp \ helper_rddsp \ helper_cfc1 \ helper_ctc1 \ -ieee_ex_to_mips \ helper_float_sqrt_d \ helper_float_sqrt_s \ helper_float_cvtd_s \ @@ -5346,23 +6520,59 @@ helper_msa_srari_df \ helper_msa_srlri_df \ helper_msa_binsli_df \ helper_msa_binsri_df \ -helper_msa_subv_df \ -helper_msa_subs_s_df \ -helper_msa_subs_u_df \ -helper_msa_subsus_u_df \ -helper_msa_subsuu_s_df \ -helper_msa_mulv_df \ -helper_msa_dotp_s_df \ -helper_msa_dotp_u_df \ +helper_msa_subv_b \ +helper_msa_subv_h \ +helper_msa_subv_w \ +helper_msa_subv_d \ +helper_msa_subs_s_b \ +helper_msa_subs_s_h \ +helper_msa_subs_s_w \ +helper_msa_subs_s_d \ +helper_msa_subs_u_b \ +helper_msa_subs_u_h \ +helper_msa_subs_u_w \ +helper_msa_subs_u_d \ +helper_msa_subsus_u_b \ +helper_msa_subsus_u_h \ +helper_msa_subsus_u_w \ +helper_msa_subsus_u_d \ +helper_msa_subsuu_s_b \ +helper_msa_subsuu_s_h \ +helper_msa_subsuu_s_w \ +helper_msa_subsuu_s_d \ +helper_msa_mulv_b \ +helper_msa_mulv_h \ +helper_msa_mulv_w \ +helper_msa_mulv_d \ +helper_msa_dotp_s_h \ +helper_msa_dotp_s_w \ +helper_msa_dotp_s_d \ +helper_msa_dotp_u_h \ +helper_msa_dotp_u_w \ +helper_msa_dotp_u_d \ helper_msa_mul_q_df \ helper_msa_mulr_q_df \ helper_msa_sld_df \ -helper_msa_maddv_df \ -helper_msa_msubv_df \ -helper_msa_dpadd_s_df \ -helper_msa_dpadd_u_df \ -helper_msa_dpsub_s_df \ -helper_msa_dpsub_u_df \ +helper_msa_maddv_b \ +helper_msa_maddv_h \ +helper_msa_maddv_w \ +helper_msa_maddv_d \ +helper_msa_msubv_b \ +helper_msa_msubv_h \ +helper_msa_msubv_w \ +helper_msa_msubv_d \ +helper_msa_dpadd_s_h \ +helper_msa_dpadd_s_w \ +helper_msa_dpadd_s_d \ +helper_msa_dpadd_u_h \ +helper_msa_dpadd_u_w \ +helper_msa_dpadd_u_d \ +helper_msa_dpsub_s_h \ +helper_msa_dpsub_s_w \ +helper_msa_dpsub_s_d \ +helper_msa_dpsub_u_h \ +helper_msa_dpsub_u_w \ +helper_msa_dpsub_u_d \ helper_msa_binsl_df \ helper_msa_binsr_df \ helper_msa_madd_q_df \ @@ -5797,7 +7007,6 @@ helper_bfffo_reg \ helper_bfffo_mem \ helper_chk \ helper_chk2 \ -floatx80_mod \ floatx80_getman \ floatx80_getexp \ floatx80_scale \ @@ -6110,6 +7319,33 @@ helper_stvewx \ helper_tbegin \ helper_load_dump_spr \ helper_store_dump_spr \ +store_fpscr \ +helper_store_fpscr \ +helper_float_check_status \ +helper_reset_fpstatus \ +helper_fadd \ +helper_fsub \ +helper_fmul \ +helper_fdiv \ +helper_fctiw \ +helper_fctiwz \ +helper_fctiwuz \ +helper_fctid \ +helper_fctidz \ +helper_fctidu \ +helper_fctiduz \ +helper_fcfid \ +helper_fcfids \ +helper_fcfidu \ +helper_fcfidus \ +helper_frin \ +helper_friz \ +helper_frip \ +helper_frim \ +helper_fmadd \ +helper_fnmadd \ +helper_fmsub \ +helper_fnmsub \ helper_hfscr_facility_check \ helper_fscr_facility_check \ helper_msr_facility_check \ @@ -6261,8 +7497,245 @@ ppc_booke_timers_init \ ppc_hash32_handle_mmu_fault \ gen_helper_store_booke_tsr \ gen_helper_store_booke_tcr \ +gen_helper_store_fpscr \ store_booke_tcr \ ppc_hash32_get_phys_page_debug \ +helper_compute_fprf_float128 \ +helper_compute_fprf_float16 \ +helper_compute_fprf_float32 \ +helper_compute_fprf_float64 \ +helper_efdadd \ +helper_efdcfs \ +helper_efdcfsf \ +helper_efdcfsi \ +helper_efdcfsid \ +helper_efdcfuf \ +helper_efdcfui \ +helper_efdcfuid \ +helper_efdcmpeq \ +helper_efdcmpgt \ +helper_efdcmplt \ +helper_efdctsf \ +helper_efdctsi \ +helper_efdctsidz \ +helper_efdctsiz \ +helper_efdctuf \ +helper_efdctui \ +helper_efdctuidz \ +helper_efdctuiz \ +helper_efddiv \ +helper_efdmul \ +helper_efdsub \ +helper_efdtsteq \ +helper_efdtstgt \ +helper_efdtstlt \ +helper_efsadd \ +helper_efscfd \ +helper_efscfsf \ +helper_efscfsi \ +helper_efscfuf \ +helper_efscfui \ +helper_efscmpeq \ +helper_efscmpgt \ +helper_efscmplt \ +helper_efsctsf \ +helper_efsctsi \ +helper_efsctsiz \ +helper_efsctuf \ +helper_efsctui \ +helper_efsctuiz \ +helper_efsdiv \ +helper_efsmul \ +helper_efssub \ +helper_efststeq \ +helper_efststgt \ +helper_efststlt \ +helper_evfsadd \ +helper_evfscfsf \ +helper_evfscfsi \ +helper_evfscfuf \ +helper_evfscfui \ +helper_evfscmpeq \ +helper_evfscmpgt \ +helper_evfscmplt \ +helper_evfsctsf \ +helper_evfsctsi \ +helper_evfsctsiz \ +helper_evfsctuf \ +helper_evfsctui \ +helper_evfsctuiz \ +helper_evfsdiv \ +helper_evfsmul \ +helper_evfssub \ +helper_evfststeq \ +helper_evfststgt \ +helper_evfststlt \ +helper_fcmpo \ +helper_fcmpu \ +helper_fctiwu \ +helper_fpscr_clrbit \ +helper_fpscr_setbit \ +helper_fre \ +helper_fres \ +helper_frsp \ +helper_frsqrte \ +helper_fsel \ +helper_fsqrt \ +helper_ftdiv \ +helper_ftsqrt \ +helper_todouble \ +helper_tosingle \ +helper_xsadddp \ +helper_xsaddqp \ +helper_xsaddsp \ +helper_xscmpeqdp \ +helper_xscmpexpdp \ +helper_xscmpexpqp \ +helper_xscmpgedp \ +helper_xscmpgtdp \ +helper_xscmpnedp \ +helper_xscmpodp \ +helper_xscmpoqp \ +helper_xscmpudp \ +helper_xscmpuqp \ +helper_xscvdphp \ +helper_xscvdpqp \ +helper_xscvdpsp \ +helper_xscvdpspn \ +helper_xscvdpsxds \ +helper_xscvdpsxws \ +helper_xscvdpuxds \ +helper_xscvdpuxws \ +helper_xscvhpdp \ +helper_xscvqpdp \ +helper_xscvqpsdz \ +helper_xscvqpswz \ +helper_xscvqpudz \ +helper_xscvqpuwz \ +helper_xscvsdqp \ +helper_xscvspdp \ +helper_xscvspdpn \ +helper_xscvsxddp \ +helper_xscvsxdsp \ +helper_xscvudqp \ +helper_xscvuxddp \ +helper_xscvuxdsp \ +helper_xsdivdp \ +helper_xsdivqp \ +helper_xsdivsp \ +helper_xsmadddp \ +helper_xsmaddsp \ +helper_xsmaxcdp \ +helper_xsmaxdp \ +helper_xsmaxjdp \ +helper_xsmincdp \ +helper_xsmindp \ +helper_xsminjdp \ +helper_xsmsubdp \ +helper_xsmsubsp \ +helper_xsmuldp \ +helper_xsmulqp \ +helper_xsmulsp \ +helper_xsnmadddp \ +helper_xsnmaddsp \ +helper_xsnmsubdp \ +helper_xsnmsubsp \ +helper_xsrdpi \ +helper_xsrdpic \ +helper_xsrdpim \ +helper_xsrdpip \ +helper_xsrdpiz \ +helper_xsredp \ +helper_xsresp \ +helper_xsrqpi \ +helper_xsrqpxp \ +helper_xsrsp \ +helper_xsrsqrtedp \ +helper_xsrsqrtesp \ +helper_xssqrtdp \ +helper_xssqrtqp \ +helper_xssqrtsp \ +helper_xssubdp \ +helper_xssubqp \ +helper_xssubsp \ +helper_xstdivdp \ +helper_xstsqrtdp \ +helper_xststdcdp \ +helper_xststdcqp \ +helper_xststdcsp \ +helper_xvadddp \ +helper_xvaddsp \ +helper_xvcmpeqdp \ +helper_xvcmpeqsp \ +helper_xvcmpgedp \ +helper_xvcmpgesp \ +helper_xvcmpgtdp \ +helper_xvcmpgtsp \ +helper_xvcmpnedp \ +helper_xvcmpnesp \ +helper_xvcvdpsp \ +helper_xvcvdpsxds \ +helper_xvcvdpsxws \ +helper_xvcvdpuxds \ +helper_xvcvdpuxws \ +helper_xvcvhpsp \ +helper_xvcvspdp \ +helper_xvcvsphp \ +helper_xvcvspsxds \ +helper_xvcvspsxws \ +helper_xvcvspuxds \ +helper_xvcvspuxws \ +helper_xvcvsxddp \ +helper_xvcvsxdsp \ +helper_xvcvsxwdp \ +helper_xvcvsxwsp \ +helper_xvcvuxddp \ +helper_xvcvuxdsp \ +helper_xvcvuxwdp \ +helper_xvcvuxwsp \ +helper_xvdivdp \ +helper_xvdivsp \ +helper_xvmadddp \ +helper_xvmaddsp \ +helper_xvmaxdp \ +helper_xvmaxsp \ +helper_xvmindp \ +helper_xvminsp \ +helper_xvmsubdp \ +helper_xvmsubsp \ +helper_xvmuldp \ +helper_xvmulsp \ +helper_xvnmadddp \ +helper_xvnmaddsp \ +helper_xvnmsubdp \ +helper_xvnmsubsp \ +helper_xvrdpi \ +helper_xvrdpic \ +helper_xvrdpim \ +helper_xvrdpip \ +helper_xvrdpiz \ +helper_xvredp \ +helper_xvresp \ +helper_xvrspi \ +helper_xvrspic \ +helper_xvrspim \ +helper_xvrspip \ +helper_xvrspiz \ +helper_xvrsqrtedp \ +helper_xvrsqrtesp \ +helper_xvsqrtdp \ +helper_xvsqrtsp \ +helper_xvsubdp \ +helper_xvsubsp \ +helper_xvtdivdp \ +helper_xvtdivsp \ +helper_xvtsqrtdp \ +helper_xvtsqrtsp \ +helper_xvtstdcdp \ +helper_xvtstdcsp \ +helper_xvxsigsp \ +helper_xxperm \ +helper_xxpermr \ " ppc64_SYMBOLS=${ppc_SYMBOLS} @@ -6290,26 +7763,26 @@ ARCHS="x86_64 arm aarch64 riscv32 riscv64 mips mipsel mips64 mips64el sparc spar for arch in $ARCHS; do -echo "Generating header for $arch" -echo "/* Autogen header for Unicorn Engine - DONOT MODIFY */" > $SOURCE_DIR/qemu/$arch.h -echo "#ifndef UNICORN_AUTOGEN_${arch}_H" >> $SOURCE_DIR/qemu/$arch.h -echo "#define UNICORN_AUTOGEN_${arch}_H" >> $SOURCE_DIR/qemu/$arch.h -echo "#ifndef UNICORN_ARCH_POSTFIX" >> $SOURCE_DIR/qemu/$arch.h -echo "#define UNICORN_ARCH_POSTFIX _$arch" >> $SOURCE_DIR/qemu/$arch.h -echo "#endif" >> $SOURCE_DIR/qemu/$arch.h + echo "Generating header for $arch" + echo "/* Autogen header for Unicorn Engine - DONOT MODIFY */" >$SOURCE_DIR/qemu/$arch.h + echo "#ifndef UNICORN_AUTOGEN_${arch}_H" >>$SOURCE_DIR/qemu/$arch.h + echo "#define UNICORN_AUTOGEN_${arch}_H" >>$SOURCE_DIR/qemu/$arch.h + echo "#ifndef UNICORN_ARCH_POSTFIX" >>$SOURCE_DIR/qemu/$arch.h + echo "#define UNICORN_ARCH_POSTFIX _$arch" >>$SOURCE_DIR/qemu/$arch.h + echo "#endif" >>$SOURCE_DIR/qemu/$arch.h -for loop in $COMMON_SYMBOLS; do - echo "#define $loop ${loop}_${arch}" >> $SOURCE_DIR/qemu/$arch.h -done + for loop in $COMMON_SYMBOLS; do + echo "#define $loop ${loop}_${arch}" >>$SOURCE_DIR/qemu/$arch.h + done -ARCH_SYMBOLS=$(eval echo '$'"${arch}_SYMBOLS") + ARCH_SYMBOLS=$(eval echo '$'"${arch}_SYMBOLS") -#echo ${ARCH_SYMBOLS} + #echo ${ARCH_SYMBOLS} -for loop in $ARCH_SYMBOLS; do - echo "#define $loop ${loop}_${arch}" >> $SOURCE_DIR/qemu/$arch.h -done + for loop in $ARCH_SYMBOLS; do + echo "#define $loop ${loop}_${arch}" >>$SOURCE_DIR/qemu/$arch.h + done -echo "#endif" >> $SOURCE_DIR/qemu/$arch.h + echo "#endif" >>$SOURCE_DIR/qemu/$arch.h done diff --git a/uc.c b/uc.c index e39aadccb5..cfeb6bc1c2 100644 --- a/uc.c +++ b/uc.c @@ -11,7 +11,6 @@ #include #endif -#include // nanosleep #include #include "uc_priv.h"