diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 1b61dd0c1b..be85a2dd70 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -19,5 +19,5 @@ ENV CLANG_PATH="/llvm/bin/clang" ENV GCC_PATH=aarch64-linux-gnu-gcc ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ - CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \ + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max,sve512=on -L /usr/aarch64-linux-gnu" \ OBJDUMP=aarch64-linux-gnu-objdump diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index 8de7a4cfa5..3966aa88d8 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -45,21 +45,25 @@ case ${TARGET} in aarch64_be*) export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc/usr/include --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization" ARCH=aarch64_be + RUNTIME_RUSTFLAGS= ;; aarch64*) export CFLAGS="-I/usr/aarch64-linux-gnu/include/" ARCH=aarch64 + RUNTIME_RUSTFLAGS=-Ctarget-feature=+sve,+sve2 ;; armv7*) export CFLAGS="-I/usr/arm-linux-gnueabihf/include/" ARCH=arm + RUNTIME_RUSTFLAGS= ;; x86_64*) export CFLAGS="-I/usr/include/x86_64-linux-gnu/" ARCH=x86 + RUNTIME_RUSTFLAGS= ;; *) ;; @@ -86,5 +90,5 @@ case "${TARGET}" in ;; esac -cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}" \ - --tests "$@" +RUSTFLAGS="${RUNTIME_RUSTFLAGS}" cargo test --manifest-path=rust_programs/Cargo.toml \ + --target "${TARGET}" --profile "${PROFILE}" --tests --no-fail-fast "$@" diff --git a/crates/core_arch/src/aarch64/sve/generated.rs b/crates/core_arch/src/aarch64/sve/generated.rs index ac3070918a..4e5547b1d0 100644 --- a/crates/core_arch/src/aarch64/sve/generated.rs +++ b/crates/core_arch/src/aarch64/sve/generated.rs @@ -42211,19 +42211,6 @@ pub fn svtmad_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloa unsafe { _svtmad_f64(op1, op2, IMM3) } } #[doc = "Interleave even elements from two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(trn1))] -pub fn svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv16i1")] - fn _svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svtrn1_b8(op1, op2) } -} -#[doc = "Interleave even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -42231,10 +42218,10 @@ pub fn svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn1))] pub fn svtrn1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv8i1")] - fn _svtrn1_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.b16")] + fn _svtrn1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn1_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn1_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b32)"] @@ -42244,10 +42231,10 @@ pub fn svtrn1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn1))] pub fn svtrn1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv4i1")] - fn _svtrn1_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.b32")] + fn _svtrn1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn1_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn1_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b64)"] @@ -42257,10 +42244,10 @@ pub fn svtrn1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn1))] pub fn svtrn1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv2i1")] - fn _svtrn1_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.b64")] + fn _svtrn1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn1_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn1_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_f32])"] @@ -42376,6 +42363,19 @@ pub fn svtrn1_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svtrn1_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svtrn1_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Interleave even elements from two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(trn1))] +pub fn svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv16i1")] + fn _svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svtrn1_b8(op1, op2) } +} #[doc = "Interleave even quadwords from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_f32])"] #[inline] @@ -42491,19 +42491,6 @@ pub fn svtrn1q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svtrn1q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } #[doc = "Interleave odd elements from two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(trn2))] -pub fn svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv16i1")] - fn _svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svtrn2_b8(op1, op2) } -} -#[doc = "Interleave odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -42511,10 +42498,10 @@ pub fn svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn2))] pub fn svtrn2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv8i1")] - fn _svtrn2_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.b16")] + fn _svtrn2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn2_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn2_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b32)"] @@ -42524,10 +42511,10 @@ pub fn svtrn2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn2))] pub fn svtrn2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv4i1")] - fn _svtrn2_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.b32")] + fn _svtrn2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn2_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn2_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b64)"] @@ -42537,10 +42524,10 @@ pub fn svtrn2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn2))] pub fn svtrn2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv2i1")] - fn _svtrn2_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.b64")] + fn _svtrn2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn2_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn2_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_f32])"] @@ -42656,6 +42643,19 @@ pub fn svtrn2_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svtrn2_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svtrn2_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Interleave odd elements from two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(trn2))] +pub fn svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv16i1")] + fn _svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svtrn2_b8(op1, op2) } +} #[doc = "Interleave odd quadwords from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_f32])"] #[inline] diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs index 26f861ca64..013c20f2db 100644 --- a/crates/intrinsic-test/src/arm/json_parser.rs +++ b/crates/intrinsic-test/src/arm/json_parser.rs @@ -58,22 +58,14 @@ struct JsonIntrinsic { _instructions: Option>>, } -pub fn get_neon_intrinsics( - filename: &Path, -) -> Result>, Box> { +pub fn get_intrinsics(filename: &Path) -> Result>, Box> { let file = std::fs::File::open(filename)?; let reader = std::io::BufReader::new(file); let json: Vec = serde_json::from_reader(reader).expect("Couldn't parse JSON"); let parsed = json .into_iter() - .filter_map(|intr| { - if intr.simd_isa == "Neon" { - Some(json_to_intrinsic(intr).expect("Couldn't parse JSON")) - } else { - None - } - }) + .map(|intr| json_to_intrinsic(intr).expect("Couldn't parse JSON")) .collect(); Ok(parsed) } @@ -121,8 +113,14 @@ fn json_to_intrinsic( } }); - let mut arg = - Argument::::new(i, String::from(arg_name), ArmType(arg_ty), constraint); + let is_predicate = arg_name == "pg"; + let mut arg = Argument::::new( + i, + String::from(arg_name), + ArmType(arg_ty), + constraint, + is_predicate, + ); // The JSON doesn't list immediates as const let IntrinsicType { diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index a60e0ff155..689fe60957 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -2,12 +2,14 @@ mod intrinsic; mod json_parser; mod types; -use crate::common::SupportedArchitecture; +use crate::common::argument::Argument; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; -use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; +use crate::common::intrinsic_helpers::{SimdLen, TypeDefinition, TypeKind}; +use crate::common::values::test_values_array_name; +use crate::common::{PASSES, PREDICATE_LOCAL, SupportedArchitecture}; use intrinsic::ArmType; -use json_parser::get_neon_intrinsics; +use json_parser::get_intrinsics; #[derive(PartialEq)] pub struct Arm(Vec>); @@ -29,12 +31,21 @@ impl SupportedArchitecture for Arm { #include #include #include +#ifdef __ARM_FEATURE_SVE +#include +#endif "#; const RUST_PRELUDE: &str = RUST_PRELUDE; fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { // GCC uses an extra `-` in the arch name + let big_endian = cli_options.target.starts_with("aarch64_be"); + let a32 = cli_options.target.starts_with("armv7"); match cli_options.cc_arg_style { + CcArgStyle::Clang if !a32 && !big_endian => vec![ + "-march=armv8.6a+crypto+crc+dotprod+fp16+sve2-aes+sve2-sm4+sve2-sha3+sve2-bitperm+\ + f32mm+f64mm+sve2p1", + ], CcArgStyle::Clang => vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"], // SVE tests aren't run under GCC so there are no target features added for SVE CcArgStyle::Gcc => vec!["-march=armv8.6-a+crypto+crc+dotprod+fp16+sha3+sm4"], @@ -45,7 +56,7 @@ impl SupportedArchitecture for Arm { let big_endian = cli_options.target.starts_with("aarch64_be"); let a32 = cli_options.target.starts_with("armv7"); let mut intrinsics = - get_neon_intrinsics(&cli_options.filename).expect("Error parsing input file"); + get_intrinsics(&cli_options.filename).expect("Error parsing input file"); intrinsics.sort_by(|a, b| a.name.cmp(&b.name)); intrinsics.dedup(); @@ -69,13 +80,13 @@ impl SupportedArchitecture for Arm { let has_sve_arg = i .arguments .iter() - .any(|a| a.ty.simd_len == Some(SimdLen::Scalable)); + .any(|a| a.ty.num_lanes() == SimdLen::Scalable); !(has_f16_arg && has_sve_arg) }) .filter(|i| { let has_f16_ret = i.results.kind() == TypeKind::Float && i.results.bit_len == Some(16); - let has_sve_ret = i.results.simd_len == Some(SimdLen::Scalable); + let has_sve_ret = i.results.num_lanes() == SimdLen::Scalable; !(has_f16_ret && has_sve_ret) }) // Skip `svqcvtn{u,}n*_x2` intrinsics - not yet implemented! @@ -129,6 +140,35 @@ impl SupportedArchitecture for Arm { Self(intrinsics) } + + fn predicate_function(size: u32) -> String { + format!("svptrue_b{size}()") + } + + fn load_call(arg: &Argument, idx: usize) -> String { + let name = arg.generate_name(); + let load = arg.ty.load_function(); + let ptr = format!( + "{vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _", + vals_name = test_values_array_name(&arg.ty) + ); + + match arg.ty.num_lanes() { + // If the load is of a `svbool_t`, then we load a `svint8_t` and + SimdLen::Scalable if matches!(arg.ty.kind(), TypeKind::Bool) => { + format!( + r#" +let {name} = {load}({PREDICATE_LOCAL}, {ptr}); +let {name} = svcmpne_n_s8({PREDICATE_LOCAL}, {name}, 0); + "# + ) + } + // If this load is of a scalable vector, then prepend an additional argument + // containing the predicate for the load. + SimdLen::Scalable => format!("let {name} = {load}({PREDICATE_LOCAL}, {ptr});"), + SimdLen::Fixed(..) => format!("let {name} = {load}({ptr});"), + } + } } const RUST_PRELUDE: &str = r#" @@ -140,6 +180,7 @@ const RUST_PRELUDE: &str = r#" #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_aarch64_sve))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] #![feature(stdarch_neon_f16)] @@ -148,4 +189,116 @@ use core_arch::arch::aarch64::*; #[cfg(target_arch = "arm")] use core_arch::arch::arm::*; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +const fn svpattern_from_i32(value: i32) -> svpattern { + match value { + 0 => svpattern::SV_POW2, + 1 => svpattern::SV_VL1, + 2 => svpattern::SV_VL2, + 3 => svpattern::SV_VL3, + 4 => svpattern::SV_VL4, + 5 => svpattern::SV_VL5, + 6 => svpattern::SV_VL6, + 7 => svpattern::SV_VL7, + 8 => svpattern::SV_VL8, + 9 => svpattern::SV_VL16, + 10 => svpattern::SV_VL32, + 11 => svpattern::SV_VL64, + 12 => svpattern::SV_VL128, + 13 => svpattern::SV_VL256, + 29 => svpattern::SV_MUL4, + 30 => svpattern::SV_MUL3, + 31 => svpattern::SV_ALL, + _ => unreachable!(), + } +} + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +const fn svprfop_from_i32(value: i32) -> svprfop { + match value { + 0 => svprfop::SV_PLDL1KEEP, + 1 => svprfop::SV_PLDL1STRM, + 2 => svprfop::SV_PLDL2KEEP, + 3 => svprfop::SV_PLDL2STRM, + 4 => svprfop::SV_PLDL3KEEP, + 5 => svprfop::SV_PLDL3STRM, + 8 => svprfop::SV_PSTL1KEEP, + 9 => svprfop::SV_PSTL1STRM, + 10 => svprfop::SV_PSTL2KEEP, + 11 => svprfop::SV_PSTL2STRM, + 12 => svprfop::SV_PSTL3KEEP, + 13 => svprfop::SV_PSTL3STRM, + _ => unreachable!(), + } +} + +macro_rules! debug_print_integral { + ($($name:ident => ($ty:ty, $svptrue_fn:ident, $svcnt_fn:ident, $svst_fn:ident)),*) => { + $( + #[inline] + #[target_feature(enable = "sve")] + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + pub fn $name(v: $ty) -> String { + unsafe { + let __pred = $svptrue_fn(); + let __num_elems = $svcnt_fn() as usize; + let mut __buf = std::vec::Vec::with_capacity(__num_elems); + $svst_fn(__pred, __buf.as_mut_ptr(), v); + __buf.set_len(__num_elems); + format!( + "[{}]", + __buf.iter().map(|el| el.to_string()).collect::>().join(", ") + ) + } + } + )* + } +} + +debug_print_integral! { + debug_print_f32 => (svfloat32_t, svptrue_b32, svcntw, svst1_f32), + debug_print_f64 => (svfloat64_t, svptrue_b64, svcntd, svst1_f64), + debug_print_s8 => (svint8_t, svptrue_b8, svcntb, svst1_s8), + debug_print_s16 => (svint16_t, svptrue_b16, svcnth, svst1_s16), + debug_print_s32 => (svint32_t, svptrue_b32, svcntw, svst1_s32), + debug_print_s64 => (svint64_t, svptrue_b64, svcntd, svst1_s64), + debug_print_u8 => (svuint8_t, svptrue_b8, svcntb, svst1_u8), + debug_print_u16 => (svuint16_t, svptrue_b16, svcnth, svst1_u16), + debug_print_u32 => (svuint32_t, svptrue_b32, svcntw, svst1_u32), + debug_print_u64 => (svuint64_t, svptrue_b64, svcntd, svst1_u64) +} + +macro_rules! debug_print_bool { + ($($name:ident => ($ty:ty, $svst_fn:ident, $svdup_fn:ident)),*) => { + $( + #[inline] + #[target_feature(enable = "sve")] + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + pub fn $name(v: $ty) -> String { + unsafe { + let __num_elems = svcntb() as usize; + let mut __buf = std::vec::Vec::with_capacity(__num_elems); + $svst_fn(v, __buf.as_mut_ptr(), $svdup_fn(1)); + __buf.set_len(__num_elems); + format!( + "[{}]", + __buf.iter() + .map(|el| *el == 1) + .map(|el| el.to_string()) + .collect::>() + .join(", ") + ) + } + } + )* + } +} + +debug_print_bool! { + debug_print_b8 => (svbool_t, svst1_u8, svdup_n_u8), + debug_print_b16 => (svbool_t, svst1_u16, svdup_n_u16), + debug_print_b32 => (svbool_t, svst1_u32, svdup_n_u32), + debug_print_b64 => (svbool_t, svst1_u64, svdup_n_u64) +} "#; diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 7754e9ec2d..7481bb4aeb 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -1,5 +1,9 @@ use super::intrinsic::ArmType; -use crate::common::intrinsic_helpers::{IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind}; +use crate::common::PREDICATE_LOCAL; +use crate::common::intrinsic_helpers::{ + IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind, default_fixed_vector_comparison, +}; +use itertools::Itertools; impl TypeDefinition for ArmType { /// Gets a string containing the typename for this type in C format. @@ -70,32 +74,137 @@ impl TypeDefinition for ArmType { } } + fn rust_scalar_type_for_test_value_array(&self) -> String { + if self.kind() == TypeKind::Bool && self.num_lanes() == SimdLen::Scalable { + let mut ty = self.clone(); + ty.kind = TypeKind::Int(Sign::Signed); + ty.rust_scalar_type() + } else { + self.rust_scalar_type() + } + } + /// Determines the load function for this type. fn load_function(&self) -> String { - if let IntrinsicType { - kind: k, - bit_len: Some(bl), - vec_len, - .. - } = **self - { - let quad = if self.num_lanes() * bl > 64 { "q" } else { "" }; + if let Some(bl) = self.bit_len { + match self.num_lanes() { + SimdLen::Scalable => { + format!( + "svld{len}_{type}{bl}", + len = self.num_vectors(), + type = self.rust_intrinsic_name_prefix(), + ) + } + SimdLen::Fixed(num_lanes) => { + format!( + "vld{len}{quad}_{type}{bl}", + quad = if num_lanes * bl > 64 { "q" } else { "" }, + len = self.num_vectors(), + type = self.rust_intrinsic_name_prefix(), + ) + } + } + } else { + todo!("load_function IntrinsicType: {self:#?}") + } + } + + fn comparison_function(&self) -> String { + if let SimdLen::Fixed(num_lanes) = self.num_lanes() { + return default_fixed_vector_comparison(self, num_lanes); + } + + if self.kind() == TypeKind::Bool { + // There isn't a `svcmpeq` for `svbool_t` and there aren't `svboolxN_t` types, so just + // do an XOR and test it is empty. + return format!( + r#" +let __eq = sveor_b_z({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); +assert!(!svptest_any({PREDICATE_LOCAL}, __eq), "{{}}", id); + "# + ); + } + + // Returns `of` when `num_vectors == 1` otherwise returns the appropriate `svget` invocation + // for `of`. + let get = |num_vectors: u32, idx: u32, from: &'static str| -> String { + if num_vectors == 1 { + return from.to_string(); + } format!( - "vld{len}{quad}_{type}{size}", - type = match k { - TypeKind::Int(Sign::Unsigned) => "u", - TypeKind::Int(Sign::Signed) => "s", - TypeKind::Float => "f", - TypeKind::Poly => "p", - x => todo!("get_load_function TypeKind: {x:#?}"), - }, - size = bl, - quad = quad, - len = vec_len.unwrap_or(1), + "svget{num_vectors}_{ty}{bl}::<{idx}>({from})", + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), ) - } else { - todo!("load_function IntrinsicType: {self:#?}") + }; + + let n = self.num_vectors(); + (0..n) + .format_with("\n", |i, fmt| { + match self.kind() { + TypeKind::Float | TypeKind::BFloat => { + // Floats need special handling because `NaN != NaN` normally - this + // effectively does `(rust == c) || (isnan(rust) && isnan(c))` + fmt(&format_args!( + r#" +let __rust_eq_return_value = {rust_return_value}; +let __c_eq_return_value = {c_return_value}; +let __eq_sans_nan = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __c_eq_return_value); +let __rust_nan = svcmpuo_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __rust_eq_return_value); +let __c_nan = svcmpuo_{ty}{bl}({PREDICATE_LOCAL}, __c_eq_return_value, __c_eq_return_value); +let __both_nan = svand_b_z({PREDICATE_LOCAL}, __rust_nan, __c_nan); +let __eq = svorr_b_z({PREDICATE_LOCAL}, __eq_sans_nan, __both_nan); +if !svptest_any(__pred, __eq) {{ + let __rust_pretty = debug_print_{ty}{bl}(__rust_eq_return_value); + let __c_pretty = debug_print_{ty}{bl}(__c_eq_return_value); + panic!("{{}}-{i_plus_one}/{n}\nRust: {{__rust_pretty}}\nC: {{__c_pretty}}", id); +}} +"#, + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + rust_return_value = get(n, i, "__rust_return_value"), + c_return_value = get(n, i, "__c_return_value"), + i_plus_one = i + 1, // so that the output is "1/2" and "2/2" + )) + } + _ => { + // Most types can just use `svcmpeq` + fmt(&format_args!( + r#" +let __rust_eq_return_value = {rust_return_value}; +let __c_eq_return_value = {c_return_value}; +let __eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __c_eq_return_value); +if !svptest_any(__pred, __eq) {{ + let __rust_pretty = debug_print_{ty}{bl}(__rust_eq_return_value); + let __c_pretty = debug_print_{ty}{bl}(__c_eq_return_value); + panic!("{{}}-{i_plus_one}/{n}\nRust: {{__rust_pretty}}\nC: {{__c_pretty}}", id); +}} +"#, + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + rust_return_value = get(n, i, "__rust_return_value"), + c_return_value = get(n, i, "__c_return_value"), + i_plus_one = i + 1, // so that the output is "1/2" and "2/2" + )) + } + } + }) + .to_string() + } +} + +impl ArmType { + /// Returns the Rust prefix for the name of an intrinsic with this type kind (i.e. `s` for + /// `i16`, or `u` for `u16`). For type kinds without any bit length at the end (e.g. `bool`), + /// returns the whole type name. + pub fn rust_intrinsic_name_prefix(&self) -> &str { + match self.kind() { + TypeKind::Char(Sign::Signed) => "s", + TypeKind::Int(Sign::Signed) => "s", + TypeKind::Poly => "p", + TypeKind::Bool => "s", + _ => self.kind.rust_prefix(), } } } diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index 4d38bce327..10d9224183 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -4,9 +4,9 @@ use crate::common::SupportedArchitecture; use crate::common::intrinsic_helpers::TypeKind; use crate::common::values::test_values_array_name; -use super::PASSES; use super::constraint::Constraint; use super::intrinsic_helpers::TypeDefinition; +use super::{PASSES, PREDICATE_LOCAL}; /// An argument for the intrinsic. #[derive(Debug, PartialEq, Clone)] @@ -19,18 +19,27 @@ pub struct Argument { pub ty: A::Type, /// Any constraints that are on this argument pub constraint: Option, + /// Is the argument a predicate for a scalable intrinsic? + pub is_predicate: bool, } impl Argument where A: SupportedArchitecture, { - pub fn new(pos: usize, name: String, ty: A::Type, constraint: Option) -> Self { + pub fn new( + pos: usize, + name: String, + ty: A::Type, + constraint: Option, + is_predicate: bool, + ) -> Self { Argument { pos, name, ty, constraint, + is_predicate, } } @@ -38,8 +47,14 @@ where self.ty.c_type() } + /// Generates local variable name for the value passed to this argument pub fn generate_name(&self) -> String { - format!("{}_val", self.name) + // The same predicate is used for scalable intrinsic invocations + if self.is_predicate { + format!("{PREDICATE_LOCAL}") + } else { + format!("{}_val", self.name) + } } pub fn is_simd(&self) -> bool { @@ -176,15 +191,11 @@ where pub fn load_values_rust(&self) -> String { self.iter() .filter(|&arg| !arg.has_constraint()) + .filter(|&arg| !arg.is_predicate) .enumerate() .map(|(idx, arg)| { if arg.is_simd() { - format!( - "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);", - name = arg.generate_name(), - vals_name = test_values_array_name(&arg.ty), - load = arg.ty.load_function(), - ) + A::load_call(arg, idx) } else { format!( "let {name} = {vals_name}[(i+{idx}) % {PASSES}];", diff --git a/crates/intrinsic-test/src/common/constraint.rs b/crates/intrinsic-test/src/common/constraint.rs index ab52d866ab..c7d37da2ad 100644 --- a/crates/intrinsic-test/src/common/constraint.rs +++ b/crates/intrinsic-test/src/common/constraint.rs @@ -1,6 +1,7 @@ -use serde::Deserialize; use std::ops::Range; +use serde::Deserialize; + /// Describes the values to test for a const generic parameter #[derive(Debug, PartialEq, Clone, Deserialize)] pub enum Constraint { @@ -23,21 +24,53 @@ pub enum Constraint { SvImmRotationAdd, } -impl Constraint { - /// Returns an iterator over the values of this constraint - pub fn iter(&self) -> Box + '_> { +/// Workaround to enable the `Constraint::into_iter` to return an iterator that implements `Clone`, +/// so that it can be used with `Itertools::multi_cartesian_product`. +/// +/// With the different iterator types, returning `Box + '_>` would the +/// idiomatic approach, but this can't be made to implement `Clone`. Given the limited number +/// of iterator types used and their relative lack of complexity, wrapping them all in an enum isn't +/// too bad. +#[derive(Clone)] +pub enum ConstraintIterator<'a> { + Once(std::iter::Once), + Range(std::ops::Range), + Copied(std::iter::Copied>), + Chain(std::iter::Chain, std::ops::RangeInclusive>), + StepBy(std::iter::StepBy>), +} + +impl<'a> Iterator for ConstraintIterator<'a> { + type Item = i64; + + fn next(&mut self) -> Option { + match self { + ConstraintIterator::Once(once) => once.next(), + ConstraintIterator::Range(range) => range.next(), + ConstraintIterator::Copied(copied) => copied.next(), + ConstraintIterator::Chain(chain) => chain.next(), + ConstraintIterator::StepBy(step_by) => step_by.next(), + } + } +} + +impl<'a> IntoIterator for &'a Constraint { + type Item = i64; + type IntoIter = ConstraintIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { match self { - Constraint::Equal(i) => Box::new(std::iter::once(*i)), - Constraint::Range(range) => Box::new(range.clone()), - Constraint::Set(items) => Box::new(items.iter().copied().chain(Range::default())), + Constraint::Equal(i) => ConstraintIterator::Once(std::iter::once(*i)), + Constraint::Range(range) => ConstraintIterator::Range(range.clone()), + Constraint::Set(items) => ConstraintIterator::Copied(items.iter().copied()), // These values are discriminants of the `svpattern` enum - Constraint::SvPattern => Box::new((0..=13).chain(29..=31)), + Constraint::SvPattern => ConstraintIterator::Chain((0..=13).chain(29..=31)), // These values are discriminants of the `svprfop` enum - Constraint::SvPrefetchOp => Box::new((0..=5).chain(8..=14)), + Constraint::SvPrefetchOp => ConstraintIterator::Chain((0..=5).chain(8..=14)), // Valid rotations for intrinsics operating on complex pairs: 0, 90, 180, 270 - Constraint::SvImmRotation => Box::new((0..=270).step_by(90)), + Constraint::SvImmRotation => ConstraintIterator::StepBy((0..=270).step_by(90)), // Valid rotations for `svcadd` and `svqcadd`: 0, 270 - Constraint::SvImmRotationAdd => Box::new((90..=270).step_by(180)), + Constraint::SvImmRotationAdd => ConstraintIterator::StepBy((90..=270).step_by(180)), } } } diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index 21cc5cfa2e..bbff7a91b6 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -18,30 +18,36 @@ pub fn write_wrapper_c( w: &mut impl std::io::Write, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { - write!(w, "{}", A::NOTICE)?; + write!( + w, + r#" +{notice} +#include +#include +{prelude} - writeln!(w, "#include ")?; - writeln!(w, "#include ")?; - writeln!(w, "{}", A::C_PRELUDE)?; - - for intrinsic in intrinsics { - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - " +{intrinsics} +"#, + notice = A::NOTICE, + prelude = A::C_PRELUDE, + intrinsics = intrinsics.iter().format_with("", |intrinsic, fmt| { + fmt(&intrinsic + .specializations() + .format_with("\n", |imm_values, fmt| { + fmt(&format_args!( + " void {name}_wrapper{imm_arglist}({return_ty}* __dst{arglist}) {{ *__dst = {name}({params}); }}", - return_ty = intrinsic.results.c_type(), - name = intrinsic.name, - imm_arglist = imm_values - .iter() - .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), - arglist = intrinsic.arguments.as_non_imm_arglist_c(), - params = intrinsic.arguments.as_call_params_c(&imm_values) - ) - })?; - } - - Ok(()) + return_ty = intrinsic.results.c_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_c(), + params = intrinsic.arguments.as_call_params_c(&imm_values) + )) + })) + }), + ) } diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index baa11511ec..44128d43b9 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -5,8 +5,9 @@ use itertools::Itertools; use super::intrinsic_helpers::TypeDefinition; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; +use crate::common::intrinsic_helpers::TypeKind; use crate::common::values::{test_values_array_name, test_values_array_static}; -use crate::common::{PASSES, SupportedArchitecture}; +use crate::common::{PASSES, PREDICATE_LOCAL, SupportedArchitecture}; /// Rust definitions that are included verbatim in the generated source. In particular, defines /// a wrapper around float types that defines `NaN`s to be equal reflexively to enable @@ -126,7 +127,10 @@ pub fn write_lib_rs( for intrinsic in intrinsics { for arg in &intrinsic.arguments.args { - if !arg.has_constraint() { + // Skip arguments with constraints as these correspond to generic instantiatons, and + // predicates for scalable intrinsics as the same predicate is used for all intrinsics + // under test. + if !arg.has_constraint() && !arg.is_predicate { let name = test_values_array_name(&arg.ty); if seen.insert(name) { @@ -168,38 +172,14 @@ fn generate_rust_test_loop( coerce += ") -> _"; c_coerce += ")"; - if intrinsic - .arguments - .iter() - .filter(|arg| arg.has_constraint()) - .count() - == 0 - { - writeln!( - w, - " let specializations = [(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)];" - )?; - } else { - writeln!(w, " let specializations = [")?; - - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - " (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {c_coerce}),", - const_args = imm_values.iter().join(","), - c_const_args = imm_values.iter().join("_"), - ) - })?; - - writeln!(w, " ];")?; - } - write!( w, r#" +let specializations = [{specializations}]; for (id, rust, c) in specializations {{ for i in 0..{PASSES} {{ unsafe {{ + {predicate} {loaded_args} let __rust_return_value = rust({rust_args}); @@ -212,9 +192,50 @@ for (id, rust, c) in specializations {{ }} }} "#, + specializations = intrinsic + .specializations() + .format_with(",", |imm_values, fmt| { + if imm_values.is_empty() { + fmt(&format_args!( + "(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)" + )) + } else { + let constraint_args = intrinsic.arguments.iter().filter(|a| a.has_constraint()); + fmt(&format_args!( + r#" + ( + "{const_args}", + {intrinsic_name}::<{const_args}> as unsafe {coerce}, + {intrinsic_name}_wrapper_{c_const_args} as unsafe extern "C" {c_coerce} + ) + "#, + const_args = imm_values + .iter() + .zip(constraint_args) + .map(|(imm_val, arg)| { + match arg.ty.kind() { + TypeKind::SvPattern | TypeKind::SvPrefetchOp => { + format!("{{ {}_from_i32({imm_val}) }}", arg.ty.kind()) + } + _ => imm_val.to_string(), + } + }) + .join(","), + c_const_args = imm_values.iter().join("_"), + )) + } + }), loaded_args = intrinsic.arguments.load_values_rust(), rust_args = intrinsic.arguments.as_call_param_rust(), c_args = intrinsic.arguments.as_c_call_param_rust(), + predicate = if intrinsic.has_scalable_argument_or_result() { + format!( + "let {PREDICATE_LOCAL} = {pred};", + pred = A::predicate_function(intrinsic.results.inner_size()), + ) + } else { + "".to_string() + }, comparison = intrinsic.results.comparison_function(), ) } @@ -256,25 +277,25 @@ pub fn write_bindings_rust( #[allow(improper_ctypes)] #[link(name = "wrapper_{i}")] unsafe extern "C" {{ + {definitions} +}} "#, - )?; - - for intrinsic in intrinsics { - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - "fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", - return_ty = intrinsic.results.rust_type(), - name = intrinsic.name, - imm_arglist = imm_values - .iter() - .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), - arglist = intrinsic.arguments.as_non_imm_arglist_rust(), - ) - })?; - } - - writeln!(w, "}}") + definitions = intrinsics.iter().format_with("", |intrinsic, fmt| { + fmt(&intrinsic + .specializations() + .format_with("\n", |imm_values, fmt| { + fmt(&format_args!( + "fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", + return_ty = intrinsic.results.rust_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_rust(), + )) + })) + }) + ) } /// Writes a `build.rs` into `w` for each test crate that compiles the corresponding C source code diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index 0c5bf43069..d5d903d941 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,5 +1,6 @@ use super::argument::ArgumentList; -use crate::common::{SupportedArchitecture, constraint::Constraint}; +use crate::common::{SupportedArchitecture, intrinsic_helpers::SimdLen}; +use itertools::Itertools; /// An intrinsic #[derive(Debug, PartialEq, Clone)] @@ -20,27 +21,6 @@ pub struct Intrinsic { pub extension: String, } -/// Invokes `f` for each combination of the values in the constraint ranges. -/// -/// For example, given `constraints=[Equal(0), Range(1..2), Set([3, 4])]` and `imm_values=[]`, this -/// produces the four calls to `f`: `f([0, 1, 3])`, `f([0, 1, 4])`, `f([0, 2, 3])`, `f([0, 2, 4])`. -fn recurse_specializations<'a, E>( - constraints: &mut (impl Iterator + Clone), - imm_values: &mut Vec, - f: &mut impl FnMut(&[i64]) -> Result<(), E>, -) -> Result<(), E> { - if let Some(current) = constraints.next() { - for i in current.iter() { - imm_values.push(i); - recurse_specializations(&mut constraints.clone(), imm_values, f)?; - imm_values.pop(); - } - Ok(()) - } else { - f(&imm_values) - } -} - impl Intrinsic { /// Invokes `f` for "specialisation" of the intrinsic - a specific instantiation of the /// constant generics of the intrinsic. `f` takes a slice where the `i`th element corresponds @@ -49,17 +29,20 @@ impl Intrinsic { /// For an intrinsic with three arguments with constraints `Equal(0)`, `Range(1..2)`, /// `Set([3, 4])` respectively, this would produce four calls to `f`: `f(0, 1, 3)`, /// `f(0, 1, 4)`, `f(0, 2, 3)`, `f(0, 2, 4)`. - pub fn iter_specializations( - &self, - mut f: impl FnMut(&[i64]) -> Result<(), E>, - ) -> Result<(), E> { - recurse_specializations( - &mut self + pub fn specializations(&self) -> impl Iterator> { + self.arguments + .iter() + .filter_map(|arg| arg.constraint.as_ref()) + .map(|constraint| constraint.into_iter()) + .multi_cartesian_product() + } + + /// Returns `true` if this intrinsic has any argument or result types that are scalable vectors + pub fn has_scalable_argument_or_result(&self) -> bool { + self.results.num_lanes() == SimdLen::Scalable + || self .arguments .iter() - .filter_map(|arg| arg.constraint.as_ref()), - &mut Vec::new(), - &mut f, - ) + .any(|a| a.ty.num_lanes() == SimdLen::Scalable) } } diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index b9f30af7df..bc47a90d3f 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -116,6 +116,15 @@ pub enum SimdLen { Fixed(u32), } +impl SimdLen { + pub fn expect_fixed(&self) -> u32 { + match self { + SimdLen::Fixed(lanes) => *lanes, + SimdLen::Scalable => panic!("`expect_fixed` with scalable length"), + } + } +} + impl std::fmt::Display for SimdLen { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -173,14 +182,8 @@ impl IntrinsicType { } /// Returns the number of lanes of the type - pub fn num_lanes(&self) -> u32 { - self.simd_len - .as_ref() - .map(|len| match len { - SimdLen::Scalable => unimplemented!(), - SimdLen::Fixed(len) => *len, - }) - .unwrap_or(1) + pub fn num_lanes(&self) -> SimdLen { + self.simd_len.unwrap_or(SimdLen::Fixed(1)) } /// Returns the number of vectors of the type @@ -208,7 +211,7 @@ pub trait TypeDefinition: Clone + DerefMut { match self.simd_len { Some(SimdLen::Scalable) => unimplemented!("architecture-specific"), Some(SimdLen::Fixed(_)) | None => { - default_fixed_vector_comparison(self, self.num_lanes()) + default_fixed_vector_comparison(self, self.num_lanes().expect_fixed()) } } } @@ -227,6 +230,12 @@ pub trait TypeDefinition: Clone + DerefMut { ty.vec_len = None; ty.rust_type() } + + /// Gets a string containing the name of the scalar type corresponding to this type that should + /// be used as the element type for the test value array. + fn rust_scalar_type_for_test_value_array(&self) -> String { + self.rust_scalar_type() + } } /// Returns the default comparison between results of an intrinsic - casting the vectors to arrays diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 73daabbd66..46b6bc4102 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -5,12 +5,14 @@ use rayon::prelude::*; use cli::ProcessedCli; use crate::common::{ + argument::Argument, gen_c::write_wrapper_c, gen_rust::{ run_rustfmt, write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs, }, intrinsic::Intrinsic, intrinsic_helpers::TypeDefinition, + values::test_values_array_name, }; pub mod argument; @@ -18,10 +20,16 @@ pub mod cli; pub mod constraint; pub mod intrinsic; pub mod intrinsic_helpers; +pub mod values; mod gen_c; mod gen_rust; -mod values; + +/// Many scalable intrinsics take a predicate argument and for the purposes of intrinsic testing, +/// a predicate that enables all lanes is used for all of these intrinsic calls (i.e. loading inputs, +/// result comparison, and the intrinsic under test). This constant defines the name of the local +/// variable that contains that predicate. +pub const PREDICATE_LOCAL: &'static str = "__pred"; // The number of times each intrinsic will be called - influences the generation of the // test arrays to minimise repeated testing of the same test values. @@ -100,6 +108,19 @@ pub trait SupportedArchitecture: Sized { .collect::>() .unwrap(); } + + /// Return a call to a intrinsic to generate a predicate, if reqd. + fn predicate_function(_: u32) -> String; + + /// Return a call loading `arg`. Can assume that `arg.is_simd()` holds. + fn load_call(arg: &Argument, idx: usize) -> String { + format!( + "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", + name = arg.generate_name(), + vals_name = test_values_array_name(&arg.ty), + load = arg.ty.load_function(), + ) + } } pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { diff --git a/crates/intrinsic-test/src/common/values.rs b/crates/intrinsic-test/src/common/values.rs index 8c549346ce..d1f333fd19 100644 --- a/crates/intrinsic-test/src/common/values.rs +++ b/crates/intrinsic-test/src/common/values.rs @@ -26,7 +26,7 @@ pub fn test_values_array_static( w, "static {name}: [{ty}; {load_size}] = {values};", name = test_values_array_name(ty), - ty = ty.rust_scalar_type(), + ty = ty.rust_scalar_type_for_test_value_array(), load_size = test_values_array_length(&ty), values = test_values_array(&ty) ) @@ -50,8 +50,16 @@ pub fn test_values_array_name(ty: &T) -> String { /// which is then printed as a hex value in the generated code (and if identified as a negative /// value, with the appropriate minus and corrected hex pattern). Calls to `fN::from_bits` are /// generated for floats. +/// +/// An exception to the above is when `ty` is a boolean, where this function returns +/// `[true, false]` - as there are only ever two values for a boolean. This only works because the +/// generated accesses to the test value array is always modulo the length of the test value array. pub fn test_values_array(ty: &IntrinsicType) -> String { let (bit_len, kind) = match ty { + IntrinsicType { + kind: TypeKind::Bool, + .. + } => (1, TypeKind::Bool), IntrinsicType { kind: TypeKind::Float, bit_len: Some(bit_len), @@ -75,6 +83,9 @@ pub fn test_values_array(ty: &IntrinsicType) -> String { let src = bit_pattern_for_test_values_array(bit_len, i); assert!(src == 0 || src.ilog2() < bit_len); match kind { + TypeKind::Bool if ty.num_lanes() != SimdLen::Scalable => { + fmt(&format_args!("{}", if src == 1 { true } else { false })) + } TypeKind::Float => fmt(&format_args!("f{bit_len}::from_bits({src:#x})")), TypeKind::Vector | TypeKind::Int(Sign::Signed) if (src >> (bit_len - 1)) != 0 => { // `src` is a two's complement representation of a negative value. @@ -134,7 +145,8 @@ pub fn test_values_array_length(ty: &IntrinsicType) -> u32 { pub fn bit_pattern_for_test_values_array(bits: u32, index: u32) -> u64 { let index = index as usize; match bits { - bits @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8) => BIT_PATTERNS_8[index % (1 << bits)].into(), + 1 => BIT_PATTERNS_1[index % BIT_PATTERNS_1.len()].into(), + bits @ (2 | 3 | 4 | 5 | 6 | 7 | 8) => BIT_PATTERNS_8[index % (1 << bits)].into(), 16 => BIT_PATTERNS_16[index % BIT_PATTERNS_16.len()].into(), 32 => BIT_PATTERNS_32[index % BIT_PATTERNS_32.len()].into(), 64 => BIT_PATTERNS_64[index % BIT_PATTERNS_64.len()], @@ -142,6 +154,9 @@ pub fn bit_pattern_for_test_values_array(bits: u32, index: u32) -> u64 { } } +// Contains every possible 1-bit value in order +pub const BIT_PATTERNS_1: &[u8] = &[0x0, 0x1]; + // Contains every possible 8-bit value in order pub const BIT_PATTERNS_8: &[u8] = &[ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index ce49550329..36f4fee437 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -99,6 +99,10 @@ impl SupportedArchitecture for X86 { Self { intrinsics } } + + fn predicate_function(_: u32) -> String { + unimplemented!("no scalable vectors on x86") + } } const RUST_PRELUDE: &str = r#" diff --git a/crates/intrinsic-test/src/x86/xml_parser.rs b/crates/intrinsic-test/src/x86/xml_parser.rs index 52f6da786e..2296dffb64 100644 --- a/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/crates/intrinsic-test/src/x86/xml_parser.rs @@ -99,8 +99,13 @@ fn xml_to_intrinsic(intr: XMLIntrinsic) -> Result, Box::new(i, param.var_name.clone(), ty.unwrap(), constraint); - Some(arg) + Some(Argument::::new( + i, + param.var_name.clone(), + ty.unwrap(), + constraint, + false, + )) } }); diff --git a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml index 138d5ba311..0b2d17d3b1 100644 --- a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml @@ -1157,7 +1157,7 @@ intrinsics: doc: Interleave even elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [trn1] compose: - LLVMLink: { name: "trn1.{sve_type}" } @@ -1167,10 +1167,13 @@ intrinsics: doc: Interleave even elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [trn1] compose: - - LLVMLink: { name: "trn1.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.trn1.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svtrn1q[_{type}] attr: [*sve-unstable] @@ -1188,7 +1191,7 @@ intrinsics: doc: Interleave odd elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [trn2] compose: - LLVMLink: { name: "trn2.{sve_type}" } @@ -1198,10 +1201,13 @@ intrinsics: doc: Interleave odd elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [trn2] compose: - - LLVMLink: { name: "trn2.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.trn2.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svtrn2q[_{type}] attr: [*sve-unstable]