From 708a4630b1fe3285f5a903285b11f115455b735e Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 22 Jun 2026 11:09:46 +0000 Subject: [PATCH 01/15] intrinsic-test: default to `--no-fail-fast` This is just more helpful for knowing what all needs to be fixed when CI fails. --- ci/intrinsic-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index 8de7a4cfa5..f4378b6b92 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -87,4 +87,4 @@ case "${TARGET}" in esac cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}" \ - --tests "$@" + --tests --no-fail-fast "$@" From bc3c777d12374158f260cafbeabf294ad393c4df Mon Sep 17 00:00:00 2001 From: David Wood Date: Wed, 20 May 2026 12:31:56 +0000 Subject: [PATCH 02/15] intrinsic-test: specializations as iterator Replacing `iter_specializations` (which repeatedly invokes a callback) with an iterator implementation allows `Itertools::format_with` to be used more broadly, which in turn allows disparate string interpolation to be combined and hopefully provide greater context to the reader. --- .../intrinsic-test/src/common/constraint.rs | 55 +++++++++--- crates/intrinsic-test/src/common/gen_c.rs | 50 ++++++----- crates/intrinsic-test/src/common/gen_rust.rs | 84 +++++++++---------- crates/intrinsic-test/src/common/intrinsic.rs | 42 ++-------- 4 files changed, 120 insertions(+), 111 deletions(-) diff --git a/crates/intrinsic-test/src/common/constraint.rs b/crates/intrinsic-test/src/common/constraint.rs index ab52d866ab..c7d37da2ad 100644 --- a/crates/intrinsic-test/src/common/constraint.rs +++ b/crates/intrinsic-test/src/common/constraint.rs @@ -1,6 +1,7 @@ -use serde::Deserialize; use std::ops::Range; +use serde::Deserialize; + /// Describes the values to test for a const generic parameter #[derive(Debug, PartialEq, Clone, Deserialize)] pub enum Constraint { @@ -23,21 +24,53 @@ pub enum Constraint { SvImmRotationAdd, } -impl Constraint { - /// Returns an iterator over the values of this constraint - pub fn iter(&self) -> Box + '_> { +/// Workaround to enable the `Constraint::into_iter` to return an iterator that implements `Clone`, +/// so that it can be used with `Itertools::multi_cartesian_product`. +/// +/// With the different iterator types, returning `Box + '_>` would the +/// idiomatic approach, but this can't be made to implement `Clone`. Given the limited number +/// of iterator types used and their relative lack of complexity, wrapping them all in an enum isn't +/// too bad. +#[derive(Clone)] +pub enum ConstraintIterator<'a> { + Once(std::iter::Once), + Range(std::ops::Range), + Copied(std::iter::Copied>), + Chain(std::iter::Chain, std::ops::RangeInclusive>), + StepBy(std::iter::StepBy>), +} + +impl<'a> Iterator for ConstraintIterator<'a> { + type Item = i64; + + fn next(&mut self) -> Option { + match self { + ConstraintIterator::Once(once) => once.next(), + ConstraintIterator::Range(range) => range.next(), + ConstraintIterator::Copied(copied) => copied.next(), + ConstraintIterator::Chain(chain) => chain.next(), + ConstraintIterator::StepBy(step_by) => step_by.next(), + } + } +} + +impl<'a> IntoIterator for &'a Constraint { + type Item = i64; + type IntoIter = ConstraintIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { match self { - Constraint::Equal(i) => Box::new(std::iter::once(*i)), - Constraint::Range(range) => Box::new(range.clone()), - Constraint::Set(items) => Box::new(items.iter().copied().chain(Range::default())), + Constraint::Equal(i) => ConstraintIterator::Once(std::iter::once(*i)), + Constraint::Range(range) => ConstraintIterator::Range(range.clone()), + Constraint::Set(items) => ConstraintIterator::Copied(items.iter().copied()), // These values are discriminants of the `svpattern` enum - Constraint::SvPattern => Box::new((0..=13).chain(29..=31)), + Constraint::SvPattern => ConstraintIterator::Chain((0..=13).chain(29..=31)), // These values are discriminants of the `svprfop` enum - Constraint::SvPrefetchOp => Box::new((0..=5).chain(8..=14)), + Constraint::SvPrefetchOp => ConstraintIterator::Chain((0..=5).chain(8..=14)), // Valid rotations for intrinsics operating on complex pairs: 0, 90, 180, 270 - Constraint::SvImmRotation => Box::new((0..=270).step_by(90)), + Constraint::SvImmRotation => ConstraintIterator::StepBy((0..=270).step_by(90)), // Valid rotations for `svcadd` and `svqcadd`: 0, 270 - Constraint::SvImmRotationAdd => Box::new((90..=270).step_by(180)), + Constraint::SvImmRotationAdd => ConstraintIterator::StepBy((90..=270).step_by(180)), } } } diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index 21cc5cfa2e..bbff7a91b6 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -18,30 +18,36 @@ pub fn write_wrapper_c( w: &mut impl std::io::Write, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { - write!(w, "{}", A::NOTICE)?; + write!( + w, + r#" +{notice} +#include +#include +{prelude} - writeln!(w, "#include ")?; - writeln!(w, "#include ")?; - writeln!(w, "{}", A::C_PRELUDE)?; - - for intrinsic in intrinsics { - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - " +{intrinsics} +"#, + notice = A::NOTICE, + prelude = A::C_PRELUDE, + intrinsics = intrinsics.iter().format_with("", |intrinsic, fmt| { + fmt(&intrinsic + .specializations() + .format_with("\n", |imm_values, fmt| { + fmt(&format_args!( + " void {name}_wrapper{imm_arglist}({return_ty}* __dst{arglist}) {{ *__dst = {name}({params}); }}", - return_ty = intrinsic.results.c_type(), - name = intrinsic.name, - imm_arglist = imm_values - .iter() - .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), - arglist = intrinsic.arguments.as_non_imm_arglist_c(), - params = intrinsic.arguments.as_call_params_c(&imm_values) - ) - })?; - } - - Ok(()) + return_ty = intrinsic.results.c_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_c(), + params = intrinsic.arguments.as_call_params_c(&imm_values) + )) + })) + }), + ) } diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index baa11511ec..72fabaaaf0 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -168,35 +168,10 @@ fn generate_rust_test_loop( coerce += ") -> _"; c_coerce += ")"; - if intrinsic - .arguments - .iter() - .filter(|arg| arg.has_constraint()) - .count() - == 0 - { - writeln!( - w, - " let specializations = [(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)];" - )?; - } else { - writeln!(w, " let specializations = [")?; - - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - " (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {c_coerce}),", - const_args = imm_values.iter().join(","), - c_const_args = imm_values.iter().join("_"), - ) - })?; - - writeln!(w, " ];")?; - } - write!( w, r#" +let specializations = [{specializations}]; for (id, rust, c) in specializations {{ for i in 0..{PASSES} {{ unsafe {{ @@ -212,6 +187,27 @@ for (id, rust, c) in specializations {{ }} }} "#, + specializations = intrinsic + .specializations() + .format_with(",", |imm_values, fmt| { + if imm_values.is_empty() { + fmt(&format_args!( + "(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)" + )) + } else { + fmt(&format_args!( + r#" + ( + "{const_args}", + {intrinsic_name}::<{const_args}> as unsafe {coerce}, + {intrinsic_name}_wrapper_{c_const_args} as unsafe extern "C" {c_coerce} + ) + "#, + const_args = imm_values.iter().join(","), + c_const_args = imm_values.iter().join("_"), + )) + } + }), loaded_args = intrinsic.arguments.load_values_rust(), rust_args = intrinsic.arguments.as_call_param_rust(), c_args = intrinsic.arguments.as_c_call_param_rust(), @@ -256,25 +252,25 @@ pub fn write_bindings_rust( #[allow(improper_ctypes)] #[link(name = "wrapper_{i}")] unsafe extern "C" {{ + {definitions} +}} "#, - )?; - - for intrinsic in intrinsics { - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - "fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", - return_ty = intrinsic.results.rust_type(), - name = intrinsic.name, - imm_arglist = imm_values - .iter() - .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), - arglist = intrinsic.arguments.as_non_imm_arglist_rust(), - ) - })?; - } - - writeln!(w, "}}") + definitions = intrinsics.iter().format_with("", |intrinsic, fmt| { + fmt(&intrinsic + .specializations() + .format_with("\n", |imm_values, fmt| { + fmt(&format_args!( + "fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", + return_ty = intrinsic.results.rust_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_rust(), + )) + })) + }) + ) } /// Writes a `build.rs` into `w` for each test crate that compiles the corresponding C source code diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index 0c5bf43069..f1e5e4dffa 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,5 +1,6 @@ use super::argument::ArgumentList; -use crate::common::{SupportedArchitecture, constraint::Constraint}; +use crate::common::SupportedArchitecture; +use itertools::Itertools; /// An intrinsic #[derive(Debug, PartialEq, Clone)] @@ -20,27 +21,6 @@ pub struct Intrinsic { pub extension: String, } -/// Invokes `f` for each combination of the values in the constraint ranges. -/// -/// For example, given `constraints=[Equal(0), Range(1..2), Set([3, 4])]` and `imm_values=[]`, this -/// produces the four calls to `f`: `f([0, 1, 3])`, `f([0, 1, 4])`, `f([0, 2, 3])`, `f([0, 2, 4])`. -fn recurse_specializations<'a, E>( - constraints: &mut (impl Iterator + Clone), - imm_values: &mut Vec, - f: &mut impl FnMut(&[i64]) -> Result<(), E>, -) -> Result<(), E> { - if let Some(current) = constraints.next() { - for i in current.iter() { - imm_values.push(i); - recurse_specializations(&mut constraints.clone(), imm_values, f)?; - imm_values.pop(); - } - Ok(()) - } else { - f(&imm_values) - } -} - impl Intrinsic { /// Invokes `f` for "specialisation" of the intrinsic - a specific instantiation of the /// constant generics of the intrinsic. `f` takes a slice where the `i`th element corresponds @@ -49,17 +29,11 @@ impl Intrinsic { /// For an intrinsic with three arguments with constraints `Equal(0)`, `Range(1..2)`, /// `Set([3, 4])` respectively, this would produce four calls to `f`: `f(0, 1, 3)`, /// `f(0, 1, 4)`, `f(0, 2, 3)`, `f(0, 2, 4)`. - pub fn iter_specializations( - &self, - mut f: impl FnMut(&[i64]) -> Result<(), E>, - ) -> Result<(), E> { - recurse_specializations( - &mut self - .arguments - .iter() - .filter_map(|arg| arg.constraint.as_ref()), - &mut Vec::new(), - &mut f, - ) + pub fn specializations(&self) -> impl Iterator> { + self.arguments + .iter() + .filter_map(|arg| arg.constraint.as_ref()) + .map(|constraint| constraint.into_iter()) + .multi_cartesian_product() } } From dbcba565a98d54bf84cb7dd1b7227f2917584bf0 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 03/15] intrinsic-test: impl `get_load_function` for SVE Updates `get_load_function` to return `svld{n}_{ty}` when loading a scalable vector type. Caller of `get_load_function` will still need updated to handle passing the predicate arguments to these load functions. --- crates/intrinsic-test/src/arm/mod.rs | 4 +- crates/intrinsic-test/src/arm/types.rs | 55 +++++++++++-------- .../src/common/intrinsic_helpers.rs | 21 ++++--- 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index a60e0ff155..e87a9cf703 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -69,13 +69,13 @@ impl SupportedArchitecture for Arm { let has_sve_arg = i .arguments .iter() - .any(|a| a.ty.simd_len == Some(SimdLen::Scalable)); + .any(|a| a.ty.num_lanes() == SimdLen::Scalable); !(has_f16_arg && has_sve_arg) }) .filter(|i| { let has_f16_ret = i.results.kind() == TypeKind::Float && i.results.bit_len == Some(16); - let has_sve_ret = i.results.simd_len == Some(SimdLen::Scalable); + let has_sve_ret = i.results.num_lanes() == SimdLen::Scalable; !(has_f16_ret && has_sve_ret) }) // Skip `svqcvtn{u,}n*_x2` intrinsics - not yet implemented! diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 7754e9ec2d..bce83b2dc3 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -72,34 +72,45 @@ impl TypeDefinition for ArmType { /// Determines the load function for this type. fn load_function(&self) -> String { - if let IntrinsicType { - kind: k, - bit_len: Some(bl), - vec_len, - .. - } = **self - { - let quad = if self.num_lanes() * bl > 64 { "q" } else { "" }; - - format!( - "vld{len}{quad}_{type}{size}", - type = match k { - TypeKind::Int(Sign::Unsigned) => "u", - TypeKind::Int(Sign::Signed) => "s", - TypeKind::Float => "f", - TypeKind::Poly => "p", - x => todo!("get_load_function TypeKind: {x:#?}"), - }, - size = bl, - quad = quad, - len = vec_len.unwrap_or(1), - ) + if let Some(bl) = self.bit_len { + match self.num_lanes() { + SimdLen::Scalable => { + format!( + "svld{len}_{type}{bl}", + len = self.num_vectors(), + type = self.rust_intrinsic_name_prefix(), + ) + } + SimdLen::Fixed(num_lanes) => { + format!( + "vld{len}{quad}_{type}{bl}", + quad = if num_lanes * bl > 64 { "q" } else { "" }, + len = self.num_vectors(), + type = self.rust_intrinsic_name_prefix(), + ) + } + } } else { todo!("load_function IntrinsicType: {self:#?}") } } } +impl ArmType { + /// Returns the Rust prefix for the name of an intrinsic with this type kind (i.e. `s` for + /// `i16`, or `u` for `u16`). For type kinds without any bit length at the end (e.g. `bool`), + /// returns the whole type name. + pub fn rust_intrinsic_name_prefix(&self) -> &str { + match self.kind() { + TypeKind::Char(Sign::Signed) => "s", + TypeKind::Int(Sign::Signed) => "s", + TypeKind::Poly => "p", + TypeKind::Bool => "s", + _ => self.kind.rust_prefix(), + } + } +} + pub fn parse_intrinsic_type(s: &str) -> Result { const CONST_STR: &str = "const"; const ENUM_STR: &str = "enum "; diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index b9f30af7df..7a7ad8f61f 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -116,6 +116,15 @@ pub enum SimdLen { Fixed(u32), } +impl SimdLen { + pub fn expect_fixed(&self) -> u32 { + match self { + SimdLen::Fixed(lanes) => *lanes, + SimdLen::Scalable => panic!("`expect_fixed` with scalable length"), + } + } +} + impl std::fmt::Display for SimdLen { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -173,14 +182,8 @@ impl IntrinsicType { } /// Returns the number of lanes of the type - pub fn num_lanes(&self) -> u32 { - self.simd_len - .as_ref() - .map(|len| match len { - SimdLen::Scalable => unimplemented!(), - SimdLen::Fixed(len) => *len, - }) - .unwrap_or(1) + pub fn num_lanes(&self) -> SimdLen { + self.simd_len.unwrap_or(SimdLen::Fixed(1)) } /// Returns the number of vectors of the type @@ -208,7 +211,7 @@ pub trait TypeDefinition: Clone + DerefMut { match self.simd_len { Some(SimdLen::Scalable) => unimplemented!("architecture-specific"), Some(SimdLen::Fixed(_)) | None => { - default_fixed_vector_comparison(self, self.num_lanes()) + default_fixed_vector_comparison(self, self.num_lanes().expect_fixed()) } } } From 27974a0c0ae239d05aae8cdb7a9ad8be34ee48f3 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 04/15] intrinsic-test: use `arm_sve.h` and target feats Updates the headers used by generated C code and the target feature flags passed to the C compiler to enable SVE. --- ci/docker/aarch64-unknown-linux-gnu/Dockerfile | 2 +- crates/intrinsic-test/src/arm/mod.rs | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 1b61dd0c1b..be85a2dd70 100644 --- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -19,5 +19,5 @@ ENV CLANG_PATH="/llvm/bin/clang" ENV GCC_PATH=aarch64-linux-gnu-gcc ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ - CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \ + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max,sve512=on -L /usr/aarch64-linux-gnu" \ OBJDUMP=aarch64-linux-gnu-objdump diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index e87a9cf703..c0a8130196 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -29,12 +29,21 @@ impl SupportedArchitecture for Arm { #include #include #include +#ifdef __ARM_FEATURE_SVE +#include +#endif "#; const RUST_PRELUDE: &str = RUST_PRELUDE; fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { // GCC uses an extra `-` in the arch name + let big_endian = cli_options.target.starts_with("aarch64_be"); + let a32 = cli_options.target.starts_with("armv7"); match cli_options.cc_arg_style { + CcArgStyle::Clang if !a32 && !big_endian => vec![ + "-march=armv8.6a+crypto+crc+dotprod+fp16+sve2-aes+sve2-sm4+sve2-sha3+sve2-bitperm+\ + f32mm+f64mm+sve2p1", + ], CcArgStyle::Clang => vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"], // SVE tests aren't run under GCC so there are no target features added for SVE CcArgStyle::Gcc => vec!["-march=armv8.6-a+crypto+crc+dotprod+fp16+sha3+sm4"], @@ -140,6 +149,7 @@ const RUST_PRELUDE: &str = r#" #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_aarch64_sve))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] #![feature(stdarch_neon_f16)] From e5edbe782d68d2e1e3a08d923dd21f68976c01ce Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 05/15] intrinsic-test: `bool` test values Some SVE intrinsics take booleans as arguments, so there is a need to support generating a test value array for booleans. --- crates/intrinsic-test/src/common/values.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crates/intrinsic-test/src/common/values.rs b/crates/intrinsic-test/src/common/values.rs index 8c549346ce..c61e2938a0 100644 --- a/crates/intrinsic-test/src/common/values.rs +++ b/crates/intrinsic-test/src/common/values.rs @@ -50,7 +50,15 @@ pub fn test_values_array_name(ty: &T) -> String { /// which is then printed as a hex value in the generated code (and if identified as a negative /// value, with the appropriate minus and corrected hex pattern). Calls to `fN::from_bits` are /// generated for floats. +/// +/// An exception to the above is when `ty` is a boolean, where this function returns +/// `[true, false]` - as there are only ever two values for a boolean. This only works because the +/// generated accesses to the test value array is always modulo the length of the test value array. pub fn test_values_array(ty: &IntrinsicType) -> String { + if ty.kind() == TypeKind::Bool { + return "[true, false]".to_string(); + } + let (bit_len, kind) = match ty { IntrinsicType { kind: TypeKind::Float, @@ -105,7 +113,15 @@ pub fn test_values_array(ty: &IntrinsicType) -> String { /// /// For scalable vectors (only SVE is currently supported), assume that the length of the vector is /// the maximum supported by the architecture. +/// +/// An exception to the above is when `ty` is a boolean, where this function returns two - as +/// there are only ever two values for a boolean. This only works because the generated accesses to +/// the test value array is always modulo this length. pub fn test_values_array_length(ty: &IntrinsicType) -> u32 { + if ty.kind() == TypeKind::Bool { + return 2; + } + let IntrinsicType { simd_len, vec_len, .. } = ty; From f9dff7de4928b6e660278a8e2817903966c1069c Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 06/15] intrinsic-test: values for enum-typed constraints Constraints that correspond to enum types - such as `svpattern` and `svprfop` - need to be converted to the enum type in order to be used in a generic instantiation - so introduce a const function for both types that provides this mapping. --- crates/intrinsic-test/src/arm/mod.rs | 43 ++++++++++++++++++++ crates/intrinsic-test/src/common/gen_rust.rs | 15 ++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index c0a8130196..49dd26010f 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -158,4 +158,47 @@ use core_arch::arch::aarch64::*; #[cfg(target_arch = "arm")] use core_arch::arch::arm::*; + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +const fn svpattern_from_i32(value: i32) -> svpattern { + match value { + 0 => svpattern::SV_POW2, + 1 => svpattern::SV_VL1, + 2 => svpattern::SV_VL2, + 3 => svpattern::SV_VL3, + 4 => svpattern::SV_VL4, + 5 => svpattern::SV_VL5, + 6 => svpattern::SV_VL6, + 7 => svpattern::SV_VL7, + 8 => svpattern::SV_VL8, + 9 => svpattern::SV_VL16, + 10 => svpattern::SV_VL32, + 11 => svpattern::SV_VL64, + 12 => svpattern::SV_VL128, + 13 => svpattern::SV_VL256, + 29 => svpattern::SV_MUL4, + 30 => svpattern::SV_MUL3, + 31 => svpattern::SV_ALL, + _ => unreachable!(), + } +} + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +const fn svprfop_from_i32(value: i32) -> svprfop { + match value { + 0 => svprfop::SV_PLDL1KEEP, + 1 => svprfop::SV_PLDL1STRM, + 2 => svprfop::SV_PLDL2KEEP, + 3 => svprfop::SV_PLDL2STRM, + 4 => svprfop::SV_PLDL3KEEP, + 5 => svprfop::SV_PLDL3STRM, + 8 => svprfop::SV_PSTL1KEEP, + 9 => svprfop::SV_PSTL1STRM, + 10 => svprfop::SV_PSTL2KEEP, + 11 => svprfop::SV_PSTL2STRM, + 12 => svprfop::SV_PSTL3KEEP, + 13 => svprfop::SV_PSTL3STRM, + _ => unreachable!(), + } +} "#; diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 72fabaaaf0..86fec4622b 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -5,6 +5,7 @@ use itertools::Itertools; use super::intrinsic_helpers::TypeDefinition; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; +use crate::common::intrinsic_helpers::TypeKind; use crate::common::values::{test_values_array_name, test_values_array_static}; use crate::common::{PASSES, SupportedArchitecture}; @@ -195,6 +196,7 @@ for (id, rust, c) in specializations {{ "(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)" )) } else { + let constraint_args = intrinsic.arguments.iter().filter(|a| a.has_constraint()); fmt(&format_args!( r#" ( @@ -203,7 +205,18 @@ for (id, rust, c) in specializations {{ {intrinsic_name}_wrapper_{c_const_args} as unsafe extern "C" {c_coerce} ) "#, - const_args = imm_values.iter().join(","), + const_args = imm_values + .iter() + .zip(constraint_args) + .map(|(imm_val, arg)| { + match arg.ty.kind() { + TypeKind::SvPattern | TypeKind::SvPrefetchOp => { + format!("{{ {}_from_i32({imm_val}) }}", arg.ty.kind()) + } + _ => imm_val.to_string(), + } + }) + .join(","), c_const_args = imm_values.iter().join("_"), )) } From 94faefd59072c8765c1909419a782cb99cfaa693 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 07/15] intrinsic-test: no test values for `svbool_t` Predicate arguments of type `svbool_t` do not need test value arrays to be generated as the same enable-all-lanes predicate will be passed to all invocations of the intrinsic under test. There is no `svld1` equivalent for `svbool_t` that could be used even if there were test values to use. --- crates/intrinsic-test/src/common/argument.rs | 10 +++++++++- crates/intrinsic-test/src/common/gen_rust.rs | 7 ++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index 4d38bce327..e1651c48a1 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -1,7 +1,7 @@ use itertools::Itertools; use crate::common::SupportedArchitecture; -use crate::common::intrinsic_helpers::TypeKind; +use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; use crate::common::values::test_values_array_name; use super::PASSES; @@ -54,6 +54,11 @@ where self.constraint.is_some() } + /// Is this argument of type `svbool_t` (or otherwise a scalable bool)? + pub fn is_scalable_bool(&self) -> bool { + self.ty.kind == TypeKind::Bool && self.ty.num_lanes() == SimdLen::Scalable + } + /// Should this argument be passed by reference in C wrapper function declarations? /// /// SIMD types and `f16` are currently passed by reference. @@ -176,6 +181,9 @@ where pub fn load_values_rust(&self) -> String { self.iter() .filter(|&arg| !arg.has_constraint()) + // FIXME(davidtwco): Need test values for `svbool_t` when the argument is *not* a + // predicate. + .filter(|&arg| !arg.is_scalable_bool()) .enumerate() .map(|(idx, arg)| { if arg.is_simd() { diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 86fec4622b..127d3cea82 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -127,7 +127,12 @@ pub fn write_lib_rs( for intrinsic in intrinsics { for arg in &intrinsic.arguments.args { - if !arg.has_constraint() { + // Skip arguments with constraints as these correspond to generic instantiatons, and + // arguments of scalable bool types as the same predicate is used for all intrinsics + // under test. + // FIXME(davidtwco): Need test values for `svbool_t` when the argument is *not* a + // predicate. + if !arg.has_constraint() && !arg.is_scalable_bool() { let name = test_values_array_name(&arg.ty); if seen.insert(name) { From d6909a4f94d7306d801f34938c0df6fcf0f48d8d Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 08/15] intrinsic-test: introduce `predicate_function` Support defining a local variable containing the predicate that will be used with all subsequent scalable vector intrinsics. --- crates/intrinsic-test/src/arm/mod.rs | 4 ++++ crates/intrinsic-test/src/common/gen_rust.rs | 11 ++++++++++- crates/intrinsic-test/src/common/intrinsic.rs | 11 ++++++++++- crates/intrinsic-test/src/common/mod.rs | 9 +++++++++ crates/intrinsic-test/src/x86/mod.rs | 4 ++++ 5 files changed, 37 insertions(+), 2 deletions(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 49dd26010f..c2ae1ad0d2 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -138,6 +138,10 @@ impl SupportedArchitecture for Arm { Self(intrinsics) } + + fn predicate_function(_: u32) -> String { + todo!("implemented in a later commit") + } } const RUST_PRELUDE: &str = r#" diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 127d3cea82..ec784cca43 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -7,7 +7,7 @@ use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use crate::common::values::{test_values_array_name, test_values_array_static}; -use crate::common::{PASSES, SupportedArchitecture}; +use crate::common::{PASSES, PREDICATE_LOCAL, SupportedArchitecture}; /// Rust definitions that are included verbatim in the generated source. In particular, defines /// a wrapper around float types that defines `NaN`s to be equal reflexively to enable @@ -181,6 +181,7 @@ let specializations = [{specializations}]; for (id, rust, c) in specializations {{ for i in 0..{PASSES} {{ unsafe {{ + {predicate} {loaded_args} let __rust_return_value = rust({rust_args}); @@ -229,6 +230,14 @@ for (id, rust, c) in specializations {{ loaded_args = intrinsic.arguments.load_values_rust(), rust_args = intrinsic.arguments.as_call_param_rust(), c_args = intrinsic.arguments.as_c_call_param_rust(), + predicate = if intrinsic.has_scalable_argument_or_result() { + format!( + "let {PREDICATE_LOCAL} = {pred};", + pred = A::predicate_function(intrinsic.results.inner_size()), + ) + } else { + "".to_string() + }, comparison = intrinsic.results.comparison_function(), ) } diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index f1e5e4dffa..d5d903d941 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,5 +1,5 @@ use super::argument::ArgumentList; -use crate::common::SupportedArchitecture; +use crate::common::{SupportedArchitecture, intrinsic_helpers::SimdLen}; use itertools::Itertools; /// An intrinsic @@ -36,4 +36,13 @@ impl Intrinsic { .map(|constraint| constraint.into_iter()) .multi_cartesian_product() } + + /// Returns `true` if this intrinsic has any argument or result types that are scalable vectors + pub fn has_scalable_argument_or_result(&self) -> bool { + self.results.num_lanes() == SimdLen::Scalable + || self + .arguments + .iter() + .any(|a| a.ty.num_lanes() == SimdLen::Scalable) + } } diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 73daabbd66..018b3278c8 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -23,6 +23,12 @@ mod gen_c; mod gen_rust; mod values; +/// Many scalable intrinsics take a predicate argument and for the purposes of intrinsic testing, +/// a predicate that enables all lanes is used for all of these intrinsic calls (i.e. loading inputs, +/// result comparison, and the intrinsic under test). This constant defines the name of the local +/// variable that contains that predicate. +pub const PREDICATE_LOCAL: &'static str = "__pred"; + // The number of times each intrinsic will be called - influences the generation of the // test arrays to minimise repeated testing of the same test values. pub(crate) const PASSES: u32 = 20; @@ -100,6 +106,9 @@ pub trait SupportedArchitecture: Sized { .collect::>() .unwrap(); } + + /// Return a call to a intrinsic to generate a predicate, if reqd. + fn predicate_function(_: u32) -> String; } pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index ce49550329..36f4fee437 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -99,6 +99,10 @@ impl SupportedArchitecture for X86 { Self { intrinsics } } + + fn predicate_function(_: u32) -> String { + unimplemented!("no scalable vectors on x86") + } } const RUST_PRELUDE: &str = r#" From 0f5958504d5cb7a2a47a2c6cf62f83b4acb7a9b9 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 09/15] intrinsic-test: sve comparison and predicates Implementation of `get_comparison_function` and `get_predicate_function` for SVE which uses the relevant SVE intrinsics. --- crates/intrinsic-test/src/arm/mod.rs | 4 +- crates/intrinsic-test/src/arm/types.rs | 58 +++++++++++++++++++- crates/intrinsic-test/src/common/argument.rs | 19 ++++++- 3 files changed, 75 insertions(+), 6 deletions(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index c2ae1ad0d2..e8977db6ba 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -139,8 +139,8 @@ impl SupportedArchitecture for Arm { Self(intrinsics) } - fn predicate_function(_: u32) -> String { - todo!("implemented in a later commit") + fn predicate_function(size: u32) -> String { + format!("svptrue_b{size}()") } } diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index bce83b2dc3..d19560231b 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -1,5 +1,9 @@ use super::intrinsic::ArmType; -use crate::common::intrinsic_helpers::{IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind}; +use crate::common::PREDICATE_LOCAL; +use crate::common::intrinsic_helpers::{ + IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind, default_fixed_vector_comparison, +}; +use itertools::Itertools; impl TypeDefinition for ArmType { /// Gets a string containing the typename for this type in C format. @@ -94,6 +98,58 @@ impl TypeDefinition for ArmType { todo!("load_function IntrinsicType: {self:#?}") } } + + fn comparison_function(&self) -> String { + match self.num_lanes() { + SimdLen::Scalable => { + // There isn't a `svcmpeq` for `svbool_t`, so do an XOR instead and test it is + // empty.. + if self.kind() == TypeKind::Bool { + return format!( + r#" +let eq = sveor_b_z({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); +assert!(!svptest_any({PREDICATE_LOCAL}, eq), "{{}}", id); + "#, + ); + } + + // Use `svcmpeq` to compare the return values of Rust and C invocations + match self.num_vectors() { + 1 => { + format!( + r#" +let eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); +assert!(svptest_any(__pred, eq), "{{}}", id); + "#, + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + ) + } + // For tuples of vectors, do multiple comparisons, each with a `svget` to + // extract the Nth vector. + n @ (2 | 3 | 4) => (0..n) + .format_with("\n", |i, fmt| { + fmt(&format_args!( + r#" +let eq = svcmpeq_{ty}{bl}( + {PREDICATE_LOCAL}, + svget{n}_{ty}{bl}::<{i}>(__rust_return_value), + svget{n}_{ty}{bl}::<{i}>(__c_return_value) +); +assert!(svptest_any(__pred, eq), "{{}}-{i_plus_one}/{n}", id); + "#, + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + i_plus_one = i + 1, // so that the output is "1/2" and "2/2" + )) + }) + .to_string(), + _ => unreachable!(), + } + } + SimdLen::Fixed(num_lanes) => default_fixed_vector_comparison(self, num_lanes), + } + } } impl ArmType { diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index e1651c48a1..e21b2d96b5 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -4,9 +4,9 @@ use crate::common::SupportedArchitecture; use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; use crate::common::values::test_values_array_name; -use super::PASSES; use super::constraint::Constraint; use super::intrinsic_helpers::TypeDefinition; +use super::{PASSES, PREDICATE_LOCAL}; /// An argument for the intrinsic. #[derive(Debug, PartialEq, Clone)] @@ -38,8 +38,15 @@ where self.ty.c_type() } + /// Generates local variable name for the value passed to this argument pub fn generate_name(&self) -> String { - format!("{}_val", self.name) + // The same predicate is used for scalable intrinsic invocations + // FIXME(davidtwco): Only for predicate arguments, not all boolean arguments + if self.is_scalable_bool() { + format!("{PREDICATE_LOCAL}") + } else { + format!("{}_val", self.name) + } } pub fn is_simd(&self) -> bool { @@ -187,8 +194,14 @@ where .enumerate() .map(|(idx, arg)| { if arg.is_simd() { + // If this load is of a scalable vector, then prepend an additional argument + // containing the predicate for the load. + let pred_arg = match arg.ty.num_lanes() { + SimdLen::Scalable => format!("{PREDICATE_LOCAL},"), + SimdLen::Fixed(..) => "".to_string(), + }; format!( - "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);", + "let {name} = {load}({pred_arg}{vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", name = arg.generate_name(), vals_name = test_values_array_name(&arg.ty), load = arg.ty.load_function(), From be5cbea35aee87551261bee7d5d33682564fd528 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 4 Jun 2026 15:08:50 +0000 Subject: [PATCH 10/15] intrinsic-test: test values for `svbool_t` Instead of assuming that any scalable boolean argument is a predicate, handle predicates specifically and generate test values for `svbool_t` values. --- crates/intrinsic-test/src/arm/json_parser.rs | 10 ++++- crates/intrinsic-test/src/arm/mod.rs | 31 ++++++++++++++- crates/intrinsic-test/src/arm/types.rs | 10 +++++ crates/intrinsic-test/src/common/argument.rs | 38 +++++++------------ crates/intrinsic-test/src/common/gen_rust.rs | 6 +-- .../src/common/intrinsic_helpers.rs | 6 +++ crates/intrinsic-test/src/common/mod.rs | 14 ++++++- crates/intrinsic-test/src/common/values.rs | 27 +++++++------ crates/intrinsic-test/src/x86/xml_parser.rs | 9 ++++- 9 files changed, 102 insertions(+), 49 deletions(-) diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs index 26f861ca64..7d0cdf7b28 100644 --- a/crates/intrinsic-test/src/arm/json_parser.rs +++ b/crates/intrinsic-test/src/arm/json_parser.rs @@ -121,8 +121,14 @@ fn json_to_intrinsic( } }); - let mut arg = - Argument::::new(i, String::from(arg_name), ArmType(arg_ty), constraint); + let is_predicate = arg_name == "pg"; + let mut arg = Argument::::new( + i, + String::from(arg_name), + ArmType(arg_ty), + constraint, + is_predicate, + ); // The JSON doesn't list immediates as const let IntrinsicType { diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index e8977db6ba..4cd76ae647 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -2,10 +2,12 @@ mod intrinsic; mod json_parser; mod types; -use crate::common::SupportedArchitecture; +use crate::common::argument::Argument; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; -use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; +use crate::common::intrinsic_helpers::{SimdLen, TypeDefinition, TypeKind}; +use crate::common::values::test_values_array_name; +use crate::common::{PASSES, PREDICATE_LOCAL, SupportedArchitecture}; use intrinsic::ArmType; use json_parser::get_neon_intrinsics; @@ -142,6 +144,31 @@ impl SupportedArchitecture for Arm { fn predicate_function(size: u32) -> String { format!("svptrue_b{size}()") } + + fn load_call(arg: &Argument, idx: usize) -> String { + let name = arg.generate_name(); + let load = arg.ty.load_function(); + let ptr = format!( + "{vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _", + vals_name = test_values_array_name(&arg.ty) + ); + + match arg.ty.num_lanes() { + // If the load is of a `svbool_t`, then we load a `svint8_t` and + SimdLen::Scalable if matches!(arg.ty.kind(), TypeKind::Bool) => { + format!( + r#" +let {name} = {load}({PREDICATE_LOCAL}, {ptr}); +let {name} = svcmpne_n_s8({PREDICATE_LOCAL}, {name}, 0); + "# + ) + } + // If this load is of a scalable vector, then prepend an additional argument + // containing the predicate for the load. + SimdLen::Scalable => format!("let {name} = {load}({PREDICATE_LOCAL}, {ptr});"), + SimdLen::Fixed(..) => format!("let {name} = {load}({ptr});"), + } + } } const RUST_PRELUDE: &str = r#" diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index d19560231b..3453ac4c95 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -74,6 +74,16 @@ impl TypeDefinition for ArmType { } } + fn rust_scalar_type_for_test_value_array(&self) -> String { + if self.kind() == TypeKind::Bool && self.num_lanes() == SimdLen::Scalable { + let mut ty = self.clone(); + ty.kind = TypeKind::Int(Sign::Signed); + ty.rust_scalar_type() + } else { + self.rust_scalar_type() + } + } + /// Determines the load function for this type. fn load_function(&self) -> String { if let Some(bl) = self.bit_len { diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index e21b2d96b5..10d9224183 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -1,7 +1,7 @@ use itertools::Itertools; use crate::common::SupportedArchitecture; -use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; +use crate::common::intrinsic_helpers::TypeKind; use crate::common::values::test_values_array_name; use super::constraint::Constraint; @@ -19,18 +19,27 @@ pub struct Argument { pub ty: A::Type, /// Any constraints that are on this argument pub constraint: Option, + /// Is the argument a predicate for a scalable intrinsic? + pub is_predicate: bool, } impl Argument where A: SupportedArchitecture, { - pub fn new(pos: usize, name: String, ty: A::Type, constraint: Option) -> Self { + pub fn new( + pos: usize, + name: String, + ty: A::Type, + constraint: Option, + is_predicate: bool, + ) -> Self { Argument { pos, name, ty, constraint, + is_predicate, } } @@ -41,8 +50,7 @@ where /// Generates local variable name for the value passed to this argument pub fn generate_name(&self) -> String { // The same predicate is used for scalable intrinsic invocations - // FIXME(davidtwco): Only for predicate arguments, not all boolean arguments - if self.is_scalable_bool() { + if self.is_predicate { format!("{PREDICATE_LOCAL}") } else { format!("{}_val", self.name) @@ -61,11 +69,6 @@ where self.constraint.is_some() } - /// Is this argument of type `svbool_t` (or otherwise a scalable bool)? - pub fn is_scalable_bool(&self) -> bool { - self.ty.kind == TypeKind::Bool && self.ty.num_lanes() == SimdLen::Scalable - } - /// Should this argument be passed by reference in C wrapper function declarations? /// /// SIMD types and `f16` are currently passed by reference. @@ -188,24 +191,11 @@ where pub fn load_values_rust(&self) -> String { self.iter() .filter(|&arg| !arg.has_constraint()) - // FIXME(davidtwco): Need test values for `svbool_t` when the argument is *not* a - // predicate. - .filter(|&arg| !arg.is_scalable_bool()) + .filter(|&arg| !arg.is_predicate) .enumerate() .map(|(idx, arg)| { if arg.is_simd() { - // If this load is of a scalable vector, then prepend an additional argument - // containing the predicate for the load. - let pred_arg = match arg.ty.num_lanes() { - SimdLen::Scalable => format!("{PREDICATE_LOCAL},"), - SimdLen::Fixed(..) => "".to_string(), - }; - format!( - "let {name} = {load}({pred_arg}{vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", - name = arg.generate_name(), - vals_name = test_values_array_name(&arg.ty), - load = arg.ty.load_function(), - ) + A::load_call(arg, idx) } else { format!( "let {name} = {vals_name}[(i+{idx}) % {PASSES}];", diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index ec784cca43..44128d43b9 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -128,11 +128,9 @@ pub fn write_lib_rs( for intrinsic in intrinsics { for arg in &intrinsic.arguments.args { // Skip arguments with constraints as these correspond to generic instantiatons, and - // arguments of scalable bool types as the same predicate is used for all intrinsics + // predicates for scalable intrinsics as the same predicate is used for all intrinsics // under test. - // FIXME(davidtwco): Need test values for `svbool_t` when the argument is *not* a - // predicate. - if !arg.has_constraint() && !arg.is_scalable_bool() { + if !arg.has_constraint() && !arg.is_predicate { let name = test_values_array_name(&arg.ty); if seen.insert(name) { diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 7a7ad8f61f..bc47a90d3f 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -230,6 +230,12 @@ pub trait TypeDefinition: Clone + DerefMut { ty.vec_len = None; ty.rust_type() } + + /// Gets a string containing the name of the scalar type corresponding to this type that should + /// be used as the element type for the test value array. + fn rust_scalar_type_for_test_value_array(&self) -> String { + self.rust_scalar_type() + } } /// Returns the default comparison between results of an intrinsic - casting the vectors to arrays diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 018b3278c8..46b6bc4102 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -5,12 +5,14 @@ use rayon::prelude::*; use cli::ProcessedCli; use crate::common::{ + argument::Argument, gen_c::write_wrapper_c, gen_rust::{ run_rustfmt, write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs, }, intrinsic::Intrinsic, intrinsic_helpers::TypeDefinition, + values::test_values_array_name, }; pub mod argument; @@ -18,10 +20,10 @@ pub mod cli; pub mod constraint; pub mod intrinsic; pub mod intrinsic_helpers; +pub mod values; mod gen_c; mod gen_rust; -mod values; /// Many scalable intrinsics take a predicate argument and for the purposes of intrinsic testing, /// a predicate that enables all lanes is used for all of these intrinsic calls (i.e. loading inputs, @@ -109,6 +111,16 @@ pub trait SupportedArchitecture: Sized { /// Return a call to a intrinsic to generate a predicate, if reqd. fn predicate_function(_: u32) -> String; + + /// Return a call loading `arg`. Can assume that `arg.is_simd()` holds. + fn load_call(arg: &Argument, idx: usize) -> String { + format!( + "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", + name = arg.generate_name(), + vals_name = test_values_array_name(&arg.ty), + load = arg.ty.load_function(), + ) + } } pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { diff --git a/crates/intrinsic-test/src/common/values.rs b/crates/intrinsic-test/src/common/values.rs index c61e2938a0..d1f333fd19 100644 --- a/crates/intrinsic-test/src/common/values.rs +++ b/crates/intrinsic-test/src/common/values.rs @@ -26,7 +26,7 @@ pub fn test_values_array_static( w, "static {name}: [{ty}; {load_size}] = {values};", name = test_values_array_name(ty), - ty = ty.rust_scalar_type(), + ty = ty.rust_scalar_type_for_test_value_array(), load_size = test_values_array_length(&ty), values = test_values_array(&ty) ) @@ -55,11 +55,11 @@ pub fn test_values_array_name(ty: &T) -> String { /// `[true, false]` - as there are only ever two values for a boolean. This only works because the /// generated accesses to the test value array is always modulo the length of the test value array. pub fn test_values_array(ty: &IntrinsicType) -> String { - if ty.kind() == TypeKind::Bool { - return "[true, false]".to_string(); - } - let (bit_len, kind) = match ty { + IntrinsicType { + kind: TypeKind::Bool, + .. + } => (1, TypeKind::Bool), IntrinsicType { kind: TypeKind::Float, bit_len: Some(bit_len), @@ -83,6 +83,9 @@ pub fn test_values_array(ty: &IntrinsicType) -> String { let src = bit_pattern_for_test_values_array(bit_len, i); assert!(src == 0 || src.ilog2() < bit_len); match kind { + TypeKind::Bool if ty.num_lanes() != SimdLen::Scalable => { + fmt(&format_args!("{}", if src == 1 { true } else { false })) + } TypeKind::Float => fmt(&format_args!("f{bit_len}::from_bits({src:#x})")), TypeKind::Vector | TypeKind::Int(Sign::Signed) if (src >> (bit_len - 1)) != 0 => { // `src` is a two's complement representation of a negative value. @@ -113,15 +116,7 @@ pub fn test_values_array(ty: &IntrinsicType) -> String { /// /// For scalable vectors (only SVE is currently supported), assume that the length of the vector is /// the maximum supported by the architecture. -/// -/// An exception to the above is when `ty` is a boolean, where this function returns two - as -/// there are only ever two values for a boolean. This only works because the generated accesses to -/// the test value array is always modulo this length. pub fn test_values_array_length(ty: &IntrinsicType) -> u32 { - if ty.kind() == TypeKind::Bool { - return 2; - } - let IntrinsicType { simd_len, vec_len, .. } = ty; @@ -150,7 +145,8 @@ pub fn test_values_array_length(ty: &IntrinsicType) -> u32 { pub fn bit_pattern_for_test_values_array(bits: u32, index: u32) -> u64 { let index = index as usize; match bits { - bits @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8) => BIT_PATTERNS_8[index % (1 << bits)].into(), + 1 => BIT_PATTERNS_1[index % BIT_PATTERNS_1.len()].into(), + bits @ (2 | 3 | 4 | 5 | 6 | 7 | 8) => BIT_PATTERNS_8[index % (1 << bits)].into(), 16 => BIT_PATTERNS_16[index % BIT_PATTERNS_16.len()].into(), 32 => BIT_PATTERNS_32[index % BIT_PATTERNS_32.len()].into(), 64 => BIT_PATTERNS_64[index % BIT_PATTERNS_64.len()], @@ -158,6 +154,9 @@ pub fn bit_pattern_for_test_values_array(bits: u32, index: u32) -> u64 { } } +// Contains every possible 1-bit value in order +pub const BIT_PATTERNS_1: &[u8] = &[0x0, 0x1]; + // Contains every possible 8-bit value in order pub const BIT_PATTERNS_8: &[u8] = &[ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, diff --git a/crates/intrinsic-test/src/x86/xml_parser.rs b/crates/intrinsic-test/src/x86/xml_parser.rs index 52f6da786e..2296dffb64 100644 --- a/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/crates/intrinsic-test/src/x86/xml_parser.rs @@ -99,8 +99,13 @@ fn xml_to_intrinsic(intr: XMLIntrinsic) -> Result, Box::new(i, param.var_name.clone(), ty.unwrap(), constraint); - Some(arg) + Some(Argument::::new( + i, + param.var_name.clone(), + ty.unwrap(), + constraint, + false, + )) } }); From 714507c45002df8ee283fd182c0fc29ecb5b42b1 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 11 Jun 2026 10:07:47 +0000 Subject: [PATCH 11/15] intrinsic-test: enable non-baseline target features SVE isn't a baseline target feature for `aarch64-unknown-linux-gnu` but should be enabled when running tests. --- ci/intrinsic-test.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index f4378b6b92..3966aa88d8 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -45,21 +45,25 @@ case ${TARGET} in aarch64_be*) export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc/usr/include --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization" ARCH=aarch64_be + RUNTIME_RUSTFLAGS= ;; aarch64*) export CFLAGS="-I/usr/aarch64-linux-gnu/include/" ARCH=aarch64 + RUNTIME_RUSTFLAGS=-Ctarget-feature=+sve,+sve2 ;; armv7*) export CFLAGS="-I/usr/arm-linux-gnueabihf/include/" ARCH=arm + RUNTIME_RUSTFLAGS= ;; x86_64*) export CFLAGS="-I/usr/include/x86_64-linux-gnu/" ARCH=x86 + RUNTIME_RUSTFLAGS= ;; *) ;; @@ -86,5 +90,5 @@ case "${TARGET}" in ;; esac -cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}" \ - --tests --no-fail-fast "$@" +RUSTFLAGS="${RUNTIME_RUSTFLAGS}" cargo test --manifest-path=rust_programs/Cargo.toml \ + --target "${TARGET}" --profile "${PROFILE}" --tests --no-fail-fast "$@" From 4038db1e2a6dc99a3b9cf47ef24535c84ba30549 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 11 Jun 2026 13:31:11 +0000 Subject: [PATCH 12/15] intrinsic-test: SVE float comparison Like with non-SVE test generation, comparison of float results in scalable vectors need special-handling of comparisons. --- crates/intrinsic-test/src/arm/types.rs | 108 +++++++++++++++---------- 1 file changed, 65 insertions(+), 43 deletions(-) diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 3453ac4c95..2e628aff92 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -110,55 +110,77 @@ impl TypeDefinition for ArmType { } fn comparison_function(&self) -> String { - match self.num_lanes() { - SimdLen::Scalable => { - // There isn't a `svcmpeq` for `svbool_t`, so do an XOR instead and test it is - // empty.. - if self.kind() == TypeKind::Bool { - return format!( - r#" -let eq = sveor_b_z({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); -assert!(!svptest_any({PREDICATE_LOCAL}, eq), "{{}}", id); - "#, - ); - } + if let SimdLen::Fixed(num_lanes) = self.num_lanes() { + return default_fixed_vector_comparison(self, num_lanes); + } + + if self.kind() == TypeKind::Bool { + // There isn't a `svcmpeq` for `svbool_t` and there aren't `svboolxN_t` types, so just + // do an XOR and test it is empty. + return format!( + r#" +let __eq = sveor_b_z({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); +assert!(!svptest_any({PREDICATE_LOCAL}, __eq), "{{}}", id); + "# + ); + } - // Use `svcmpeq` to compare the return values of Rust and C invocations - match self.num_vectors() { - 1 => { - format!( + // Returns `of` when `num_vectors == 1` otherwise returns the appropriate `svget` invocation + // for `of`. + let get = |num_vectors: u32, idx: u32, from: &'static str| -> String { + if num_vectors == 1 { + return from.to_string(); + } + + format!( + "svget{num_vectors}_{ty}{bl}::<{idx}>({from})", + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + ) + }; + + let n = self.num_vectors(); + (0..n) + .format_with("\n", |i, fmt| { + match self.kind() { + TypeKind::Float | TypeKind::BFloat => { + // Floats need special handling because `NaN != NaN` normally - this + // effectively does `(rust == c) || (isnan(rust) && isnan(c))` + fmt(&format_args!( r#" -let eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); -assert!(svptest_any(__pred, eq), "{{}}", id); - "#, +let __rust_eq_return_value = {rust_return_value}; +let __c_eq_return_value = {c_return_value}; +let __eq_sans_nan = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __c_eq_return_value); +let __rust_nan = svcmpuo_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __rust_eq_return_value); +let __c_nan = svcmpuo_{ty}{bl}({PREDICATE_LOCAL}, __c_eq_return_value, __c_eq_return_value); +let __both_nan = svand_b_z({PREDICATE_LOCAL}, __rust_nan, __c_nan); +let __eq = svorr_b_z({PREDICATE_LOCAL}, __eq_sans_nan, __both_nan); +assert!(svptest_any(__pred, __eq), "{{}}-{i_plus_one}/{n}", id); +"#, ty = self.rust_intrinsic_name_prefix(), bl = self.inner_size(), - ) + rust_return_value = get(n, i, "__rust_return_value"), + c_return_value = get(n, i, "__c_return_value"), + i_plus_one = i + 1, // so that the output is "1/2" and "2/2" + )) + } + _ => { + // Most types can just use `svcmpeq` + fmt(&format_args!( + r#" +let __eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, {rust_return_value}, {c_return_value}); +assert!(svptest_any(__pred, __eq), "{{}}-{i_plus_one}/{n}", id); +"#, + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + rust_return_value = get(n, i, "__rust_return_value"), + c_return_value = get(n, i, "__c_return_value"), + i_plus_one = i + 1, // so that the output is "1/2" and "2/2" + )) } - // For tuples of vectors, do multiple comparisons, each with a `svget` to - // extract the Nth vector. - n @ (2 | 3 | 4) => (0..n) - .format_with("\n", |i, fmt| { - fmt(&format_args!( - r#" -let eq = svcmpeq_{ty}{bl}( - {PREDICATE_LOCAL}, - svget{n}_{ty}{bl}::<{i}>(__rust_return_value), - svget{n}_{ty}{bl}::<{i}>(__c_return_value) -); -assert!(svptest_any(__pred, eq), "{{}}-{i_plus_one}/{n}", id); - "#, - ty = self.rust_intrinsic_name_prefix(), - bl = self.inner_size(), - i_plus_one = i + 1, // so that the output is "1/2" and "2/2" - )) - }) - .to_string(), - _ => unreachable!(), } - } - SimdLen::Fixed(num_lanes) => default_fixed_vector_comparison(self, num_lanes), - } + }) + .to_string() } } From 7023f3599f6521a5d836de6785bfa1f5304de58f Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 13/15] intrinsic-test: stop skipping SVE intrinsics Enables generation of tests for SVE intrinsics leveraging the changes from the previous commits. --- crates/intrinsic-test/src/arm/json_parser.rs | 12 ++---------- crates/intrinsic-test/src/arm/mod.rs | 4 ++-- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs index 7d0cdf7b28..013c20f2db 100644 --- a/crates/intrinsic-test/src/arm/json_parser.rs +++ b/crates/intrinsic-test/src/arm/json_parser.rs @@ -58,22 +58,14 @@ struct JsonIntrinsic { _instructions: Option>>, } -pub fn get_neon_intrinsics( - filename: &Path, -) -> Result>, Box> { +pub fn get_intrinsics(filename: &Path) -> Result>, Box> { let file = std::fs::File::open(filename)?; let reader = std::io::BufReader::new(file); let json: Vec = serde_json::from_reader(reader).expect("Couldn't parse JSON"); let parsed = json .into_iter() - .filter_map(|intr| { - if intr.simd_isa == "Neon" { - Some(json_to_intrinsic(intr).expect("Couldn't parse JSON")) - } else { - None - } - }) + .map(|intr| json_to_intrinsic(intr).expect("Couldn't parse JSON")) .collect(); Ok(parsed) } diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 4cd76ae647..d035684ac2 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -9,7 +9,7 @@ use crate::common::intrinsic_helpers::{SimdLen, TypeDefinition, TypeKind}; use crate::common::values::test_values_array_name; use crate::common::{PASSES, PREDICATE_LOCAL, SupportedArchitecture}; use intrinsic::ArmType; -use json_parser::get_neon_intrinsics; +use json_parser::get_intrinsics; #[derive(PartialEq)] pub struct Arm(Vec>); @@ -56,7 +56,7 @@ impl SupportedArchitecture for Arm { let big_endian = cli_options.target.starts_with("aarch64_be"); let a32 = cli_options.target.starts_with("armv7"); let mut intrinsics = - get_neon_intrinsics(&cli_options.filename).expect("Error parsing input file"); + get_intrinsics(&cli_options.filename).expect("Error parsing input file"); intrinsics.sort_by(|a, b| a.name.cmp(&b.name)); intrinsics.dedup(); From c04381b64cd4c7aeb62716a18d41d5f7f689f3c9 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 15 Jun 2026 10:20:39 +0000 Subject: [PATCH 14/15] intrinsic-test: print scalable vectors on test fail This makes it far easier to debug what's potentially gone wrong with an intrinsic test. --- crates/intrinsic-test/src/arm/mod.rs | 69 ++++++++++++++++++++++++++ crates/intrinsic-test/src/arm/types.rs | 16 ++++-- 2 files changed, 82 insertions(+), 3 deletions(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index d035684ac2..689fe60957 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -232,4 +232,73 @@ const fn svprfop_from_i32(value: i32) -> svprfop { _ => unreachable!(), } } + +macro_rules! debug_print_integral { + ($($name:ident => ($ty:ty, $svptrue_fn:ident, $svcnt_fn:ident, $svst_fn:ident)),*) => { + $( + #[inline] + #[target_feature(enable = "sve")] + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + pub fn $name(v: $ty) -> String { + unsafe { + let __pred = $svptrue_fn(); + let __num_elems = $svcnt_fn() as usize; + let mut __buf = std::vec::Vec::with_capacity(__num_elems); + $svst_fn(__pred, __buf.as_mut_ptr(), v); + __buf.set_len(__num_elems); + format!( + "[{}]", + __buf.iter().map(|el| el.to_string()).collect::>().join(", ") + ) + } + } + )* + } +} + +debug_print_integral! { + debug_print_f32 => (svfloat32_t, svptrue_b32, svcntw, svst1_f32), + debug_print_f64 => (svfloat64_t, svptrue_b64, svcntd, svst1_f64), + debug_print_s8 => (svint8_t, svptrue_b8, svcntb, svst1_s8), + debug_print_s16 => (svint16_t, svptrue_b16, svcnth, svst1_s16), + debug_print_s32 => (svint32_t, svptrue_b32, svcntw, svst1_s32), + debug_print_s64 => (svint64_t, svptrue_b64, svcntd, svst1_s64), + debug_print_u8 => (svuint8_t, svptrue_b8, svcntb, svst1_u8), + debug_print_u16 => (svuint16_t, svptrue_b16, svcnth, svst1_u16), + debug_print_u32 => (svuint32_t, svptrue_b32, svcntw, svst1_u32), + debug_print_u64 => (svuint64_t, svptrue_b64, svcntd, svst1_u64) +} + +macro_rules! debug_print_bool { + ($($name:ident => ($ty:ty, $svst_fn:ident, $svdup_fn:ident)),*) => { + $( + #[inline] + #[target_feature(enable = "sve")] + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] + pub fn $name(v: $ty) -> String { + unsafe { + let __num_elems = svcntb() as usize; + let mut __buf = std::vec::Vec::with_capacity(__num_elems); + $svst_fn(v, __buf.as_mut_ptr(), $svdup_fn(1)); + __buf.set_len(__num_elems); + format!( + "[{}]", + __buf.iter() + .map(|el| *el == 1) + .map(|el| el.to_string()) + .collect::>() + .join(", ") + ) + } + } + )* + } +} + +debug_print_bool! { + debug_print_b8 => (svbool_t, svst1_u8, svdup_n_u8), + debug_print_b16 => (svbool_t, svst1_u16, svdup_n_u16), + debug_print_b32 => (svbool_t, svst1_u32, svdup_n_u32), + debug_print_b64 => (svbool_t, svst1_u64, svdup_n_u64) +} "#; diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 2e628aff92..7481bb4aeb 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -155,7 +155,11 @@ let __rust_nan = svcmpuo_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __r let __c_nan = svcmpuo_{ty}{bl}({PREDICATE_LOCAL}, __c_eq_return_value, __c_eq_return_value); let __both_nan = svand_b_z({PREDICATE_LOCAL}, __rust_nan, __c_nan); let __eq = svorr_b_z({PREDICATE_LOCAL}, __eq_sans_nan, __both_nan); -assert!(svptest_any(__pred, __eq), "{{}}-{i_plus_one}/{n}", id); +if !svptest_any(__pred, __eq) {{ + let __rust_pretty = debug_print_{ty}{bl}(__rust_eq_return_value); + let __c_pretty = debug_print_{ty}{bl}(__c_eq_return_value); + panic!("{{}}-{i_plus_one}/{n}\nRust: {{__rust_pretty}}\nC: {{__c_pretty}}", id); +}} "#, ty = self.rust_intrinsic_name_prefix(), bl = self.inner_size(), @@ -168,8 +172,14 @@ assert!(svptest_any(__pred, __eq), "{{}}-{i_plus_one}/{n}", id); // Most types can just use `svcmpeq` fmt(&format_args!( r#" -let __eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, {rust_return_value}, {c_return_value}); -assert!(svptest_any(__pred, __eq), "{{}}-{i_plus_one}/{n}", id); +let __rust_eq_return_value = {rust_return_value}; +let __c_eq_return_value = {c_return_value}; +let __eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __c_eq_return_value); +if !svptest_any(__pred, __eq) {{ + let __rust_pretty = debug_print_{ty}{bl}(__rust_eq_return_value); + let __c_pretty = debug_print_{ty}{bl}(__c_eq_return_value); + panic!("{{}}-{i_plus_one}/{n}\nRust: {{__rust_pretty}}\nC: {{__c_pretty}}", id); +}} "#, ty = self.rust_intrinsic_name_prefix(), bl = self.inner_size(), From c89cacc759190c313eb90fcc9b890747343801aa Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 22 Jun 2026 12:46:05 +0000 Subject: [PATCH 15/15] core_arch: redefine `svtrn{1,2}` Same as previous redefinitions in stdarch#2163 - these were missed in that PR because the hardware being tested on was missing the hardware feature required for the instructions these use. --- crates/core_arch/src/aarch64/sve/generated.rs | 88 +++++++++---------- .../stdarch-gen-arm/spec/sve/aarch64.spec.yml | 18 ++-- 2 files changed, 56 insertions(+), 50 deletions(-) diff --git a/crates/core_arch/src/aarch64/sve/generated.rs b/crates/core_arch/src/aarch64/sve/generated.rs index ac3070918a..4e5547b1d0 100644 --- a/crates/core_arch/src/aarch64/sve/generated.rs +++ b/crates/core_arch/src/aarch64/sve/generated.rs @@ -42211,19 +42211,6 @@ pub fn svtmad_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloa unsafe { _svtmad_f64(op1, op2, IMM3) } } #[doc = "Interleave even elements from two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(trn1))] -pub fn svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv16i1")] - fn _svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svtrn1_b8(op1, op2) } -} -#[doc = "Interleave even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -42231,10 +42218,10 @@ pub fn svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn1))] pub fn svtrn1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv8i1")] - fn _svtrn1_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.b16")] + fn _svtrn1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn1_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn1_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b32)"] @@ -42244,10 +42231,10 @@ pub fn svtrn1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn1))] pub fn svtrn1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv4i1")] - fn _svtrn1_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.b32")] + fn _svtrn1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn1_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn1_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b64)"] @@ -42257,10 +42244,10 @@ pub fn svtrn1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn1))] pub fn svtrn1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv2i1")] - fn _svtrn1_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.b64")] + fn _svtrn1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn1_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn1_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_f32])"] @@ -42376,6 +42363,19 @@ pub fn svtrn1_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svtrn1_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svtrn1_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Interleave even elements from two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(trn1))] +pub fn svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv16i1")] + fn _svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svtrn1_b8(op1, op2) } +} #[doc = "Interleave even quadwords from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_f32])"] #[inline] @@ -42491,19 +42491,6 @@ pub fn svtrn1q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svtrn1q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } #[doc = "Interleave odd elements from two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(trn2))] -pub fn svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv16i1")] - fn _svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svtrn2_b8(op1, op2) } -} -#[doc = "Interleave odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -42511,10 +42498,10 @@ pub fn svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn2))] pub fn svtrn2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv8i1")] - fn _svtrn2_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.b16")] + fn _svtrn2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn2_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn2_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b32)"] @@ -42524,10 +42511,10 @@ pub fn svtrn2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn2))] pub fn svtrn2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv4i1")] - fn _svtrn2_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.b32")] + fn _svtrn2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn2_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn2_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b64)"] @@ -42537,10 +42524,10 @@ pub fn svtrn2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(trn2))] pub fn svtrn2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv2i1")] - fn _svtrn2_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.b64")] + fn _svtrn2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svtrn2_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svtrn2_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_f32])"] @@ -42656,6 +42643,19 @@ pub fn svtrn2_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svtrn2_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svtrn2_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Interleave odd elements from two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(trn2))] +pub fn svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv16i1")] + fn _svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svtrn2_b8(op1, op2) } +} #[doc = "Interleave odd quadwords from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_f32])"] #[inline] diff --git a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml index 138d5ba311..0b2d17d3b1 100644 --- a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml @@ -1157,7 +1157,7 @@ intrinsics: doc: Interleave even elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [trn1] compose: - LLVMLink: { name: "trn1.{sve_type}" } @@ -1167,10 +1167,13 @@ intrinsics: doc: Interleave even elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [trn1] compose: - - LLVMLink: { name: "trn1.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.trn1.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svtrn1q[_{type}] attr: [*sve-unstable] @@ -1188,7 +1191,7 @@ intrinsics: doc: Interleave odd elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [trn2] compose: - LLVMLink: { name: "trn2.{sve_type}" } @@ -1198,10 +1201,13 @@ intrinsics: doc: Interleave odd elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [trn2] compose: - - LLVMLink: { name: "trn2.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.trn2.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svtrn2q[_{type}] attr: [*sve-unstable]