Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/docker/aarch64-unknown-linux-gnu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ ENV CLANG_PATH="/llvm/bin/clang"
ENV GCC_PATH=aarch64-linux-gnu-gcc

ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max,sve512=on -L /usr/aarch64-linux-gnu" \
OBJDUMP=aarch64-linux-gnu-objdump
2 changes: 1 addition & 1 deletion ci/intrinsic-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,4 @@ case "${TARGET}" in
esac

cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}" \
--tests "$@"
--tests --no-fail-fast "$@"
61 changes: 59 additions & 2 deletions crates/intrinsic-test/src/arm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,21 @@ impl SupportedArchitecture for Arm {
#include <arm_acle.h>
#include <arm_fp16.h>
#include <arm_neon.h>
#ifdef __ARM_FEATURE_SVE
#include <arm_sve.h>
#endif
"#;
const RUST_PRELUDE: &str = RUST_PRELUDE;

fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> {
// GCC uses an extra `-` in the arch name
let big_endian = cli_options.target.starts_with("aarch64_be");
let a32 = cli_options.target.starts_with("armv7");
match cli_options.cc_arg_style {
CcArgStyle::Clang if !a32 && !big_endian => vec![
"-march=armv8.6a+crypto+crc+dotprod+fp16+sve2-aes+sve2-sm4+sve2-sha3+sve2-bitperm+\
f32mm+f64mm+sve2p1",
],
CcArgStyle::Clang => vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"],
// SVE tests aren't run under GCC so there are no target features added for SVE
CcArgStyle::Gcc => vec!["-march=armv8.6-a+crypto+crc+dotprod+fp16+sha3+sm4"],
Expand Down Expand Up @@ -69,13 +78,13 @@ impl SupportedArchitecture for Arm {
let has_sve_arg = i
.arguments
.iter()
.any(|a| a.ty.simd_len == Some(SimdLen::Scalable));
.any(|a| a.ty.num_lanes() == SimdLen::Scalable);
!(has_f16_arg && has_sve_arg)
})
.filter(|i| {
let has_f16_ret =
i.results.kind() == TypeKind::Float && i.results.bit_len == Some(16);
let has_sve_ret = i.results.simd_len == Some(SimdLen::Scalable);
let has_sve_ret = i.results.num_lanes() == SimdLen::Scalable;
!(has_f16_ret && has_sve_ret)
})
// Skip `svqcvtn{u,}n*_x2` intrinsics - not yet implemented!
Expand Down Expand Up @@ -129,6 +138,10 @@ impl SupportedArchitecture for Arm {

Self(intrinsics)
}

fn predicate_function(size: u32) -> String {
format!("svptrue_b{size}()")
}
}

const RUST_PRELUDE: &str = r#"
Expand All @@ -140,6 +153,7 @@ const RUST_PRELUDE: &str = r#"
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_aarch64_sve))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))]
#![feature(stdarch_neon_f16)]

Expand All @@ -148,4 +162,47 @@ use core_arch::arch::aarch64::*;

#[cfg(target_arch = "arm")]
use core_arch::arch::arm::*;

#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
const fn svpattern_from_i32(value: i32) -> svpattern {
match value {
0 => svpattern::SV_POW2,
1 => svpattern::SV_VL1,
2 => svpattern::SV_VL2,
3 => svpattern::SV_VL3,
4 => svpattern::SV_VL4,
5 => svpattern::SV_VL5,
6 => svpattern::SV_VL6,
7 => svpattern::SV_VL7,
8 => svpattern::SV_VL8,
9 => svpattern::SV_VL16,
10 => svpattern::SV_VL32,
11 => svpattern::SV_VL64,
12 => svpattern::SV_VL128,
13 => svpattern::SV_VL256,
29 => svpattern::SV_MUL4,
30 => svpattern::SV_MUL3,
31 => svpattern::SV_ALL,
_ => unreachable!(),
}
}

#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
const fn svprfop_from_i32(value: i32) -> svprfop {
match value {
0 => svprfop::SV_PLDL1KEEP,
1 => svprfop::SV_PLDL1STRM,
2 => svprfop::SV_PLDL2KEEP,
3 => svprfop::SV_PLDL2STRM,
4 => svprfop::SV_PLDL3KEEP,
5 => svprfop::SV_PLDL3STRM,
8 => svprfop::SV_PSTL1KEEP,
9 => svprfop::SV_PSTL1STRM,
10 => svprfop::SV_PSTL2KEEP,
11 => svprfop::SV_PSTL2STRM,
12 => svprfop::SV_PSTL3KEEP,
13 => svprfop::SV_PSTL3STRM,
_ => unreachable!(),
}
}
"#;
113 changes: 90 additions & 23 deletions crates/intrinsic-test/src/arm/types.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
use super::intrinsic::ArmType;
use crate::common::intrinsic_helpers::{IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind};
use crate::common::PREDICATE_LOCAL;
use crate::common::intrinsic_helpers::{
IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind, default_fixed_vector_comparison,
};
use itertools::Itertools;

impl TypeDefinition for ArmType {
/// Gets a string containing the typename for this type in C format.
Expand Down Expand Up @@ -72,32 +76,95 @@ impl TypeDefinition for ArmType {

/// Determines the load function for this type.
fn load_function(&self) -> String {
if let IntrinsicType {
kind: k,
bit_len: Some(bl),
vec_len,
..
} = **self
{
let quad = if self.num_lanes() * bl > 64 { "q" } else { "" };

format!(
"vld{len}{quad}_{type}{size}",
type = match k {
TypeKind::Int(Sign::Unsigned) => "u",
TypeKind::Int(Sign::Signed) => "s",
TypeKind::Float => "f",
TypeKind::Poly => "p",
x => todo!("get_load_function TypeKind: {x:#?}"),
},
size = bl,
quad = quad,
len = vec_len.unwrap_or(1),
)
if let Some(bl) = self.bit_len {
match self.num_lanes() {
SimdLen::Scalable => {
format!(
"svld{len}_{type}{bl}",
len = self.num_vectors(),
type = self.rust_intrinsic_name_prefix(),
)
}
SimdLen::Fixed(num_lanes) => {
format!(
"vld{len}{quad}_{type}{bl}",
quad = if num_lanes * bl > 64 { "q" } else { "" },
len = self.num_vectors(),
type = self.rust_intrinsic_name_prefix(),
)
}
}
} else {
todo!("load_function IntrinsicType: {self:#?}")
}
}

fn comparison_function(&self) -> String {

@sayantn sayantn Jun 17, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there some way to print the diff for SVE? I would imagine it is difficult due to SVE being non-const sized, but is there some way? That would massively improve debuggability. One approach I can suggest is have some small functions that convert from SVE vectors to &[T], e.g.

#[target_feature(enable = "sve")]
pub fn svfloat32_to_slice(a: &svfloat32_t) -> &[NanEqF32] {
    unsafe {
        core::slice::from_raw_parts(core::ptr::from_ref(a).cast(), svcntw() as usize)
    }
}

this might work, with significantly less complexity

View changes since the review

@davidtwco davidtwco Jun 18, 2026

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I've needed to debug them, I've disabled the invocation of intrinsic-test in intrinsic-test.sh and added these snippets to the generated tests that I wanted to debug.

For non-bool vectors, just replace the x0_val and x1_val with the vectors that you want to inspect, e.g. __rust_return_value or __c_return_value:

{
    let num_elems = svcnth() as usize;
    let mut x0_buf = Vec::with_capacity(num_elems);
    let mut x1_buf = Vec::with_capacity(num_elems);
    svst1_u16(__pred, x0_buf.as_mut_ptr(), x0_val);
    x0_buf.set_len(num_elems);
    svst1_u16(__pred, x1_buf.as_mut_ptr(), x1_val);
    x1_buf.set_len(num_elems);
    for i in 0..num_elems {
        let x0_val = x0_buf[i];
        dbg!(i, x0_val);
    }
    for i in 0..num_elems {
        let x1_val = x1_buf[i];
        dbg!(i, x1_val);
    }
}

Similarly for bool vectors:

{
    let num_elems = svcntb() as usize;
    let mut _op_val = Vec::with_capacity(num_elems);
    svst1_u8(op_val, _op_val.as_mut_ptr(), svdup_n_u8(1));
    _op_val.set_len(num_elems);
    for i in 0..num_elems {
        let _op_val_el = if _op_val[i] == 1 { true } else { false };
        dbg!(i, _op_val_el);
    }
}

I'm not sure if what you've suggested will work, happy to try. I'm more than happy to make some improvements here for debuggability of the tests when they do fail, but could that be left to a follow-up?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ended up doing this in the current patch because it was useful for working out what was going on with CI failures

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, didn't understand what you meant by this and the current patch, the GH version shows the sveor version. Also, https://godbolt.org/z/a481YdW1P seems to work.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah - by "this", I meant making it easier to debug failures, and "this current patch" being in this PR rather than a follow-up. I did it using the snippets that I shared rather than yours, while that patch does compile, I'm not sure it actually works in practice, see https://godbolt.org/z/ozev8Krqr - once you actually try and use those functions, you get an error from LLVM (that's something we'll need to fix on the rustc side so that we emit an error instead if you do something you shouldn't, but it is still a work-in-progress afterall).

match self.num_lanes() {
SimdLen::Scalable => {
// There isn't a `svcmpeq` for `svbool_t`, so do an XOR instead and test it is
// empty..
if self.kind() == TypeKind::Bool {
return format!(
r#"
let eq = sveor_b_z({PREDICATE_LOCAL}, __rust_return_value, __c_return_value);
assert!(!svptest_any({PREDICATE_LOCAL}, eq), "{{}}", id);
"#,
);
}

// Use `svcmpeq` to compare the return values of Rust and C invocations
match self.num_vectors() {
1 => {
format!(
r#"
let eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_return_value, __c_return_value);
assert!(svptest_any(__pred, eq), "{{}}", id);
"#,
ty = self.rust_intrinsic_name_prefix(),
bl = self.inner_size(),
)
}
// For tuples of vectors, do multiple comparisons, each with a `svget` to
// extract the Nth vector.
n @ (2 | 3 | 4) => (0..n)
.format_with("\n", |i, fmt| {
fmt(&format_args!(
r#"
let eq = svcmpeq_{ty}{bl}(
{PREDICATE_LOCAL},
svget{n}_{ty}{bl}::<{i}>(__rust_return_value),
svget{n}_{ty}{bl}::<{i}>(__c_return_value)
);
assert!(svptest_any(__pred, eq), "{{}}-{i_plus_one}/{n}", id);
"#,
ty = self.rust_intrinsic_name_prefix(),
bl = self.inner_size(),
i_plus_one = i + 1, // so that the output is "1/2" and "2/2"
))
})
.to_string(),
_ => unreachable!(),
}
}
SimdLen::Fixed(num_lanes) => default_fixed_vector_comparison(self, num_lanes),
}
}
}

impl ArmType {
/// Returns the Rust prefix for the name of an intrinsic with this type kind (i.e. `s` for
/// `i16`, or `u` for `u16`). For type kinds without any bit length at the end (e.g. `bool`),
/// returns the whole type name.
pub fn rust_intrinsic_name_prefix(&self) -> &str {
match self.kind() {
TypeKind::Char(Sign::Signed) => "s",
TypeKind::Int(Sign::Signed) => "s",
TypeKind::Poly => "p",
TypeKind::Bool => "s",
_ => self.kind.rust_prefix(),
}
}
}

pub fn parse_intrinsic_type(s: &str) -> Result<IntrinsicType, String> {
Expand Down
29 changes: 25 additions & 4 deletions crates/intrinsic-test/src/common/argument.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use itertools::Itertools;

use crate::common::SupportedArchitecture;
use crate::common::intrinsic_helpers::TypeKind;
use crate::common::intrinsic_helpers::{SimdLen, TypeKind};
use crate::common::values::test_values_array_name;

use super::PASSES;
use super::constraint::Constraint;
use super::intrinsic_helpers::TypeDefinition;
use super::{PASSES, PREDICATE_LOCAL};

/// An argument for the intrinsic.
#[derive(Debug, PartialEq, Clone)]
Expand Down Expand Up @@ -38,8 +38,15 @@ where
self.ty.c_type()
}

/// Generates local variable name for the value passed to this argument
pub fn generate_name(&self) -> String {
format!("{}_val", self.name)
// The same predicate is used for scalable intrinsic invocations
// FIXME(davidtwco): Only for predicate arguments, not all boolean arguments
if self.is_scalable_bool() {
format!("{PREDICATE_LOCAL}")
} else {
format!("{}_val", self.name)
}
}

pub fn is_simd(&self) -> bool {
Expand All @@ -54,6 +61,11 @@ where
self.constraint.is_some()
}

/// Is this argument of type `svbool_t` (or otherwise a scalable bool)?
pub fn is_scalable_bool(&self) -> bool {
self.ty.kind == TypeKind::Bool && self.ty.num_lanes() == SimdLen::Scalable
}

/// Should this argument be passed by reference in C wrapper function declarations?
///
/// SIMD types and `f16` are currently passed by reference.
Expand Down Expand Up @@ -176,11 +188,20 @@ where
pub fn load_values_rust(&self) -> String {
self.iter()
.filter(|&arg| !arg.has_constraint())
// FIXME(davidtwco): Need test values for `svbool_t` when the argument is *not* a
// predicate.
.filter(|&arg| !arg.is_scalable_bool())
.enumerate()
.map(|(idx, arg)| {
if arg.is_simd() {
// If this load is of a scalable vector, then prepend an additional argument
// containing the predicate for the load.
let pred_arg = match arg.ty.num_lanes() {
SimdLen::Scalable => format!("{PREDICATE_LOCAL},"),
SimdLen::Fixed(..) => "".to_string(),
};
format!(
"let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);",
"let {name} = {load}({pred_arg}{vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n",
name = arg.generate_name(),
vals_name = test_values_array_name(&arg.ty),
load = arg.ty.load_function(),
Expand Down
55 changes: 44 additions & 11 deletions crates/intrinsic-test/src/common/constraint.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use serde::Deserialize;
use std::ops::Range;

use serde::Deserialize;

/// Describes the values to test for a const generic parameter
#[derive(Debug, PartialEq, Clone, Deserialize)]
pub enum Constraint {
Expand All @@ -23,21 +24,53 @@ pub enum Constraint {
SvImmRotationAdd,
}

impl Constraint {
/// Returns an iterator over the values of this constraint
pub fn iter(&self) -> Box<dyn Iterator<Item = i64> + '_> {
/// Workaround to enable the `Constraint::into_iter` to return an iterator that implements `Clone`,
/// so that it can be used with `Itertools::multi_cartesian_product`.
///
/// With the different iterator types, returning `Box<dyn Iterator<Item = i64> + '_>` would the
/// idiomatic approach, but this can't be made to implement `Clone`. Given the limited number
/// of iterator types used and their relative lack of complexity, wrapping them all in an enum isn't
/// too bad.
#[derive(Clone)]
pub enum ConstraintIterator<'a> {
Once(std::iter::Once<i64>),
Range(std::ops::Range<i64>),
Copied(std::iter::Copied<std::slice::Iter<'a, i64>>),
Chain(std::iter::Chain<std::ops::RangeInclusive<i64>, std::ops::RangeInclusive<i64>>),
StepBy(std::iter::StepBy<std::ops::RangeInclusive<i64>>),
}

impl<'a> Iterator for ConstraintIterator<'a> {
type Item = i64;

fn next(&mut self) -> Option<Self::Item> {
match self {
ConstraintIterator::Once(once) => once.next(),
ConstraintIterator::Range(range) => range.next(),
ConstraintIterator::Copied(copied) => copied.next(),
ConstraintIterator::Chain(chain) => chain.next(),
ConstraintIterator::StepBy(step_by) => step_by.next(),
}
}
}

impl<'a> IntoIterator for &'a Constraint {
type Item = i64;
type IntoIter = ConstraintIterator<'a>;

fn into_iter(self) -> Self::IntoIter {
match self {
Constraint::Equal(i) => Box::new(std::iter::once(*i)),
Constraint::Range(range) => Box::new(range.clone()),
Constraint::Set(items) => Box::new(items.iter().copied().chain(Range::default())),
Constraint::Equal(i) => ConstraintIterator::Once(std::iter::once(*i)),
Constraint::Range(range) => ConstraintIterator::Range(range.clone()),
Constraint::Set(items) => ConstraintIterator::Copied(items.iter().copied()),
// These values are discriminants of the `svpattern` enum
Constraint::SvPattern => Box::new((0..=13).chain(29..=31)),
Constraint::SvPattern => ConstraintIterator::Chain((0..=13).chain(29..=31)),
// These values are discriminants of the `svprfop` enum
Constraint::SvPrefetchOp => Box::new((0..=5).chain(8..=14)),
Constraint::SvPrefetchOp => ConstraintIterator::Chain((0..=5).chain(8..=14)),
// Valid rotations for intrinsics operating on complex pairs: 0, 90, 180, 270
Constraint::SvImmRotation => Box::new((0..=270).step_by(90)),
Constraint::SvImmRotation => ConstraintIterator::StepBy((0..=270).step_by(90)),
// Valid rotations for `svcadd` and `svqcadd`: 0, 270
Constraint::SvImmRotationAdd => Box::new((90..=270).step_by(180)),
Constraint::SvImmRotationAdd => ConstraintIterator::StepBy((90..=270).step_by(180)),
}
}
}
Loading