Skip to content

Commit 6252590

Browse files
authored
Add vmull_p64 and vmull_high_p64 for aarch64 (#1157)
1 parent c957acd commit 6252590

File tree

2 files changed

+45
-5
lines changed

2 files changed

+45
-5
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4646,6 +4646,19 @@ pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
46464646
vmull_u32(a, b)
46474647
}
46484648

4649+
/// Polynomial multiply long
4650+
#[inline]
4651+
#[target_feature(enable = "neon,crypto")]
4652+
#[cfg_attr(test, assert_instr(pmull))]
4653+
pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
4654+
#[allow(improper_ctypes)]
4655+
extern "C" {
4656+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull64")]
4657+
fn vmull_p64_(a: p64, b: p64) -> int8x16_t;
4658+
}
4659+
transmute(vmull_p64_(a, b))
4660+
}
4661+
46494662
/// Polynomial multiply long
46504663
#[inline]
46514664
#[target_feature(enable = "neon")]
@@ -4656,6 +4669,14 @@ pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
46564669
vmull_p8(a, b)
46574670
}
46584671

4672+
/// Polynomial multiply long
4673+
#[inline]
4674+
#[target_feature(enable = "neon,crypto")]
4675+
#[cfg_attr(test, assert_instr(pmull))]
4676+
pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
4677+
vmull_p64(simd_extract(a, 1), simd_extract(b, 1))
4678+
}
4679+
46594680
/// Multiply long
46604681
#[inline]
46614682
#[target_feature(enable = "neon")]
@@ -12612,6 +12633,15 @@ mod test {
1261212633
assert_eq!(r, e);
1261312634
}
1261412635

12636+
#[simd_test(enable = "neon")]
12637+
unsafe fn test_vmull_p64() {
12638+
let a: p64 = 15;
12639+
let b: p64 = 3;
12640+
let e: p128 = 17;
12641+
let r: p128 = transmute(vmull_p64(transmute(a), transmute(b)));
12642+
assert_eq!(r, e);
12643+
}
12644+
1261512645
#[simd_test(enable = "neon")]
1261612646
unsafe fn test_vmull_high_p8() {
1261712647
let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
@@ -12621,6 +12651,15 @@ mod test {
1262112651
assert_eq!(r, e);
1262212652
}
1262312653

12654+
#[simd_test(enable = "neon")]
12655+
unsafe fn test_vmull_high_p64() {
12656+
let a: i64x2 = i64x2::new(1, 15);
12657+
let b: i64x2 = i64x2::new(1, 3);
12658+
let e: p128 = 17;
12659+
let r: p128 = transmute(vmull_high_p64(transmute(a), transmute(b)));
12660+
assert_eq!(r, e);
12661+
}
12662+
1262412663
#[simd_test(enable = "neon")]
1262512664
unsafe fn test_vmull_high_n_s16() {
1262612665
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);

crates/stdarch-gen/neon.spec

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2214,9 +2214,10 @@ target = crypto
22142214

22152215
aarch64 = pmull
22162216
link-aarch64 = pmull64:p64:p64:p64:int8x16_t
2217-
arm = vmull
2218-
link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
2219-
//generate p64:p64:p128
2217+
// Because of the support status of llvm, vmull_p64 is currently only available on aarch64
2218+
// arm = vmull
2219+
// link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
2220+
generate p64:p64:p128
22202221

22212222

22222223
/// Polynomial multiply long
@@ -2242,8 +2243,8 @@ b = 1, 3
22422243
validate 17
22432244
target = crypto
22442245

2245-
aarch64 = pmull2
2246-
//generate poly64x2_t:poly64x2_t:p128
2246+
aarch64 = pmull
2247+
generate poly64x2_t:poly64x2_t:p128
22472248

22482249
/// Vector long multiply with scalar
22492250
name = vmull

0 commit comments

Comments
 (0)