Skip to content

Commit 9a8c044

Browse files
tmiaskoAmanieu
authored andcommitted
Convert _mm{256,_mask,}_permute_p{d,s} to const generics
* _mm256_permute_pd * _mm256_permute_ps * _mm_mask_permute_pd * _mm_maskz_permute_pd * _mm_permute_pd * _mm_permute_ps
1 parent 4b0591c commit 9a8c044

File tree

3 files changed

+85
-190
lines changed

3 files changed

+85
-190
lines changed

crates/core_arch/src/x86/avx.rs

Lines changed: 59 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,57 +1087,25 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
10871087
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_ps)
10881088
#[inline]
10891089
#[target_feature(enable = "avx")]
1090-
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
1091-
#[rustc_args_required_const(1)]
1090+
#[cfg_attr(test, assert_instr(vpermilps, IMM8 = 9))]
1091+
#[rustc_legacy_const_generics(1)]
10921092
#[stable(feature = "simd_x86", since = "1.27.0")]
1093-
pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 {
1094-
let imm8 = (imm8 & 0xFF) as u8;
1095-
let undefined = _mm256_undefined_ps();
1096-
macro_rules! shuffle4 {
1097-
($a:expr, $b:expr, $c:expr, $d:expr) => {
1098-
simd_shuffle8(
1099-
a,
1100-
undefined,
1101-
[$a, $b, $c, $d, $a + 4, $b + 4, $c + 4, $d + 4],
1102-
)
1103-
};
1104-
}
1105-
macro_rules! shuffle3 {
1106-
($a:expr, $b:expr, $c:expr) => {
1107-
match (imm8 >> 6) & 0b11 {
1108-
0b00 => shuffle4!($a, $b, $c, 0),
1109-
0b01 => shuffle4!($a, $b, $c, 1),
1110-
0b10 => shuffle4!($a, $b, $c, 2),
1111-
_ => shuffle4!($a, $b, $c, 3),
1112-
}
1113-
};
1114-
}
1115-
macro_rules! shuffle2 {
1116-
($a:expr, $b:expr) => {
1117-
match (imm8 >> 4) & 0b11 {
1118-
0b00 => shuffle3!($a, $b, 0),
1119-
0b01 => shuffle3!($a, $b, 1),
1120-
0b10 => shuffle3!($a, $b, 2),
1121-
_ => shuffle3!($a, $b, 3),
1122-
}
1123-
};
1124-
}
1125-
macro_rules! shuffle1 {
1126-
($a:expr) => {
1127-
match (imm8 >> 2) & 0b11 {
1128-
0b00 => shuffle2!($a, 0),
1129-
0b01 => shuffle2!($a, 1),
1130-
0b10 => shuffle2!($a, 2),
1131-
_ => shuffle2!($a, 3),
1132-
}
1133-
};
1134-
}
1135-
match imm8 & 0b11 {
1136-
0b00 => shuffle1!(0),
1137-
0b01 => shuffle1!(1),
1138-
0b10 => shuffle1!(2),
1139-
_ => shuffle1!(3),
1140-
}
1093+
pub unsafe fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1094+
static_assert_imm8!(IMM8);
1095+
simd_shuffle8(
1096+
a,
1097+
_mm256_undefined_ps(),
1098+
[
1099+
(IMM8 as u32 >> 0) & 0b11,
1100+
(IMM8 as u32 >> 2) & 0b11,
1101+
(IMM8 as u32 >> 4) & 0b11,
1102+
(IMM8 as u32 >> 6) & 0b11,
1103+
((IMM8 as u32 >> 0) & 0b11) + 4,
1104+
((IMM8 as u32 >> 2) & 0b11) + 4,
1105+
((IMM8 as u32 >> 4) & 0b11) + 4,
1106+
((IMM8 as u32 >> 6) & 0b11) + 4,
1107+
],
1108+
)
11411109
}
11421110

11431111
/// Shuffles single-precision (32-bit) floating-point elements in `a`
@@ -1146,53 +1114,21 @@ pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 {
11461114
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_ps)
11471115
#[inline]
11481116
#[target_feature(enable = "avx,sse")]
1149-
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
1150-
#[rustc_args_required_const(1)]
1117+
#[cfg_attr(test, assert_instr(vpermilps, IMM8 = 9))]
1118+
#[rustc_legacy_const_generics(1)]
11511119
#[stable(feature = "simd_x86", since = "1.27.0")]
1152-
pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 {
1153-
let imm8 = (imm8 & 0xFF) as u8;
1154-
let undefined = _mm_undefined_ps();
1155-
macro_rules! shuffle4 {
1156-
($a:expr, $b:expr, $c:expr, $d:expr) => {
1157-
simd_shuffle4(a, undefined, [$a, $b, $c, $d])
1158-
};
1159-
}
1160-
macro_rules! shuffle3 {
1161-
($a:expr, $b:expr, $c:expr) => {
1162-
match (imm8 >> 6) & 0b11 {
1163-
0b00 => shuffle4!($a, $b, $c, 0),
1164-
0b01 => shuffle4!($a, $b, $c, 1),
1165-
0b10 => shuffle4!($a, $b, $c, 2),
1166-
_ => shuffle4!($a, $b, $c, 3),
1167-
}
1168-
};
1169-
}
1170-
macro_rules! shuffle2 {
1171-
($a:expr, $b:expr) => {
1172-
match (imm8 >> 4) & 0b11 {
1173-
0b00 => shuffle3!($a, $b, 0),
1174-
0b01 => shuffle3!($a, $b, 1),
1175-
0b10 => shuffle3!($a, $b, 2),
1176-
_ => shuffle3!($a, $b, 3),
1177-
}
1178-
};
1179-
}
1180-
macro_rules! shuffle1 {
1181-
($a:expr) => {
1182-
match (imm8 >> 2) & 0b11 {
1183-
0b00 => shuffle2!($a, 0),
1184-
0b01 => shuffle2!($a, 1),
1185-
0b10 => shuffle2!($a, 2),
1186-
_ => shuffle2!($a, 3),
1187-
}
1188-
};
1189-
}
1190-
match imm8 & 0b11 {
1191-
0b00 => shuffle1!(0),
1192-
0b01 => shuffle1!(1),
1193-
0b10 => shuffle1!(2),
1194-
_ => shuffle1!(3),
1195-
}
1120+
pub unsafe fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1121+
static_assert_imm8!(IMM8);
1122+
simd_shuffle4(
1123+
a,
1124+
_mm_undefined_ps(),
1125+
[
1126+
(IMM8 as u32 >> 0) & 0b11,
1127+
(IMM8 as u32 >> 2) & 0b11,
1128+
(IMM8 as u32 >> 4) & 0b11,
1129+
(IMM8 as u32 >> 6) & 0b11,
1130+
],
1131+
)
11961132
}
11971133

11981134
/// Shuffles double-precision (64-bit) floating-point elements in `a`
@@ -1225,45 +1161,21 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
12251161
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_pd)
12261162
#[inline]
12271163
#[target_feature(enable = "avx")]
1228-
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
1229-
#[rustc_args_required_const(1)]
1164+
#[cfg_attr(test, assert_instr(vpermilpd, IMM4 = 0x1))]
1165+
#[rustc_legacy_const_generics(1)]
12301166
#[stable(feature = "simd_x86", since = "1.27.0")]
1231-
pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d {
1232-
let imm8 = (imm8 & 0xFF) as u8;
1233-
let undefined = _mm256_undefined_pd();
1234-
macro_rules! shuffle4 {
1235-
($a:expr, $b:expr, $c:expr, $d:expr) => {
1236-
simd_shuffle4(a, undefined, [$a, $b, $c, $d])
1237-
};
1238-
}
1239-
macro_rules! shuffle3 {
1240-
($a:expr, $b:expr, $c:expr) => {
1241-
match (imm8 >> 3) & 0x1 {
1242-
0 => shuffle4!($a, $b, $c, 2),
1243-
_ => shuffle4!($a, $b, $c, 3),
1244-
}
1245-
};
1246-
}
1247-
macro_rules! shuffle2 {
1248-
($a:expr, $b:expr) => {
1249-
match (imm8 >> 2) & 0x1 {
1250-
0 => shuffle3!($a, $b, 2),
1251-
_ => shuffle3!($a, $b, 3),
1252-
}
1253-
};
1254-
}
1255-
macro_rules! shuffle1 {
1256-
($a:expr) => {
1257-
match (imm8 >> 1) & 0x1 {
1258-
0 => shuffle2!($a, 0),
1259-
_ => shuffle2!($a, 1),
1260-
}
1261-
};
1262-
}
1263-
match imm8 & 0x1 {
1264-
0 => shuffle1!(0),
1265-
_ => shuffle1!(1),
1266-
}
1167+
pub unsafe fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1168+
static_assert_imm4!(IMM4);
1169+
simd_shuffle4(
1170+
a,
1171+
_mm256_undefined_pd(),
1172+
[
1173+
((IMM4 as u32 >> 0) & 1),
1174+
((IMM4 as u32 >> 1) & 1),
1175+
((IMM4 as u32 >> 2) & 1) + 2,
1176+
((IMM4 as u32 >> 3) & 1) + 2,
1177+
],
1178+
)
12671179
}
12681180

12691181
/// Shuffles double-precision (64-bit) floating-point elements in `a`
@@ -1272,29 +1184,16 @@ pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d {
12721184
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_pd)
12731185
#[inline]
12741186
#[target_feature(enable = "avx,sse2")]
1275-
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
1276-
#[rustc_args_required_const(1)]
1187+
#[cfg_attr(test, assert_instr(vpermilpd, IMM2 = 0x1))]
1188+
#[rustc_legacy_const_generics(1)]
12771189
#[stable(feature = "simd_x86", since = "1.27.0")]
1278-
pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d {
1279-
let imm8 = (imm8 & 0xFF) as u8;
1280-
let undefined = _mm_undefined_pd();
1281-
macro_rules! shuffle2 {
1282-
($a:expr, $b:expr) => {
1283-
simd_shuffle2(a, undefined, [$a, $b])
1284-
};
1285-
}
1286-
macro_rules! shuffle1 {
1287-
($a:expr) => {
1288-
match (imm8 >> 1) & 0x1 {
1289-
0 => shuffle2!($a, 0),
1290-
_ => shuffle2!($a, 1),
1291-
}
1292-
};
1293-
}
1294-
match imm8 & 0x1 {
1295-
0 => shuffle1!(0),
1296-
_ => shuffle1!(1),
1297-
}
1190+
pub unsafe fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1191+
static_assert_imm2!(IMM2);
1192+
simd_shuffle2(
1193+
a,
1194+
_mm_undefined_pd(),
1195+
[(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
1196+
)
12981197
}
12991198

13001199
/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit)
@@ -3784,15 +3683,15 @@ mod tests {
37843683
#[simd_test(enable = "avx")]
37853684
unsafe fn test_mm256_permute_ps() {
37863685
let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3787-
let r = _mm256_permute_ps(a, 0x1b);
3686+
let r = _mm256_permute_ps::<0x1b>(a);
37883687
let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.);
37893688
assert_eq_m256(r, e);
37903689
}
37913690

37923691
#[simd_test(enable = "avx")]
37933692
unsafe fn test_mm_permute_ps() {
37943693
let a = _mm_setr_ps(4., 3., 2., 5.);
3795-
let r = _mm_permute_ps(a, 0x1b);
3694+
let r = _mm_permute_ps::<0x1b>(a);
37963695
let e = _mm_setr_ps(5., 2., 3., 4.);
37973696
assert_eq_m128(r, e);
37983697
}
@@ -3818,15 +3717,15 @@ mod tests {
38183717
#[simd_test(enable = "avx")]
38193718
unsafe fn test_mm256_permute_pd() {
38203719
let a = _mm256_setr_pd(4., 3., 2., 5.);
3821-
let r = _mm256_permute_pd(a, 5);
3720+
let r = _mm256_permute_pd::<5>(a);
38223721
let e = _mm256_setr_pd(3., 4., 5., 2.);
38233722
assert_eq_m256d(r, e);
38243723
}
38253724

38263725
#[simd_test(enable = "avx")]
38273726
unsafe fn test_mm_permute_pd() {
38283727
let a = _mm_setr_pd(4., 3.);
3829-
let r = _mm_permute_pd(a, 1);
3728+
let r = _mm_permute_pd::<1>(a);
38303729
let e = _mm_setr_pd(3., 4.);
38313730
assert_eq_m128d(r, e);
38323731
}

crates/core_arch/src/x86/avx512f.rs

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19904,7 +19904,7 @@ pub unsafe fn _mm512_maskz_permute_ps(k: __mmask16, a: __m512, imm8: i32) -> __m
1990419904
pub unsafe fn _mm256_mask_permute_ps(src: __m256, k: __mmask8, a: __m256, imm8: i32) -> __m256 {
1990519905
macro_rules! call {
1990619906
($imm8:expr) => {
19907-
_mm256_permute_ps(a, $imm8)
19907+
_mm256_permute_ps::<$imm8>(a)
1990819908
};
1990919909
}
1991019910
let r = constify_imm8_sae!(imm8, call);
@@ -19921,7 +19921,7 @@ pub unsafe fn _mm256_mask_permute_ps(src: __m256, k: __mmask8, a: __m256, imm8:
1992119921
pub unsafe fn _mm256_maskz_permute_ps(k: __mmask8, a: __m256, imm8: i32) -> __m256 {
1992219922
macro_rules! call {
1992319923
($imm8:expr) => {
19924-
_mm256_permute_ps(a, $imm8)
19924+
_mm256_permute_ps::<$imm8>(a)
1992519925
};
1992619926
}
1992719927
let r = constify_imm8_sae!(imm8, call);
@@ -19939,7 +19939,7 @@ pub unsafe fn _mm256_maskz_permute_ps(k: __mmask8, a: __m256, imm8: i32) -> __m2
1993919939
pub unsafe fn _mm_mask_permute_ps(src: __m128, k: __mmask8, a: __m128, imm8: i32) -> __m128 {
1994019940
macro_rules! call {
1994119941
($imm8:expr) => {
19942-
_mm_permute_ps(a, $imm8)
19942+
_mm_permute_ps::<$imm8>(a)
1994319943
};
1994419944
}
1994519945
let r = constify_imm8_sae!(imm8, call);
@@ -19956,7 +19956,7 @@ pub unsafe fn _mm_mask_permute_ps(src: __m128, k: __mmask8, a: __m128, imm8: i32
1995619956
pub unsafe fn _mm_maskz_permute_ps(k: __mmask8, a: __m128, imm8: i32) -> __m128 {
1995719957
macro_rules! call {
1995819958
($imm8:expr) => {
19959-
_mm_permute_ps(a, $imm8)
19959+
_mm_permute_ps::<$imm8>(a)
1996019960
};
1996119961
}
1996219962
let r = constify_imm8_sae!(imm8, call);
@@ -20058,10 +20058,10 @@ pub unsafe fn _mm512_maskz_permute_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m
2005820058
pub unsafe fn _mm256_mask_permute_pd(src: __m256d, k: __mmask8, a: __m256d, imm8: i32) -> __m256d {
2005920059
macro_rules! call {
2006020060
($imm8:expr) => {
20061-
_mm256_permute_pd(a, $imm8)
20061+
_mm256_permute_pd::<$imm8>(a)
2006220062
};
2006320063
}
20064-
let r = constify_imm8_sae!(imm8, call);
20064+
let r = constify_imm4!(imm8, call);
2006520065
transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
2006620066
}
2006720067

@@ -20075,10 +20075,10 @@ pub unsafe fn _mm256_mask_permute_pd(src: __m256d, k: __mmask8, a: __m256d, imm8
2007520075
pub unsafe fn _mm256_maskz_permute_pd(k: __mmask8, a: __m256d, imm8: i32) -> __m256d {
2007620076
macro_rules! call {
2007720077
($imm8:expr) => {
20078-
_mm256_permute_pd(a, $imm8)
20078+
_mm256_permute_pd::<$imm8>(a)
2007920079
};
2008020080
}
20081-
let r = constify_imm8_sae!(imm8, call);
20081+
let r = constify_imm4!(imm8, call);
2008220082
let zero = _mm256_setzero_pd().as_f64x4();
2008320083
transmute(simd_select_bitmask(k, r.as_f64x4(), zero))
2008420084
}
@@ -20088,15 +20088,15 @@ pub unsafe fn _mm256_maskz_permute_pd(k: __mmask8, a: __m256d, imm8: i32) -> __m
2008820088
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_permute_pd&expand=4153)
2008920089
#[inline]
2009020090
#[target_feature(enable = "avx512f,avx512vl")]
20091-
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0b01))]
20092-
#[rustc_args_required_const(3)]
20093-
pub unsafe fn _mm_mask_permute_pd(src: __m128d, k: __mmask8, a: __m128d, imm8: i32) -> __m128d {
20094-
macro_rules! call {
20095-
($imm8:expr) => {
20096-
_mm_permute_pd(a, $imm8)
20097-
};
20098-
}
20099-
let r = constify_imm8_sae!(imm8, call);
20091+
#[cfg_attr(test, assert_instr(vpermilpd, IMM2 = 0b01))]
20092+
#[rustc_legacy_const_generics(3)]
20093+
pub unsafe fn _mm_mask_permute_pd<const IMM2: i32>(
20094+
src: __m128d,
20095+
k: __mmask8,
20096+
a: __m128d,
20097+
) -> __m128d {
20098+
static_assert_imm2!(IMM2);
20099+
let r = _mm_permute_pd::<IMM2>(a);
2010020100
transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
2010120101
}
2010220102

@@ -20105,15 +20105,11 @@ pub unsafe fn _mm_mask_permute_pd(src: __m128d, k: __mmask8, a: __m128d, imm8: i
2010520105
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_permute_pd&expand=4154)
2010620106
#[inline]
2010720107
#[target_feature(enable = "avx512f,avx512vl")]
20108-
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0b01))]
20109-
#[rustc_args_required_const(2)]
20110-
pub unsafe fn _mm_maskz_permute_pd(k: __mmask8, a: __m128d, imm8: i32) -> __m128d {
20111-
macro_rules! call {
20112-
($imm8:expr) => {
20113-
_mm_permute_pd(a, $imm8)
20114-
};
20115-
}
20116-
let r = constify_imm8_sae!(imm8, call);
20108+
#[cfg_attr(test, assert_instr(vpermilpd, IMM2 = 0b01))]
20109+
#[rustc_legacy_const_generics(2)]
20110+
pub unsafe fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
20111+
static_assert_imm2!(IMM2);
20112+
let r = _mm_permute_pd::<IMM2>(a);
2011720113
let zero = _mm_setzero_pd().as_f64x2();
2011820114
transmute(simd_select_bitmask(k, r.as_f64x2(), zero))
2011920115
}

0 commit comments

Comments
 (0)