@@ -1087,57 +1087,25 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1087
1087
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_ps)
1088
1088
#[ inline]
1089
1089
#[ target_feature( enable = "avx" ) ]
1090
- #[ cfg_attr( test, assert_instr( vpermilps, imm8 = 9 ) ) ]
1091
- #[ rustc_args_required_const ( 1 ) ]
1090
+ #[ cfg_attr( test, assert_instr( vpermilps, IMM8 = 9 ) ) ]
1091
+ #[ rustc_legacy_const_generics ( 1 ) ]
1092
1092
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1093
- pub unsafe fn _mm256_permute_ps ( a : __m256 , imm8 : i32 ) -> __m256 {
1094
- let imm8 = ( imm8 & 0xFF ) as u8 ;
1095
- let undefined = _mm256_undefined_ps ( ) ;
1096
- macro_rules! shuffle4 {
1097
- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
1098
- simd_shuffle8(
1099
- a,
1100
- undefined,
1101
- [ $a, $b, $c, $d, $a + 4 , $b + 4 , $c + 4 , $d + 4 ] ,
1102
- )
1103
- } ;
1104
- }
1105
- macro_rules! shuffle3 {
1106
- ( $a: expr, $b: expr, $c: expr) => {
1107
- match ( imm8 >> 6 ) & 0b11 {
1108
- 0b00 => shuffle4!( $a, $b, $c, 0 ) ,
1109
- 0b01 => shuffle4!( $a, $b, $c, 1 ) ,
1110
- 0b10 => shuffle4!( $a, $b, $c, 2 ) ,
1111
- _ => shuffle4!( $a, $b, $c, 3 ) ,
1112
- }
1113
- } ;
1114
- }
1115
- macro_rules! shuffle2 {
1116
- ( $a: expr, $b: expr) => {
1117
- match ( imm8 >> 4 ) & 0b11 {
1118
- 0b00 => shuffle3!( $a, $b, 0 ) ,
1119
- 0b01 => shuffle3!( $a, $b, 1 ) ,
1120
- 0b10 => shuffle3!( $a, $b, 2 ) ,
1121
- _ => shuffle3!( $a, $b, 3 ) ,
1122
- }
1123
- } ;
1124
- }
1125
- macro_rules! shuffle1 {
1126
- ( $a: expr) => {
1127
- match ( imm8 >> 2 ) & 0b11 {
1128
- 0b00 => shuffle2!( $a, 0 ) ,
1129
- 0b01 => shuffle2!( $a, 1 ) ,
1130
- 0b10 => shuffle2!( $a, 2 ) ,
1131
- _ => shuffle2!( $a, 3 ) ,
1132
- }
1133
- } ;
1134
- }
1135
- match imm8 & 0b11 {
1136
- 0b00 => shuffle1 ! ( 0 ) ,
1137
- 0b01 => shuffle1 ! ( 1 ) ,
1138
- 0b10 => shuffle1 ! ( 2 ) ,
1139
- _ => shuffle1 ! ( 3 ) ,
1140
- }
1093
+ pub unsafe fn _mm256_permute_ps < const IMM8 : i32 > ( a : __m256 ) -> __m256 {
1094
+ static_assert_imm8 ! ( IMM8 ) ;
1095
+ simd_shuffle8 (
1096
+ a,
1097
+ _mm256_undefined_ps ( ) ,
1098
+ [
1099
+ ( IMM8 as u32 >> 0 ) & 0b11 ,
1100
+ ( IMM8 as u32 >> 2 ) & 0b11 ,
1101
+ ( IMM8 as u32 >> 4 ) & 0b11 ,
1102
+ ( IMM8 as u32 >> 6 ) & 0b11 ,
1103
+ ( ( IMM8 as u32 >> 0 ) & 0b11 ) + 4 ,
1104
+ ( ( IMM8 as u32 >> 2 ) & 0b11 ) + 4 ,
1105
+ ( ( IMM8 as u32 >> 4 ) & 0b11 ) + 4 ,
1106
+ ( ( IMM8 as u32 >> 6 ) & 0b11 ) + 4 ,
1107
+ ] ,
1108
+ )
1141
1109
}
1142
1110
1143
1111
/// Shuffles single-precision (32-bit) floating-point elements in `a`
@@ -1146,53 +1114,21 @@ pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 {
1146
1114
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_ps)
1147
1115
#[ inline]
1148
1116
#[ target_feature( enable = "avx,sse" ) ]
1149
- #[ cfg_attr( test, assert_instr( vpermilps, imm8 = 9 ) ) ]
1150
- #[ rustc_args_required_const ( 1 ) ]
1117
+ #[ cfg_attr( test, assert_instr( vpermilps, IMM8 = 9 ) ) ]
1118
+ #[ rustc_legacy_const_generics ( 1 ) ]
1151
1119
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1152
- pub unsafe fn _mm_permute_ps ( a : __m128 , imm8 : i32 ) -> __m128 {
1153
- let imm8 = ( imm8 & 0xFF ) as u8 ;
1154
- let undefined = _mm_undefined_ps ( ) ;
1155
- macro_rules! shuffle4 {
1156
- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
1157
- simd_shuffle4( a, undefined, [ $a, $b, $c, $d] )
1158
- } ;
1159
- }
1160
- macro_rules! shuffle3 {
1161
- ( $a: expr, $b: expr, $c: expr) => {
1162
- match ( imm8 >> 6 ) & 0b11 {
1163
- 0b00 => shuffle4!( $a, $b, $c, 0 ) ,
1164
- 0b01 => shuffle4!( $a, $b, $c, 1 ) ,
1165
- 0b10 => shuffle4!( $a, $b, $c, 2 ) ,
1166
- _ => shuffle4!( $a, $b, $c, 3 ) ,
1167
- }
1168
- } ;
1169
- }
1170
- macro_rules! shuffle2 {
1171
- ( $a: expr, $b: expr) => {
1172
- match ( imm8 >> 4 ) & 0b11 {
1173
- 0b00 => shuffle3!( $a, $b, 0 ) ,
1174
- 0b01 => shuffle3!( $a, $b, 1 ) ,
1175
- 0b10 => shuffle3!( $a, $b, 2 ) ,
1176
- _ => shuffle3!( $a, $b, 3 ) ,
1177
- }
1178
- } ;
1179
- }
1180
- macro_rules! shuffle1 {
1181
- ( $a: expr) => {
1182
- match ( imm8 >> 2 ) & 0b11 {
1183
- 0b00 => shuffle2!( $a, 0 ) ,
1184
- 0b01 => shuffle2!( $a, 1 ) ,
1185
- 0b10 => shuffle2!( $a, 2 ) ,
1186
- _ => shuffle2!( $a, 3 ) ,
1187
- }
1188
- } ;
1189
- }
1190
- match imm8 & 0b11 {
1191
- 0b00 => shuffle1 ! ( 0 ) ,
1192
- 0b01 => shuffle1 ! ( 1 ) ,
1193
- 0b10 => shuffle1 ! ( 2 ) ,
1194
- _ => shuffle1 ! ( 3 ) ,
1195
- }
1120
+ pub unsafe fn _mm_permute_ps < const IMM8 : i32 > ( a : __m128 ) -> __m128 {
1121
+ static_assert_imm8 ! ( IMM8 ) ;
1122
+ simd_shuffle4 (
1123
+ a,
1124
+ _mm_undefined_ps ( ) ,
1125
+ [
1126
+ ( IMM8 as u32 >> 0 ) & 0b11 ,
1127
+ ( IMM8 as u32 >> 2 ) & 0b11 ,
1128
+ ( IMM8 as u32 >> 4 ) & 0b11 ,
1129
+ ( IMM8 as u32 >> 6 ) & 0b11 ,
1130
+ ] ,
1131
+ )
1196
1132
}
1197
1133
1198
1134
/// Shuffles double-precision (64-bit) floating-point elements in `a`
@@ -1225,45 +1161,21 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1225
1161
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_pd)
1226
1162
#[ inline]
1227
1163
#[ target_feature( enable = "avx" ) ]
1228
- #[ cfg_attr( test, assert_instr( vpermilpd, imm8 = 0x1 ) ) ]
1229
- #[ rustc_args_required_const ( 1 ) ]
1164
+ #[ cfg_attr( test, assert_instr( vpermilpd, IMM4 = 0x1 ) ) ]
1165
+ #[ rustc_legacy_const_generics ( 1 ) ]
1230
1166
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1231
- pub unsafe fn _mm256_permute_pd ( a : __m256d , imm8 : i32 ) -> __m256d {
1232
- let imm8 = ( imm8 & 0xFF ) as u8 ;
1233
- let undefined = _mm256_undefined_pd ( ) ;
1234
- macro_rules! shuffle4 {
1235
- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
1236
- simd_shuffle4( a, undefined, [ $a, $b, $c, $d] )
1237
- } ;
1238
- }
1239
- macro_rules! shuffle3 {
1240
- ( $a: expr, $b: expr, $c: expr) => {
1241
- match ( imm8 >> 3 ) & 0x1 {
1242
- 0 => shuffle4!( $a, $b, $c, 2 ) ,
1243
- _ => shuffle4!( $a, $b, $c, 3 ) ,
1244
- }
1245
- } ;
1246
- }
1247
- macro_rules! shuffle2 {
1248
- ( $a: expr, $b: expr) => {
1249
- match ( imm8 >> 2 ) & 0x1 {
1250
- 0 => shuffle3!( $a, $b, 2 ) ,
1251
- _ => shuffle3!( $a, $b, 3 ) ,
1252
- }
1253
- } ;
1254
- }
1255
- macro_rules! shuffle1 {
1256
- ( $a: expr) => {
1257
- match ( imm8 >> 1 ) & 0x1 {
1258
- 0 => shuffle2!( $a, 0 ) ,
1259
- _ => shuffle2!( $a, 1 ) ,
1260
- }
1261
- } ;
1262
- }
1263
- match imm8 & 0x1 {
1264
- 0 => shuffle1 ! ( 0 ) ,
1265
- _ => shuffle1 ! ( 1 ) ,
1266
- }
1167
+ pub unsafe fn _mm256_permute_pd < const IMM4 : i32 > ( a : __m256d ) -> __m256d {
1168
+ static_assert_imm4 ! ( IMM4 ) ;
1169
+ simd_shuffle4 (
1170
+ a,
1171
+ _mm256_undefined_pd ( ) ,
1172
+ [
1173
+ ( ( IMM4 as u32 >> 0 ) & 1 ) ,
1174
+ ( ( IMM4 as u32 >> 1 ) & 1 ) ,
1175
+ ( ( IMM4 as u32 >> 2 ) & 1 ) + 2 ,
1176
+ ( ( IMM4 as u32 >> 3 ) & 1 ) + 2 ,
1177
+ ] ,
1178
+ )
1267
1179
}
1268
1180
1269
1181
/// Shuffles double-precision (64-bit) floating-point elements in `a`
@@ -1272,29 +1184,16 @@ pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d {
1272
1184
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_pd)
1273
1185
#[ inline]
1274
1186
#[ target_feature( enable = "avx,sse2" ) ]
1275
- #[ cfg_attr( test, assert_instr( vpermilpd, imm8 = 0x1 ) ) ]
1276
- #[ rustc_args_required_const ( 1 ) ]
1187
+ #[ cfg_attr( test, assert_instr( vpermilpd, IMM2 = 0x1 ) ) ]
1188
+ #[ rustc_legacy_const_generics ( 1 ) ]
1277
1189
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1278
- pub unsafe fn _mm_permute_pd ( a : __m128d , imm8 : i32 ) -> __m128d {
1279
- let imm8 = ( imm8 & 0xFF ) as u8 ;
1280
- let undefined = _mm_undefined_pd ( ) ;
1281
- macro_rules! shuffle2 {
1282
- ( $a: expr, $b: expr) => {
1283
- simd_shuffle2( a, undefined, [ $a, $b] )
1284
- } ;
1285
- }
1286
- macro_rules! shuffle1 {
1287
- ( $a: expr) => {
1288
- match ( imm8 >> 1 ) & 0x1 {
1289
- 0 => shuffle2!( $a, 0 ) ,
1290
- _ => shuffle2!( $a, 1 ) ,
1291
- }
1292
- } ;
1293
- }
1294
- match imm8 & 0x1 {
1295
- 0 => shuffle1 ! ( 0 ) ,
1296
- _ => shuffle1 ! ( 1 ) ,
1297
- }
1190
+ pub unsafe fn _mm_permute_pd < const IMM2 : i32 > ( a : __m128d ) -> __m128d {
1191
+ static_assert_imm2 ! ( IMM2 ) ;
1192
+ simd_shuffle2 (
1193
+ a,
1194
+ _mm_undefined_pd ( ) ,
1195
+ [ ( IMM2 as u32 ) & 1 , ( IMM2 as u32 >> 1 ) & 1 ] ,
1196
+ )
1298
1197
}
1299
1198
1300
1199
/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit)
@@ -3784,15 +3683,15 @@ mod tests {
3784
3683
#[ simd_test( enable = "avx" ) ]
3785
3684
unsafe fn test_mm256_permute_ps ( ) {
3786
3685
let a = _mm256_setr_ps ( 4. , 3. , 2. , 5. , 8. , 9. , 64. , 50. ) ;
3787
- let r = _mm256_permute_ps ( a , 0x1b ) ;
3686
+ let r = _mm256_permute_ps :: < 0x1b > ( a ) ;
3788
3687
let e = _mm256_setr_ps ( 5. , 2. , 3. , 4. , 50. , 64. , 9. , 8. ) ;
3789
3688
assert_eq_m256 ( r, e) ;
3790
3689
}
3791
3690
3792
3691
#[ simd_test( enable = "avx" ) ]
3793
3692
unsafe fn test_mm_permute_ps ( ) {
3794
3693
let a = _mm_setr_ps ( 4. , 3. , 2. , 5. ) ;
3795
- let r = _mm_permute_ps ( a , 0x1b ) ;
3694
+ let r = _mm_permute_ps :: < 0x1b > ( a ) ;
3796
3695
let e = _mm_setr_ps ( 5. , 2. , 3. , 4. ) ;
3797
3696
assert_eq_m128 ( r, e) ;
3798
3697
}
@@ -3818,15 +3717,15 @@ mod tests {
3818
3717
#[ simd_test( enable = "avx" ) ]
3819
3718
unsafe fn test_mm256_permute_pd ( ) {
3820
3719
let a = _mm256_setr_pd ( 4. , 3. , 2. , 5. ) ;
3821
- let r = _mm256_permute_pd ( a , 5 ) ;
3720
+ let r = _mm256_permute_pd :: < 5 > ( a ) ;
3822
3721
let e = _mm256_setr_pd ( 3. , 4. , 5. , 2. ) ;
3823
3722
assert_eq_m256d ( r, e) ;
3824
3723
}
3825
3724
3826
3725
#[ simd_test( enable = "avx" ) ]
3827
3726
unsafe fn test_mm_permute_pd ( ) {
3828
3727
let a = _mm_setr_pd ( 4. , 3. ) ;
3829
- let r = _mm_permute_pd ( a , 1 ) ;
3728
+ let r = _mm_permute_pd :: < 1 > ( a ) ;
3830
3729
let e = _mm_setr_pd ( 3. , 4. ) ;
3831
3730
assert_eq_m128d ( r, e) ;
3832
3731
}
0 commit comments