Skip to content

Commit

Permalink
Fix the implementation of _mm256_alignr_epi8
Browse files Browse the repository at this point in the history
This seems likely to have mostly just been a copy/paste error, so this
re-reviews the intrinsics and aligns it with the implementation in
clang.

Closes rust-lang#328
  • Loading branch information
alexcrichton committed Feb 25, 2018
1 parent 5bea452 commit 37f44b2
Showing 1 changed file with 114 additions and 46 deletions.
160 changes: 114 additions & 46 deletions coresimd/x86/i586/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
/// result, shift the result right by `n` bytes, and return the low 16 bytes.
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpalignr, n = 15))]
#[cfg_attr(test, assert_instr(vpalignr, n = 7))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i {
let n = n as u32;
Expand All @@ -141,46 +141,104 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i {
let a = a.as_i8x32();
let b = b.as_i8x32();

macro_rules! shuffle {
($shift:expr) => {
let r: i8x32 = match n {
0 => {
simd_shuffle32(b, a, [
0 + $shift, 1 + $shift,
2 + $shift, 3 + $shift,
4 + $shift, 5 + $shift,
6 + $shift, 7 + $shift,
8 + $shift, 9 + $shift,
10 + $shift, 11 + $shift,
12 + $shift, 13 + $shift,
14 + $shift, 15 + $shift,
16 + $shift, 17 + $shift,
18 + $shift, 19 + $shift,
20 + $shift, 21 + $shift,
22 + $shift, 23 + $shift,
24 + $shift, 25 + $shift,
26 + $shift, 27 + $shift,
28 + $shift, 29 + $shift,
30 + $shift, 31 + $shift,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
])
}
}
let r: i8x32 = match n {
0 => shuffle!(0),
1 => shuffle!(1),
2 => shuffle!(2),
3 => shuffle!(3),
4 => shuffle!(4),
5 => shuffle!(5),
6 => shuffle!(6),
7 => shuffle!(7),
8 => shuffle!(8),
9 => shuffle!(9),
10 => shuffle!(10),
11 => shuffle!(11),
12 => shuffle!(12),
13 => shuffle!(13),
14 => shuffle!(14),
15 => shuffle!(15),
_ => shuffle!(16),
1 => {
simd_shuffle32(b, a, [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48,
])
}
2 => {
simd_shuffle32(b, a, [
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49,
])
}
3 => {
simd_shuffle32(b, a, [
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
])
}
4 => {
simd_shuffle32(b, a, [
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
])
}
5 => {
simd_shuffle32(b, a, [
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
])
}
6 => {
simd_shuffle32(b, a, [
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
])
}
7 => {
simd_shuffle32(b, a, [
7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38,
23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
])
}
8 => {
simd_shuffle32(b, a, [
8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39,
24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
])
}
9 => {
simd_shuffle32(b, a, [
9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40,
25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
])
}
10 => {
simd_shuffle32(b, a, [
10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
])
}
11 => {
simd_shuffle32(b, a, [
11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
])
}
12 => {
simd_shuffle32(b, a, [
12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
])
}
13 => {
simd_shuffle32(b, a, [
13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
])
}
14 => {
simd_shuffle32(b, a, [
14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
])
}
15 => {
simd_shuffle32(b, a, [
15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
])
}
_ => b,
};
mem::transmute(r)
}
Expand Down Expand Up @@ -4747,29 +4805,39 @@ mod tests {
#[cfg_attr(rustfmt, rustfmt_skip)]
let expected = _mm256_setr_epi8(
2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17,
10, 11, 12, 13, 14, 15, 16, 0,
18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 0,
);
assert_eq_m256i(r, expected);

let r = _mm256_alignr_epi8(a, b, 4);
#[cfg_attr(rustfmt, rustfmt_skip)]
let expected = _mm256_setr_epi8(
-17, -18, -19, -20, -21, -22, -23, -24,
-25, -26, -27, -28, -29, -30, -31, -32,
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
-5, -6, -7, -8, -9, -10, -11, -12,
-13, -14, -15, -16, 1, 2, 3, 4,
-21, -22, -23, -24, -25, -26, -27, -28,
-29, -30, -31, -32, 17, 18, 19, 20,
);
assert_eq_m256i(r, expected);

#[cfg_attr(rustfmt, rustfmt_skip)]
let expected = _mm256_setr_epi8(
-1, -2, -3, -4, -5, -6, -7, -8,
-9, -10, -11, -12, -13, -14, -15, -16, -17,
-18, -19, -20, -21, -22, -23, -24, -25,
-26, -27, -28, -29, -30, -31, -32,
);
let r = _mm256_alignr_epi8(a, b, 16);
assert_eq_m256i(r, expected);

let r = _mm256_alignr_epi8(a, b, 15);
#[cfg_attr(rustfmt, rustfmt_skip)]
let expected = _mm256_setr_epi8(
-16, -17, -18, -19, -20, -21, -22, -23,
-24, -25, -26, -27, -28, -29, -30, -31,
-32, 1, 2, 3, 4, 5, 6, 7,
-16, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
-32, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
);
assert_eq_m256i(r, expected);

Expand Down

0 comments on commit 37f44b2

Please sign in to comment.