-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathAVX2.cpp
54 lines (44 loc) · 2.07 KB
/
AVX2.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#include <libhat/Defines.hpp>
#ifdef LIBHAT_X86
#include <libhat/Scanner.hpp>
#include <immintrin.h>
namespace hat::detail {
template<>
scan_result find_pattern<scan_mode::AVX2>(std::byte* begin, std::byte* end, signature_view signature) {
// 256 bit vector containing first signature byte repeated
const auto firstByte = _mm256_set1_epi8(static_cast<int8_t>(*signature[0]));
std::byte byteBuffer[32]{}; // The remaining signature bytes
std::byte maskBuffer[32]{}; // A bitmask for the signature bytes we care about
for (size_t i = 1; i < signature.size(); i++) {
auto e = signature[i];
if (e.has_value()) {
byteBuffer[i - 1] = *e;
maskBuffer[i - 1] = std::byte{0xFFu};
}
}
const auto signatureBytes = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&byteBuffer));
const auto signatureMask = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&maskBuffer));
auto vec = reinterpret_cast<__m256i*>(begin);
const auto n = static_cast<size_t>(end - signature.size() - begin) / sizeof(__m256i);
const auto e = vec + n;
for (; vec != e; vec++) {
const auto cmp = _mm256_cmpeq_epi8(firstByte, *vec);
auto mask = static_cast<uint32_t>(_mm256_movemask_epi8(cmp));
while (mask) {
const auto offset = _tzcnt_u32(mask);
const auto i = reinterpret_cast<std::byte*>(vec) + offset;
const auto data = _mm256_loadu_si256(reinterpret_cast<__m256i*>(i + 1));
const auto cmpToSig = _mm256_cmpeq_epi8(signatureBytes, data);
const auto matched = _mm256_testc_si256(cmpToSig, signatureMask);
if (matched) {
return i;
}
mask = _blsr_u32(mask);
}
}
// Look in remaining bytes that couldn't be grouped into 256 bits
begin = reinterpret_cast<std::byte*>(vec);
return find_pattern<scan_mode::Single>(begin, end, signature);
}
}
#endif