Skip to content

Commit

Permalink
Aligned scanning optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
ZeroMemes committed Jan 24, 2024
1 parent ba4a942 commit feef00d
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 9 deletions.
8 changes: 5 additions & 3 deletions src/arch/x86/AVX2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,17 @@ namespace hat::detail {
const auto cmp = _mm256_cmpeq_epi8(firstByte, _mm256_loadu_si256(vec));
auto mask = static_cast<uint32_t>(_mm256_movemask_epi8(cmp));

if constexpr (cmpeq2) {
if constexpr (alignment != scan_alignment::X1) {
mask &= create_alignment_mask<uint32_t, alignment>();
if (!mask) continue;
} else if constexpr (cmpeq2) {
const auto cmp2 = _mm256_cmpeq_epi8(secondByte, _mm256_loadu_si256(vec));
auto mask2 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp2));
// avoid loading unaligned memory by letting a match of the first signature byte in the last
// position imply that the second byte also matched
mask &= (mask2 >> 1) | (0b1u << 31);
}

mask &= create_alignment_mask<uint32_t, alignment>();
while (mask) {
const auto offset = _tzcnt_u32(mask);
const auto i = reinterpret_cast<const std::byte*>(vec) + offset;
Expand Down Expand Up @@ -91,7 +93,7 @@ namespace hat::detail {

template<scan_alignment alignment>
scan_result find_pattern_avx2(const std::byte* begin, const std::byte* end, signature_view signature) {
const bool cmpeq2 = signature.size() > 1 && signature[1].has_value();
const bool cmpeq2 = alignment == scan_alignment::X1 && signature.size() > 1 && signature[1].has_value();
const bool veccmp = signature.size() <= 33;

if (cmpeq2 && veccmp) {
Expand Down
8 changes: 5 additions & 3 deletions src/arch/x86/AVX512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ namespace hat::detail {
for (; vec != e; vec++) {
auto mask = _mm512_cmpeq_epi8_mask(firstByte, _mm512_loadu_si512(vec));

if constexpr (cmpeq2) {
if constexpr (alignment != scan_alignment::X1) {
mask &= create_alignment_mask<uint64_t, alignment>();
if (!mask) continue;
} else if constexpr (cmpeq2) {
const auto mask2 = _mm512_cmpeq_epi8_mask(secondByte, _mm512_loadu_si512(vec));
mask &= (mask2 >> 1) | (0b1ull << 63);
}

mask &= create_alignment_mask<uint64_t, alignment>();
while (mask) {
const auto offset = LIBHAT_TZCNT64(mask);
const auto i = reinterpret_cast<const std::byte*>(vec) + offset;
Expand Down Expand Up @@ -87,7 +89,7 @@ namespace hat::detail {

template<scan_alignment alignment>
scan_result find_pattern_avx512(const std::byte* begin, const std::byte* end, signature_view signature) {
const bool cmpeq2 = signature.size() > 1 && signature[1].has_value();
const bool cmpeq2 = alignment == scan_alignment::X1 && signature.size() > 1 && signature[1].has_value();
const bool veccmp = signature.size() <= 65;

if (cmpeq2 && veccmp) {
Expand Down
8 changes: 5 additions & 3 deletions src/arch/x86/SSE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,15 @@ namespace hat::detail {
const auto cmp = _mm_cmpeq_epi8(firstByte, _mm_loadu_si128(vec));
auto mask = static_cast<uint16_t>(_mm_movemask_epi8(cmp));

if constexpr (cmpeq2) {
if constexpr (alignment != scan_alignment::X1) {
mask &= create_alignment_mask<uint16_t, alignment>();
if (!mask) continue;
} else if constexpr (cmpeq2) {
const auto cmp2 = _mm_cmpeq_epi8(secondByte, _mm_loadu_si128(vec));
auto mask2 = static_cast<uint16_t>(_mm_movemask_epi8(cmp2));
mask &= (mask2 >> 1) | (0b1u << 15);
}

mask &= create_alignment_mask<uint16_t, alignment>();
while (mask) {
const auto offset = LIBHAT_BSF32(mask);
const auto i = reinterpret_cast<const std::byte*>(vec) + offset;
Expand Down Expand Up @@ -89,7 +91,7 @@ namespace hat::detail {

template<scan_alignment alignment>
scan_result find_pattern_sse(const std::byte* begin, const std::byte* end, signature_view signature) {
const bool cmpeq2 = signature.size() > 1 && signature[1].has_value();
const bool cmpeq2 = alignment == scan_alignment::X1 && signature.size() > 1 && signature[1].has_value();
const bool veccmp = signature.size() <= 17;

if (cmpeq2 && veccmp) {
Expand Down

0 comments on commit feef00d

Please sign in to comment.