diff --git a/include/libhat/Scanner.hpp b/include/libhat/Scanner.hpp index 8fcdf45..3ede14c 100644 --- a/include/libhat/Scanner.hpp +++ b/include/libhat/Scanner.hpp @@ -160,7 +160,7 @@ namespace hat { return std::assume_aligned(ptr); } - template + template LIBHAT_FORCEINLINE auto segment_scan( const std::byte* begin, const std::byte* end, @@ -181,7 +181,8 @@ namespace hat { } const size_t vecAvailable = end - reinterpret_cast(vecBegin); - const auto vecEnd = vecBegin + (vecAvailable >= signatureSize ? (vecAvailable - signatureSize) / sizeof(Vector) : 0); + const size_t requiredAfter = veccmp ? sizeof(Vector) : signatureSize; + const auto vecEnd = vecBegin + (vecAvailable >= requiredAfter ? (vecAvailable - requiredAfter) / sizeof(Vector) : 0); // If the scan can't be vectorized, just do the single byte scanner "pre" part if (vecBegin == vecEnd) LIBHAT_UNLIKELY { diff --git a/src/arch/x86/AVX2.cpp b/src/arch/x86/AVX2.cpp index 71f2cc8..95ce663 100644 --- a/src/arch/x86/AVX2.cpp +++ b/src/arch/x86/AVX2.cpp @@ -47,7 +47,7 @@ namespace hat::detail { return {}; } - auto [pre, vec, post] = segment_scan<__m256i>(begin, end, signature.size(), cmpIndex); + auto [pre, vec, post] = segment_scan<__m256i, veccmp>(begin, end, signature.size(), cmpIndex); if (!pre.empty()) { const auto result = find_pattern_single(pre.data(), pre.data() + pre.size(), context); diff --git a/src/arch/x86/AVX512.cpp b/src/arch/x86/AVX512.cpp index 582bd5a..202fc67 100644 --- a/src/arch/x86/AVX512.cpp +++ b/src/arch/x86/AVX512.cpp @@ -48,7 +48,7 @@ namespace hat::detail { return {}; } - auto [pre, vec, post] = segment_scan<__m512i>(begin, end, signature.size(), cmpIndex); + auto [pre, vec, post] = segment_scan<__m512i, veccmp>(begin, end, signature.size(), cmpIndex); if (!pre.empty()) { const auto result = find_pattern_single(pre.data(), pre.data() + pre.size(), context); diff --git a/src/arch/x86/SSE.cpp b/src/arch/x86/SSE.cpp index a57f89d..49b7ed7 100644 --- a/src/arch/x86/SSE.cpp +++ b/src/arch/x86/SSE.cpp @@ -47,7 +47,7 @@ namespace hat::detail { return {}; } - auto [pre, vec, post] = segment_scan<__m128i>(begin, end, signature.size(), cmpIndex); + auto [pre, vec, post] = segment_scan<__m128i, veccmp>(begin, end, signature.size(), cmpIndex); if (!pre.empty()) { const auto result = find_pattern_single(pre.data(), pre.data() + pre.size(), context);