From c8477369098b1d2c575b23f8bff80c4ff7fe67e4 Mon Sep 17 00:00:00 2001 From: Brady Date: Wed, 8 Feb 2023 15:19:07 -0600 Subject: [PATCH] Signature register load dedicated functions --- src/arch/x86/AVX2.cpp | 19 ++++++++++++------- src/arch/x86/AVX512.cpp | 19 ++++++++++++------- src/arch/x86/SSE.cpp | 19 ++++++++++++------- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/src/arch/x86/AVX2.cpp b/src/arch/x86/AVX2.cpp index cd10e21..9db11f0 100644 --- a/src/arch/x86/AVX2.cpp +++ b/src/arch/x86/AVX2.cpp @@ -5,14 +5,11 @@ #include #include +#include namespace hat::detail { - template<> - scan_result find_pattern(const std::byte* begin, const std::byte* end, signature_view signature) { - // 256 bit vector containing first signature byte repeated - const auto firstByte = _mm256_set1_epi8(static_cast(*signature[0])); - + inline auto load_signature_256(signature_view signature) { std::byte byteBuffer[32]{}; // The remaining signature bytes std::byte maskBuffer[32]{}; // A bitmask for the signature bytes we care about for (size_t i = 1; i < signature.size(); i++) { @@ -22,9 +19,17 @@ namespace hat::detail { maskBuffer[i - 1] = std::byte{0xFFu}; } } + return std::make_tuple( + _mm256_loadu_si256(reinterpret_cast<__m256i*>(&byteBuffer)), + _mm256_loadu_si256(reinterpret_cast<__m256i*>(&maskBuffer)) + ); + } - const auto signatureBytes = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&byteBuffer)); - const auto signatureMask = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&maskBuffer)); + template<> + scan_result find_pattern(const std::byte* begin, const std::byte* end, signature_view signature) { + // 256 bit vector containing first signature byte repeated + const auto firstByte = _mm256_set1_epi8(static_cast(*signature[0])); + const auto [signatureBytes, signatureMask] = load_signature_256(signature); auto vec = reinterpret_cast(begin); const auto n = static_cast(end - signature.size() - begin) / sizeof(__m256i); diff --git a/src/arch/x86/AVX512.cpp b/src/arch/x86/AVX512.cpp index ad1e2f6..40cc100 100644 --- a/src/arch/x86/AVX512.cpp +++ b/src/arch/x86/AVX512.cpp @@ -5,14 +5,11 @@ #include #include +#include namespace hat::detail { - template<> - scan_result find_pattern(const std::byte* begin, const std::byte* end, signature_view signature) { - // 512 bit vector containing first signature byte repeated - const auto firstByte = _mm512_set1_epi8(static_cast(*signature[0])); - + inline auto load_signature_512(signature_view signature) { std::byte byteBuffer[64]{}; // The remaining signature bytes uint64_t maskBuffer{}; // A bitmask for the signature bytes we care about for (size_t i = 1; i < signature.size(); i++) { @@ -22,9 +19,17 @@ namespace hat::detail { maskBuffer |= (1ull << (i - 1)); } } + return std::make_tuple( + _mm512_loadu_si512(&byteBuffer), + _cvtu64_mask64(maskBuffer) + ); + } - const auto signatureBytes = _mm512_loadu_si512(&byteBuffer); - const auto signatureMask = _cvtu64_mask64(maskBuffer); + template<> + scan_result find_pattern(const std::byte* begin, const std::byte* end, signature_view signature) { + // 512 bit vector containing first signature byte repeated + const auto firstByte = _mm512_set1_epi8(static_cast(*signature[0])); + const auto [signatureBytes, signatureMask] = load_signature_512(signature); auto vec = reinterpret_cast(begin); const auto n = static_cast(end - signature.size() - begin) / sizeof(__m512i); diff --git a/src/arch/x86/SSE.cpp b/src/arch/x86/SSE.cpp index 0500b29..89f32e9 100644 --- a/src/arch/x86/SSE.cpp +++ b/src/arch/x86/SSE.cpp @@ -5,14 +5,11 @@ #include #include +#include namespace hat::detail { - template<> - scan_result find_pattern(const std::byte* begin, const std::byte* end, signature_view signature) { - // 256 bit vector containing first signature byte repeated - const auto firstByte = _mm_set1_epi8(static_cast(*signature[0])); - + inline auto load_signature_128(signature_view signature) { std::byte byteBuffer[16]{}; // The remaining signature bytes std::byte maskBuffer[16]{}; // A bitmask for the signature bytes we care about for (size_t i = 1; i < signature.size(); i++) { @@ -22,9 +19,17 @@ namespace hat::detail { maskBuffer[i - 1] = std::byte{0xFFu}; } } + return std::make_tuple( + _mm_loadu_si128(reinterpret_cast<__m128i*>(&byteBuffer)), + _mm_loadu_si128(reinterpret_cast<__m128i*>(&maskBuffer)) + ); + } - const auto signatureBytes = _mm_loadu_si128(reinterpret_cast<__m128i*>(&byteBuffer)); - const auto signatureMask = _mm_loadu_si128(reinterpret_cast<__m128i*>(&maskBuffer)); + template<> + scan_result find_pattern(const std::byte* begin, const std::byte* end, signature_view signature) { + // 256 bit vector containing first signature byte repeated + const auto firstByte = _mm_set1_epi8(static_cast(*signature[0])); + const auto [signatureBytes, signatureMask] = load_signature_128(signature); auto vec = reinterpret_cast(begin); const auto n = static_cast(end - signature.size() - begin) / sizeof(__m128i);