Skip to content

Commit

Permalink
Fix vector alignment issues with tuple
Browse files Browse the repository at this point in the history
  • Loading branch information
ZeroMemes committed Sep 21, 2024
1 parent b7ceb3b commit 7630273
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 18 deletions.
10 changes: 4 additions & 6 deletions src/arch/x86/AVX2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

namespace hat::detail {

inline auto load_signature_256(const signature_view signature) {
inline void load_signature_256(const signature_view signature, __m256i& bytes, __m256i& mask) {
std::byte byteBuffer[32]{}; // The remaining signature bytes
std::byte maskBuffer[32]{}; // A bitmask for the signature bytes we care about
for (size_t i = 0; i < signature.size(); i++) {
Expand All @@ -19,10 +19,8 @@ namespace hat::detail {
maskBuffer[i] = std::byte{0xFFu};
}
}
return std::make_tuple(
_mm256_loadu_si256(reinterpret_cast<__m256i*>(&byteBuffer)),
_mm256_loadu_si256(reinterpret_cast<__m256i*>(&maskBuffer))
);
bytes = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&byteBuffer));
mask = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&maskBuffer));
}

template<scan_alignment alignment, bool cmpeq2, bool veccmp>
Expand All @@ -41,7 +39,7 @@ namespace hat::detail {

__m256i signatureBytes, signatureMask;
if constexpr (veccmp) {
std::tie(signatureBytes, signatureMask) = load_signature_256(signature);
load_signature_256(signature, signatureBytes, signatureMask);
}

begin = next_boundary_align<alignment>(begin);
Expand Down
10 changes: 4 additions & 6 deletions src/arch/x86/AVX512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

namespace hat::detail {

inline auto load_signature_512(const signature_view signature) {
inline void load_signature_512(const signature_view signature, __m512i& bytes, uint64_t& mask) {
std::byte byteBuffer[64]{}; // The remaining signature bytes
uint64_t maskBuffer{}; // A bitmask for the signature bytes we care about
for (size_t i = 0; i < signature.size(); i++) {
Expand All @@ -19,10 +19,8 @@ namespace hat::detail {
maskBuffer |= (1ull << i);
}
}
return std::make_tuple(
_mm512_loadu_si512(&byteBuffer),
_cvtu64_mask64(maskBuffer)
);
bytes = _mm512_loadu_si512(&byteBuffer);
mask = maskBuffer;
}

template<scan_alignment alignment, bool cmpeq2, bool veccmp>
Expand All @@ -42,7 +40,7 @@ namespace hat::detail {
__m512i signatureBytes;
uint64_t signatureMask;
if constexpr (veccmp) {
std::tie(signatureBytes, signatureMask) = load_signature_512(signature);
load_signature_512(signature, signatureBytes, signatureMask);
}

begin = next_boundary_align<alignment>(begin);
Expand Down
10 changes: 4 additions & 6 deletions src/arch/x86/SSE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

namespace hat::detail {

inline auto load_signature_128(const signature_view signature) {
inline void load_signature_128(const signature_view signature, __m128i& bytes, __m128i& mask) {
std::byte byteBuffer[16]{}; // The remaining signature bytes
std::byte maskBuffer[16]{}; // A bitmask for the signature bytes we care about
for (size_t i = 0; i < signature.size(); i++) {
Expand All @@ -19,10 +19,8 @@ namespace hat::detail {
maskBuffer[i] = std::byte{0xFFu};
}
}
return std::make_tuple(
_mm_loadu_si128(reinterpret_cast<__m128i*>(&byteBuffer)),
_mm_loadu_si128(reinterpret_cast<__m128i*>(&maskBuffer))
);
bytes = _mm_loadu_si128(reinterpret_cast<__m128i*>(&byteBuffer));
mask = _mm_loadu_si128(reinterpret_cast<__m128i*>(&maskBuffer));
}

template<scan_alignment alignment, bool cmpeq2, bool veccmp>
Expand All @@ -41,7 +39,7 @@ namespace hat::detail {

__m128i signatureBytes, signatureMask;
if constexpr (veccmp) {
std::tie(signatureBytes, signatureMask) = load_signature_128(signature);
load_signature_128(signature, signatureBytes, signatureMask);
}

begin = next_boundary_align<alignment>(begin);
Expand Down

0 comments on commit 7630273

Please sign in to comment.