Skip to content

Commit

Permalink
Fix bad alignment crash on SSE
Browse files Browse the repository at this point in the history
Applied same change to AVX2 and AVX512 impls
  • Loading branch information
ZeroMemes committed Feb 8, 2023
1 parent 444fc44 commit db0d28e
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/arch/x86/AVX2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace hat::detail {
const auto e = vec + n;

for (; vec != e; vec++) {
const auto cmp = _mm256_cmpeq_epi8(firstByte, *vec);
const auto cmp = _mm256_cmpeq_epi8(firstByte, _mm256_loadu_si256(vec));
auto mask = static_cast<uint32_t>(_mm256_movemask_epi8(cmp));
while (mask) {
const auto offset = _tzcnt_u32(mask);
Expand Down
2 changes: 1 addition & 1 deletion src/arch/x86/AVX512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace hat::detail {
const auto e = vec + n;

for (; vec != e; vec++) {
auto mask = _mm512_cmpeq_epi8_mask(firstByte, *vec);
auto mask = _mm512_cmpeq_epi8_mask(firstByte, _mm512_loadu_si512(vec));
while (mask) {
const auto offset = LIBHAT_TZCNT64(mask);
const auto i = reinterpret_cast<const std::byte*>(vec) + offset;
Expand Down
2 changes: 1 addition & 1 deletion src/arch/x86/SSE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace hat::detail {
const auto e = vec + n;

for (; vec != e; vec++) {
const auto cmp = _mm_cmpeq_epi8(firstByte, *vec);
const auto cmp = _mm_cmpeq_epi8(firstByte, _mm_loadu_si128(vec));
auto mask = static_cast<uint32_t>(_mm_movemask_epi8(cmp));
while (mask) {
const auto offset = LIBHAT_BSF32(mask);
Expand Down

0 comments on commit db0d28e

Please sign in to comment.