From db0d28e9abdb77546d22dcd82b0eee27a15f95c8 Mon Sep 17 00:00:00 2001 From: Brady Date: Wed, 8 Feb 2023 14:55:56 -0600 Subject: [PATCH] Fix bad alignment crash on SSE Applied same change to AVX2 and AVX512 impls --- src/arch/x86/AVX2.cpp | 2 +- src/arch/x86/AVX512.cpp | 2 +- src/arch/x86/SSE.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arch/x86/AVX2.cpp b/src/arch/x86/AVX2.cpp index ab7f03e..cd10e21 100644 --- a/src/arch/x86/AVX2.cpp +++ b/src/arch/x86/AVX2.cpp @@ -31,7 +31,7 @@ namespace hat::detail { const auto e = vec + n; for (; vec != e; vec++) { - const auto cmp = _mm256_cmpeq_epi8(firstByte, *vec); + const auto cmp = _mm256_cmpeq_epi8(firstByte, _mm256_loadu_si256(vec)); auto mask = static_cast(_mm256_movemask_epi8(cmp)); while (mask) { const auto offset = _tzcnt_u32(mask); diff --git a/src/arch/x86/AVX512.cpp b/src/arch/x86/AVX512.cpp index 47e559e..ad1e2f6 100644 --- a/src/arch/x86/AVX512.cpp +++ b/src/arch/x86/AVX512.cpp @@ -31,7 +31,7 @@ namespace hat::detail { const auto e = vec + n; for (; vec != e; vec++) { - auto mask = _mm512_cmpeq_epi8_mask(firstByte, *vec); + auto mask = _mm512_cmpeq_epi8_mask(firstByte, _mm512_loadu_si512(vec)); while (mask) { const auto offset = LIBHAT_TZCNT64(mask); const auto i = reinterpret_cast(vec) + offset; diff --git a/src/arch/x86/SSE.cpp b/src/arch/x86/SSE.cpp index 4c9717c..0500b29 100644 --- a/src/arch/x86/SSE.cpp +++ b/src/arch/x86/SSE.cpp @@ -31,7 +31,7 @@ namespace hat::detail { const auto e = vec + n; for (; vec != e; vec++) { - const auto cmp = _mm_cmpeq_epi8(firstByte, *vec); + const auto cmp = _mm_cmpeq_epi8(firstByte, _mm_loadu_si128(vec)); auto mask = static_cast(_mm_movemask_epi8(cmp)); while (mask) { const auto offset = LIBHAT_BSF32(mask);