Skip to content

Commit c67c202

Browse files
Kraionixocornut
authored andcommitted
Merge feature/simd-strlen into master as a single commit
1 parent 707ba8c commit c67c202

File tree

2 files changed

+149
-1
lines changed

2 files changed

+149
-1
lines changed

imgui.cpp

+146
Original file line numberDiff line numberDiff line change
@@ -1960,6 +1960,152 @@ ImVec2 ImTriangleClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c,
19601960
// [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions)
19611961
//-----------------------------------------------------------------------------
19621962

1963+
#if defined IMGUI_ENABLE_AVX2_IMSTRLEN
1964+
size_t ImStrlen(const char* str)
1965+
{
1966+
const size_t SIMD_LENGTH = 32;
1967+
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
1968+
1969+
const unsigned char* begin = (unsigned char*)str;
1970+
const unsigned char* ptr = begin;
1971+
1972+
// first page
1973+
{
1974+
const size_t PAGE_LENGTH = 4096;
1975+
const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
1976+
1977+
const unsigned char* page_end = (const unsigned char*)_andn_u64(PAGE_LENGTH_MASK, (uintptr_t)ptr + PAGE_LENGTH_MASK);
1978+
const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
1979+
1980+
// if ptr is far the end of page
1981+
if (ptr <= align_page_end)
1982+
{
1983+
__m256i target = _mm256_setzero_si256();
1984+
1985+
// if ptr not aligned, align ptr to SIMD_LENGTH
1986+
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
1987+
{
1988+
__m256i chunk = _mm256_lddqu_si256((const __m256i*)ptr);
1989+
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
1990+
1991+
if (mask)
1992+
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
1993+
1994+
ptr = (const unsigned char*)_andn_u64(SIMD_LENGTH_MASK, (uintptr_t)ptr + SIMD_LENGTH_MASK);
1995+
}
1996+
1997+
// main loop of first page
1998+
for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
1999+
{
2000+
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
2001+
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
2002+
2003+
if (mask)
2004+
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
2005+
2006+
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2007+
}
2008+
}
2009+
2010+
// if ptr is near the end of page
2011+
for (; ptr < page_end; ptr++)
2012+
{
2013+
if (!(*ptr))
2014+
return (uintptr_t)(ptr - begin);
2015+
}
2016+
}
2017+
2018+
__m256i target = _mm256_setzero_si256();
2019+
2020+
// main loop
2021+
for (; ; ptr += SIMD_LENGTH)
2022+
{
2023+
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
2024+
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
2025+
2026+
if (mask)
2027+
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
2028+
2029+
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2030+
}
2031+
}
2032+
#elif defined IMGUI_ENABLE_SSE_IMSTRLEN
2033+
size_t ImStrlen(const char* str)
2034+
{
2035+
const size_t SIMD_LENGTH = 16;
2036+
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
2037+
2038+
const unsigned char* begin = (unsigned char*)str;
2039+
const unsigned char* ptr = begin;
2040+
const unsigned char ch = '\0';
2041+
2042+
// first page
2043+
{
2044+
const size_t PAGE_LENGTH = 4096;
2045+
const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
2046+
2047+
const unsigned char* page_end = (const unsigned char*)(((uintptr_t)ptr + PAGE_LENGTH_MASK) & ~PAGE_LENGTH_MASK);
2048+
const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
2049+
2050+
// if ptr is far the end of page
2051+
if (ptr <= align_page_end)
2052+
{
2053+
__m128i target = _mm_set1_epi8(ch);
2054+
2055+
// if ptr not aligned, align ptr to SIMD_LENGTH
2056+
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
2057+
{
2058+
__m128i chunk = _mm_lddqu_si128((const __m128i*)ptr);
2059+
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
2060+
2061+
if (mask)
2062+
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
2063+
2064+
ptr = (const unsigned char*)(((uintptr_t)ptr + SIMD_LENGTH_MASK) & ~SIMD_LENGTH_MASK);
2065+
}
2066+
2067+
// main loop of first page
2068+
for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
2069+
{
2070+
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
2071+
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
2072+
2073+
if (mask)
2074+
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
2075+
2076+
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2077+
}
2078+
}
2079+
2080+
// if ptr is near the end of page
2081+
for (; ptr < page_end; ptr++)
2082+
{
2083+
if (*ptr == ch)
2084+
return (uintptr_t)(ptr - begin);
2085+
}
2086+
}
2087+
2088+
__m128i target = _mm_set1_epi8(ch);
2089+
2090+
// main loop
2091+
for (; ; ptr += SIMD_LENGTH)
2092+
{
2093+
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
2094+
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
2095+
2096+
if (mask)
2097+
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
2098+
2099+
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2100+
}
2101+
}
2102+
#else
2103+
size_t ImStrlen(const char* str)
2104+
{
2105+
return strlen(str);
2106+
}
2107+
#endif
2108+
19632109
#if defined IMGUI_ENABLE_AVX2_IMMEMCHR
19642110
const void* ImMemchr(const void* buf, int val, size_t count)
19652111
{

imgui_internal.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,10 @@ Index of this file:
9090

9191
// Only AVX2 supports integer and byte instructions for 256-bit registers. Implementation this on AVX1 is not possible.
9292
#if defined(IMGUI_ENABLE_AVX2)
93+
#define IMGUI_ENABLE_AVX2_IMSTRLEN
9394
#define IMGUI_ENABLE_AVX2_IMMEMCHR
9495
#elif defined(IMGUI_ENABLE_AVX) || defined(IMGUI_ENABLE_SSE)
96+
#define IMGUI_ENABLE_SSE_IMSTRLEN
9597
#define IMGUI_ENABLE_SSE_IMMEMCHR
9698
#endif
9799

@@ -395,7 +397,7 @@ static inline bool ImIsPowerOfTwo(ImU64 v) { return v != 0 && (v &
395397
static inline int ImUpperPowerOfTwo(int v) { v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return v; }
396398

397399
// Helpers: String
398-
#define ImStrlen strlen
400+
IMGUI_API size_t ImStrlen(const char* str); // Compute the length of a null-terminated string.
399401
IMGUI_API const void* ImMemchr(const void* buf, int val, size_t count); // Find first occurrence of 'val' in buffer given length.
400402
IMGUI_API int ImStricmp(const char* str1, const char* str2); // Case insensitive compare.
401403
IMGUI_API int ImStrnicmp(const char* str1, const char* str2, size_t count); // Case insensitive compare to a certain count.

0 commit comments

Comments
 (0)