@@ -1960,6 +1960,152 @@ ImVec2 ImTriangleClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c,
1960
1960
// [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions)
1961
1961
//-----------------------------------------------------------------------------
1962
1962
1963
+ #if defined IMGUI_ENABLE_AVX2_IMSTRLEN
1964
+ size_t ImStrlen(const char* str)
1965
+ {
1966
+ const size_t SIMD_LENGTH = 32;
1967
+ const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
1968
+
1969
+ const unsigned char* begin = (unsigned char*)str;
1970
+ const unsigned char* ptr = begin;
1971
+
1972
+ // first page
1973
+ {
1974
+ const size_t PAGE_LENGTH = 4096;
1975
+ const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
1976
+
1977
+ const unsigned char* page_end = (const unsigned char*)_andn_u64(PAGE_LENGTH_MASK, (uintptr_t)ptr + PAGE_LENGTH_MASK);
1978
+ const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
1979
+
1980
+ // if ptr is far the end of page
1981
+ if (ptr <= align_page_end)
1982
+ {
1983
+ __m256i target = _mm256_setzero_si256();
1984
+
1985
+ // if ptr not aligned, align ptr to SIMD_LENGTH
1986
+ if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
1987
+ {
1988
+ __m256i chunk = _mm256_lddqu_si256((const __m256i*)ptr);
1989
+ int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
1990
+
1991
+ if (mask)
1992
+ return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
1993
+
1994
+ ptr = (const unsigned char*)_andn_u64(SIMD_LENGTH_MASK, (uintptr_t)ptr + SIMD_LENGTH_MASK);
1995
+ }
1996
+
1997
+ // main loop of first page
1998
+ for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
1999
+ {
2000
+ __m256i chunk = _mm256_load_si256((const __m256i*)ptr);
2001
+ int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
2002
+
2003
+ if (mask)
2004
+ return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
2005
+
2006
+ _mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2007
+ }
2008
+ }
2009
+
2010
+ // if ptr is near the end of page
2011
+ for (; ptr < page_end; ptr++)
2012
+ {
2013
+ if (!(*ptr))
2014
+ return (uintptr_t)(ptr - begin);
2015
+ }
2016
+ }
2017
+
2018
+ __m256i target = _mm256_setzero_si256();
2019
+
2020
+ // main loop
2021
+ for (; ; ptr += SIMD_LENGTH)
2022
+ {
2023
+ __m256i chunk = _mm256_load_si256((const __m256i*)ptr);
2024
+ int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
2025
+
2026
+ if (mask)
2027
+ return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
2028
+
2029
+ _mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2030
+ }
2031
+ }
2032
+ #elif defined IMGUI_ENABLE_SSE_IMSTRLEN
2033
+ size_t ImStrlen(const char* str)
2034
+ {
2035
+ const size_t SIMD_LENGTH = 16;
2036
+ const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
2037
+
2038
+ const unsigned char* begin = (unsigned char*)str;
2039
+ const unsigned char* ptr = begin;
2040
+ const unsigned char ch = '\0';
2041
+
2042
+ // first page
2043
+ {
2044
+ const size_t PAGE_LENGTH = 4096;
2045
+ const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
2046
+
2047
+ const unsigned char* page_end = (const unsigned char*)(((uintptr_t)ptr + PAGE_LENGTH_MASK) & ~PAGE_LENGTH_MASK);
2048
+ const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
2049
+
2050
+ // if ptr is far the end of page
2051
+ if (ptr <= align_page_end)
2052
+ {
2053
+ __m128i target = _mm_set1_epi8(ch);
2054
+
2055
+ // if ptr not aligned, align ptr to SIMD_LENGTH
2056
+ if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
2057
+ {
2058
+ __m128i chunk = _mm_lddqu_si128((const __m128i*)ptr);
2059
+ int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
2060
+
2061
+ if (mask)
2062
+ return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
2063
+
2064
+ ptr = (const unsigned char*)(((uintptr_t)ptr + SIMD_LENGTH_MASK) & ~SIMD_LENGTH_MASK);
2065
+ }
2066
+
2067
+ // main loop of first page
2068
+ for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
2069
+ {
2070
+ __m128i chunk = _mm_load_si128((const __m128i*)ptr);
2071
+ int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
2072
+
2073
+ if (mask)
2074
+ return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
2075
+
2076
+ _mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2077
+ }
2078
+ }
2079
+
2080
+ // if ptr is near the end of page
2081
+ for (; ptr < page_end; ptr++)
2082
+ {
2083
+ if (*ptr == ch)
2084
+ return (uintptr_t)(ptr - begin);
2085
+ }
2086
+ }
2087
+
2088
+ __m128i target = _mm_set1_epi8(ch);
2089
+
2090
+ // main loop
2091
+ for (; ; ptr += SIMD_LENGTH)
2092
+ {
2093
+ __m128i chunk = _mm_load_si128((const __m128i*)ptr);
2094
+ int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
2095
+
2096
+ if (mask)
2097
+ return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
2098
+
2099
+ _mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2100
+ }
2101
+ }
2102
+ #else
2103
+ size_t ImStrlen(const char* str)
2104
+ {
2105
+ return strlen(str);
2106
+ }
2107
+ #endif
2108
+
1963
2109
#if defined IMGUI_ENABLE_AVX2_IMMEMCHR
1964
2110
const void* ImMemchr(const void* buf, int val, size_t count)
1965
2111
{
0 commit comments