Skip to content

Commit c5496ee

Browse files
committed
Merge feature/simd-strlen into master
2 parents 08a3e51 + 909cfdc commit c5496ee

5 files changed

+191
-42
lines changed

imgui.cpp

+167-21
Original file line numberDiff line numberDiff line change
@@ -1959,6 +1959,152 @@ ImVec2 ImTriangleClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c,
19591959
// [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions)
19601960
//-----------------------------------------------------------------------------
19611961

1962+
#if defined IMGUI_ENABLE_AVX2_IMSTRLEN
1963+
size_t ImStrlen(const char* str)
1964+
{
1965+
const size_t SIMD_LENGTH = 32;
1966+
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
1967+
1968+
const unsigned char* begin = (unsigned char*)str;
1969+
const unsigned char* ptr = begin;
1970+
1971+
// first page
1972+
{
1973+
const size_t PAGE_LENGTH = 4096;
1974+
const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
1975+
1976+
const unsigned char* page_end = (const unsigned char*)_andn_u64(PAGE_LENGTH_MASK, (uintptr_t)ptr + PAGE_LENGTH_MASK);
1977+
const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
1978+
1979+
// if ptr is far the end of page
1980+
if (ptr <= align_page_end)
1981+
{
1982+
__m256i target = _mm256_setzero_si256();
1983+
1984+
// if ptr not aligned, align ptr to SIMD_LENGTH
1985+
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
1986+
{
1987+
__m256i chunk = _mm256_lddqu_si256((const __m256i*)ptr);
1988+
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
1989+
1990+
if (mask)
1991+
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
1992+
1993+
ptr = (const unsigned char*)_andn_u64(SIMD_LENGTH_MASK, (uintptr_t)ptr + SIMD_LENGTH_MASK);
1994+
}
1995+
1996+
// main loop of first page
1997+
for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
1998+
{
1999+
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
2000+
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
2001+
2002+
if (mask)
2003+
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
2004+
2005+
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2006+
}
2007+
}
2008+
2009+
// if ptr is near the end of page
2010+
for (; ptr < page_end; ptr++)
2011+
{
2012+
if (!(*ptr))
2013+
return (uintptr_t)(ptr - begin);
2014+
}
2015+
}
2016+
2017+
__m256i target = _mm256_setzero_si256();
2018+
2019+
// main loop
2020+
for (; ; ptr += SIMD_LENGTH)
2021+
{
2022+
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
2023+
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
2024+
2025+
if (mask)
2026+
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
2027+
2028+
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2029+
}
2030+
}
2031+
#elif defined IMGUI_ENABLE_SSE_IMSTRLEN
2032+
size_t ImStrlen(const char* str)
2033+
{
2034+
const size_t SIMD_LENGTH = 16;
2035+
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
2036+
2037+
const unsigned char* begin = (unsigned char*)str;
2038+
const unsigned char* ptr = begin;
2039+
const unsigned char ch = '\0';
2040+
2041+
// first page
2042+
{
2043+
const size_t PAGE_LENGTH = 4096;
2044+
const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
2045+
2046+
const unsigned char* page_end = (const unsigned char*)(((uintptr_t)ptr + PAGE_LENGTH_MASK) & ~PAGE_LENGTH_MASK);
2047+
const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
2048+
2049+
// if ptr is far the end of page
2050+
if (ptr <= align_page_end)
2051+
{
2052+
__m128i target = _mm_set1_epi8(ch);
2053+
2054+
// if ptr not aligned, align ptr to SIMD_LENGTH
2055+
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
2056+
{
2057+
__m128i chunk = _mm_lddqu_si128((const __m128i*)ptr);
2058+
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
2059+
2060+
if (mask)
2061+
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
2062+
2063+
ptr = (const unsigned char*)(((uintptr_t)ptr + SIMD_LENGTH_MASK) & ~SIMD_LENGTH_MASK);
2064+
}
2065+
2066+
// main loop of first page
2067+
for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
2068+
{
2069+
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
2070+
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
2071+
2072+
if (mask)
2073+
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
2074+
2075+
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2076+
}
2077+
}
2078+
2079+
// if ptr is near the end of page
2080+
for (; ptr < page_end; ptr++)
2081+
{
2082+
if (*ptr == ch)
2083+
return (uintptr_t)(ptr - begin);
2084+
}
2085+
}
2086+
2087+
__m128i target = _mm_set1_epi8(ch);
2088+
2089+
// main loop
2090+
for (; ; ptr += SIMD_LENGTH)
2091+
{
2092+
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
2093+
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
2094+
2095+
if (mask)
2096+
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
2097+
2098+
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
2099+
}
2100+
}
2101+
#else
2102+
size_t ImStrlen(const char* str)
2103+
{
2104+
return strlen(str);
2105+
}
2106+
#endif
2107+
19622108
#if defined IMGUI_ENABLE_AVX2_IMMEMCHR
19632109
const void* ImMemchr(const void* buf, int val, size_t count)
19642110
{
@@ -2086,15 +2232,15 @@ void ImStrncpy(char* dst, const char* src, size_t count)
20862232

20872233
char* ImStrdup(const char* str)
20882234
{
2089-
size_t len = strlen(str);
2235+
size_t len = ImStrlen(str);
20902236
void* buf = IM_ALLOC(len + 1);
20912237
return (char*)memcpy(buf, (const void*)str, len + 1);
20922238
}
20932239

20942240
char* ImStrdupcpy(char* dst, size_t* p_dst_size, const char* src)
20952241
{
2096-
size_t dst_buf_size = p_dst_size ? *p_dst_size : strlen(dst) + 1;
2097-
size_t src_size = strlen(src) + 1;
2242+
size_t dst_buf_size = p_dst_size ? *p_dst_size : ImStrlen(dst) + 1;
2243+
size_t src_size = ImStrlen(src) + 1;
20982244
if (dst_buf_size < src_size)
20992245
{
21002246
IM_FREE(dst);
@@ -2128,7 +2274,7 @@ const char* ImStreolRange(const char* str, const char* str_end)
21282274

21292275
const char* ImStrbol(const char* buf_mid_line, const char* buf_begin) // find beginning-of-line
21302276
{
2131-
IM_ASSERT_PARANOID(buf_mid_line >= buf_begin && buf_mid_line <= buf_begin + strlen(buf_begin));
2277+
IM_ASSERT_PARANOID(buf_mid_line >= buf_begin && buf_mid_line <= buf_begin + ImStrlen(buf_begin));
21322278
while (buf_mid_line > buf_begin && buf_mid_line[-1] != '\n')
21332279
buf_mid_line--;
21342280
return buf_mid_line;
@@ -2137,7 +2283,7 @@ const char* ImStrbol(const char* buf_mid_line, const char* buf_begin) // find be
21372283
const char* ImStristr(const char* haystack, const char* haystack_end, const char* needle, const char* needle_end)
21382284
{
21392285
if (!needle_end)
2140-
needle_end = needle + strlen(needle);
2286+
needle_end = needle + ImStrlen(needle);
21412287

21422288
const char un0 = (char)ImToUpper(*needle);
21432289
while ((!haystack_end && *haystack) || (haystack_end && haystack < haystack_end))
@@ -2258,7 +2404,7 @@ void ImFormatStringToTempBufferV(const char** out_buf, const char** out_buf_end,
22582404
if (buf == NULL)
22592405
buf = "(null)";
22602406
*out_buf = buf;
2261-
if (out_buf_end) { *out_buf_end = buf + strlen(buf); }
2407+
if (out_buf_end) { *out_buf_end = buf + ImStrlen(buf); }
22622408
}
22632409
else if (fmt[0] == '%' && fmt[1] == '.' && fmt[2] == '*' && fmt[3] == 's' && fmt[4] == 0)
22642410
{
@@ -2667,7 +2813,7 @@ const char* ImTextFindPreviousUtf8Codepoint(const char* in_text_start, const cha
26672813
int ImTextCountLines(const char* in_text, const char* in_text_end)
26682814
{
26692815
if (in_text_end == NULL)
2670-
in_text_end = in_text + strlen(in_text); // FIXME-OPT: Not optimal approach, discourage use for now.
2816+
in_text_end = in_text + ImStrlen(in_text); // FIXME-OPT: Not optimal approach, discourage use for now.
26712817
int count = 0;
26722818
while (in_text < in_text_end)
26732819
{
@@ -2952,7 +3098,7 @@ void ImGuiTextFilter::ImGuiTextRange::split(char separator, ImVector<ImGuiTextRa
29523098
void ImGuiTextFilter::Build()
29533099
{
29543100
Filters.resize(0);
2955-
ImGuiTextRange input_range(InputBuf, InputBuf + strlen(InputBuf));
3101+
ImGuiTextRange input_range(InputBuf, InputBuf + ImStrlen(InputBuf));
29563102
input_range.split(',', &Filters);
29573103

29583104
CountGrep = 0;
@@ -3020,7 +3166,7 @@ char ImGuiTextBuffer::EmptyString[1] = { 0 };
30203166

30213167
void ImGuiTextBuffer::append(const char* str, const char* str_end)
30223168
{
3023-
int len = str_end ? (int)(str_end - str) : (int)strlen(str);
3169+
int len = str_end ? (int)(str_end - str) : (int)ImStrlen(str);
30243170

30253171
// Add zero-terminator the first time
30263172
const int write_off = (Buf.Size != 0) ? Buf.Size : 1;
@@ -3703,7 +3849,7 @@ void ImGui::RenderText(ImVec2 pos, const char* text, const char* text_end, bool
37033849
else
37043850
{
37053851
if (!text_end)
3706-
text_end = text + strlen(text); // FIXME-OPT
3852+
text_end = text + ImStrlen(text); // FIXME-OPT
37073853
text_display_end = text_end;
37083854
}
37093855

@@ -3721,7 +3867,7 @@ void ImGui::RenderTextWrapped(ImVec2 pos, const char* text, const char* text_end
37213867
ImGuiWindow* window = g.CurrentWindow;
37223868

37233869
if (!text_end)
3724-
text_end = text + strlen(text); // FIXME-OPT
3870+
text_end = text + ImStrlen(text); // FIXME-OPT
37253871

37263872
if (text != text_end)
37273873
{
@@ -4394,7 +4540,7 @@ ImGuiWindow::ImGuiWindow(ImGuiContext* ctx, const char* name) : DrawListInst(NUL
43944540
memset(this, 0, sizeof(*this));
43954541
Ctx = ctx;
43964542
Name = ImStrdup(name);
4397-
NameBufLen = (int)strlen(name) + 1;
4543+
NameBufLen = (int)ImStrlen(name) + 1;
43984544
ID = ImHashStr(name);
43994545
IDStack.push_back(ID);
44004546
MoveId = GetID("#MOVE");
@@ -8925,7 +9071,7 @@ const char* ImGui::GetKeyChordName(ImGuiKeyChord key_chord)
89259071
(key != ImGuiKey_None || key_chord == ImGuiKey_None) ? GetKeyName(key) : "");
89269072
size_t len;
89279073
if (key == ImGuiKey_None && key_chord != 0)
8928-
if ((len = strlen(g.TempKeychordName)) != 0) // Remove trailing '+'
9074+
if ((len = ImStrlen(g.TempKeychordName)) != 0) // Remove trailing '+'
89299075
g.TempKeychordName[len - 1] = 0;
89309076
return g.TempKeychordName;
89319077
}
@@ -14215,7 +14361,7 @@ bool ImGui::SetDragDropPayload(const char* type, const void* data, size_t data_s
1421514361
cond = ImGuiCond_Always;
1421614362

1421714363
IM_ASSERT(type != NULL);
14218-
IM_ASSERT(strlen(type) < IM_ARRAYSIZE(payload.DataType) && "Payload type can be at most 32 characters long");
14364+
IM_ASSERT(ImStrlen(type) < IM_ARRAYSIZE(payload.DataType) && "Payload type can be at most 32 characters long");
1421914365
IM_ASSERT((data != NULL && data_size > 0) || (data == NULL && data_size == 0));
1422014366
IM_ASSERT(cond == ImGuiCond_Always || cond == ImGuiCond_Once);
1422114367
IM_ASSERT(payload.SourceId != 0); // Not called between BeginDragDropSource() and EndDragDropSource()
@@ -14459,7 +14605,7 @@ void ImGui::LogRenderedText(const ImVec2* ref_pos, const char* text, const char*
1445914605
}
1446014606

1446114607
if (prefix)
14462-
LogRenderedText(ref_pos, prefix, prefix + strlen(prefix)); // Calculate end ourself to ensure "##" are included here.
14608+
LogRenderedText(ref_pos, prefix, prefix + ImStrlen(prefix)); // Calculate end ourself to ensure "##" are included here.
1446314609

1446414610
// Re-adjust padding if we have popped out of our starting depth
1446514611
if (g.LogDepthRef > window->DC.TreeDepth)
@@ -14492,7 +14638,7 @@ void ImGui::LogRenderedText(const ImVec2* ref_pos, const char* text, const char*
1449214638
}
1449314639

1449414640
if (suffix)
14495-
LogRenderedText(ref_pos, suffix, suffix + strlen(suffix));
14641+
LogRenderedText(ref_pos, suffix, suffix + ImStrlen(suffix));
1449614642
}
1449714643

1449814644
// Start logging/capturing text output
@@ -14758,7 +14904,7 @@ void ImGui::LoadIniSettingsFromMemory(const char* ini_data, size_t ini_size)
1475814904
// For user convenience, we allow passing a non zero-terminated string (hence the ini_size parameter).
1475914905
// For our convenience and to make the code simpler, we'll also write zero-terminators within the buffer. So let's create a writable copy..
1476014906
if (ini_size == 0)
14761-
ini_size = strlen(ini_data);
14907+
ini_size = ImStrlen(ini_data);
1476214908
g.SettingsIniData.Buf.resize((int)ini_size + 1);
1476314909
char* const buf = g.SettingsIniData.Buf.Data;
1476414910
char* const buf_end = buf + ini_size;
@@ -14859,7 +15005,7 @@ ImGuiWindowSettings* ImGui::CreateNewWindowSettings(const char* name)
1485915005
if (const char* p = strstr(name, "###"))
1486015006
name = p;
1486115007
}
14862-
const size_t name_len = strlen(name);
15008+
const size_t name_len = ImStrlen(name);
1486315009

1486415010
// Allocate chunk
1486515011
const size_t chunk_size = sizeof(ImGuiWindowSettings) + name_len + 1;
@@ -15151,7 +15297,7 @@ static void Platform_SetClipboardTextFn_DefaultImpl(ImGuiContext*, const char* t
1515115297
if (!main_clipboard)
1515215298
PasteboardCreate(kPasteboardClipboard, &main_clipboard);
1515315299
PasteboardClear(main_clipboard);
15154-
CFDataRef cf_data = CFDataCreate(kCFAllocatorDefault, (const UInt8*)text, strlen(text));
15300+
CFDataRef cf_data = CFDataCreate(kCFAllocatorDefault, (const UInt8*)text, ImStrlen(text));
1515515301
if (cf_data)
1515615302
{
1515715303
PasteboardPutItemFlavor(main_clipboard, (PasteboardItemID)1, CFSTR("public.utf8-plain-text"), cf_data, 0);
@@ -15205,7 +15351,7 @@ static void Platform_SetClipboardTextFn_DefaultImpl(ImGuiContext* ctx, const cha
1520515351
{
1520615352
ImGuiContext& g = *ctx;
1520715353
g.ClipboardHandlerData.clear();
15208-
const char* text_end = text + strlen(text);
15354+
const char* text_end = text + ImStrlen(text);
1520915355
g.ClipboardHandlerData.resize((int)(text_end - text) + 1);
1521015356
memcpy(&g.ClipboardHandlerData[0], text, (size_t)(text_end - text));
1521115357
g.ClipboardHandlerData[(int)(text_end - text)] = 0;
@@ -16975,7 +17121,7 @@ void ImGui::DebugHookIdInfo(ImGuiID id, ImGuiDataType data_type, const void* dat
1697517121
ImFormatString(info->Desc, IM_ARRAYSIZE(info->Desc), "%d", (int)(intptr_t)data_id);
1697617122
break;
1697717123
case ImGuiDataType_String:
16978-
ImFormatString(info->Desc, IM_ARRAYSIZE(info->Desc), "%.*s", data_id_end ? (int)((const char*)data_id_end - (const char*)data_id) : (int)strlen((const char*)data_id), (const char*)data_id);
17124+
ImFormatString(info->Desc, IM_ARRAYSIZE(info->Desc), "%.*s", data_id_end ? (int)((const char*)data_id_end - (const char*)data_id) : (int)ImStrlen((const char*)data_id), (const char*)data_id);
1697917125
break;
1698017126
case ImGuiDataType_Pointer:
1698117127
ImFormatString(info->Desc, IM_ARRAYSIZE(info->Desc), "(void*)0x%p", data_id);

imgui_draw.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -2672,7 +2672,7 @@ ImFont* ImFontAtlas::AddFontFromFileTTF(const char* filename, float size_pixels,
26722672
{
26732673
// Store a short copy of filename into into the font name for convenience
26742674
const char* p;
2675-
for (p = filename + strlen(filename); p > filename && p[-1] != '/' && p[-1] != '\\'; p--) {}
2675+
for (p = filename + ImStrlen(filename); p > filename && p[-1] != '/' && p[-1] != '\\'; p--) {}
26762676
ImFormatString(font_cfg.Name, IM_ARRAYSIZE(font_cfg.Name), "%s, %.0fpx", p, size_pixels);
26772677
}
26782678
return AddFontFromMemoryTTF(data, (int)data_size, size_pixels, &font_cfg, glyph_ranges);
@@ -2707,7 +2707,7 @@ ImFont* ImFontAtlas::AddFontFromMemoryCompressedTTF(const void* compressed_ttf_d
27072707

27082708
ImFont* ImFontAtlas::AddFontFromMemoryCompressedBase85TTF(const char* compressed_ttf_data_base85, float size_pixels, const ImFontConfig* font_cfg, const ImWchar* glyph_ranges)
27092709
{
2710-
int compressed_ttf_size = (((int)strlen(compressed_ttf_data_base85) + 4) / 5) * 4;
2710+
int compressed_ttf_size = (((int)ImStrlen(compressed_ttf_data_base85) + 4) / 5) * 4;
27112711
void* compressed_ttf = IM_ALLOC((size_t)compressed_ttf_size);
27122712
Decode85((const unsigned char*)compressed_ttf_data_base85, (unsigned char*)compressed_ttf);
27132713
ImFont* font = AddFontFromMemoryCompressedTTF(compressed_ttf, compressed_ttf_size, size_pixels, font_cfg, glyph_ranges);
@@ -4029,7 +4029,7 @@ const char* ImFont::CalcWordWrapPositionA(float scale, const char* text, const c
40294029
ImVec2 ImFont::CalcTextSizeA(float size, float max_width, float wrap_width, const char* text_begin, const char* text_end, const char** remaining)
40304030
{
40314031
if (!text_end)
4032-
text_end = text_begin + strlen(text_begin); // FIXME-OPT: Need to avoid this.
4032+
text_end = text_begin + ImStrlen(text_begin); // FIXME-OPT: Need to avoid this.
40334033

40344034
const float line_height = size;
40354035
const float scale = size / FontSize;
@@ -4129,7 +4129,7 @@ void ImFont::RenderText(ImDrawList* draw_list, float size, const ImVec2& pos, Im
41294129
return;
41304130

41314131
if (!text_end)
4132-
text_end = text_begin + strlen(text_begin); // ImGui:: functions generally already provides a valid text_end, so this is merely to handle direct calls.
4132+
text_end = text_begin + ImStrlen(text_begin); // ImGui:: functions generally already provides a valid text_end, so this is merely to handle direct calls.
41334133

41344134
const float scale = size / FontSize;
41354135
const float line_height = FontSize * scale;

imgui_internal.h

+3
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,10 @@ Index of this file:
9090

9191
// Only AVX2 supports integer and byte instructions for 256-bit registers. Implementation this on AVX1 is not possible.
9292
#if defined(IMGUI_ENABLE_AVX2)
93+
#define IMGUI_ENABLE_AVX2_IMSTRLEN
9394
#define IMGUI_ENABLE_AVX2_IMMEMCHR
9495
#elif defined(IMGUI_ENABLE_AVX) || defined(IMGUI_ENABLE_SSE)
96+
#define IMGUI_ENABLE_SSE_IMSTRLEN
9597
#define IMGUI_ENABLE_SSE_IMMEMCHR
9698
#endif
9799

@@ -394,6 +396,7 @@ static inline bool ImIsPowerOfTwo(int v) { return v != 0 && (v &
394396
static inline bool ImIsPowerOfTwo(ImU64 v) { return v != 0 && (v & (v - 1)) == 0; }
395397
static inline int ImUpperPowerOfTwo(int v) { v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return v; }
396398

399+
IMGUI_API size_t ImStrlen(const char* str); // Compute the length of a null-terminated string.
397400
IMGUI_API const void* ImMemchr(const void* buf, int val, size_t count); // Find first occurrence of 'val' in buffer given length.
398401
IMGUI_API int ImStricmp(const char* str1, const char* str2); // Case insensitive compare.
399402
IMGUI_API int ImStrnicmp(const char* str1, const char* str2, size_t count); // Case insensitive compare to a certain count.

imgui_tables.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1648,7 +1648,7 @@ void ImGui::TableSetupColumn(const char* label, ImGuiTableColumnFlags flags, flo
16481648
if (label != NULL && label[0] != 0)
16491649
{
16501650
column->NameOffset = (ImS16)table->ColumnsNames.size();
1651-
table->ColumnsNames.append(label, label + strlen(label) + 1);
1651+
table->ColumnsNames.append(label, label + ImStrlen(label) + 1);
16521652
}
16531653
}
16541654

0 commit comments

Comments
 (0)