@@ -41,7 +41,8 @@ class graphemedText
41
41
{
42
42
private:
43
43
bool graphemeIndexDirty = true ;
44
- vector<string> graphemeIndexMap;
44
+ vector<size_t > graphemeIndexMap;
45
+ vector<size_t > graphemeSizeMap;
45
46
string stringRep;
46
47
void createFromString (const string &cstr);
47
48
void createFromChar (const char *cstr);
@@ -89,7 +90,6 @@ class graphemedText
89
90
bool isNumber () const ;
90
91
double getNumber () const ;
91
92
graphemedText substr (size_t from, size_t count);
92
- bool substr_comp (size_t from, size_t count, graphemedText &value);
93
93
graphemedText &erase (size_t from, size_t count);
94
94
graphemedText substr (size_t from);
95
95
int compare (size_t from, size_t count, const graphemedText &other);
@@ -122,8 +122,9 @@ void graphemedText::regenerateGraphemeIndex()
122
122
graphemeIndexDirty = false ;
123
123
124
124
graphemeIndexMap.clear ();
125
+ graphemeSizeMap.clear ();
125
126
size_t i = 0 ;
126
- string currentGrapheme ;
127
+ size_t currentGraphemeSize = 0 ;
127
128
128
129
while (i < stringRep.length ())
129
130
{
@@ -147,7 +148,7 @@ void graphemedText::regenerateGraphemeIndex()
147
148
charLen = 4 ;
148
149
}
149
150
150
- string character = stringRep. substr (i, charLen) ;
151
+ currentGraphemeSize += charLen;
151
152
152
153
// Check for combining characters (this is a simplified check)
153
154
bool isCombiningCharacter = false ;
@@ -174,23 +175,25 @@ void graphemedText::regenerateGraphemeIndex()
174
175
175
176
if (!isCombiningCharacter)
176
177
{
177
- if (!currentGrapheme. empty () )
178
+ if (currentGraphemeSize > 0 )
178
179
{
179
- graphemeIndexMap.push_back (currentGrapheme);
180
+ graphemeIndexMap.push_back (i);
181
+ graphemeSizeMap.push_back (currentGraphemeSize);
180
182
}
181
- currentGrapheme = character ;
183
+ currentGraphemeSize = 0 ;
182
184
}
183
185
else
184
186
{
185
- currentGrapheme += character ;
187
+ currentGraphemeSize += charLen ;
186
188
}
187
189
188
190
i += charLen;
189
191
}
190
192
191
- if (!currentGrapheme. empty () )
193
+ if (currentGraphemeSize > 0 )
192
194
{
193
- graphemeIndexMap.push_back (currentGrapheme);
195
+ graphemeIndexMap.push_back (i);
196
+ graphemeSizeMap.push_back (currentGraphemeSize);
194
197
}
195
198
}
196
199
}
@@ -215,7 +218,8 @@ void graphemedText::createFromMem(const char *cstr, size_t cstrlen)
215
218
}
216
219
size_t graphemedText::size ()
217
220
{
218
- if (stringRep.length () <= 1 ) return stringRep.length ();
221
+ if (stringRep.length () <= 1 )
222
+ return stringRep.length ();
219
223
regenerateGraphemeIndex ();
220
224
return graphemeIndexMap.size ();
221
225
}
@@ -292,7 +296,7 @@ string graphemedText::operator[](size_t i)
292
296
cout << " Out-of-bounds index access." << endl;
293
297
exit (1 );
294
298
}
295
- return graphemeIndexMap[i];
299
+ return stringRep. substr ( graphemeIndexMap[i], graphemeSizeMap[i]) ;
296
300
}
297
301
// [] for setting
298
302
/* string graphemedText::operator[](int i)
@@ -352,24 +356,18 @@ graphemedText &graphemedText::operator+=(const char *txt)
352
356
353
357
bool graphemedText::isAlphanumeric ()
354
358
{
355
- regenerateGraphemeIndex ();
356
- for (const string &s : graphemeIndexMap)
357
- {
358
- for (const char &c : s)
359
- if (!isalnum (c))
360
- return false ;
361
- }
359
+ for (const char &c : stringRep)
360
+ if (!isalnum (c))
361
+ return false ;
362
362
return true ;
363
363
}
364
364
365
365
bool graphemedText::isAlphanumeric (size_t from)
366
366
{
367
- regenerateGraphemeIndex ();
368
- for (size_t i = from; i < size (); ++i)
367
+ for (size_t i = from; i < stringRep.length (); ++i)
369
368
{
370
- for (const char &c : graphemeIndexMap[i])
371
- if (!isalnum (c))
372
- return false ;
369
+ if (!isalnum (stringRep[i]))
370
+ return false ;
373
371
}
374
372
return true ;
375
373
}
@@ -429,26 +427,7 @@ graphemedText graphemedText::substr(size_t from, size_t count)
429
427
{
430
428
regenerateGraphemeIndex ();
431
429
count = from + count > graphemeIndexMap.size () ? graphemeIndexMap.size () - from : count;
432
- string new_text = " " ;
433
- for (size_t i = from; i < from + count; ++i)
434
- {
435
- new_text += graphemeIndexMap[i];
436
- }
437
- return new_text;
438
- }
439
-
440
- bool graphemedText::substr_comp (size_t from, size_t count, graphemedText &value)
441
- {
442
- value.regenerateGraphemeIndex ();
443
- count = from + count > graphemeIndexMap.size () ? graphemeIndexMap.size () - from : count;
444
- for (size_t i = from; i < from + count; ++i)
445
- {
446
- if (graphemeIndexMap[i] != value.graphemeIndexMap [i - from])
447
- {
448
- return false ;
449
- }
450
- }
451
- return true ;
430
+ return stringRep.substr (graphemeIndexMap[from], graphemeIndexMap[from + count] - graphemeIndexMap[from]);
452
431
}
453
432
454
433
graphemedText &graphemedText::erase (size_t from, size_t count)
@@ -1055,16 +1034,27 @@ graphemedText trimCopy(graphemedText _line)
1055
1034
void utf8_split_list (ldpl_list<graphemedText> &result, graphemedText haystack, graphemedText needle)
1056
1035
{
1057
1036
result.inner_collection .clear ();
1058
- int lenHaystack = haystack.size ();
1059
- int lenNeedle = needle.size ();
1037
+ const int lenHaystack = haystack.size ();
1038
+ const int lenNeedle = needle.size ();
1060
1039
if (lenNeedle > 0 )
1061
1040
{
1062
1041
int i = 0 ;
1063
1042
int last_start = 0 ;
1043
+ bool success = false ;
1064
1044
while (i + lenNeedle <= lenHaystack)
1065
1045
{
1066
- if (haystack.substr_comp (i, lenNeedle, needle))
1046
+ success = true ;
1047
+ for (size_t x = 0 ; x < lenNeedle; ++x)
1048
+ {
1049
+ if (haystack[i + x] != needle[x])
1050
+ {
1051
+ success = false ;
1052
+ break ;
1053
+ }
1054
+ }
1055
+ if (success)
1067
1056
{
1057
+
1068
1058
graphemedText token = haystack.substr (last_start, i - last_start);
1069
1059
if (token.length () > 0 )
1070
1060
{
0 commit comments