Skip to content

Commit cb31326

Browse files
committed
Heavily optimized strings
1 parent 9e87581 commit cb31326

File tree

2 files changed

+38
-48
lines changed

2 files changed

+38
-48
lines changed

src/ldpl.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ int main(int argc, const char *argv[])
347347

348348
// Generate the C++ compilation command
349349
string compile_line =
350-
"c++ ldpl-temp.cpp -std=gnu++11 -w -o " + final_filename;
350+
"c++ ldpl-temp.cpp -std=gnu++11 -w -O3 -o " + final_filename;
351351
#ifdef STATIC_BUILDS
352352
if (!no_static)
353353
compile_line += " -static-libgcc -static-libstdc++ ";

src/ldpl_lib/ldpl_lib.cpp

+37-47
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class graphemedText
4141
{
4242
private:
4343
bool graphemeIndexDirty = true;
44-
vector<string> graphemeIndexMap;
44+
vector<size_t> graphemeIndexMap;
45+
vector<size_t> graphemeSizeMap;
4546
string stringRep;
4647
void createFromString(const string &cstr);
4748
void createFromChar(const char *cstr);
@@ -89,7 +90,6 @@ class graphemedText
8990
bool isNumber() const;
9091
double getNumber() const;
9192
graphemedText substr(size_t from, size_t count);
92-
bool substr_comp(size_t from, size_t count, graphemedText &value);
9393
graphemedText &erase(size_t from, size_t count);
9494
graphemedText substr(size_t from);
9595
int compare(size_t from, size_t count, const graphemedText &other);
@@ -122,8 +122,9 @@ void graphemedText::regenerateGraphemeIndex()
122122
graphemeIndexDirty = false;
123123

124124
graphemeIndexMap.clear();
125+
graphemeSizeMap.clear();
125126
size_t i = 0;
126-
string currentGrapheme;
127+
size_t currentGraphemeSize = 0;
127128

128129
while (i < stringRep.length())
129130
{
@@ -147,7 +148,7 @@ void graphemedText::regenerateGraphemeIndex()
147148
charLen = 4;
148149
}
149150

150-
string character = stringRep.substr(i, charLen);
151+
currentGraphemeSize += charLen;
151152

152153
// Check for combining characters (this is a simplified check)
153154
bool isCombiningCharacter = false;
@@ -174,23 +175,25 @@ void graphemedText::regenerateGraphemeIndex()
174175

175176
if (!isCombiningCharacter)
176177
{
177-
if (!currentGrapheme.empty())
178+
if (currentGraphemeSize > 0)
178179
{
179-
graphemeIndexMap.push_back(currentGrapheme);
180+
graphemeIndexMap.push_back(i);
181+
graphemeSizeMap.push_back(currentGraphemeSize);
180182
}
181-
currentGrapheme = character;
183+
currentGraphemeSize = 0;
182184
}
183185
else
184186
{
185-
currentGrapheme += character;
187+
currentGraphemeSize += charLen;
186188
}
187189

188190
i += charLen;
189191
}
190192

191-
if (!currentGrapheme.empty())
193+
if (currentGraphemeSize > 0)
192194
{
193-
graphemeIndexMap.push_back(currentGrapheme);
195+
graphemeIndexMap.push_back(i);
196+
graphemeSizeMap.push_back(currentGraphemeSize);
194197
}
195198
}
196199
}
@@ -215,7 +218,8 @@ void graphemedText::createFromMem(const char *cstr, size_t cstrlen)
215218
}
216219
size_t graphemedText::size()
217220
{
218-
if(stringRep.length() <= 1) return stringRep.length();
221+
if (stringRep.length() <= 1)
222+
return stringRep.length();
219223
regenerateGraphemeIndex();
220224
return graphemeIndexMap.size();
221225
}
@@ -292,7 +296,7 @@ string graphemedText::operator[](size_t i)
292296
cout << "Out-of-bounds index access." << endl;
293297
exit(1);
294298
}
295-
return graphemeIndexMap[i];
299+
return stringRep.substr(graphemeIndexMap[i], graphemeSizeMap[i]);
296300
}
297301
// [] for setting
298302
/*string graphemedText::operator[](int i)
@@ -352,24 +356,18 @@ graphemedText &graphemedText::operator+=(const char *txt)
352356

353357
bool graphemedText::isAlphanumeric()
354358
{
355-
regenerateGraphemeIndex();
356-
for (const string &s : graphemeIndexMap)
357-
{
358-
for (const char &c : s)
359-
if (!isalnum(c))
360-
return false;
361-
}
359+
for (const char &c : stringRep)
360+
if (!isalnum(c))
361+
return false;
362362
return true;
363363
}
364364

365365
bool graphemedText::isAlphanumeric(size_t from)
366366
{
367-
regenerateGraphemeIndex();
368-
for (size_t i = from; i < size(); ++i)
367+
for (size_t i = from; i < stringRep.length(); ++i)
369368
{
370-
for (const char &c : graphemeIndexMap[i])
371-
if (!isalnum(c))
372-
return false;
369+
if (!isalnum(stringRep[i]))
370+
return false;
373371
}
374372
return true;
375373
}
@@ -429,26 +427,7 @@ graphemedText graphemedText::substr(size_t from, size_t count)
429427
{
430428
regenerateGraphemeIndex();
431429
count = from + count > graphemeIndexMap.size() ? graphemeIndexMap.size() - from : count;
432-
string new_text = "";
433-
for (size_t i = from; i < from + count; ++i)
434-
{
435-
new_text += graphemeIndexMap[i];
436-
}
437-
return new_text;
438-
}
439-
440-
bool graphemedText::substr_comp(size_t from, size_t count, graphemedText &value)
441-
{
442-
value.regenerateGraphemeIndex();
443-
count = from + count > graphemeIndexMap.size() ? graphemeIndexMap.size() - from : count;
444-
for (size_t i = from; i < from + count; ++i)
445-
{
446-
if (graphemeIndexMap[i] != value.graphemeIndexMap[i - from])
447-
{
448-
return false;
449-
}
450-
}
451-
return true;
430+
return stringRep.substr(graphemeIndexMap[from], graphemeIndexMap[from + count] - graphemeIndexMap[from]);
452431
}
453432

454433
graphemedText &graphemedText::erase(size_t from, size_t count)
@@ -1055,16 +1034,27 @@ graphemedText trimCopy(graphemedText _line)
10551034
void utf8_split_list(ldpl_list<graphemedText> &result, graphemedText haystack, graphemedText needle)
10561035
{
10571036
result.inner_collection.clear();
1058-
int lenHaystack = haystack.size();
1059-
int lenNeedle = needle.size();
1037+
const int lenHaystack = haystack.size();
1038+
const int lenNeedle = needle.size();
10601039
if (lenNeedle > 0)
10611040
{
10621041
int i = 0;
10631042
int last_start = 0;
1043+
bool success = false;
10641044
while (i + lenNeedle <= lenHaystack)
10651045
{
1066-
if (haystack.substr_comp(i, lenNeedle, needle))
1046+
success = true;
1047+
for (size_t x = 0; x < lenNeedle; ++x)
1048+
{
1049+
if (haystack[i + x] != needle[x])
1050+
{
1051+
success = false;
1052+
break;
1053+
}
1054+
}
1055+
if (success)
10671056
{
1057+
10681058
graphemedText token = haystack.substr(last_start, i - last_start);
10691059
if (token.length() > 0)
10701060
{

0 commit comments

Comments
 (0)