This is a C library for purifying text from a list of words. It supports UTF-8 string (wchar_t*, char*).
git clone https://github.com/lakoo/text_purifier_c.git
cd text_purifier_c
./autogen.sh
./configure
make
make install
##############
# example.c
##############
#include <locale.h>
#include <stdio.h>
#include <wchar.h>
#include <text_purifier_c.h>
#define UNUSED(x) UNUSED_##x __attribute__((__unused__))
void charExample()
{
void* handle = text_purifier_init();
const char* list[] = { "ζ§θ¦θ¨θ«", "η²ε£", "θ²ζ
εη" };
const size_t size = sizeof(list) / sizeof(char*);
for(size_t index = 0; index < size; ++index)
{
text_purifier_add(handle, list[index]);
}
const char strA[] = "ιζ―ζ§θ¦θ¨θ«εοΌ";
const char* str = text_purifier_purify2str(handle, strA, "ζ²³θΉ");
printf("1. %s -> %s\n", strA, str);
text_purifier_free_str(handle, str);
str = NULL;
const char strB[] = "ιθ£ζζ§θ¦θ¨θ«γη²ε£η¨θͺγθ²ζ
εηηηγ";
str = text_purifier_purify2char(handle, strB, '*', 1);
printf("2. %s -> %s\n", strB, str);
text_purifier_free_str(handle, str);
str = NULL;
str = text_purifier_purify2char(handle, strB, '#', 0);
printf("3. %s -> %s\n", strB, str);
text_purifier_free_str(handle, str);
str = NULL;
text_purifier_destroy(handle);
handle = NULL;
}
void wcharExample()
{
void* handle = text_purifier_init();
const wchar_t* list[] = { L"ζ§θ¦θ¨θ«", L"η²ε£", L"θ²ζ
εη" };
const size_t size = sizeof(list) / sizeof(wchar_t*);
text_purifier_add_wlist(handle, list, size);
const wchar_t strA[] = L"ιζ―ζ§θ¦θ¨θ«εοΌ";
const wchar_t* str = text_purifier_wpurify2str(handle, strA, L"ζ²³θΉ");
wprintf(L"4. %ls -> %ls\n", strA, str);
text_purifier_free_wstr(handle, str);
str = NULL;
const wchar_t strB[] = L"ιθ£ζζ§θ¦θ¨θ«γη²ε£η¨θͺγθ²ζ
εηηηγ";
str = text_purifier_wpurify2char(handle, strB, L'η’', 1);
wprintf(L"5. %ls -> %ls\n", strB, str);
text_purifier_free_wstr(handle, str);
str = NULL;
str = text_purifier_wpurify2char(handle, strB, L'η¦', 0);
wprintf(L"6. %ls -> %ls\n", strB, str);
text_purifier_free_wstr(handle, str);
str = NULL;
text_purifier_destroy(handle);
handle = NULL;
}
int main(int UNUSED(argc), char* UNUSED(argv[]))
{
setlocale(LC_ALL, "C.UTF-8");
printf("char\n");
charExample();
printf("\n");
printf("wchar_t\n");
wcharExample();
return 0;
}
$ g++ -o example.o -std=c11 -c example.cpp
$ g++ -o example -ltextpurifier -Wl,-rpath,/usr/local/lib example.o
$ ./example
char
1. ιζ―ζ§θ¦θ¨θ«εοΌ -> ιζ―ζ²³θΉεοΌ
2. ιθ£ζζ§θ¦θ¨θ«γη²ε£η¨θͺγθ²ζ
εηηηγ -> ιθ£ζ****γ**η¨θͺγ****ηηγ
3. ιθ£ζζ§θ¦θ¨θ«γη²ε£η¨θͺγθ²ζ
εηηηγ -> ιθ£ζ#γ#η¨θͺγ#ηηγ
wchar_t
4. ιζ―ζ§θ¦θ¨θ«εοΌ -> ιζ―ζ²³θΉεοΌ
5. ιθ£ζζ§θ¦θ¨θ«γη²ε£η¨θͺγθ²ζ
εηηηγ -> ιθ£ζη’η’η’η’γη’η’η¨θͺγη’η’η’η’ηηγ
6. ιθ£ζζ§θ¦θ¨θ«γη²ε£η¨θͺγθ²ζ
εηηηγ -> ιθ£ζη¦γη¦η¨θͺγη¦ηηγ
This software is licensed under the GNU Lesser GPL v3 license. Β© 2017 Lakoo Games Limited