Skip to content

A C library to purify text by given list of strings.

License

Notifications You must be signed in to change notification settings

lakoo/text_purifier_c

Folders and files

NameName
Last commit message
Last commit date

Latest commit

f4f9255 Β· Aug 4, 2017

History

7 Commits
Aug 4, 2017
Aug 2, 2017
Aug 2, 2017
Aug 2, 2017
Aug 2, 2017
Aug 4, 2017
Aug 4, 2017
Aug 4, 2017
Aug 2, 2017
Aug 1, 2017
Aug 4, 2017
Aug 4, 2017
Aug 2, 2017
Aug 4, 2017
Aug 4, 2017

Repository files navigation

Text Purifier (0.9.3)

Introduction

This is a C library for purifying text from a list of words. It supports UTF-8 string (wchar_t*, char*).

Installation

git clone https://github.com/lakoo/text_purifier_c.git
cd text_purifier_c
./autogen.sh
./configure
make
make install

Example

##############
# example.c
##############

#include <locale.h>
#include <stdio.h>
#include <wchar.h>

#include <text_purifier_c.h>


#define UNUSED(x) UNUSED_##x __attribute__((__unused__))


void charExample()
{
    void* handle = text_purifier_init();

    const char* list[] = { "歧視言論", "粗口", "θ‰²ζƒ…εœ–η‰‡" };
    const size_t size = sizeof(list) / sizeof(char*);

    for(size_t index = 0; index < size; ++index)
    {
        text_purifier_add(handle, list[index]);
    }

    const char strA[] = "ι€™ζ˜―ζ­§θ¦–θ¨€θ«–ε•ŠοΌ";
    const char* str = text_purifier_purify2str(handle, strA, "河蟹");
    printf("1. %s -> %s\n", strA, str);
    text_purifier_free_str(handle, str);
    str = NULL;

    const char strB[] = "ι€™θ£ζœ‰ζ­§θ¦–θ¨€θ«–γ€η²—ε£η”¨θͺžγ€θ‰²ζƒ…εœ–η‰‡η­‰η­‰γ€‚";

    str = text_purifier_purify2char(handle, strB, '*', 1);
    printf("2. %s -> %s\n", strB, str);
    text_purifier_free_str(handle, str);
    str = NULL;

    str = text_purifier_purify2char(handle, strB, '#', 0);
    printf("3. %s -> %s\n", strB, str);
    text_purifier_free_str(handle, str);
    str = NULL;

    text_purifier_destroy(handle);
    handle = NULL;
}

void wcharExample()
{
    void* handle = text_purifier_init();

    const wchar_t* list[] = { L"歧視言論", L"粗口", L"θ‰²ζƒ…εœ–η‰‡" };
    const size_t size = sizeof(list) / sizeof(wchar_t*);

    text_purifier_add_wlist(handle, list, size);

    const wchar_t strA[] = L"ι€™ζ˜―ζ­§θ¦–θ¨€θ«–ε•ŠοΌ";
    const wchar_t* str = text_purifier_wpurify2str(handle, strA, L"河蟹");
    wprintf(L"4. %ls -> %ls\n", strA, str);
    text_purifier_free_wstr(handle, str);
    str = NULL;

    const wchar_t strB[] = L"ι€™θ£ζœ‰ζ­§θ¦–θ¨€θ«–γ€η²—ε£η”¨θͺžγ€θ‰²ζƒ…εœ–η‰‡η­‰η­‰γ€‚";

    str = text_purifier_wpurify2char(handle, strB, L'η•’', 1);
    wprintf(L"5. %ls -> %ls\n", strB, str);
    text_purifier_free_wstr(handle, str);
    str = NULL;

    str = text_purifier_wpurify2char(handle, strB, L'禁', 0);
    wprintf(L"6. %ls -> %ls\n", strB, str);
    text_purifier_free_wstr(handle, str);
    str = NULL;

    text_purifier_destroy(handle);
    handle = NULL;
}

int main(int UNUSED(argc), char* UNUSED(argv[]))
{
    setlocale(LC_ALL, "C.UTF-8");

    printf("char\n");
    charExample();
    printf("\n");

    printf("wchar_t\n");
    wcharExample();

    return 0;
}
$ g++ -o example.o -std=c11 -c example.cpp
$ g++ -o example -ltextpurifier -Wl,-rpath,/usr/local/lib example.o
$ ./example
char
1. ι€™ζ˜―ζ­§θ¦–θ¨€θ«–ε•ŠοΌ -> ι€™ζ˜―ζ²³θŸΉε•ŠοΌ
2. ι€™θ£ζœ‰ζ­§θ¦–θ¨€θ«–γ€η²—ε£η”¨θͺžγ€θ‰²ζƒ…εœ–η‰‡η­‰η­‰γ€‚ -> ι€™θ£ζœ‰****、**用θͺžγ€****等等。
3. ι€™θ£ζœ‰ζ­§θ¦–θ¨€θ«–γ€η²—ε£η”¨θͺžγ€θ‰²ζƒ…εœ–η‰‡η­‰η­‰γ€‚ -> ι€™θ£ζœ‰#、#用θͺžγ€#等等。

wchar_t
4. ι€™ζ˜―ζ­§θ¦–θ¨€θ«–ε•ŠοΌ -> ι€™ζ˜―ζ²³θŸΉε•ŠοΌ
5. ι€™θ£ζœ‰ζ­§θ¦–θ¨€θ«–γ€η²—ε£η”¨θͺžγ€θ‰²ζƒ…εœ–η‰‡η­‰η­‰γ€‚ -> ι€™θ£ζœ‰η•’η•’η•’η•’γ€η•’η•’η”¨θͺžγ€η•’畒畒畒等等。
6. ι€™θ£ζœ‰ζ­§θ¦–θ¨€θ«–γ€η²—ε£η”¨θͺžγ€θ‰²ζƒ…εœ–η‰‡η­‰η­‰γ€‚ -> ι€™θ£ζœ‰η¦γ€η¦η”¨θͺžγ€η¦η­‰η­‰γ€‚

License

This software is licensed under the GNU Lesser GPL v3 license. Β© 2017 Lakoo Games Limited

About

A C library to purify text by given list of strings.

Topics

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published