Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into unit_test
Browse files Browse the repository at this point in the history
  • Loading branch information
Ami11111 committed Aug 26, 2024
2 parents 9e32adf + b5183d5 commit e46365c
Show file tree
Hide file tree
Showing 48 changed files with 2,478 additions and 1,013 deletions.
11 changes: 6 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ endif()

set(CMAKE_CXX_STANDARD 20)

message(STATUS "CXX: ${CMAKE_CXX_COMPILER}")
execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE clang_full_version_string)
string(REGEX REPLACE ".*clang version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION_STRING ${clang_full_version_string})
if (CLANG_VERSION_STRING VERSION_GREATER 16)
Expand Down Expand Up @@ -48,7 +49,7 @@ if (CLANG_VERSION_STRING VERSION_GREATER 16)

else ()

message(FATAL_ERROR "Please use clang version 17.0 and above, current version: ${CLANG_VERSION_STRING}")
message(FATAL_ERROR "Please use clang version 17.0 and above, current version: ${CLANG_VERSION_STRING} ${CMAKE_CXX_COMPILER}")

endif ()

Expand Down Expand Up @@ -192,10 +193,10 @@ find_package(Lz4 REQUIRED)


set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")

find_package(Python 3.8
REQUIRED COMPONENTS Interpreter Development.Module
OPTIONAL_COMPONENTS Development.SABIModule)
find_package(Python COMPONENTS Interpreter Development REQUIRED)
#find_package(Python 3.8
# REQUIRED COMPONENTS Interpreter Development.Module
# OPTIONAL_COMPONENTS Development.SABIModule)

if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
Expand Down
75 changes: 75 additions & 0 deletions cmake/FindPython.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
if(NOT DEFINED Python3_EXECUTABLE)
execute_process(
COMMAND which python3
RESULT_VARIABLE STATUS
OUTPUT_VARIABLE OUTPUT
ERROR_QUIET)
if(STATUS EQUAL 0)
string(STRIP ${OUTPUT} STRIPPED)
message(STATUS "Using Python3 from 'which python3': ${STRIPPED}")
set(Python3_EXECUTABLE ${STRIPPED})
endif()
endif()

set (Python3_USE_STATIC_LIBS "ON")
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
message(STATUS "Python3 specified. Version found: " ${Python3_VERSION})
set(Python_EXECUTABLE ${Python3_EXECUTABLE})
message(STATUS "Using Python executable: " ${Python_EXECUTABLE})

find_package(Python3 COMPONENTS Development Module)
if(Python3_Development_FOUND AND Python3_INCLUDE_DIRS)
set(Python_INCLUDE_DIRS ${Python3_INCLUDE_DIRS})
endif()

if(NOT Python_INCLUDE_DIRS)
message(STATUS "Getting python include directory from sysconfig..")
execute_process(
COMMAND ${Python_EXECUTABLE} -c "import sysconfig; print(sysconfig.get_paths()['include'])"
OUTPUT_VARIABLE Python_INCLUDE_DIRS RESULT_VARIABLE ret_code)
string(STRIP "${Python_INCLUDE_DIRS}" Python_INCLUDE_DIRS)
if((NOT (ret_code EQUAL "0")) OR (NOT IS_DIRECTORY ${Python_INCLUDE_DIRS})
OR (NOT EXISTS ${Python_INCLUDE_DIRS}/Python.h))
set(Python_INCLUDE_DIRS "")
endif()
endif()

if(NOT Python_INCLUDE_DIRS)
message(FATAL_ERROR "Cannot find python include directory")
endif()

message(STATUS "Found python include directory ${Python_INCLUDE_DIRS}")

if(NOT Python3_LIBRARIES)
message(STATUS "Getting python library from sysconfig..")
execute_process(
COMMAND ${Python_EXECUTABLE} -c
"
import sysconfig
import os
import platform
import sys
vars = sysconfig.get_config_vars()
if platform.system() == 'Windows':
libpython_suffix = 'dll'
else:
libpython_suffix = 'a'
python_version = f'python{sys.version_info.major}.{sys.version_info.minor}'
libpython_name = f'lib{python_version}.{libpython_suffix}'
base_dir = sysconfig.get_config_var('base')
platform_dir = f'config-{sys.version_info.major}.{sys.version_info.minor}-{platform.machine()}-linux-gnu'
libpython_path = os.path.join(base_dir, 'lib', python_version, platform_dir, libpython_name)
print(libpython_path))
"
OUTPUT_VARIABLE Python3_LIBRARIES RESULT_VARIABLE ret_code)
string(STRIP "${Python3_LIBRARIES}" Python3_LIBRARIES)
if((NOT (ret_code EQUAL "0")) OR (NOT ${Python3_LIBRARIES}))
set(Python_INCLUDE_DIRS "")
endif()
endif()

if(NOT Python3_LIBRARIES)
message(FATAL_ERROR "Cannot find python static library")
endif()

message(STATUS "Found python static library ${Python3_LIBRARIES}")
9 changes: 8 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_SCAN_FOR_MODULES ON)

#find_package(Python)

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/bin/compilation_config.cppm.in ${CMAKE_CURRENT_SOURCE_DIR}/bin/compilation_config.cppm)

### Parser
Expand Down Expand Up @@ -218,7 +220,7 @@ target_sources(infinity_core
)

add_dependencies(infinity_core thrift thriftnb parquet_static snappy)

target_include_directories(infinity_core PUBLIC ${Python3_INCLUDE_DIRS})
target_include_directories(infinity_core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
target_include_directories(infinity_core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/parser")
target_include_directories(infinity_core PUBLIC "${CMAKE_SOURCE_DIR}/third_party/spdlog/include")
Expand Down Expand Up @@ -492,9 +494,14 @@ target_link_libraries(unit_test
thrift.a
thriftnb.a
snappy.a
${Python3_LIBRARIES}
z.a
expat.a
${JEMALLOC_STATIC_LIB}
util
)

target_link_options(unit_test PRIVATE -no-pie)
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/lib")
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
Expand Down
3 changes: 3 additions & 0 deletions src/common/analyzer/analyzer_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ Tuple<UniquePtr<Analyzer>, Status> AnalyzerPool::GetAnalyzer(const std::string_v
return {MakeUnique<NGramAnalyzer>(ngram), Status::OK()};
}
default: {
if(std::filesystem::is_regular_file(name)) {
// Suppose it is a customized Python script analyzer
}
return {nullptr, Status::AnalyzerNotFound(name.data())};
}
}
Expand Down
134 changes: 134 additions & 0 deletions src/common/analyzer/user_defined_analyzer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

module;

#include "Python.h"

module user_defined_analyzer;

import stl;
import term;
import stemmer;
import analyzer;
import tokenizer;
import status;
import third_party;

namespace infinity {

Status UserDefinedAnalyzer::Init() {
locker_.lock();
// gil_state_ = PyGILState_Ensure();

if (!Py_IsInitialized()) {
Py_Initialize();
}

// if (!Py_IsInitialized()) {
// return Status::FailToRunPython("Fail to init Python");
// }
//
// if (!PyEval_ThreadsInitialized()) {
// // Start multiple thread supports
// PyEval_InitThreads();
// PyEval_SaveThread();
// }

// if (!PyGILState_Check()) {
// gil_state_ = PyGILState_Ensure();
// }

std::filesystem::path path = analyzer_path_;

if (!std::filesystem::exists(path)) {
return Status::FailToRunPython(fmt::format("{} doesn't exist!", analyzer_path_));
}

String file_dir = path.parent_path();
String file_name = path.filename();
// Set module directory
PyRun_SimpleString("import sys");
String import_str = fmt::format("sys.path.append('{}')", file_dir);
PyRun_SimpleString(import_str.c_str());

// Import the module
std::filesystem::path filePath(file_name);
String main_filename = filePath.stem().string();
module_ = PyImport_ImportModule(main_filename.c_str());
if (module_ == nullptr) {
return Status::FailToRunPython(fmt::format("Fail to load python module: {}", main_filename));
}

// Load function: analyze
function_ = PyObject_GetAttrString(module_, "analyze");
if (function_ == nullptr || !PyCallable_Check(function_)) {
return Status::FailToRunPython(fmt::format("Can't to load function: analyze"));
}

return Status::OK();
}

void UserDefinedAnalyzer::UnInit() {
if (function_ != nullptr) {
Py_DECREF(function_);
function_ = nullptr;
}

if (module_ != nullptr) {
Py_DECREF(module_);
module_ = nullptr;
}

// if (Py_IsInitialized()) {
// Py_FinalizeEx();
// }

// if (PyGILState_Check()) {
// PyGILState_Release(gil_state_);
// }
locker_.unlock();
}

Tuple<Vector<String>, Status> UserDefinedAnalyzer::Analyze(const String &text) {
Vector<String> return_list;

PyObject *args = Py_BuildValue("(s)", text.c_str());
PyObject *result = PyObject_CallObject(function_, args);

if (result == nullptr || !PyList_Check(result)) {
return {return_list, Status::FailToRunPython(fmt::format("Failed to use {} to parse: {}", analyzer_path_, text))};
}

PyObject *python_list = nullptr;
SizeT len = PyList_GET_SIZE(result);
return_list.reserve(len);
for (SizeT i = 0; i < len; ++i) {
python_list = PyList_GetItem(result, i);

char *result_ptr{nullptr};
PyArg_Parse(python_list, "s", &result_ptr);
return_list.push_back(result_ptr);

// Py_DECREF(python_list);
// python_list = nullptr;
}

Py_DECREF(args);
Py_DECREF(result);

return {return_list, Status::OK()};
}

} // namespace infinity
51 changes: 51 additions & 0 deletions src/common/analyzer/user_defined_analyzer.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

module;

#include "Python.h"

export module user_defined_analyzer;

import stl;
import term;
import stemmer;
import analyzer;
import tokenizer;
import third_party;
import status;

namespace infinity {
export class UserDefinedAnalyzer : public Analyzer {
public:
UserDefinedAnalyzer(const String &analyzer_path) : analyzer_path_(analyzer_path) {}

~UserDefinedAnalyzer() = default;

Status Init();
void UnInit();

Tuple<Vector<String>, Status> Analyze(const String &text);

protected:
private:
const String analyzer_path_;

PyGILState_STATE gil_state_;
std::mutex locker_;

PyObject *module_{};
PyObject *function_{};
};
} // namespace infinity
Loading

0 comments on commit e46365c

Please sign in to comment.