From 021db2289c6b9fa1f0a3d83c6e756cdaa8510bcb Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Sun, 9 Sep 2018 00:17:37 +0000 Subject: [PATCH 01/42] Setup basic swig wrapper --- CMakeLists.txt | 20 +++++++++++++++----- CMakeLists.txt.faiss | 4 ++-- hnswlib/CMakeLists.txt | 4 ++-- ivfhnsw.i | 13 +++++++++++++ 4 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 ivfhnsw.i diff --git a/CMakeLists.txt b/CMakeLists.txt index e8b895e0..6b64f54a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,14 +13,24 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +set(CMAKE_SWIG_OUTDIR ${CMAKE_BINARY_DIR}/lib) + # specify header and cpp files file(GLOB ivf-hnsw_cpu_headers ${CMAKE_CURRENT_SOURCE_DIR}/*.h) file(GLOB ivf-hnsw_cpu_cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) -add_library(ivf-hnsw STATIC ${ivf-hnsw_cpu_headers} ${ivf-hnsw_cpu_cpp}) +FIND_PACKAGE(SWIG REQUIRED) +INCLUDE(${SWIG_USE_FILE}) + +FIND_PACKAGE(PythonLibs) +INCLUDE_DIRECTORIES(${PROJECT_BINARY_DIR} ${PYTHON_INCLUDE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) + +file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i ./numpy.i) + +SET_SOURCE_FILES_PROPERTIES(ivfhnsw.i PROPERTIES CPLUSPLUS ON) -SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0" ) -target_link_libraries(ivf-hnsw faiss hnswlib) +SET(CMAKE_SWIG_FLAGS "") +SET(CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0") -# build tests -add_subdirectory(tests) \ No newline at end of file +swig_add_module(ivfhnsw python ivfhnsw.i IndexIVF_HNSW.cpp) +swig_link_libraries(ivfhnsw faiss hnswlib ${PYTHON_LIBRARIES}) diff --git a/CMakeLists.txt.faiss b/CMakeLists.txt.faiss index 90880f9e..786c8c1f 100644 --- a/CMakeLists.txt.faiss +++ b/CMakeLists.txt.faiss @@ -34,5 +34,5 @@ file(GLOB faiss_cpu_headers ${CMAKE_CURRENT_SOURCE_DIR}/*.h) file(GLOB faiss_cpu_cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) set(faiss_lib faiss) -add_library(${faiss_lib} STATIC ${faiss_cpu_headers} ${faiss_cpu_cpp}) -target_link_libraries(${faiss_lib} ${OpenMP_CXX_FLAGS} ${BLAS_LIB}) \ No newline at end of file +add_library(${faiss_lib} SHARED ${faiss_cpu_headers} ${faiss_cpu_cpp}) +target_link_libraries(${faiss_lib} ${OpenMP_CXX_FLAGS} ${BLAS_LIB}) diff --git a/hnswlib/CMakeLists.txt b/hnswlib/CMakeLists.txt index 59330055..78ea0f4f 100644 --- a/hnswlib/CMakeLists.txt +++ b/hnswlib/CMakeLists.txt @@ -13,6 +13,6 @@ file(GLOB sources ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) # Build each source file independently include_directories(../../) # ivf-hnsw root directory -add_library(hnswlib STATIC ${headers} ${sources}) +add_library(hnswlib SHARED ${headers} ${sources}) SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0" ) -target_link_libraries(hnswlib) \ No newline at end of file +target_link_libraries(hnswlib) diff --git a/ivfhnsw.i b/ivfhnsw.i new file mode 100644 index 00000000..c80539e8 --- /dev/null +++ b/ivfhnsw.i @@ -0,0 +1,13 @@ +%module ivfhnsw +%{ +#define SWIG_FILE_WITH_INIT +#include "IndexIVF_HNSW.h" +%} + +%include "numpy.i" + +%init %{ +import_array(); +%} + +%include "IndexIVF_HNSW.h" From 8efa4ba2f4c1d47b15be00e24cf28d93f8896f47 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Sun, 9 Sep 2018 00:19:30 +0000 Subject: [PATCH 02/42] Update .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 9f0ebe10..5463f21d 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ CMakeCache.txt main *.swp +__pycache__/ +lib/ +numpy.i From 494bbc08eae50deb86f147c77de00c466acfa772 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Sun, 9 Sep 2018 17:45:33 +0000 Subject: [PATCH 03/42] Apply numpy array typemap --- ivfhnsw.i | 1 + 1 file changed, 1 insertion(+) diff --git a/ivfhnsw.i b/ivfhnsw.i index c80539e8..287bdd6f 100644 --- a/ivfhnsw.i +++ b/ivfhnsw.i @@ -10,4 +10,5 @@ import_array(); %} +%apply (int DIM1, float* IN_ARRAY1) {(size_t n, const float* x)}; %include "IndexIVF_HNSW.h" From 1fa8c920183e0e1fdb669b852c6f6f6fb0168822 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Wed, 12 Sep 2018 10:36:03 +0000 Subject: [PATCH 04/42] Ignore default arguments --- CMakeLists.txt | 3 +++ ivfhnsw.i | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b64f54a..51049956 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ FIND_PACKAGE(PythonLibs) INCLUDE_DIRECTORIES(${PROJECT_BINARY_DIR} ${PYTHON_INCLUDE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i ./numpy.i) +file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/pyfragments.swg ./pyfragments.swg) SET_SOURCE_FILES_PROPERTIES(ivfhnsw.i PROPERTIES CPLUSPLUS ON) @@ -34,3 +35,5 @@ SET(CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march swig_add_module(ivfhnsw python ivfhnsw.i IndexIVF_HNSW.cpp) swig_link_libraries(ivfhnsw faiss hnswlib ${PYTHON_LIBRARIES}) + + diff --git a/ivfhnsw.i b/ivfhnsw.i index 287bdd6f..319c8f60 100644 --- a/ivfhnsw.i +++ b/ivfhnsw.i @@ -11,4 +11,8 @@ import_array(); %} %apply (int DIM1, float* IN_ARRAY1) {(size_t n, const float* x)}; +%apply (unsigned int* ARGOUT_ARRAY1, int DIM1) {(ivfhnsw::IndexIVF_HNSW::idx_t *labels, size_t k)}; + +%ignore assign(size_t, const float*, idx_t*); + %include "IndexIVF_HNSW.h" From f35d34b624f381027daa80934c323233d78f7bfd Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Wed, 12 Sep 2018 15:53:13 +0000 Subject: [PATCH 05/42] Implement setup.py --- setup.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..c403be47 --- /dev/null +++ b/setup.py @@ -0,0 +1,57 @@ +import os +import pathlib + +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext as build_ext_orig + + +class CMakeExtension(Extension): + + def __init__(self, name): + # don't invoke the original build_ext for this special extension + super().__init__(name, sources=[]) + + +class build_ext(build_ext_orig): + + def run(self): + for ext in self.extensions: + self.build_cmake(ext) + super().run() + + def build_cmake(self, ext): + cwd = pathlib.Path().absolute() + + # these dirs will be created in build_py, so if you don't have + # any python sources to bundle, the dirs will be missing + build_temp = pathlib.Path(self.build_temp) + build_temp.mkdir(parents=True, exist_ok=True) + extdir = pathlib.Path(self.get_ext_fullpath(ext.name)) + extdir.mkdir(parents=True, exist_ok=True) + + # example of cmake args + config = 'Debug' if self.debug else 'Release' + cmake_args = [ + '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + str(extdir.parent.absolute()), + '-DCMAKE_BUILD_TYPE=' + config + ] + + # example of build args + build_args = [] + + os.chdir(str(build_temp)) + self.spawn(['cmake', str(cwd)] + cmake_args) + if not self.dry_run: + self.spawn(['cmake', '--build', '.'] + build_args) + os.chdir(str(cwd)) + + +setup( + name='ivfhnsw', + version='0.1', + packages=['ivfhnsw'], + ext_modules=[CMakeExtension('IndexIVF_HNSW')], + cmdclass={ + 'build_ext': build_ext, + } +) From e08267ea60d04a36307e83b6893bdee1f31ca557 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Wed, 12 Sep 2018 15:59:47 +0000 Subject: [PATCH 06/42] Write a wrapper for assign method to work correctly --- ivfhnsw.i | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/ivfhnsw.i b/ivfhnsw.i index 319c8f60..5ad1be58 100644 --- a/ivfhnsw.i +++ b/ivfhnsw.i @@ -10,9 +10,26 @@ import_array(); %} -%apply (int DIM1, float* IN_ARRAY1) {(size_t n, const float* x)}; +%apply (float* IN_ARRAY2, int DIM1, int DIM2) {(const float *x, size_t n, size_t d)}; %apply (unsigned int* ARGOUT_ARRAY1, int DIM1) {(ivfhnsw::IndexIVF_HNSW::idx_t *labels, size_t k)}; -%ignore assign(size_t, const float*, idx_t*); +%rename (assign) assign_numpy; +%exception my_dot { + $action + if (PyErr_Occurred()) SWIG_fail; +} +%extend ivfhnsw::IndexIVF_HNSW { +void assign_numpy(const float *x, size_t n, size_t d, idx_t *labels, size_t k) { + if (d != $self->d) { + PyErr_Format(PyExc_ValueError, + "Vector length must be equal d=%d, got %d", + $self->d, d); + return; + } + return $self->assign(n, x, labels, k); +} +} +%ignore assign; %include "IndexIVF_HNSW.h" + From ad99216b28fab3208038b94a6eb5d894ef5b7e1c Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Wed, 12 Sep 2018 17:39:09 +0000 Subject: [PATCH 07/42] Link static dependency links --- CMakeLists.txt.faiss | 2 +- hnswlib/CMakeLists.txt | 4 ++-- setup.py | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt.faiss b/CMakeLists.txt.faiss index 786c8c1f..1250323c 100644 --- a/CMakeLists.txt.faiss +++ b/CMakeLists.txt.faiss @@ -34,5 +34,5 @@ file(GLOB faiss_cpu_headers ${CMAKE_CURRENT_SOURCE_DIR}/*.h) file(GLOB faiss_cpu_cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) set(faiss_lib faiss) -add_library(${faiss_lib} SHARED ${faiss_cpu_headers} ${faiss_cpu_cpp}) +add_library(${faiss_lib} STATIC ${faiss_cpu_headers} ${faiss_cpu_cpp}) target_link_libraries(${faiss_lib} ${OpenMP_CXX_FLAGS} ${BLAS_LIB}) diff --git a/hnswlib/CMakeLists.txt b/hnswlib/CMakeLists.txt index 78ea0f4f..4e0766d4 100644 --- a/hnswlib/CMakeLists.txt +++ b/hnswlib/CMakeLists.txt @@ -11,8 +11,8 @@ file(GLOB headers ${CMAKE_CURRENT_SOURCE_DIR}/*.h) file(GLOB sources ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) # Build each source file independently -include_directories(../../) # ivf-hnsw root directory +include_directories(${CMAKE_SOURCE_DIR}) # ivf-hnsw root directory -add_library(hnswlib SHARED ${headers} ${sources}) +add_library(hnswlib STATIC ${headers} ${sources}) SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0" ) target_link_libraries(hnswlib) diff --git a/setup.py b/setup.py index c403be47..d8f594fc 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,8 @@ def build_cmake(self, ext): setup( name='ivfhnsw', version='0.1', - packages=['ivfhnsw'], +# packages=['ivfhnsw'], +# package_dir={'': 'src'}, ext_modules=[CMakeExtension('IndexIVF_HNSW')], cmdclass={ 'build_ext': build_ext, From e79771bec0327554e07c837b7812eb946f9b1f3d Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Thu, 13 Sep 2018 09:52:57 +0000 Subject: [PATCH 08/42] Move source to src/ --- IndexIVF_HNSW.cpp => src/IndexIVF_HNSW.cpp | 0 IndexIVF_HNSW.h => src/IndexIVF_HNSW.h | 0 IndexIVF_HNSW_Grouping.cpp => src/IndexIVF_HNSW_Grouping.cpp | 0 IndexIVF_HNSW_Grouping.h => src/IndexIVF_HNSW_Grouping.h | 0 Parser.h => src/Parser.h | 0 utils.cpp => src/utils.cpp | 0 utils.h => src/utils.h | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename IndexIVF_HNSW.cpp => src/IndexIVF_HNSW.cpp (100%) rename IndexIVF_HNSW.h => src/IndexIVF_HNSW.h (100%) rename IndexIVF_HNSW_Grouping.cpp => src/IndexIVF_HNSW_Grouping.cpp (100%) rename IndexIVF_HNSW_Grouping.h => src/IndexIVF_HNSW_Grouping.h (100%) rename Parser.h => src/Parser.h (100%) rename utils.cpp => src/utils.cpp (100%) rename utils.h => src/utils.h (100%) diff --git a/IndexIVF_HNSW.cpp b/src/IndexIVF_HNSW.cpp similarity index 100% rename from IndexIVF_HNSW.cpp rename to src/IndexIVF_HNSW.cpp diff --git a/IndexIVF_HNSW.h b/src/IndexIVF_HNSW.h similarity index 100% rename from IndexIVF_HNSW.h rename to src/IndexIVF_HNSW.h diff --git a/IndexIVF_HNSW_Grouping.cpp b/src/IndexIVF_HNSW_Grouping.cpp similarity index 100% rename from IndexIVF_HNSW_Grouping.cpp rename to src/IndexIVF_HNSW_Grouping.cpp diff --git a/IndexIVF_HNSW_Grouping.h b/src/IndexIVF_HNSW_Grouping.h similarity index 100% rename from IndexIVF_HNSW_Grouping.h rename to src/IndexIVF_HNSW_Grouping.h diff --git a/Parser.h b/src/Parser.h similarity index 100% rename from Parser.h rename to src/Parser.h diff --git a/utils.cpp b/src/utils.cpp similarity index 100% rename from utils.cpp rename to src/utils.cpp diff --git a/utils.h b/src/utils.h similarity index 100% rename from utils.h rename to src/utils.h From 4042d21b4d062525e540090d5215052b20890d05 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Thu, 13 Sep 2018 09:53:33 +0000 Subject: [PATCH 09/42] Move interface file to src/ --- ivfhnsw.i => src/ivfhnsw.i | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename ivfhnsw.i => src/ivfhnsw.i (100%) diff --git a/ivfhnsw.i b/src/ivfhnsw.i similarity index 100% rename from ivfhnsw.i rename to src/ivfhnsw.i From 5b7c7b6238521a88dc4d566d6d287be66d7e2beb Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Thu, 13 Sep 2018 11:17:37 +0000 Subject: [PATCH 10/42] Fix cmake config for new project structure with src/ --- .gitignore | 4 ++++ CMakeLists.txt | 27 +++++++++++---------------- hnswlib/CMakeLists.txt | 2 +- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 5463f21d..a7149d59 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,7 @@ main __pycache__/ lib/ numpy.i +build/ +dist/ +ivfhnsw.egg-info/ +venv/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 51049956..19dbb11c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,37 +3,32 @@ cmake_minimum_required (VERSION 2.8) # ivf-hnsw project project(ivf-hnsw C CXX) -include_directories("${PROJECT_BINARY_DIR}") - add_subdirectory(faiss) add_subdirectory(hnswlib) # specify output bin_path and lib_path -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) - -set(CMAKE_SWIG_OUTDIR ${CMAKE_BINARY_DIR}/lib) - -# specify header and cpp files -file(GLOB ivf-hnsw_cpu_headers ${CMAKE_CURRENT_SOURCE_DIR}/*.h) -file(GLOB ivf-hnsw_cpu_cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_SWIG_OUTDIR ${CMAKE_BINARY_DIR}) FIND_PACKAGE(SWIG REQUIRED) INCLUDE(${SWIG_USE_FILE}) FIND_PACKAGE(PythonLibs) -INCLUDE_DIRECTORIES(${PROJECT_BINARY_DIR} ${PYTHON_INCLUDE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PYTHON_INCLUDE_PATH}) + +file(GLOB SOURCES ${PROJECT_SOURCE_DIR}/src/*) file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i ./numpy.i) file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/pyfragments.swg ./pyfragments.swg) -SET_SOURCE_FILES_PROPERTIES(ivfhnsw.i PROPERTIES CPLUSPLUS ON) - -SET(CMAKE_SWIG_FLAGS "") SET(CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0") -swig_add_module(ivfhnsw python ivfhnsw.i IndexIVF_HNSW.cpp) +SET_SOURCE_FILES_PROPERTIES(src/ivfhnsw.i PROPERTIES CPLUSPLUS ON) +SET_SOURCE_FILES_PROPERTIES(${swig_generated_file_fullname} PROPERTIES COMPILE_FLAGS -Iwtf) + +swig_add_module(ivfhnsw python src/ivfhnsw.i src/IndexIVF_HNSW.cpp) swig_link_libraries(ivfhnsw faiss hnswlib ${PYTHON_LIBRARIES}) diff --git a/hnswlib/CMakeLists.txt b/hnswlib/CMakeLists.txt index 4e0766d4..8797a784 100644 --- a/hnswlib/CMakeLists.txt +++ b/hnswlib/CMakeLists.txt @@ -11,7 +11,7 @@ file(GLOB headers ${CMAKE_CURRENT_SOURCE_DIR}/*.h) file(GLOB sources ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) # Build each source file independently -include_directories(${CMAKE_SOURCE_DIR}) # ivf-hnsw root directory +include_directories(${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/src) # ivf-hnsw root directory add_library(hnswlib STATIC ${headers} ${sources}) SET( CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0" ) From 56dd88b915d0168fb213aaca68e36e4e8d37742f Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Thu, 13 Sep 2018 15:33:41 +0000 Subject: [PATCH 11/42] Create wrapper code for IndexIVF_HNSW::search method --- src/ivfhnsw.i | 57 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/src/ivfhnsw.i b/src/ivfhnsw.i index 5ad1be58..b0d5cc4f 100644 --- a/src/ivfhnsw.i +++ b/src/ivfhnsw.i @@ -12,9 +12,15 @@ import_array(); %apply (float* IN_ARRAY2, int DIM1, int DIM2) {(const float *x, size_t n, size_t d)}; %apply (unsigned int* ARGOUT_ARRAY1, int DIM1) {(ivfhnsw::IndexIVF_HNSW::idx_t *labels, size_t k)}; +%apply (long* ARGOUT_ARRAY1, int DIM1) {(long *labels, size_t k)}; +%apply (float* ARGOUT_ARRAY1, int DIM1) {(float* distances, size_t k_)}; + +/* +Wrapper for IndexIVF_HNSW::assign +*/ %rename (assign) assign_numpy; -%exception my_dot { +%exception assign_numpy { $action if (PyErr_Occurred()) SWIG_fail; } @@ -22,14 +28,59 @@ import_array(); void assign_numpy(const float *x, size_t n, size_t d, idx_t *labels, size_t k) { if (d != $self->d) { PyErr_Format(PyExc_ValueError, - "Vector length must be equal d=%d, got %d", + "Query vectors must be of length d=%d, got %d", $self->d, d); return; } return $self->assign(n, x, labels, k); } } - %ignore assign; + + +/* +Wrapper for IndexIVF_HNSW::search +*/ +%exception _search { + $action + if (PyErr_Occurred()) SWIG_fail; +} +%extend ivfhnsw::IndexIVF_HNSW { +void _search(const float *x, size_t n, size_t d, float* distances, size_t k_, long *labels, size_t k) { + if (d != $self->d) { + PyErr_Format(PyExc_ValueError, + "Query vectors must be of length d=%d, got %d", + $self->d, d); + return; + } + if (k != k_) { + PyErr_Format(PyExc_ValueError, + "Output sizes must be the same, got %d and %d", + k_, k); + return; + } + $self->search(k, x, distances, labels); +} +} +%ignore search; + %include "IndexIVF_HNSW.h" +%pythoncode %{ +import functools + +cls = IndexIVF_HNSW + +@functools.wraps(cls._search) +def search_wrapper(self, x, k): + """ + Query n vectors of dimension d to the index. + + Return at most k vectors. If there are not enough results for the query, + the result array is padded with -1s. + """ + return self._search(x, k, k) + +cls.search = search_wrapper +%} + From 1e5f7277b28c3dce8011b599ae097ffc7e7375e0 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Thu, 13 Sep 2018 15:36:46 +0000 Subject: [PATCH 12/42] Fix build_ext in setup.py --- setup.py | 76 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/setup.py b/setup.py index d8f594fc..73f1fa1e 100644 --- a/setup.py +++ b/setup.py @@ -1,58 +1,68 @@ +import sys +import platform +import subprocess import os import pathlib from setuptools import setup, Extension -from setuptools.command.build_ext import build_ext as build_ext_orig +from setuptools.command.build_ext import build_ext +from distutils.version import LooseVersion class CMakeExtension(Extension): + def __init__(self, name, sourcedir=''): + Extension.__init__(self, name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) - def __init__(self, name): - # don't invoke the original build_ext for this special extension - super().__init__(name, sources=[]) +class CMakeBuild(build_ext): + def run(self): + try: + out = subprocess.check_output(['cmake', '--version']) + except OSError: + raise RuntimeError("CMake must be installed to build the following extensions: " + + ", ".join(e.name for e in self.extensions)) -class build_ext(build_ext_orig): + if platform.system() == "Windows": + cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) + if cmake_version < '3.1.0': + raise RuntimeError("CMake >= 3.1.0 is required on Windows") - def run(self): for ext in self.extensions: - self.build_cmake(ext) - super().run() - - def build_cmake(self, ext): - cwd = pathlib.Path().absolute() + self.build_extension(ext) - # these dirs will be created in build_py, so if you don't have - # any python sources to bundle, the dirs will be missing - build_temp = pathlib.Path(self.build_temp) - build_temp.mkdir(parents=True, exist_ok=True) - extdir = pathlib.Path(self.get_ext_fullpath(ext.name)) - extdir.mkdir(parents=True, exist_ok=True) + def build_extension(self, ext): + extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) + cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, + '-DPYTHON_EXECUTABLE=' + sys.executable] - # example of cmake args - config = 'Debug' if self.debug else 'Release' - cmake_args = [ - '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + str(extdir.parent.absolute()), - '-DCMAKE_BUILD_TYPE=' + config - ] + cfg = 'Debug' if self.debug else 'Release' + build_args = ['--config', cfg] - # example of build args - build_args = [] + if platform.system() == "Windows": + cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] + if sys.maxsize > 2**32: + cmake_args += ['-A', 'x64'] + build_args += ['--', '/m'] + else: + cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] + build_args += ['--', '-j2'] - os.chdir(str(build_temp)) - self.spawn(['cmake', str(cwd)] + cmake_args) - if not self.dry_run: - self.spawn(['cmake', '--build', '.'] + build_args) - os.chdir(str(cwd)) + env = os.environ.copy() + env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), + self.distribution.get_version()) + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) + subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) setup( name='ivfhnsw', version='0.1', -# packages=['ivfhnsw'], -# package_dir={'': 'src'}, ext_modules=[CMakeExtension('IndexIVF_HNSW')], + packages=['ivfhnsw'], cmdclass={ - 'build_ext': build_ext, + 'build_ext': CMakeBuild, } ) From a7f9009d3018e3b53fa2f696e610088ef550aa78 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Fri, 14 Sep 2018 09:37:31 +0000 Subject: [PATCH 13/42] Reorganize directory structure --- {src => include}/IndexIVF_HNSW.h | 0 {src => include}/IndexIVF_HNSW_Grouping.h | 0 {src => include}/Parser.h | 0 {src => include}/utils.h | 0 {src => interface}/ivfhnsw.i | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename {src => include}/IndexIVF_HNSW.h (100%) rename {src => include}/IndexIVF_HNSW_Grouping.h (100%) rename {src => include}/Parser.h (100%) rename {src => include}/utils.h (100%) rename {src => interface}/ivfhnsw.i (100%) diff --git a/src/IndexIVF_HNSW.h b/include/IndexIVF_HNSW.h similarity index 100% rename from src/IndexIVF_HNSW.h rename to include/IndexIVF_HNSW.h diff --git a/src/IndexIVF_HNSW_Grouping.h b/include/IndexIVF_HNSW_Grouping.h similarity index 100% rename from src/IndexIVF_HNSW_Grouping.h rename to include/IndexIVF_HNSW_Grouping.h diff --git a/src/Parser.h b/include/Parser.h similarity index 100% rename from src/Parser.h rename to include/Parser.h diff --git a/src/utils.h b/include/utils.h similarity index 100% rename from src/utils.h rename to include/utils.h diff --git a/src/ivfhnsw.i b/interface/ivfhnsw.i similarity index 100% rename from src/ivfhnsw.i rename to interface/ivfhnsw.i From 1c21976900afbd14499c00a11cddbae9ada055d9 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Fri, 14 Sep 2018 09:45:52 +0000 Subject: [PATCH 14/42] Fix cmake build --- CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 19dbb11c..e150cea2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,19 +16,20 @@ FIND_PACKAGE(SWIG REQUIRED) INCLUDE(${SWIG_USE_FILE}) FIND_PACKAGE(PythonLibs) -INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src ${PYTHON_INCLUDE_PATH}) +INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/include ${PYTHON_INCLUDE_PATH}) file(GLOB SOURCES ${PROJECT_SOURCE_DIR}/src/*) +file(GLOB SOURCES ${PROJECT_SOURCE_DIR}/include/*) +file(GLOB SOURCES ${PROJECT_SOURCE_DIR}/interface/*) file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i ./numpy.i) file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/pyfragments.swg ./pyfragments.swg) SET(CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0") +SET(SWIG_FEATURES "-Iinclude") +SET_SOURCE_FILES_PROPERTIES(interface/ivfhnsw.i PROPERTIES CPLUSPLUS ON) -SET_SOURCE_FILES_PROPERTIES(src/ivfhnsw.i PROPERTIES CPLUSPLUS ON) -SET_SOURCE_FILES_PROPERTIES(${swig_generated_file_fullname} PROPERTIES COMPILE_FLAGS -Iwtf) - -swig_add_module(ivfhnsw python src/ivfhnsw.i src/IndexIVF_HNSW.cpp) +swig_add_module(ivfhnsw python interface/ivfhnsw.i src/IndexIVF_HNSW.cpp) swig_link_libraries(ivfhnsw faiss hnswlib ${PYTHON_LIBRARIES}) From f8ea6fb6576af1a924eb39c10613874cbb34d284 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Fri, 14 Sep 2018 10:09:07 +0000 Subject: [PATCH 15/42] Build swig extension in setup.py --- setup.py | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 73f1fa1e..184af1f3 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +from pprint import pprint +from urllib.request import urlretrieve import sys import platform import subprocess @@ -33,7 +35,7 @@ def run(self): def build_extension(self, ext): extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) - cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, + cmake_args = ['-DCMAKE_SWIG_OUTDIR=' + extdir, '-DPYTHON_EXECUTABLE=' + sys.executable] cfg = 'Debug' if self.debug else 'Release' @@ -57,12 +59,39 @@ def build_extension(self, ext): subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) +class custom_build_ext(build_ext): + def run(self): + super().run() + + def build_extension(self, ext): + env = os.environ.copy() + cmake_args = [] + build_args = [] + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + subprocess.check_call(['cmake', os.path.abspath(os.curdir)] + cmake_args, cwd=self.build_temp, env=env) + subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) + ext.library_dirs.append(os.path.join(self.build_temp, 'lib')) + return super().build_extension(ext) + +paths = ['interface/ivfhnsw.i'] + +ext = [Extension(name='_' + os.path.splitext(os.path.basename(path))[0], + sources=[str(path)], + swig_opts=['-Iinclude', '-c++'], + include_dirs=['include', 'faiss', 'hnswlib', os.curdir], + libraries=['faiss', 'hnswlib'], + library_dirs=['lib'], + extra_compile_args=['-std=c++11'],) + for path in paths] + setup( name='ivfhnsw', version='0.1', - ext_modules=[CMakeExtension('IndexIVF_HNSW')], - packages=['ivfhnsw'], + ext_modules=ext, + packages=[], + include_package_data=True, cmdclass={ - 'build_ext': CMakeBuild, + 'build_ext': custom_build_ext, } ) From 01a7d17582326daf0a7a80c4e6d80b0284df5a0c Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Fri, 14 Sep 2018 13:12:32 +0000 Subject: [PATCH 16/42] `python setup.py install` completely works! --- CMakeLists.txt | 27 ++++++++------------------- setup.py | 23 +++++++++++++++++------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e150cea2..9092be0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,29 +7,18 @@ add_subdirectory(faiss) add_subdirectory(hnswlib) # specify output bin_path and lib_path -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) -set(CMAKE_SWIG_OUTDIR ${CMAKE_BINARY_DIR}) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) -FIND_PACKAGE(SWIG REQUIRED) -INCLUDE(${SWIG_USE_FILE}) +INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/include) -FIND_PACKAGE(PythonLibs) -INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/include ${PYTHON_INCLUDE_PATH}) - -file(GLOB SOURCES ${PROJECT_SOURCE_DIR}/src/*) -file(GLOB SOURCES ${PROJECT_SOURCE_DIR}/include/*) -file(GLOB SOURCES ${PROJECT_SOURCE_DIR}/interface/*) - -file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i ./numpy.i) -file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/pyfragments.swg ./pyfragments.swg) +file(GLOB ivfhnsw_src ${PROJECT_SOURCE_DIR}/src/*.cpp) +file(GLOB ivfhnsw_include ${PROJECT_SOURCE_DIR}/include/*.h) SET(CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0") -SET(SWIG_FEATURES "-Iinclude") -SET_SOURCE_FILES_PROPERTIES(interface/ivfhnsw.i PROPERTIES CPLUSPLUS ON) -swig_add_module(ivfhnsw python interface/ivfhnsw.i src/IndexIVF_HNSW.cpp) -swig_link_libraries(ivfhnsw faiss hnswlib ${PYTHON_LIBRARIES}) +add_library(ivfhnsw STATIC ${ivfhnsw_src}) +link_libraries(ivfhnsw faiss hnswlib) diff --git a/setup.py b/setup.py index 184af1f3..da623d0d 100644 --- a/setup.py +++ b/setup.py @@ -69,27 +69,38 @@ def build_extension(self, ext): build_args = [] if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) + interface_temp = os.path.join(self.build_temp, 'interface') + os.makedirs(interface_temp, exist_ok=True) + + urlretrieve('https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i', + os.path.join(interface_temp, 'numpy.i'),) subprocess.check_call(['cmake', os.path.abspath(os.curdir)] + cmake_args, cwd=self.build_temp, env=env) subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) ext.library_dirs.append(os.path.join(self.build_temp, 'lib')) - return super().build_extension(ext) + ext.swig_opts.append('-I' + os.path.join(self.build_temp, 'interface')) + r = super().build_extension(ext) + + self.distribution.py_modules.append('ivfhnsw') + self.run_command('build_py') + return r + paths = ['interface/ivfhnsw.i'] ext = [Extension(name='_' + os.path.splitext(os.path.basename(path))[0], sources=[str(path)], swig_opts=['-Iinclude', '-c++'], - include_dirs=['include', 'faiss', 'hnswlib', os.curdir], - libraries=['faiss', 'hnswlib'], - library_dirs=['lib'], - extra_compile_args=['-std=c++11'],) + include_dirs=['include', os.curdir], + libraries=['ivfhnsw', 'hnswlib', 'faiss', 'gomp', 'lapack',], + extra_compile_args=['-std=c++11', '-static'],) for path in paths] setup( name='ivfhnsw', version='0.1', ext_modules=ext, - packages=[], + package_dir={'': 'interface'}, + py_modules=[], include_package_data=True, cmdclass={ 'build_ext': custom_build_ext, From 03a956cc14284bf6052ff0d682909d316168d443 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Fri, 14 Sep 2018 14:01:29 +0000 Subject: [PATCH 17/42] Clean setup.py --- setup.py | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) diff --git a/setup.py b/setup.py index da623d0d..756f9475 100644 --- a/setup.py +++ b/setup.py @@ -11,54 +11,6 @@ from distutils.version import LooseVersion -class CMakeExtension(Extension): - def __init__(self, name, sourcedir=''): - Extension.__init__(self, name, sources=[]) - self.sourcedir = os.path.abspath(sourcedir) - - -class CMakeBuild(build_ext): - def run(self): - try: - out = subprocess.check_output(['cmake', '--version']) - except OSError: - raise RuntimeError("CMake must be installed to build the following extensions: " + - ", ".join(e.name for e in self.extensions)) - - if platform.system() == "Windows": - cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1)) - if cmake_version < '3.1.0': - raise RuntimeError("CMake >= 3.1.0 is required on Windows") - - for ext in self.extensions: - self.build_extension(ext) - - def build_extension(self, ext): - extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) - cmake_args = ['-DCMAKE_SWIG_OUTDIR=' + extdir, - '-DPYTHON_EXECUTABLE=' + sys.executable] - - cfg = 'Debug' if self.debug else 'Release' - build_args = ['--config', cfg] - - if platform.system() == "Windows": - cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] - if sys.maxsize > 2**32: - cmake_args += ['-A', 'x64'] - build_args += ['--', '/m'] - else: - cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] - build_args += ['--', '-j2'] - - env = os.environ.copy() - env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), - self.distribution.get_version()) - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) - subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) - subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) - - class custom_build_ext(build_ext): def run(self): super().run() From 507293e7aeb6ca7254cffccf6cc0593507f4cfb6 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Fri, 14 Sep 2018 16:03:48 +0000 Subject: [PATCH 18/42] Add numpy, pytest dependencies --- setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.py b/setup.py index 756f9475..fb8f46db 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,11 @@ def build_extension(self, ext): ext_modules=ext, package_dir={'': 'interface'}, py_modules=[], + setup_requires=['pytest-runner'], + install_requires=[ + 'numpy', + ], + tests_require=['pytest>2.8'], include_package_data=True, cmdclass={ 'build_ext': custom_build_ext, From 4537e0ef612335d2778dbe11deb852f38b5ebd41 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Fri, 14 Sep 2018 16:07:17 +0000 Subject: [PATCH 19/42] Add python tests to run with pytest --- python-tests/test_wrapper.py | 3 +++ setup.cfg | 5 +++++ 2 files changed, 8 insertions(+) create mode 100644 python-tests/test_wrapper.py create mode 100644 setup.cfg diff --git a/python-tests/test_wrapper.py b/python-tests/test_wrapper.py new file mode 100644 index 00000000..4d446ef4 --- /dev/null +++ b/python-tests/test_wrapper.py @@ -0,0 +1,3 @@ +def test_wrapper(): + import ivfhnsw + ivfhnsw.IndexIVF_HNSW(4,4,4,4) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..5ec4ad2d --- /dev/null +++ b/setup.cfg @@ -0,0 +1,5 @@ +[aliases] +test=pytest + +[tool:pytest] +testpaths=python-tests From e935e173c26a076b35b8823e746ffbade9e1a2a3 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Sat, 15 Sep 2018 20:11:18 +0000 Subject: [PATCH 20/42] Build tests, add dummy test --- CMakeLists.txt | 4 ++-- tests/CMakeLists.txt | 4 ++-- tests/test_dummy.cpp | 10 ++++++++++ 3 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 tests/test_dummy.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9092be0e..a7c05fae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,6 @@ file(GLOB ivfhnsw_include ${PROJECT_SOURCE_DIR}/include/*.h) SET(CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march=native -fpic -w -fopenmp -ftree-vectorize -ftree-vectorizer-verbose=0") add_library(ivfhnsw STATIC ${ivfhnsw_src}) -link_libraries(ivfhnsw faiss hnswlib) - +target_link_libraries(ivfhnsw faiss hnswlib) +add_subdirectory(tests) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 28b64d87..e405aebd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,14 +3,14 @@ cmake_minimum_required (VERSION 2.8) file(GLOB srcs ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) # Build each source file independently -include_directories(../../) # ivf-hnsw root directory +include_directories(${PROJECT_SOURCE_DIR}) # ivf-hnsw root directory foreach(source ${srcs}) get_filename_component(name ${source} NAME_WE) # target add_executable(${name} ${source}) - target_link_libraries(${name} ivf-hnsw faiss) + target_link_libraries(${name} ivfhnsw faiss) # Install install(TARGETS ${name} DESTINATION test) diff --git a/tests/test_dummy.cpp b/tests/test_dummy.cpp new file mode 100644 index 00000000..748e719a --- /dev/null +++ b/tests/test_dummy.cpp @@ -0,0 +1,10 @@ +#include +#include "IndexIVF_HNSW.h" + + +int main(int argc, char **argv) { + ivfhnsw::IndexIVF_HNSW* index = new ivfhnsw::IndexIVF_HNSW(4,4,4,4); + delete index; + std::cout << "OK" << std::endl; + return 0; +} From 97fd0bad6a9203670b8da580cf9a0704e2731cc6 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Sat, 15 Sep 2018 21:21:32 +0000 Subject: [PATCH 21/42] Build only ivfhnsw library in setup.py --- .gitignore | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a7149d59..6efdecb0 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ build/ dist/ ivfhnsw.egg-info/ venv/ +.eggs/ diff --git a/setup.py b/setup.py index fb8f46db..0eec1475 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ def run(self): def build_extension(self, ext): env = os.environ.copy() cmake_args = [] - build_args = [] + build_args = ['--target', 'ivfhnsw'] if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) interface_temp = os.path.join(self.build_temp, 'interface') From a5a19551810b18429863db6cc9d010c179afb64c Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Sun, 16 Sep 2018 18:32:36 +0000 Subject: [PATCH 22/42] Fix includes in tests --- tests/CMakeLists.txt | 2 +- tests/test_ivfhnsw_deep1b.cpp | 6 +++--- tests/test_ivfhnsw_grouping_deep1b.cpp | 6 +++--- tests/test_ivfhnsw_grouping_sift1b.cpp | 6 +++--- tests/test_ivfhnsw_sift1b.cpp | 6 +++--- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e405aebd..d96ea8f8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required (VERSION 2.8) file(GLOB srcs ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) # Build each source file independently -include_directories(${PROJECT_SOURCE_DIR}) # ivf-hnsw root directory +include_directories(${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/include) # ivf-hnsw root directory foreach(source ${srcs}) get_filename_component(name ${source} NAME_WE) diff --git a/tests/test_ivfhnsw_deep1b.cpp b/tests/test_ivfhnsw_deep1b.cpp index 6f4abf6b..1740d82e 100644 --- a/tests/test_ivfhnsw_deep1b.cpp +++ b/tests/test_ivfhnsw_deep1b.cpp @@ -5,8 +5,8 @@ #include #include -#include -#include +#include "IndexIVF_HNSW.h" +#include "Parser.h" using namespace hnswlib; using namespace ivfhnsw; @@ -212,4 +212,4 @@ int main(int argc, char **argv) delete index; return 0; -} \ No newline at end of file +} diff --git a/tests/test_ivfhnsw_grouping_deep1b.cpp b/tests/test_ivfhnsw_grouping_deep1b.cpp index 1b11bd76..94d1bfd0 100644 --- a/tests/test_ivfhnsw_grouping_deep1b.cpp +++ b/tests/test_ivfhnsw_grouping_deep1b.cpp @@ -5,8 +5,8 @@ #include #include -#include -#include +#include "IndexIVF_HNSW_Grouping.h" +#include "Parser.h" using namespace hnswlib; using namespace ivfhnsw; @@ -259,4 +259,4 @@ int main(int argc, char **argv) delete index; return 0; -} \ No newline at end of file +} diff --git a/tests/test_ivfhnsw_grouping_sift1b.cpp b/tests/test_ivfhnsw_grouping_sift1b.cpp index f19cdc11..cb34be85 100644 --- a/tests/test_ivfhnsw_grouping_sift1b.cpp +++ b/tests/test_ivfhnsw_grouping_sift1b.cpp @@ -5,8 +5,8 @@ #include #include -#include -#include +#include "IndexIVF_HNSW_Grouping.h" +#include "Parser.h" using namespace hnswlib; using namespace ivfhnsw; @@ -257,4 +257,4 @@ int main(int argc, char **argv) { delete index; return 0; -} \ No newline at end of file +} diff --git a/tests/test_ivfhnsw_sift1b.cpp b/tests/test_ivfhnsw_sift1b.cpp index 26449fd2..0d70b112 100644 --- a/tests/test_ivfhnsw_sift1b.cpp +++ b/tests/test_ivfhnsw_sift1b.cpp @@ -5,8 +5,8 @@ #include #include -#include -#include +#include "IndexIVF_HNSW.h" +#include "Parser.h" using namespace hnswlib; using namespace ivfhnsw; @@ -215,4 +215,4 @@ int main(int argc, char **argv) delete index; return 0; -} \ No newline at end of file +} From 21c667383272b3cbae25f78fe4fa224bf324eedb Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Sun, 16 Sep 2018 19:54:25 +0000 Subject: [PATCH 23/42] Rename interface file, return cmake swig instructions back --- CMakeLists.txt | 18 ++++++++++++++++++ interface/{ivfhnsw.i => index.i} | 2 +- setup.py | 15 +++++++++------ 3 files changed, 28 insertions(+), 7 deletions(-) rename interface/{ivfhnsw.i => index.i} (99%) diff --git a/CMakeLists.txt b/CMakeLists.txt index a7c05fae..8ff45970 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,4 +21,22 @@ SET(CMAKE_CXX_FLAGS "-Ofast -lrt -DNDEBUG -std=c++11 -DHAVE_CXX0X -openmp -march add_library(ivfhnsw STATIC ${ivfhnsw_src}) target_link_libraries(ivfhnsw faiss hnswlib) +FIND_PACKAGE(SWIG) +FIND_PACKAGE(PythonLibs) +if(SWIG_FOUND AND PythonLibs_FOUND) + INCLUDE(${SWIG_USE_FILE}) + INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_PATH}) + + file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i ./numpy.i) + file(DOWNLOAD https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/pyfragments.swg ./pyfragments.swg) + + set(CMAKE_SWIG_OUTDIR ${CMAKE_BINARY_DIR}/lib) + set(SWIG_FEATURES "-Iinclude") + SET_SOURCE_FILES_PROPERTIES(interface/index.i PROPERTIES CPLUSPLUS ON) + + swig_add_module(index python interface/index.i src/IndexIVF_HNSW.cpp) + swig_link_libraries(index faiss hnswlib ${PYTHON_LIBRARIES}) + +endif() + add_subdirectory(tests) diff --git a/interface/ivfhnsw.i b/interface/index.i similarity index 99% rename from interface/ivfhnsw.i rename to interface/index.i index b0d5cc4f..cf741e36 100644 --- a/interface/ivfhnsw.i +++ b/interface/index.i @@ -1,4 +1,4 @@ -%module ivfhnsw +%module index %{ #define SWIG_FILE_WITH_INIT #include "IndexIVF_HNSW.h" diff --git a/setup.py b/setup.py index 0eec1475..d43c41c2 100644 --- a/setup.py +++ b/setup.py @@ -12,8 +12,14 @@ class custom_build_ext(build_ext): + def __init__(self, *args, **kwargs): + self._swig_generated_modules = [] + super().__init__(*args, **kwargs) + def run(self): super().run() + self.distribution.py_modules.extend(self._swig_generated_modules) + self.run_command('build_py') def build_extension(self, ext): env = os.environ.copy() @@ -30,14 +36,11 @@ def build_extension(self, ext): subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) ext.library_dirs.append(os.path.join(self.build_temp, 'lib')) ext.swig_opts.append('-I' + os.path.join(self.build_temp, 'interface')) - r = super().build_extension(ext) - - self.distribution.py_modules.append('ivfhnsw') - self.run_command('build_py') - return r + self._swig_generated_modules.append(ext.name) + return super().build_extension(ext) -paths = ['interface/ivfhnsw.i'] +paths = ['interface/index.i'] ext = [Extension(name='_' + os.path.splitext(os.path.basename(path))[0], sources=[str(path)], From d9f04ef14d71afb8358d4cb5c063318792216712 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Sun, 16 Sep 2018 21:58:00 +0000 Subject: [PATCH 24/42] Add tests target to build all tests --- .gitignore | 1 + tests/CMakeLists.txt | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 6efdecb0..da2cb796 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ dist/ ivfhnsw.egg-info/ venv/ .eggs/ +.pytest_cache/ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d96ea8f8..ff9e7cee 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -5,12 +5,15 @@ file(GLOB srcs ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) # Build each source file independently include_directories(${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/include) # ivf-hnsw root directory +add_custom_target(tests) + foreach(source ${srcs}) get_filename_component(name ${source} NAME_WE) # target add_executable(${name} ${source}) target_link_libraries(${name} ivfhnsw faiss) + add_dependencies(tests ${name}) # Install install(TARGETS ${name} DESTINATION test) From b60fab67e03d9e1594635f797b475b5788da3f00 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Sun, 16 Sep 2018 22:21:55 +0000 Subject: [PATCH 25/42] Rename interface for the last time --- CMakeLists.txt | 8 +++++--- interface/{index.i => wrapper.i} | 4 +++- setup.py | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) rename interface/{index.i => wrapper.i} (95%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ff45970..b102ba7f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,10 +32,12 @@ if(SWIG_FOUND AND PythonLibs_FOUND) set(CMAKE_SWIG_OUTDIR ${CMAKE_BINARY_DIR}/lib) set(SWIG_FEATURES "-Iinclude") - SET_SOURCE_FILES_PROPERTIES(interface/index.i PROPERTIES CPLUSPLUS ON) + file(GLOB swig_interface interface/wrapper.i) - swig_add_module(index python interface/index.i src/IndexIVF_HNSW.cpp) - swig_link_libraries(index faiss hnswlib ${PYTHON_LIBRARIES}) + SET_SOURCE_FILES_PROPERTIES(${swig_interface} PROPERTIES CPLUSPLUS ON) + + swig_add_module(wrapper python ${swig_interface} ${ivfhnsw_src}) + swig_link_libraries(wrapper faiss hnswlib ${PYTHON_LIBRARIES}) endif() diff --git a/interface/index.i b/interface/wrapper.i similarity index 95% rename from interface/index.i rename to interface/wrapper.i index cf741e36..5ffa82e2 100644 --- a/interface/index.i +++ b/interface/wrapper.i @@ -1,7 +1,8 @@ -%module index +%module wrapper %{ #define SWIG_FILE_WITH_INIT #include "IndexIVF_HNSW.h" +#include "IndexIVF_HNSW_Grouping.h" %} %include "numpy.i" @@ -65,6 +66,7 @@ void _search(const float *x, size_t n, size_t d, float* distances, size_t k_, lo %ignore search; %include "IndexIVF_HNSW.h" +%include "IndexIVF_HNSW_Grouping.h" %pythoncode %{ import functools diff --git a/setup.py b/setup.py index d43c41c2..fc1b5ddb 100644 --- a/setup.py +++ b/setup.py @@ -36,11 +36,11 @@ def build_extension(self, ext): subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) ext.library_dirs.append(os.path.join(self.build_temp, 'lib')) ext.swig_opts.append('-I' + os.path.join(self.build_temp, 'interface')) - self._swig_generated_modules.append(ext.name) + self._swig_generated_modules.append(ext.name.lstrip('_')) return super().build_extension(ext) -paths = ['interface/index.i'] +paths = ['interface/wrapper.i'] ext = [Extension(name='_' + os.path.splitext(os.path.basename(path))[0], sources=[str(path)], From 4d1482473bce9770d17a558aebde6e37231c1228 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Mon, 17 Sep 2018 09:47:30 +0000 Subject: [PATCH 26/42] Move wrapper module into ivfhnsw package --- interface/.gitignore | 4 ++++ python-src/ivfhnsw/.gitignore | 1 + python-src/ivfhnsw/__init__.py | 0 setup.py | 25 ++++++++++--------------- 4 files changed, 15 insertions(+), 15 deletions(-) create mode 100644 interface/.gitignore create mode 100644 python-src/ivfhnsw/.gitignore create mode 100644 python-src/ivfhnsw/__init__.py diff --git a/interface/.gitignore b/interface/.gitignore new file mode 100644 index 00000000..57a16aaa --- /dev/null +++ b/interface/.gitignore @@ -0,0 +1,4 @@ +* + +!.gitignore +!wrapper.i diff --git a/python-src/ivfhnsw/.gitignore b/python-src/ivfhnsw/.gitignore new file mode 100644 index 00000000..5a59a147 --- /dev/null +++ b/python-src/ivfhnsw/.gitignore @@ -0,0 +1 @@ +wrapper.py diff --git a/python-src/ivfhnsw/__init__.py b/python-src/ivfhnsw/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/setup.py b/setup.py index fc1b5ddb..3920d1fe 100644 --- a/setup.py +++ b/setup.py @@ -6,19 +6,14 @@ import os import pathlib -from setuptools import setup, Extension +from setuptools import setup, Extension, find_packages from setuptools.command.build_ext import build_ext from distutils.version import LooseVersion class custom_build_ext(build_ext): - def __init__(self, *args, **kwargs): - self._swig_generated_modules = [] - super().__init__(*args, **kwargs) - def run(self): super().run() - self.distribution.py_modules.extend(self._swig_generated_modules) self.run_command('build_py') def build_extension(self, ext): @@ -36,26 +31,26 @@ def build_extension(self, ext): subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) ext.library_dirs.append(os.path.join(self.build_temp, 'lib')) ext.swig_opts.append('-I' + os.path.join(self.build_temp, 'interface')) - self._swig_generated_modules.append(ext.name.lstrip('_')) return super().build_extension(ext) -paths = ['interface/wrapper.i'] - -ext = [Extension(name='_' + os.path.splitext(os.path.basename(path))[0], - sources=[str(path)], - swig_opts=['-Iinclude', '-c++'], +names = ['wrapper'] +python_src = 'python-src' +ext = [Extension(name='.'.join(['ivfhnsw', '_' + name]), + sources=[os.path.join('interface', '.'.join([name, 'i']))], + swig_opts=['-Iinclude', '-c++', '-outdir', os.path.join(python_src, 'ivfhnsw')], include_dirs=['include', os.curdir], libraries=['ivfhnsw', 'hnswlib', 'faiss', 'gomp', 'lapack',], extra_compile_args=['-std=c++11', '-static'],) - for path in paths] + for name in names] + setup( name='ivfhnsw', version='0.1', ext_modules=ext, - package_dir={'': 'interface'}, - py_modules=[], + package_dir={'': python_src}, + packages=find_packages(python_src), setup_requires=['pytest-runner'], install_requires=[ 'numpy', From 1c715cc9e03eeba4d06014c1e39083aaa462d21d Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Mon, 17 Sep 2018 10:14:12 +0000 Subject: [PATCH 27/42] Move search wrapper code to separate python module --- interface/wrapper.i | 22 ++-------------------- python-src/ivfhnsw/__init__.py | 5 +++++ python-src/ivfhnsw/index.py | 12 ++++++++++++ python-tests/test_wrapper.py | 5 +++-- setup.py | 10 ++++++++-- 5 files changed, 30 insertions(+), 24 deletions(-) create mode 100644 python-src/ivfhnsw/index.py diff --git a/interface/wrapper.i b/interface/wrapper.i index 5ffa82e2..55cf2dbf 100644 --- a/interface/wrapper.i +++ b/interface/wrapper.i @@ -42,12 +42,12 @@ void assign_numpy(const float *x, size_t n, size_t d, idx_t *labels, size_t k) { /* Wrapper for IndexIVF_HNSW::search */ -%exception _search { +%exception search { $action if (PyErr_Occurred()) SWIG_fail; } %extend ivfhnsw::IndexIVF_HNSW { -void _search(const float *x, size_t n, size_t d, float* distances, size_t k_, long *labels, size_t k) { +void search(const float *x, size_t n, size_t d, float* distances, size_t k_, long *labels, size_t k) { if (d != $self->d) { PyErr_Format(PyExc_ValueError, "Query vectors must be of length d=%d, got %d", @@ -68,21 +68,3 @@ void _search(const float *x, size_t n, size_t d, float* distances, size_t k_, lo %include "IndexIVF_HNSW.h" %include "IndexIVF_HNSW_Grouping.h" -%pythoncode %{ -import functools - -cls = IndexIVF_HNSW - -@functools.wraps(cls._search) -def search_wrapper(self, x, k): - """ - Query n vectors of dimension d to the index. - - Return at most k vectors. If there are not enough results for the query, - the result array is padded with -1s. - """ - return self._search(x, k, k) - -cls.search = search_wrapper -%} - diff --git a/python-src/ivfhnsw/__init__.py b/python-src/ivfhnsw/__init__.py index e69de29b..560bac9a 100644 --- a/python-src/ivfhnsw/__init__.py +++ b/python-src/ivfhnsw/__init__.py @@ -0,0 +1,5 @@ +from .index import Index + +__all__ = ( + 'Index', +) diff --git a/python-src/ivfhnsw/index.py b/python-src/ivfhnsw/index.py new file mode 100644 index 00000000..0e3067e1 --- /dev/null +++ b/python-src/ivfhnsw/index.py @@ -0,0 +1,12 @@ +from .wrapper import IndexIVF_HNSW + +class Index(IndexIVF_HNSW): + def search(self, x, k): + """ + Query n vectors of dimension d to the index. + + Return at most k vectors. If there are not enough results for the query, + the result array is padded with -1s. + """ + return super().search(x, k, k) + diff --git a/python-tests/test_wrapper.py b/python-tests/test_wrapper.py index 4d446ef4..057c561a 100644 --- a/python-tests/test_wrapper.py +++ b/python-tests/test_wrapper.py @@ -1,3 +1,4 @@ def test_wrapper(): - import ivfhnsw - ivfhnsw.IndexIVF_HNSW(4,4,4,4) + from ivfhnsw import _wrapper + i = _wrapper.new_IndexIVF_HNSW(4,4,4,4) + _wrapper.delete_IndexIVF_HNSW(i) diff --git a/setup.py b/setup.py index 3920d1fe..87cdef6f 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,9 @@ from distutils.version import LooseVersion +python_src = 'python-src' + + class custom_build_ext(build_ext): def run(self): super().run() @@ -31,14 +34,17 @@ def build_extension(self, ext): subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) ext.library_dirs.append(os.path.join(self.build_temp, 'lib')) ext.swig_opts.append('-I' + os.path.join(self.build_temp, 'interface')) + + ivfhnsw_package_path = os.path.join(self.build_temp, python_src, 'ivfhnsw') + os.makedirs(ivfhnsw_package_path, exist_ok=True) + ext.swig_opts.extend(['-outdir', ivfhnsw_package_path]) return super().build_extension(ext) names = ['wrapper'] -python_src = 'python-src' ext = [Extension(name='.'.join(['ivfhnsw', '_' + name]), sources=[os.path.join('interface', '.'.join([name, 'i']))], - swig_opts=['-Iinclude', '-c++', '-outdir', os.path.join(python_src, 'ivfhnsw')], + swig_opts=['-Iinclude', '-c++'], include_dirs=['include', os.curdir], libraries=['ivfhnsw', 'hnswlib', 'faiss', 'gomp', 'lapack',], extra_compile_args=['-std=c++11', '-static'],) From f84925da4b00d2bde57334e6e3365805c15bbf22 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Tue, 18 Sep 2018 09:13:38 +0000 Subject: [PATCH 28/42] Add basic python test --- python-tests/test_wrapper.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/python-tests/test_wrapper.py b/python-tests/test_wrapper.py index 057c561a..cc819390 100644 --- a/python-tests/test_wrapper.py +++ b/python-tests/test_wrapper.py @@ -1,4 +1,15 @@ -def test_wrapper(): +def test_lowlevel_constructor_and_destructor_wrappers(): from ivfhnsw import _wrapper i = _wrapper.new_IndexIVF_HNSW(4,4,4,4) _wrapper.delete_IndexIVF_HNSW(i) + + +def test_pipeline(): + from ivfhnsw import Index + index = Index(4,4,4,4) + index.build_quantizer('', '', '') + index.assign([[5,5,5,5]], 2) + distances, labels = index.search([[1,2,3,4], + [6,2,3,2]], 3) + assert distances.shape[0] == 3 + assert labels.shape[0] == 3 From 5251711fa626cde9968696d4372f83e7ff7b7863 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Tue, 18 Sep 2018 09:14:31 +0000 Subject: [PATCH 29/42] Split the Parser into .h and .cpp files --- include/Parser.h | 128 +---------------------------------------------- src/Parser.cpp | 127 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 126 deletions(-) create mode 100644 src/Parser.cpp diff --git a/include/Parser.h b/include/Parser.h index 9b85a630..fc75f589 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -1,9 +1,6 @@ #ifndef IVF_HNSW_LIB_PARSER_H #define IVF_HNSW_LIB_PARSER_H -#include -#include - //============== // Parser Class //============== @@ -63,129 +60,8 @@ struct Parser const char *path_norm_pq; ///< Path to the product quantizer for norms of reconstructed base points const char *path_index; ///< Path to the constructed index - Parser(int argc, char **argv) - { - cmd = argv[0]; - if (argc == 1) - usage(); - - for (size_t i = 1 ; i < argc; i++) { - char *a = argv[i]; - - if (!strcmp (a, "-h") || !strcmp (a, "--help")) - usage(); - - if (i == argc-1) - break; - - //================= - // HNSW parameters - //================= - if (!strcmp (a, "-M")) sscanf(argv[++i], "%zu", &M); - else if (!strcmp (a, "-efConstruction")) sscanf(argv[++i], "%zu", &efConstruction); - - //================= - // Data parameters - //================= - else if (!strcmp (a, "-nb")) sscanf(argv[++i], "%zu", &nb); - else if (!strcmp (a, "-nc")) sscanf(argv[++i], "%zu", &nc); - else if (!strcmp (a, "-nsubc")) sscanf(argv[++i], "%zu", &nsubc); - else if (!strcmp (a, "-nt")) sscanf(argv[++i], "%zu", &nt); - else if (!strcmp (a, "-nsubt")) sscanf(argv[++i], "%zu", &nsubt); - else if (!strcmp (a, "-nq")) sscanf(argv[++i], "%zu", &nq); - else if (!strcmp (a, "-ngt")) sscanf(argv[++i], "%zu", &ngt); - else if (!strcmp (a, "-d")) sscanf(argv[++i], "%zu", &d); - - //=============== - // PQ parameters - //=============== - else if (!strcmp (a, "-code_size"))sscanf(argv[++i], "%zu", &code_size); - else if (!strcmp (a, "-opq")) do_opq = !strcmp(argv[++i], "on"); - - //=================== - // Search parameters - //=================== - else if (!strcmp (a, "-k")) sscanf(argv[++i], "%zu", &k); - else if (!strcmp (a, "-nprobe")) sscanf(argv[++i], "%zu", &nprobe); - else if (!strcmp (a, "-max_codes")) sscanf(argv[++i], "%zu", &max_codes); - else if (!strcmp (a, "-efSearch")) sscanf(argv[++i], "%zu", &efSearch); - else if (!strcmp (a, "-pruning")) do_pruning = !strcmp(argv[++i], "on"); - - //======= - // Paths - //======= - else if (!strcmp (a, "-path_base")) path_base = argv[++i]; - else if (!strcmp (a, "-path_learn")) path_learn = argv[++i]; - else if (!strcmp (a, "-path_q")) path_q = argv[++i]; - else if (!strcmp (a, "-path_gt")) path_gt = argv[++i]; - else if (!strcmp (a, "-path_centroids")) path_centroids = argv[++i]; - - else if (!strcmp (a, "-path_precomputed_idx")) path_precomputed_idxs = argv[++i]; - - else if (!strcmp (a, "-path_info")) path_info = argv[++i]; - else if (!strcmp (a, "-path_edges")) path_edges = argv[++i]; - - else if (!strcmp (a, "-path_pq")) path_pq = argv[++i]; - else if (!strcmp (a, "-path_opq_matrix")) path_opq_matrix = argv[++i]; - else if (!strcmp (a, "-path_norm_pq")) path_norm_pq = argv[++i]; - else if (!strcmp (a, "-path_index")) path_index = argv[++i]; - } - } - - void usage() - { - printf ("Usage: %s [options]\n", cmd); - printf ("###################\n" - "# HNSW Parameters #\n" - "###################\n" - " -M # Min number of edges per point\n" - " -efConstruction # Max number of candidate vertices in priority queue to observe during construction\n" - "###################\n" - "# Data Parameters #\n" - "###################\n" - " -nb # Number of base vectors\n" - " -nt # Number of learn vectors\n" - " -nsubt # Number of learn vectors to train (random subset of the learn set)\n" - " -nc # Number of centroids for HNSW quantizer\n" - " -nsubc # Number of subcentroids per group\n" - " -nq # Number of queries\n" - " -ngt # Number of groundtruth neighbours per query\n" - " -d # Vector dimension\n" - "#################\n" - "# PQ Parameters #\n" - "#################\n" - " -code_size # Code size per vector in bytes\n" - " -opq on/off Turn on/off OPQ compression\n" - "####################\n" - "# Search Parameters #\n" - "#####################\n" - " -k # Number of the closest vertices to search\n" - " -nprobe # Number of probes at query time\n" - " -max_codes # Max number of codes to visit to do a query\n" - " -efSearch # Max number of candidate vertices in priority queue to observe during searching\n" - " -pruning on/off Turn on/off pruning in the grouping scheme\n" - "#########\n" - "# Paths #\n" - "#########\n" - " -path_base filename Path to a base set\n" - " -path_learn filename Path to a learn set\n" - " -path_q filename Path to queries\n" - " -path_gt filename Path to groundtruth\n" - " -path_centroids filename Path to coarse centroids\n" - " \n" - " -path_precomputed_idxs filename Path to coarse centroid indices for base points\n" - " \n" - " -path_info filename Path to parameters of HNSW graph\n" - " -path_edges filename Path to edges of HNSW graph\n" - " \n" - " -path_pq filename Path to the product quantizer for residuals\n" - " -path_opq_matrix filename Path to the rotation matrix for OPQ compression\n" - " -path_norm_pq filename Path to the product quantizer for norms of reconstructed base points\n" - " " - " -path_index filename Path to the constructed index\n" - ); - exit(0); - } + Parser(int argc, char **argv); + void usage(); }; #endif //IVF_HNSW_LIB_PARSER_H diff --git a/src/Parser.cpp b/src/Parser.cpp new file mode 100644 index 00000000..c678168b --- /dev/null +++ b/src/Parser.cpp @@ -0,0 +1,127 @@ +#include +#include +#include "Parser.h" + + Parser::Parser(int argc, char **argv) + { + cmd = argv[0]; + if (argc == 1) + usage(); + + for (size_t i = 1 ; i < argc; i++) { + char *a = argv[i]; + + if (!strcmp (a, "-h") || !strcmp (a, "--help")) + usage(); + + if (i == argc-1) + break; + + //================= + // HNSW parameters + //================= + if (!strcmp (a, "-M")) sscanf(argv[++i], "%zu", &M); + else if (!strcmp (a, "-efConstruction")) sscanf(argv[++i], "%zu", &efConstruction); + + //================= + // Data parameters + //================= + else if (!strcmp (a, "-nb")) sscanf(argv[++i], "%zu", &nb); + else if (!strcmp (a, "-nc")) sscanf(argv[++i], "%zu", &nc); + else if (!strcmp (a, "-nsubc")) sscanf(argv[++i], "%zu", &nsubc); + else if (!strcmp (a, "-nt")) sscanf(argv[++i], "%zu", &nt); + else if (!strcmp (a, "-nsubt")) sscanf(argv[++i], "%zu", &nsubt); + else if (!strcmp (a, "-nq")) sscanf(argv[++i], "%zu", &nq); + else if (!strcmp (a, "-ngt")) sscanf(argv[++i], "%zu", &ngt); + else if (!strcmp (a, "-d")) sscanf(argv[++i], "%zu", &d); + + //=============== + // PQ parameters + //=============== + else if (!strcmp (a, "-code_size"))sscanf(argv[++i], "%zu", &code_size); + else if (!strcmp (a, "-opq")) do_opq = !strcmp(argv[++i], "on"); + + //=================== + // Search parameters + //=================== + else if (!strcmp (a, "-k")) sscanf(argv[++i], "%zu", &k); + else if (!strcmp (a, "-nprobe")) sscanf(argv[++i], "%zu", &nprobe); + else if (!strcmp (a, "-max_codes")) sscanf(argv[++i], "%zu", &max_codes); + else if (!strcmp (a, "-efSearch")) sscanf(argv[++i], "%zu", &efSearch); + else if (!strcmp (a, "-pruning")) do_pruning = !strcmp(argv[++i], "on"); + + //======= + // Paths + //======= + else if (!strcmp (a, "-path_base")) path_base = argv[++i]; + else if (!strcmp (a, "-path_learn")) path_learn = argv[++i]; + else if (!strcmp (a, "-path_q")) path_q = argv[++i]; + else if (!strcmp (a, "-path_gt")) path_gt = argv[++i]; + else if (!strcmp (a, "-path_centroids")) path_centroids = argv[++i]; + + else if (!strcmp (a, "-path_precomputed_idx")) path_precomputed_idxs = argv[++i]; + + else if (!strcmp (a, "-path_info")) path_info = argv[++i]; + else if (!strcmp (a, "-path_edges")) path_edges = argv[++i]; + + else if (!strcmp (a, "-path_pq")) path_pq = argv[++i]; + else if (!strcmp (a, "-path_opq_matrix")) path_opq_matrix = argv[++i]; + else if (!strcmp (a, "-path_norm_pq")) path_norm_pq = argv[++i]; + else if (!strcmp (a, "-path_index")) path_index = argv[++i]; + } + } + + void Parser::usage() + { + printf ("Usage: %s [options]\n", cmd); + printf ("###################\n" + "# HNSW Parameters #\n" + "###################\n" + " -M # Min number of edges per point\n" + " -efConstruction # Max number of candidate vertices in priority queue to observe during construction\n" + "###################\n" + "# Data Parameters #\n" + "###################\n" + " -nb # Number of base vectors\n" + " -nt # Number of learn vectors\n" + " -nsubt # Number of learn vectors to train (random subset of the learn set)\n" + " -nc # Number of centroids for HNSW quantizer\n" + " -nsubc # Number of subcentroids per group\n" + " -nq # Number of queries\n" + " -ngt # Number of groundtruth neighbours per query\n" + " -d # Vector dimension\n" + "#################\n" + "# PQ Parameters #\n" + "#################\n" + " -code_size # Code size per vector in bytes\n" + " -opq on/off Turn on/off OPQ compression\n" + "####################\n" + "# Search Parameters #\n" + "#####################\n" + " -k # Number of the closest vertices to search\n" + " -nprobe # Number of probes at query time\n" + " -max_codes # Max number of codes to visit to do a query\n" + " -efSearch # Max number of candidate vertices in priority queue to observe during searching\n" + " -pruning on/off Turn on/off pruning in the grouping scheme\n" + "#########\n" + "# Paths #\n" + "#########\n" + " -path_base filename Path to a base set\n" + " -path_learn filename Path to a learn set\n" + " -path_q filename Path to queries\n" + " -path_gt filename Path to groundtruth\n" + " -path_centroids filename Path to coarse centroids\n" + " \n" + " -path_precomputed_idxs filename Path to coarse centroid indices for base points\n" + " \n" + " -path_info filename Path to parameters of HNSW graph\n" + " -path_edges filename Path to edges of HNSW graph\n" + " \n" + " -path_pq filename Path to the product quantizer for residuals\n" + " -path_opq_matrix filename Path to the rotation matrix for OPQ compression\n" + " -path_norm_pq filename Path to the product quantizer for norms of reconstructed base points\n" + " " + " -path_index filename Path to the constructed index\n" + ); + exit(0); + } From 9b784829755a1131c375f7b9268f9c488bd3224a Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Tue, 18 Sep 2018 10:00:01 +0000 Subject: [PATCH 30/42] Add Proxy class for IndexIVF_HNSW_Grouping --- python-src/ivfhnsw/__init__.py | 2 ++ python-src/ivfhnsw/index_grouping.py | 6 ++++++ 2 files changed, 8 insertions(+) create mode 100644 python-src/ivfhnsw/index_grouping.py diff --git a/python-src/ivfhnsw/__init__.py b/python-src/ivfhnsw/__init__.py index 560bac9a..99d06cb4 100644 --- a/python-src/ivfhnsw/__init__.py +++ b/python-src/ivfhnsw/__init__.py @@ -1,5 +1,7 @@ from .index import Index +from .index_grouping import IndexGrouping __all__ = ( 'Index', + 'IndexGrouping', ) diff --git a/python-src/ivfhnsw/index_grouping.py b/python-src/ivfhnsw/index_grouping.py new file mode 100644 index 00000000..90d9ec65 --- /dev/null +++ b/python-src/ivfhnsw/index_grouping.py @@ -0,0 +1,6 @@ +from .wrapper import IndexIVF_HNSW_Grouping +from .index import Index + + +class IndexGrouping(IndexIVF_HNSW_Grouping, Index): + pass From e07d460765e25d2b847ba27c4e3c8f3fd4d972c4 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Tue, 18 Sep 2018 10:32:30 +0000 Subject: [PATCH 31/42] Remove unused pathlib imports --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 87cdef6f..9b1252e7 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,6 @@ import platform import subprocess import os -import pathlib from setuptools import setup, Extension, find_packages from setuptools.command.build_ext import build_ext From f520cb0b743c9ee4636a23d4661306fa634df1c1 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Tue, 18 Sep 2018 12:07:09 +0000 Subject: [PATCH 32/42] Fix interface for IndexIVF_HNSW::search --- interface/wrapper.i | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/interface/wrapper.i b/interface/wrapper.i index 55cf2dbf..b92791ab 100644 --- a/interface/wrapper.i +++ b/interface/wrapper.i @@ -11,6 +11,7 @@ import_array(); %} +%apply (float* IN_ARRAY1, int DIM1) {(const float *x, size_t d)}; %apply (float* IN_ARRAY2, int DIM1, int DIM2) {(const float *x, size_t n, size_t d)}; %apply (unsigned int* ARGOUT_ARRAY1, int DIM1) {(ivfhnsw::IndexIVF_HNSW::idx_t *labels, size_t k)}; %apply (long* ARGOUT_ARRAY1, int DIM1) {(long *labels, size_t k)}; @@ -47,7 +48,7 @@ Wrapper for IndexIVF_HNSW::search if (PyErr_Occurred()) SWIG_fail; } %extend ivfhnsw::IndexIVF_HNSW { -void search(const float *x, size_t n, size_t d, float* distances, size_t k_, long *labels, size_t k) { +void search(const float *x, size_t d, float* distances, size_t k_, long *labels, size_t k) { if (d != $self->d) { PyErr_Format(PyExc_ValueError, "Query vectors must be of length d=%d, got %d", From cd6a20147f0f0465b7408d4df9ec3a41b87706d2 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Tue, 18 Sep 2018 12:48:06 +0000 Subject: [PATCH 33/42] Add wrapper for IndexIVF_HNSW::add_batch --- interface/wrapper.i | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/interface/wrapper.i b/interface/wrapper.i index b92791ab..0ad7cc2b 100644 --- a/interface/wrapper.i +++ b/interface/wrapper.i @@ -14,6 +14,8 @@ import_array(); %apply (float* IN_ARRAY1, int DIM1) {(const float *x, size_t d)}; %apply (float* IN_ARRAY2, int DIM1, int DIM2) {(const float *x, size_t n, size_t d)}; %apply (unsigned int* ARGOUT_ARRAY1, int DIM1) {(ivfhnsw::IndexIVF_HNSW::idx_t *labels, size_t k)}; +%apply (unsigned int* IN_ARRAY1, int DIM1) {(const ivfhnsw::IndexIVF_HNSW::idx_t *xids, size_t n1)}; +%apply (unsigned int* IN_ARRAY1, int DIM1) {(const ivfhnsw::IndexIVF_HNSW::idx_t *precomputed_idx, size_t n2)}; %apply (long* ARGOUT_ARRAY1, int DIM1) {(long *labels, size_t k)}; %apply (float* ARGOUT_ARRAY1, int DIM1) {(float* distances, size_t k_)}; @@ -66,6 +68,33 @@ void search(const float *x, size_t d, float* distances, size_t k_, long *labels, } %ignore search; + +/* +Wrapper for IndexIVF_HNSW::add_batch +*/ +%exception add_batch { + $action + if (PyErr_Occurred()) SWIG_fail; +} +%extend ivfhnsw::IndexIVF_HNSW { +void add_batch(const float *x, size_t n, size_t d, const idx_t* xids, size_t n1, const idx_t *precomputed_idx, size_t n2) { + if (d != $self->d) { + PyErr_Format(PyExc_ValueError, + "Query vectors must be of length d=%d, got %d", + $self->d, d); + return; + } + if (!(n == n1 && n == n2)) { + PyErr_Format(PyExc_ValueError, + "Arrays must have the same first dimention size, got %d, %d, %d", + n, n1, n2); + return; + } + $self->add_batch(n, x, xids, precomputed_idx); +} +} +%ignore add_batch; + %include "IndexIVF_HNSW.h" %include "IndexIVF_HNSW_Grouping.h" From 9c49f2f0b6dbecba4bf2f51ecb94c46be5d57e26 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Tue, 18 Sep 2018 13:06:13 +0000 Subject: [PATCH 34/42] Fix swig output directory in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9b1252e7..bad433ce 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ def build_extension(self, ext): ext.library_dirs.append(os.path.join(self.build_temp, 'lib')) ext.swig_opts.append('-I' + os.path.join(self.build_temp, 'interface')) - ivfhnsw_package_path = os.path.join(self.build_temp, python_src, 'ivfhnsw') + ivfhnsw_package_path = os.path.join(self.build_lib, 'ivfhnsw') os.makedirs(ivfhnsw_package_path, exist_ok=True) ext.swig_opts.extend(['-outdir', ivfhnsw_package_path]) return super().build_extension(ext) From 9af69cc75323e43980ccd7ce9446de226263c451 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Tue, 18 Sep 2018 22:06:53 +0000 Subject: [PATCH 35/42] Include numpy headers --- setup.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index bad433ce..60fe6295 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,4 @@ +import numpy from pprint import pprint from urllib.request import urlretrieve import sys @@ -44,7 +45,7 @@ def build_extension(self, ext): ext = [Extension(name='.'.join(['ivfhnsw', '_' + name]), sources=[os.path.join('interface', '.'.join([name, 'i']))], swig_opts=['-Iinclude', '-c++'], - include_dirs=['include', os.curdir], + include_dirs=['include', os.curdir, numpy.get_include()], libraries=['ivfhnsw', 'hnswlib', 'faiss', 'gomp', 'lapack',], extra_compile_args=['-std=c++11', '-static'],) for name in names] @@ -56,7 +57,10 @@ def build_extension(self, ext): ext_modules=ext, package_dir={'': python_src}, packages=find_packages(python_src), - setup_requires=['pytest-runner'], + setup_requires=[ + 'pytest-runner', + 'numpy', + ], install_requires=[ 'numpy', ], From 0e8c7f3c88500778d1cb85e6917c2fda34b527f6 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Wed, 19 Sep 2018 08:11:15 +0000 Subject: [PATCH 36/42] Add numpy include --- setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 60fe6295..bf9bb2f2 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -import numpy from pprint import pprint from urllib.request import urlretrieve import sys @@ -38,6 +37,9 @@ def build_extension(self, ext): ivfhnsw_package_path = os.path.join(self.build_lib, 'ivfhnsw') os.makedirs(ivfhnsw_package_path, exist_ok=True) ext.swig_opts.extend(['-outdir', ivfhnsw_package_path]) + + import numpy + ext.include_dirs.append(numpy.get_include()) return super().build_extension(ext) @@ -45,7 +47,7 @@ def build_extension(self, ext): ext = [Extension(name='.'.join(['ivfhnsw', '_' + name]), sources=[os.path.join('interface', '.'.join([name, 'i']))], swig_opts=['-Iinclude', '-c++'], - include_dirs=['include', os.curdir, numpy.get_include()], + include_dirs=['include', os.curdir], libraries=['ivfhnsw', 'hnswlib', 'faiss', 'gomp', 'lapack',], extra_compile_args=['-std=c++11', '-static'],) for name in names] From ab15d51798a448984b233d4de68e1e310165f467 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Wed, 19 Sep 2018 16:52:43 +0000 Subject: [PATCH 37/42] Use get_ext_fullpath to obtain right dirpath --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bf9bb2f2..ea624f37 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ def build_extension(self, ext): ext.library_dirs.append(os.path.join(self.build_temp, 'lib')) ext.swig_opts.append('-I' + os.path.join(self.build_temp, 'interface')) - ivfhnsw_package_path = os.path.join(self.build_lib, 'ivfhnsw') + ivfhnsw_package_path = os.path.dirname(self.get_ext_fullpath(ext.name)) os.makedirs(ivfhnsw_package_path, exist_ok=True) ext.swig_opts.extend(['-outdir', ivfhnsw_package_path]) From 207ba3d60eb85097c7e4019f7540faf2e7d17e1a Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Wed, 19 Sep 2018 20:00:13 +0000 Subject: [PATCH 38/42] Add basic import test --- python-tests/test_wrapper.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python-tests/test_wrapper.py b/python-tests/test_wrapper.py index cc819390..5f1f3e5b 100644 --- a/python-tests/test_wrapper.py +++ b/python-tests/test_wrapper.py @@ -1,3 +1,7 @@ +def test_import_ivfhnsw(): + import ivfhnsw + + def test_lowlevel_constructor_and_destructor_wrappers(): from ivfhnsw import _wrapper i = _wrapper.new_IndexIVF_HNSW(4,4,4,4) From 66fbddd350d2e8052e513fa0d83db5d8a38e8f30 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Wed, 19 Sep 2018 21:51:32 +0000 Subject: [PATCH 39/42] Fix test after IndexIVF_HNSW::search method change --- python-tests/test_wrapper.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python-tests/test_wrapper.py b/python-tests/test_wrapper.py index 5f1f3e5b..c77d1852 100644 --- a/python-tests/test_wrapper.py +++ b/python-tests/test_wrapper.py @@ -13,7 +13,6 @@ def test_pipeline(): index = Index(4,4,4,4) index.build_quantizer('', '', '') index.assign([[5,5,5,5]], 2) - distances, labels = index.search([[1,2,3,4], - [6,2,3,2]], 3) + distances, labels = index.search([1,2,3,4], 3) assert distances.shape[0] == 3 assert labels.shape[0] == 3 From ba73362d3bb84432928ab978032e02a25ab6bf5e Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Wed, 19 Sep 2018 21:52:33 +0000 Subject: [PATCH 40/42] Patch setup.py to work with `python setup.py test` --- setup.py | 52 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/setup.py b/setup.py index ea624f37..f0b7fa26 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,4 @@ +import contextlib from pprint import pprint from urllib.request import urlretrieve import sys @@ -7,36 +8,64 @@ from setuptools import setup, Extension, find_packages from setuptools.command.build_ext import build_ext +from setuptools.command.install_scripts import install_scripts from distutils.version import LooseVersion -python_src = 'python-src' +project_dir = os.path.dirname(os.path.abspath(__file__)) +python_src = os.path.join(os.curdir, 'python-src') + + +class custom_install_scripts(install_scripts): + def run(self): + print(self.build_dir) + super().run() class custom_build_ext(build_ext): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._swig_outdir = None + def run(self): + # Store self.inplace flag because it gets overriden somehow + # by `python setup.py test` pipeline + self._real_inplace = self.inplace + print('Inplace:', self.inplace) super().run() self.run_command('build_py') + @contextlib.contextmanager + def set_inplace(self, inplace): + saved_inplace, self.inplace = self.inplace, inplace + yield + self.inplace = saved_inplace + def build_extension(self, ext): - env = os.environ.copy() - cmake_args = [] - build_args = ['--target', 'ivfhnsw'] - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) interface_temp = os.path.join(self.build_temp, 'interface') os.makedirs(interface_temp, exist_ok=True) + # Download numpy.i dependency to be used by swig urlretrieve('https://raw.githubusercontent.com/numpy/numpy/master/tools/swig/numpy.i', os.path.join(interface_temp, 'numpy.i'),) - subprocess.check_call(['cmake', os.path.abspath(os.curdir)] + cmake_args, cwd=self.build_temp, env=env) - subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) + + # Build only ivfhnsw static library + build_args = ['--target', 'ivfhnsw'] + cmake_args = [] + env = os.environ.copy() + subprocess.check_call(['cmake', project_dir] + cmake_args, cwd=self.build_temp, env=env) + subprocess.check_call(['cmake', '--build', os.curdir] + build_args, cwd=self.build_temp) + + # Add path to the compiled static libraries ext.library_dirs.append(os.path.join(self.build_temp, 'lib')) + # Add path to the temporary swig interface files directory ext.swig_opts.append('-I' + os.path.join(self.build_temp, 'interface')) - ivfhnsw_package_path = os.path.dirname(self.get_ext_fullpath(ext.name)) - os.makedirs(ivfhnsw_package_path, exist_ok=True) - ext.swig_opts.extend(['-outdir', ivfhnsw_package_path]) + with self.set_inplace(self._real_inplace): + _swig_outdir = os.path.dirname(self.get_ext_fullpath(ext.name)) + os.makedirs(_swig_outdir, exist_ok=True) + ext.swig_opts.extend(['-outdir', _swig_outdir]) + print('SWIG outdir:', _swig_outdir) import numpy ext.include_dirs.append(numpy.get_include()) @@ -70,5 +99,6 @@ def build_extension(self, ext): include_package_data=True, cmdclass={ 'build_ext': custom_build_ext, + 'install_scripts': custom_install_scripts, } ) From 313d308f56a133a2f523afef9f34b2b4593d40d7 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Thu, 20 Sep 2018 14:30:39 +0000 Subject: [PATCH 41/42] Add support for CMAKE_BUILD_TYPE in setup.py --- CMakeLists.txt | 2 ++ setup.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b102ba7f..d499458a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,8 @@ cmake_minimum_required (VERSION 2.8) # ivf-hnsw project project(ivf-hnsw C CXX) +message("Build type: ${CMAKE_BUILD_TYPE}") + add_subdirectory(faiss) add_subdirectory(hnswlib) diff --git a/setup.py b/setup.py index f0b7fa26..6e65ed6c 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ project_dir = os.path.dirname(os.path.abspath(__file__)) python_src = os.path.join(os.curdir, 'python-src') - +build_type = os.environ.get('CMAKE_BUILD_TYPE', 'release') class custom_install_scripts(install_scripts): def run(self): @@ -51,7 +51,7 @@ def build_extension(self, ext): # Build only ivfhnsw static library build_args = ['--target', 'ivfhnsw'] - cmake_args = [] + cmake_args = ['-DCMAKE_BUILD_TYPE={}'.format(build_type.upper())] env = os.environ.copy() subprocess.check_call(['cmake', project_dir] + cmake_args, cwd=self.build_temp, env=env) subprocess.check_call(['cmake', '--build', os.curdir] + build_args, cwd=self.build_temp) From 67f7f2978482dbc555eed82b789c9f44b8f8bfd4 Mon Sep 17 00:00:00 2001 From: Borodin Gregory Date: Fri, 21 Sep 2018 12:20:43 +0000 Subject: [PATCH 42/42] Add interface for IndexIVF_HNSW::train_pq --- interface/wrapper.i | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/interface/wrapper.i b/interface/wrapper.i index 0ad7cc2b..534dcbe5 100644 --- a/interface/wrapper.i +++ b/interface/wrapper.i @@ -41,6 +41,26 @@ void assign_numpy(const float *x, size_t n, size_t d, idx_t *labels, size_t k) { } %ignore assign; +/* +Wrapper for IndexIVF_HNSW::train_pq +*/ +%exception train_pq { + $action + if (PyErr_Occurred()) SWIG_fail; +} +%extend ivfhnsw::IndexIVF_HNSW { +void train_pq(const float *x, size_t n, size_t d) { + if (d != $self->d) { + PyErr_Format(PyExc_ValueError, + "Query vectors must be of length d=%d, got %d", + $self->d, d); + return; + } + return $self->train_pq(n, x); +} +} +%ignore train_pq; + /* Wrapper for IndexIVF_HNSW::search