diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a90eb86 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "pybind11"] + path = pybind11 + url = https://github.com/pybind/pybind11.git diff --git a/CMakeLists.txt b/CMakeLists.txt index e0616ee..40cb406 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(Fred LANGUAGES CXX C) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Ofast") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive") #supress error in older gcc set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") @@ -12,24 +12,14 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-trapping-math") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftree-vectorize") #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopt-info-vec") #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopt-info-loop") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--no-undefined") -#set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++ -static") include_directories(${CMAKE_SOURCE_DIR}/include) -find_package(PythonInterp REQUIRED) -find_package(PythonLibs REQUIRED) -find_package(Boost 1.63 COMPONENTS system chrono ${BPY} ${BNPY} REQUIRED) find_package(OpenMP REQUIRED) add_definitions(-D_GLIBCXX_PARALLEL) -include_directories(${Boost_INCLUDE_DIRS}) -include_directories(${PYTHON_INCLUDE_DIRS}) - -link_libraries(${Boost_LIBRARIES}) -link_libraries(${PYTHON_LIBRARIES}) - +find_package(OpenMP) if(OpenMP_CXX_FOUND) link_libraries(OpenMP::OpenMP_CXX) endif() @@ -40,10 +30,13 @@ if(NOT TARGET OpenMP::OpenMP_CXX) PROPERTY INTERFACE_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS}) set_property(TARGET OpenMP::OpenMP_CXX PROPERTY INTERFACE_LINK_LIBRARIES ${OpenMP_CXX_FLAGS} Threads::Threads) - link_libraries(OpenMP::OpenMP_CXX) + endif() +link_libraries(OpenMP::OpenMP_CXX) + +add_subdirectory(pybind11) -PYTHON_ADD_MODULE(backend +pybind11_add_module(backend src/fred_python_wrapper.cpp src/curve.cpp src/point.cpp diff --git a/py/Fred/__init__.py b/Fred/__init__.py similarity index 100% rename from py/Fred/__init__.py rename to Fred/__init__.py diff --git a/Fred/__pycache__/__init__.cpython-39.pyc b/Fred/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..837d0ee Binary files /dev/null and b/Fred/__pycache__/__init__.cpython-39.pyc differ diff --git a/py/Fred/hd_fs.py b/Fred/hd_fs.py similarity index 100% rename from py/Fred/hd_fs.py rename to Fred/hd_fs.py diff --git a/py/Fred/median.py b/Fred/median.py similarity index 100% rename from py/Fred/median.py rename to Fred/median.py diff --git a/Makefile b/Makefile index b21155d..935f6d3 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,12 @@ +all: pre install + pre: - sudo apt install -y libboost-all-dev - sudo apt-get install -y python3-setuptools - sudo apt-get install -y python3-numpy - sudo apt-get install -y python3-matplotlib - sudo apt-get install -y cmake + git submodule init + git submodule update install: - cd py && /usr/bin/python3 ./setup.py install --user + python setup.py install --user clean: - rm -r py/dist py/build/ py/Fred.egg-info/ - pip3 uninstall Fred -y + rm -r dist build/ Fred_Frechet.egg-info/ & pip uninstall Fred-Frechet -y diff --git a/README.md b/README.md index 78d9d73..d1f1ea9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # Fred ![alt text](https://raw.githubusercontent.com/derohde/Fred/master/logo/logo.png "Fred logo") A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves. +### NOW USING PYBIND11 INSTEAD OF BOOST! + ## Ingredients C++ Backend `import Fred.backend as fred` @@ -52,12 +54,12 @@ By default, Fred will automatically determine the number of threads to use. If y A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.discrete_klcenter` and `fred.discrete_klmedian`. As the name suggests, it stores the distances already computed. -#### discrete (k,l)-center clustering (continuous Fréchet) -- multiple calls +#### discrete (k,l)-center clustering (continuous Fréchet) - from [**Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616) - signature: `fred.discrete_klcenter_multi(k, l, curves, distances, center_domain, random_first_center)` with parameters - `k`: number of centers - `l`: maximum complexity of the centers, only used when center_domain is default value - - `distances`: `fred.Distance_Matrix` + - `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix` - `center_domain`: possible centers, defaults to empty `fred.Curves()`, in this case the input is simplified and used as center domain - `random_first_center`: determines if first center is chosen uniformly at random or first curve is used as first center, optional, defaults to true - returns: `fred.Clustering_Result` with mebers @@ -65,42 +67,18 @@ A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.di - `time`: running-time - `assignment`: empty if compute_assignment has not been called -#### discrete (k,l)-median clustering (continuous Fréchet) -- multiple calls +#### discrete (k,l)-median clustering (continuous Fréchet) - Algorithm 6 in [**Coresets for (k,l)-Clustering under the Fréchet distance**](https://arxiv.org/pdf/1901.01870.pdf) + simplification - signature: `fred.discrete_klmedian_multi(k, l, curves, distances, center_domain)` with parameters - `k`: number of centers - `l`: maximum complexity of the centers, only used when center_domain is default value - - `distances`: `fred.Distance_Matrix` - - `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain -- returns: `fred.Clustering_Result` with mebers - - `value`: objective value - - `time`: running-time - - `assignment`: empty if compute_assignment has not been called - -#### discrete (k,l)-center clustering (continuous Fréchet) -- oneshot -- from [**Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616) -- signature: `fred.discrete_klcenter(k, l, curves, center_domain, random_first_center)` with parameters - - `k`: number of centers - - `l`: maximum complexity of the centers, only used when center_domain is default value - - `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain - - `random_first_center`: determines if first center is chosen uniformly at random or first curve is used as first center, optional, defaults to true -- returns: `fred.Clustering_Result` with mebers - - `value`: objective value - - `time`: running-time - - `assignment`: empty if compute_assignment has not been called - -#### discrete (k,l)-median clustering (continuous Fréchet) -- oneshot -- Algorithm 6 in [**Coresets for (k,l)-Clustering under the Fréchet distance**](https://arxiv.org/pdf/1901.01870.pdf) + simplification -- signature: `fred.discrete_klmedian(k, l, curves, center_domain)` with parameters - - `k`: number of centers - - `l`: maximum complexity of the centers, only used when center_domain is default value + - `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix` - `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain - returns: `fred.Clustering_Result` with mebers - `value`: objective value - `time`: running-time - `assignment`: empty if compute_assignment has not been called - #### Clustering Result - signature: `fred.Clustering_Result` - methods: `len(fred.Clustering_Result)`: number of centers, `fred.Clustering_Result[i]`: get ith center, `fred.Clustering_Result.compute_assignment(fred.Curves)`: assigns every curve to its nearest center @@ -112,27 +90,23 @@ A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.di ### Dimension Reduction via Gaussian Random Projection - [Section 2 in **Random Projections and Sampling Algorithms for Clustering of High Dimensional Polygonal Curves**](https://papers.nips.cc/paper/9443-random-projections-and-sampling-algorithms-for-clustering-of-high-dimensional-polygonal-curves) -- signature: `fred.dimension_reduction(curves, epsilon, empirical_constant)` with parameters `epsilon`: (1+epsilon) approximation parameter, `empirical_constant`: use constant of empirical study (faster, but less accurate) +- signature: `fred.dimension_reduction(curves, epsilon, empirical_constant)` with parameters `epsilon`: (1+epsilon) approximation parameter, `empirical_constant`: use constant of empirical study (faster, but less accurate), defaults to `True` - returns: `fred.Curves` collection of curves ## Installation -Get requirements under Ubuntu: `make pre` - -Python3 installation into userdir: `make install` -### If something does not work with Boost +### Requirements -Manual installation of Boost +You have to have installed: + - git + - openmp available (should be a part of your compiler) + +Thats it! -- `mkdir $HOME/boost` (This folder is hardcoded in setup.py, another location won't work.) -- `cd /tmp` -- `wget https://dl.bintray.com/boostorg/release/1.73.0/source/boost_1_73_0.tar.gz` -- `tar -xzf boost_1_73_0.tar.gz` -- `cd boost_1_73_0` -- `./bootstrap.sh --with-python=/usr/bin/python3` -- `./b2 install --prefix=$HOME/boost` +### Installation Procedure -After that, go back to Freds folder and run `make clean` and then `make install` + - Variant 1: simply run `pip install git+https://github.com/derohde/Fred` + - Variant 2: clone repository and run `make` for installation into userdir ## Test Just run `python py/test.py`. @@ -213,10 +187,10 @@ dm = fred.Distance_Matrix() # computing the Fréchet distance is costly, for k in range(2, 6): - clustering = fred.discrete_klcenter_multi(k, 10, curves, dm) + clustering = fred.discrete_klcenter(k, 10, curves, dm) print("clustering cost is {}".format(clustering.value)) - clustering = fred.discrete_klmedian_multi(k, 10, curves, dm) + clustering = fred.discrete_klmedian(k, 10, curves, dm) print("clustering cost is {}".format(clustering.value)) clustering.compute_assignment(curves) diff --git a/include/clustering.hpp b/include/clustering.hpp index 36bfdea..920b1de 100644 --- a/include/clustering.hpp +++ b/include/clustering.hpp @@ -11,8 +11,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #pragma once #include - -#include +#include #include "random.hpp" #include "curve.hpp" @@ -132,7 +131,7 @@ struct Clustering_Result { Clustering_Result gonzalez(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool arya = false, const Curves ¢er_domain = Curves(), const bool random_start_center = true) { - const auto start = boost::chrono::process_real_cpu_clock::now(); + const auto start = std::chrono::high_resolution_clock::now(); Clustering_Result result; if (in.empty()) return result; @@ -233,10 +232,10 @@ Clustering_Result gonzalez(const curve_number_t num_centers, const curve_size_t Curves simpl_centers; for (const auto center: centers) simpl_centers.push_back(simplified_in[center]); - auto end = boost::chrono::process_real_cpu_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); result.centers = simpl_centers; result.value = curr_maxdist; - result.running_time = (end-start).count() / 1000000000.0; + result.running_time = std::chrono::duration_cast(end - start).count(); return result; } @@ -245,7 +244,7 @@ Clustering_Result arya(const curve_number_t num_centers, const curve_size_t ell, } Clustering_Result one_median_sampling(const curve_size_t ell, const Curves &in, const double epsilon, const Curves ¢er_domain = Curves()) { - const auto start = boost::chrono::process_real_cpu_clock::now(); + const auto start = std::chrono::high_resolution_clock::now(); Clustering_Result result; std::vector centers; const Curves &simplified_in = center_domain; @@ -296,15 +295,15 @@ Clustering_Result one_median_sampling(const curve_size_t ell, const Curves &in, } centers.push_back(best_candidate); - auto end = boost::chrono::process_real_cpu_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); result.centers.push_back(simplified_in[centers[0]]); result.value = _center_cost_sum(in, simplified_in, centers, distances); - result.running_time = (end-start).count() / 1000000000.0; + result.running_time = std::chrono::duration_cast(end - start).count(); return result; } Clustering_Result one_median_exhaustive(const curve_size_t ell, const Curves &in, const Curves ¢er_domain = Curves()) { - const auto start = boost::chrono::process_real_cpu_clock::now(); + const auto start = std::chrono::high_resolution_clock::now(); Clustering_Result result; std::vector centers; const Curves &simplified_in = center_domain; @@ -344,15 +343,15 @@ Clustering_Result one_median_exhaustive(const curve_size_t ell, const Curves &in } centers.push_back(best_candidate); - auto end = boost::chrono::process_real_cpu_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); result.centers.push_back(simplified_in[centers[0]]); result.value = best_objective_value; - result.running_time = (end-start).count() / 1000000000.0; + result.running_time = std::chrono::duration_cast(end - start).count(); return result; } Clustering_Result two_two_dtw_one_two_median(const Curves &in, const bool with_assignment = false) { - const auto start = boost::chrono::process_real_cpu_clock::now(); + const auto start = std::chrono::high_resolution_clock::now(); Clustering_Result result; const auto n = in.size(); @@ -431,15 +430,15 @@ Clustering_Result two_two_dtw_one_two_median(const Curves &in, const bool with_a for (const auto &p : S1) cost += p.dist(mu1); for (const auto &p : S2) cost += p.dist(mu2); - auto end = boost::chrono::process_real_cpu_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); result.centers.push_back(center_curve); result.value = cost; - result.running_time = (end-start).count() / 1000000000.0; + result.running_time = std::chrono::duration_cast(end - start).count(); return result; } Clustering_Result two_two_dtw_one_two_median_exact(const Curves &in, const bool with_assignment = false) { - const auto start = boost::chrono::process_real_cpu_clock::now(); + const auto start = std::chrono::high_resolution_clock::now(); Clustering_Result result; Curve best_center(in.dimensions()); const auto infty = std::numeric_limits::infinity(); @@ -508,10 +507,10 @@ Clustering_Result two_two_dtw_one_two_median_exact(const Curves &in, const bool } } - auto end = boost::chrono::process_real_cpu_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); result.centers.push_back(best_center); result.value = best; - result.running_time = (end-start).count() / 1000000000.0; + result.running_time = std::chrono::duration_cast(end - start).count(); return result; } diff --git a/include/coreset.hpp b/include/coreset.hpp index a1a47c8..6533610 100644 --- a/include/coreset.hpp +++ b/include/coreset.hpp @@ -9,16 +9,10 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI */ #pragma once -#include -#include - #include "types.hpp" #include "clustering.hpp" #include "frechet.hpp" -namespace np = boost::python::numpy; -namespace p = boost::python; - namespace Coreset { class Onemedian_Coreset { @@ -62,31 +56,31 @@ class Onemedian_Coreset { } } - inline np::ndarray get_lambda() const { - np::dtype dt = np::dtype::get_builtin(); - p::list l; - np::ndarray result = np::array(l, dt); - for (const auto &elem: lambda) { - l.append(elem); - } - result = np::array(l, dt); - return result; - } +// inline np::ndarray get_lambda() const { +// np::dtype dt = np::dtype::get_builtin(); +// p::list l; +// np::ndarray result = np::array(l, dt); +// for (const auto &elem: lambda) { +// l.append(elem); +// } +// result = np::array(l, dt); +// return result; +// } inline distance_t get_Lambda() const { return Lambda; } - inline np::ndarray get_curves() const { - np::dtype dt = np::dtype::get_builtin(); - p::list l; - np::ndarray result = np::array(l, dt); - for (const auto &elem: coreset) { - l.append(elem); - } - result = np::array(l, dt); - return result; - } +// inline np::ndarray get_curves() const { +// np::dtype dt = np::dtype::get_builtin(); +// p::list l; +// np::ndarray result = np::array(l, dt); +// for (const auto &elem: coreset) { +// l.append(elem); +// } +// result = np::array(l, dt); +// return result; +// } inline distance_t get_cost() const { return cost; diff --git a/include/curve.hpp b/include/curve.hpp index 58f72d0..c73a390 100644 --- a/include/curve.hpp +++ b/include/curve.hpp @@ -14,15 +14,14 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #include #include -#include -#include +#include +#include #include "types.hpp" #include "point.hpp" #include "interval.hpp" -namespace np = boost::python::numpy; -namespace p = boost::python; +namespace py = pybind11; class Curve : private Points { @@ -35,7 +34,7 @@ class Curve : private Points { inline Curve(const dimensions_t dim, const std::string &name = "unnamed curve") : Points(dim), vstart{0}, vend{0}, name{name} {} inline Curve(const curve_size_t m, const dimensions_t dimensions, const std::string &name = "unnamed curve") : Points(m, Point(dimensions)), vstart{0}, vend{m-1}, name{name} {} Curve(const Points &points, const std::string &name = "unnamed curve"); - Curve(const np::ndarray &in, const std::string &name = "unnamed curve"); + Curve(const py::array_t &in, const std::string &name = "unnamed curve"); inline Point& get(const curve_size_t i) { return Points::operator[](vstart + i); @@ -122,13 +121,11 @@ class Curve : private Points { } inline auto as_ndarray() const { - np::dtype dt = np::dtype::get_builtin(); - p::list l; - np::ndarray result = np::array(l, dt); + py::list l; for (const Point &elem : *this) { l.append(elem.as_ndarray()); } - result = np::array(l, dt); + auto result = py::array_t(l); return result; } diff --git a/include/dynamic_time_warping.hpp b/include/dynamic_time_warping.hpp index fdaba8a..4253f9b 100644 --- a/include/dynamic_time_warping.hpp +++ b/include/dynamic_time_warping.hpp @@ -10,8 +10,6 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #pragma once -#include - #include "types.hpp" #include "point.hpp" #include "interval.hpp" diff --git a/include/frechet.hpp b/include/frechet.hpp index 8eaea6c..1b3f85a 100644 --- a/include/frechet.hpp +++ b/include/frechet.hpp @@ -10,8 +10,6 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #pragma once -#include - #include "types.hpp" #include "point.hpp" #include "interval.hpp" diff --git a/include/point.hpp b/include/point.hpp index dacae23..58e92c1 100644 --- a/include/point.hpp +++ b/include/point.hpp @@ -16,14 +16,13 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #include #include -#include -#include +#include +#include #include "types.hpp" #include "interval.hpp" -namespace np = boost::python::numpy; -namespace p = boost::python; +namespace py = pybind11; class Point : public Coordinates { public: @@ -166,14 +165,12 @@ class Point : public Coordinates { return Interval(std::max(0.L, lambda1), std::min(1.L, lambda2)); } - inline auto as_ndarray() const { - np::dtype dt = np::dtype::get_builtin(); - p::list l; - np::ndarray result = np::array(l, dt); + inline auto as_ndarray() const { + py::list l; for (const coordinate_t &elem : *this) { l.append(elem); } - result = np::array(l, dt); + auto result = py::array_t(l); return result; } @@ -222,11 +219,11 @@ class Points : public std::vector { } inline auto as_ndarray() const { - p::list l; + py::list l; for (const Point &elem : *this) { l.append(elem.as_ndarray()); } - auto result = np::array(l); + auto result = py::array_t(l); return result; } diff --git a/include/types.hpp b/include/types.hpp index 121c934..526fffd 100644 --- a/include/types.hpp +++ b/include/types.hpp @@ -10,6 +10,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #pragma once +#include + typedef double distance_t; typedef double coordinate_t; typedef long double parameter_t; diff --git a/py/.gitignore b/py/.gitignore deleted file mode 100644 index 694190f..0000000 --- a/py/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!Fred -!test diff --git a/pybind11 b/pybind11 new file mode 160000 index 0000000..617cb65 --- /dev/null +++ b/pybind11 @@ -0,0 +1 @@ +Subproject commit 617cb653ec513b4e02d7104b05fb75c26d10e79e diff --git a/py/setup.py b/setup.py similarity index 77% rename from py/setup.py rename to setup.py index 87b13cb..79db323 100644 --- a/py/setup.py +++ b/setup.py @@ -16,11 +16,10 @@ def __init__(self, name, sourcedir=''): Extension.__init__(self, name, sources=[]) self.sourcedir = os.path.abspath(sourcedir) - class CMakeBuild(build_ext): def run(self): try: - out = subprocess.check_output(['cmake', '..', '--version']) + out = subprocess.check_output(['cmake', '.', '--version']) except OSError: raise RuntimeError( "CMake must be installed to build the following extensions: " + @@ -39,15 +38,7 @@ def build_extension(self, ext): extdir = os.path.abspath( os.path.dirname(self.get_ext_fullpath(ext.name))) - import sys - bpy = "python{}{}".format(sys.version_info[0], sys.version_info[1]) - bnpy = "numpy{}{}".format(sys.version_info[0], sys.version_info[1]) - - cmake_args = ['-DBPY=' + bpy, '-DBNPY=' + bnpy, - '-DBOOST_ROOT=~/boost', - '-DBOOST_LIBRARYDIR=~/boost/lib', - '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, - '-DPYTHON_EXECUTABLE=' + sys.executable,] + cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir] cfg = 'Debug' if self.debug else 'Release' build_args = ['--config', cfg] @@ -61,7 +52,7 @@ def build_extension(self, ext): build_args += ['--', '/m'] else: cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] - build_args += ['--', '-j5'] + build_args += ['--', '-j'] env = os.environ.copy() env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format( @@ -69,18 +60,19 @@ def build_extension(self, ext): self.distribution.get_version()) if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) - subprocess.check_call(['cmake', "{}/..".format(ext.sourcedir)] + cmake_args, + subprocess.check_call(['cmake', "{}".format(ext.sourcedir)] + cmake_args, cwd=self.build_temp, env=env) subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) setup( - name='Fred', - version='1.0', + name='Fred-Frechet', + version='1.6', author='Dennis Rohde', author_email='dennis.rohde@tu-dortmund.de', description='Frechet Distance and Clustering Library', - long_description='', + long_description='A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves.', + url="http://fred.dennisrohde.work", packages=setuptools.find_packages(), ext_package="Fred", ext_modules=[CMakeExtension('backend')], diff --git a/src/curve.cpp b/src/curve.cpp index 77b3d3b..4529730 100644 --- a/src/curve.cpp +++ b/src/curve.cpp @@ -23,27 +23,24 @@ Curve::Curve(const Points &points, const std::string &name) : Points(points), vs #endif } -Curve::Curve(const np::ndarray &in, const std::string &name) : Points(in.shape(0), in.get_nd() > 1 ? in.shape(1) : 1), name{name}, vstart{0}, vend{Points::size() - 1} { - const dimensions_t n_dimensions = in.get_nd(); - if (n_dimensions > 2){ - std::cerr << "A Curve requires a 1- or 2-dimensional numpy array of type " << typeid(coordinate_t).name() << "." << std::endl; - std::cerr << "Current dimensions: " << n_dimensions << std::endl; - std::cerr << "Current type: " << p::extract(p::str(in.get_dtype())) << std::endl; - std::cerr << "WARNING: constructed empty curve" << std::endl; - return; - } - if (in.get_dtype() != np::dtype::get_builtin()) { - std::cerr << "A Curve requires a 1- or 2-dimensional numpy array of type " << typeid(coordinate_t).name() << "." << std::endl; - std::cerr << "Current dimensions: " << n_dimensions << std::endl; - std::cerr << "Current type: " << p::extract(p::str(in.get_dtype())) << std::endl; - std::cerr << "WARNING: constructed empty curve" << std::endl; - return; - } - const curve_size_t number_points = in.shape(0); - const Py_intptr_t* strides = in.get_strides(); +Curve::Curve(const py::array_t &in, const std::string &name) : Points(in.request().shape[0], in.request().ndim > 1 ? in.request().shape[1] : 1), name{name}, vstart{0}, vend{Points::size() - 1} { + auto buffer = in.request(); + + const dimensions_t n_dimensions = buffer.ndim; + + if (n_dimensions > 2){ + std::cerr << "A Curve requires a 1- or 2-dimensional numpy array of type " << typeid(coordinate_t).name() << "." << std::endl; + std::cerr << "Current dimensions: " << n_dimensions << std::endl; + std::cerr << "WARNING: constructed empty curve" << std::endl; + return; + } + + const curve_size_t number_points = buffer.shape[0]; if (n_dimensions == 2) { - const dimensions_t point_size = in.shape(1); + const dimensions_t point_size = buffer.shape[1]; + + auto accessor = in.unchecked<2>(); #if DEBUG std::cout << "constructing curve of size " << number_points << " and " << point_size << " dimensions" << std::endl; @@ -52,13 +49,15 @@ Curve::Curve(const np::ndarray &in, const std::string &name) : Points(in.shape(0 #pragma omp parallel for simd for (curve_size_t i = 0; i < number_points; ++i) { for(curve_size_t j = 0; j < point_size; ++j){ - Points::operator[](i)[j] = *reinterpret_cast(in.get_data() + i * strides[0] + j * strides[1]); + Points::operator[](i)[j] = accessor(i, j); } } - } else { + } else { + auto accessor = in.unchecked<1>(); + #pragma omp parallel for simd for (curve_size_t i = 0; i < number_points; ++i) { - Points::operator[](i)[0] = *reinterpret_cast(in.get_data() + i * strides[0]); + Points::operator[](i)[0] = accessor(i); } } diff --git a/src/dynamic_time_warping.cpp b/src/dynamic_time_warping.cpp index e9a5483..d93bb04 100644 --- a/src/dynamic_time_warping.cpp +++ b/src/dynamic_time_warping.cpp @@ -10,8 +10,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #include #include - -#include +#include #include "dynamic_time_warping.hpp" @@ -27,7 +26,7 @@ std::string Distance::repr() const { Distance distance(const Curve &curve1, const Curve &curve2) { Distance result; - auto start = boost::chrono::process_real_cpu_clock::now(); + auto start = std::chrono::high_resolution_clock::now(); std::vector> a(curve1.complexity() + 1, std::vector(curve2.complexity() + 1, std::numeric_limits::infinity())); a[0][0] = 0; for (curve_size_t i = 1; i <= curve1.complexity(); ++i) { @@ -37,8 +36,8 @@ Distance distance(const Curve &curve1, const Curve &curve2) { } } auto value = a[curve1.complexity()][curve2.complexity()]; - auto end = boost::chrono::process_real_cpu_clock::now(); - result.time = (end-start).count() / 1000000000.0; + auto end = std::chrono::high_resolution_clock::now(); + result.time = std::chrono::duration_cast(end - start).count(); result.value = value; return result; diff --git a/src/frechet.cpp b/src/frechet.cpp index 7234402..8d10db4 100644 --- a/src/frechet.cpp +++ b/src/frechet.cpp @@ -10,8 +10,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #include #include - -#include +#include #include "frechet.hpp" @@ -42,17 +41,17 @@ Distance distance(const Curve &curve1, const Curve &curve2) { return result; } - auto start = boost::chrono::process_real_cpu_clock::now(); + auto start = std::chrono::high_resolution_clock::now(); const distance_t lb = std::sqrt(std::max(curve1[0].dist_sqr(curve2[0]), curve1[curve1.complexity()-1].dist_sqr(curve2[curve2.complexity()-1]))); const distance_t ub = _greedy_upper_bound(curve1, curve2); - auto end = boost::chrono::process_real_cpu_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); #if DEBUG std::cout << "narrowed to [" << lb << ", " << ub.value << "]" << std::endl; #endif auto dist = _distance(curve1, curve2, ub, lb); - dist.time_bounds = (end-start).count() / 1000000000.0; + dist.time_bounds = std::chrono::duration_cast(end - start).count(); if (round) dist.value = std::round(dist.value * 1e3) / 1e3; return dist; @@ -60,7 +59,7 @@ Distance distance(const Curve &curve1, const Curve &curve2) { Distance _distance(const Curve &curve1, const Curve &curve2, distance_t ub, distance_t lb) { Distance result; - auto start = boost::chrono::process_real_cpu_clock::now(); + auto start = std::chrono::high_resolution_clock::now(); distance_t split = (ub + lb)/2; std::size_t number_searches = 0; @@ -96,9 +95,9 @@ Distance _distance(const Curve &curve1, const Curve &curve2, distance_t ub, dist } distance_t value = (ub + lb)/2.; - auto end = boost::chrono::process_real_cpu_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); result.value = value; - result.time_searches = (end-start).count() / 1000000000.0; + result.time_searches = std::chrono::duration_cast(end - start).count(); result.number_searches = number_searches; return result; } @@ -230,11 +229,11 @@ std::string Distance::repr() const { Distance distance(const Curve &curve1, const Curve &curve2) { Distance result; - auto start = boost::chrono::process_real_cpu_clock::now(); + auto start = std::chrono::high_resolution_clock::now(); std::vector> a(curve1.complexity(), std::vector(curve2.complexity(), -1)); auto value = std::sqrt(_dp(a, curve1.complexity() - 1, curve2.complexity() - 1, curve1, curve2)); - auto end = boost::chrono::process_real_cpu_clock::now(); - result.time = (end-start).count() / 1000000000.0; + auto end = std::chrono::high_resolution_clock::now(); + result.time = std::chrono::duration_cast(end - start).count(); result.value = value; return result; diff --git a/src/fred_python_wrapper.cpp b/src/fred_python_wrapper.cpp index 9166385..4de24eb 100644 --- a/src/fred_python_wrapper.cpp +++ b/src/fred_python_wrapper.cpp @@ -8,7 +8,8 @@ The above copyright notice and this permission notice shall be included in all c THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include +#include #include "curve.hpp" #include "point.hpp" @@ -20,8 +21,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #include "simplification.hpp" #include "dynamic_time_warping.hpp" -using namespace boost::python; -namespace np = boost::python::numpy; +namespace py = pybind11; + namespace fc = Frechet::Continuous; namespace fd = Frechet::Discrete; namespace ddtw = Dynamic_Time_Warping::Discrete; @@ -47,8 +48,6 @@ Curves jl_transform(const Curves &in, const double epsilon, const bool empirical return curvesrp; } -BOOST_PYTHON_FUNCTION_OVERLOADS(jl_transform_overloads, jl_transform, 2, 3); - void set_frechet_epsilon(const double eps) { fc::epsilon = eps; } @@ -75,40 +74,18 @@ Clustering::Clustering_Result dtw_one_median_exact(const Curves &in) { return result; } -Clustering::Clustering_Result klcenter_multi(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Clustering::Distance_Matrix &distances, const Curves ¢er_domain = Curves(), const bool random_start_center = true) { - auto result = Clustering::gonzalez(num_centers, ell, in, distances, false, center_domain, random_start_center); - return result; -} - -BOOST_PYTHON_FUNCTION_OVERLOADS(klcenter_multi_overloads, klcenter_multi, 4, 6); - -Clustering::Clustering_Result klcenter(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, const Curves ¢er_domain = Curves(), const bool random_start_center = true) { - Clustering::Distance_Matrix distances; +Clustering::Clustering_Result klcenter(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Clustering::Distance_Matrix &distances, const Curves ¢er_domain = Curves(), const bool random_start_center = true) { auto result = Clustering::gonzalez(num_centers, ell, in, distances, false, center_domain, random_start_center); return result; } -BOOST_PYTHON_FUNCTION_OVERLOADS(klcenter_overloads, klcenter, 3, 5); - -Clustering::Clustering_Result klmedian_multi(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Clustering::Distance_Matrix distances, const Curves ¢er_domain = Curves()) { - - auto result = Clustering::arya(num_centers, ell, in, distances, center_domain); - - return result; -} - -BOOST_PYTHON_FUNCTION_OVERLOADS(klmedian_multi_overloads, klmedian_multi, 4, 5); - -Clustering::Clustering_Result klmedian(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, const Curves ¢er_domain = Curves()) { +Clustering::Clustering_Result klmedian(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Clustering::Distance_Matrix distances, const Curves ¢er_domain = Curves()) { - auto distances = Clustering::Distance_Matrix(); auto result = Clustering::arya(num_centers, ell, in, distances, center_domain); return result; } -BOOST_PYTHON_FUNCTION_OVERLOADS(klmedian_overloads, klmedian, 3, 4); - // Clustering::Clustering_Result onemedian_sampling(const curve_size_t ell, Curves &in, const double epsilon, const bool with_assignment = false, const Curves ¢er_domain = Curves()) { // // auto result = Clustering::one_median_sampling(ell, in, epsilon, with_assignment); @@ -116,8 +93,6 @@ BOOST_PYTHON_FUNCTION_OVERLOADS(klmedian_overloads, klmedian, 3, 4); // return result; // } // -// BOOST_PYTHON_FUNCTION_OVERLOADS(onemedian_sampling_overloads, onemedian_sampling, 3, 5); -// // Clustering::Clustering_Result onemedian_exhaustive(const curve_size_t ell, Curves &in, const bool with_assignment = false, const Curves ¢er_domain = Curves()) { // // auto result = Clustering::one_median_exhaustive(ell, in, with_assignment); @@ -125,14 +100,11 @@ BOOST_PYTHON_FUNCTION_OVERLOADS(klmedian_overloads, klmedian, 3, 4); // return result; // } // -// BOOST_PYTHON_FUNCTION_OVERLOADS(onemedian_exhaustive_overloads, onemedian_exhaustive, 2, 4); -// // // Coreset::Onemedian_Coreset onemedian_coreset(const Curves &in, const curve_size_t ell, const double epsilon, const double constant = 1) { // return Coreset::Onemedian_Coreset(ell, in, epsilon, constant); // } // -// BOOST_PYTHON_FUNCTION_OVERLOADS(onemedian_coreset_overloads, onemedian_coreset, 3, 4); Curve weak_minimum_error_simplification(const Curve &curve, const curve_size_t l) { Simplification::Subcurve_Shortcut_Graph graph(const_cast(curve)); @@ -157,130 +129,135 @@ void set_number_threads(std::uint64_t number) { omp_set_dynamic(0); omp_set_num_threads(number); } - -BOOST_PYTHON_MODULE(backend) -{ - Py_Initialize(); - np::initialize(); +PYBIND11_MODULE(backend, m) { - scope().attr("default_epsilon_continuous_frechet") = default_epsilon; + m.attr("default_epsilon_continuous_frechet") = default_epsilon; - class_("Point", init()) + py::class_(m, "Point") + .def(py::init()) .def("__len__", &Point::dimensions) .def("__getitem__", &Point::get) .def("__str__", &Point::str) - .def("__iter__", iterator()) + .def("__iter__", [](Point &v) { return py::make_iterator(v.begin(), v.end()); }, py::keep_alive<0, 1>()) .def("__repr__", &Point::repr) - .add_property("values", &Point::as_ndarray) + //.def_property_readonly("values", &Point::as_ndarray) ; - class_("Points", init()) + py::class_(m, "Points") + .def(py::init()) .def("__len__", &Points::number) - .def("__getitem__", &Points::get, return_value_policy()) + .def("__getitem__", &Points::get, py::return_value_policy::reference) .def("__str__", &Points::str) - .def("__iter__", iterator()) + .def("__iter__", [](Points &v) { return py::make_iterator(v.begin(), v.end()); }, py::keep_alive<0, 1>()) .def("__repr__", &Points::repr) - .add_property("values", &Points::as_ndarray) - .add_property("centroid", &Points::centroid) + .def_property_readonly("values", &Points::as_ndarray) + .def_property_readonly("centroid", &Points::centroid) ; - class_("Curve", init()) - .def(init()) - .add_property("dimensions", &Curve::dimensions) - .add_property("complexity", &Curve::complexity) - .add_property("name", &Curve::get_name, &Curve::set_name) - .add_property("values", &Curve::as_ndarray) - .add_property("centroid", &Curve::centroid) - .def("__getitem__", &Curve::get, return_value_policy()) + py::class_(m, "Curve") + .def(py::init>()) + .def(py::init, std::string>()) + .def_property_readonly("dimensions", &Curve::dimensions) + .def_property_readonly("complexity", &Curve::complexity) + .def_property("name", &Curve::get_name, &Curve::set_name) + .def_property_readonly("values", &Curve::as_ndarray) + .def_property_readonly("centroid", &Curve::centroid) + .def("__getitem__", &Curve::get, py::return_value_policy::reference) .def("__len__", &Curve::complexity) .def("__str__", &Curve::str) - .def("__iter__", iterator()) + .def("__iter__", [](Curve &v) { return py::make_iterator(v.begin(), v.end()); }, py::keep_alive<0, 1>()) .def("__repr__", &Curve::repr) ; - class_("Curves", init<>()) - .add_property("m", &Curves::get_m) + py::class_(m, "Curves") + .def(py::init<>()) + .def_property_readonly("m", &Curves::get_m) .def("add", &Curves::add) .def("simplify", &Curves::simplify) - .def("__getitem__", &Curves::get, return_value_policy()) + .def("__getitem__", &Curves::get, py::return_value_policy::reference) .def("__len__", &Curves::number) .def("__str__", &Curves::str) - .def("__iter__", iterator()) + .def("__iter__", [](Curves &v) { return py::make_iterator(v.begin(), v.end()); }, py::keep_alive<0, 1>()) .def("__repr__", &Curves::repr) ; - class_("Continuous_Frechet_Distance", init<>()) - .add_property("time_searches", &fc::Distance::time_searches) - .add_property("time_bounds", &fc::Distance::time_bounds) - .add_property("number_searches", &fc::Distance::number_searches) - .add_property("value", &fc::Distance::value) + py::class_(m, "Continuous_Frechet_Distance") + .def(py::init<>()) + .def_readwrite("time_searches", &fc::Distance::time_searches) + .def_readwrite("time_bounds", &fc::Distance::time_bounds) + .def_readwrite("number_searches", &fc::Distance::number_searches) + .def_readwrite("value", &fc::Distance::value) .def("__repr__", &fc::Distance::repr) ; - class_("Discrete_Frechet_Distance", init<>()) - .add_property("time", &fd::Distance::time) - .add_property("value", &fd::Distance::value) + py::class_(m, "Discrete_Frechet_Distance") + .def(py::init<>()) + .def_readwrite("time", &fd::Distance::time) + .def_readwrite("value", &fd::Distance::value) .def("__repr__", &fd::Distance::repr) ; - class_("Discrete_Dynamic_Time_Warping_Distance", init<>()) - .add_property("time", &ddtw::Distance::time) - .add_property("value", &ddtw::Distance::value) + py::class_(m, "Discrete_Dynamic_Time_Warping_Distance") + .def(py::init<>()) + .def_readwrite("time", &ddtw::Distance::time) + .def_readwrite("value", &ddtw::Distance::value) .def("__repr__", &ddtw::Distance::repr) ; - class_("Distance_Matrix", init<>()); + py::class_(m, "Distance_Matrix") + .def(py::init<>()) + ; - class_("Clustering_Result", init<>()) - .add_property("value", &Clustering::Clustering_Result::value) - .add_property("time", &Clustering::Clustering_Result::running_time) - .add_property("assignment", &Clustering::Clustering_Result::assignment) - .def("__getitem__", &Clustering::Clustering_Result::get, return_value_policy()) + py::class_(m, "Clustering_Result") + .def(py::init<>()) + .def_readwrite("value", &Clustering::Clustering_Result::value) + .def_readwrite("time", &Clustering::Clustering_Result::running_time) + .def_readwrite("assignment", &Clustering::Clustering_Result::assignment) + .def("__getitem__", &Clustering::Clustering_Result::get, py::return_value_policy::reference) .def("__len__", &Clustering::Clustering_Result::size) - .def("__iter__", range(&Clustering::Clustering_Result::cbegin, &Clustering::Clustering_Result::cend)) + .def("__iter__", [](Clustering::Clustering_Result &v) { return py::make_iterator(v.cbegin(), v.cend()); }, py::keep_alive<0, 1>()) .def("compute_assignment", &Clustering::Clustering_Result::compute_assignment) ; - class_("Cluster_Assignment", init<>()) + py::class_(m, "Cluster_Assignment") + .def(py::init<>()) .def("__len__", &Clustering::Cluster_Assignment::size) .def("count", &Clustering::Cluster_Assignment::count) .def("get", &Clustering::Cluster_Assignment::get) ; - /*class_("Onemedian_coreset") - .add_property("lambd", &Coreset::Onemedian_Coreset::get_lambda) - .add_property("Lambd", &Coreset::Onemedian_Coreset::get_Lambda) - .add_property("cost", &Coreset::Onemedian_Coreset::get_cost) + /*py::class_("Onemedian_coreset") + .def_property("lambd", &Coreset::Onemedian_Coreset::get_lambda) + .def_property("Lambd", &Coreset::Onemedian_Coreset::get_Lambda) + .def_property("cost", &Coreset::Onemedian_Coreset::get_cost) .def("curves", &Coreset::Onemedian_Coreset::get_curves) ;*/ - def("set_continuous_frechet_epsilon", set_frechet_epsilon); - def("set_continuous_frechet_rounding", set_frechet_rounding); - def("get_continuous_frechet_epsilon", get_frechet_epsilon); - def("get_continuous_frechet_rounding", get_frechet_rounding); + m.def("set_continuous_frechet_epsilon", &set_frechet_epsilon); + m.def("set_continuous_frechet_rounding", &set_frechet_rounding); + m.def("get_continuous_frechet_epsilon", &get_frechet_epsilon); + m.def("get_continuous_frechet_rounding", &get_frechet_rounding); - def("continuous_frechet", continuous_frechet); - def("discrete_frechet", discrete_frechet); - def("discrete_dynamic_time_warping", discrete_dynamic_time_warping); + m.def("continuous_frechet", &continuous_frechet); + m.def("discrete_frechet", &discrete_frechet); + m.def("discrete_dynamic_time_warping", &discrete_dynamic_time_warping); - def("weak_minimum_error_simplification", weak_minimum_error_simplification); - def("approximate_weak_minimum_link_simplification", approximate_weak_minimum_link_simplification); - def("approximate_weak_minimum_error_simplification", approximate_weak_minimum_error_simplification); + m.def("weak_minimum_error_simplification", &weak_minimum_error_simplification); + m.def("approximate_weak_minimum_link_simplification", &approximate_weak_minimum_link_simplification); + m.def("approximate_weak_minimum_error_simplification", &approximate_weak_minimum_error_simplification); - def("dimension_reduction", jl_transform, jl_transform_overloads()); + m.def("dimension_reduction", &jl_transform, py::arg("in") = Curves(), py::arg("epsilon")= 0.5, py::arg("empirical_constant") = true); - def("discrete_klcenter", klcenter, klcenter_overloads()); - def("discrete_klmedian", klmedian, klmedian_overloads()); - def("discrete_klcenter_multi", klcenter_multi, klcenter_multi_overloads()); - def("discrete_klmedian_multi", klmedian_multi, klmedian_multi_overloads()); + m.def("discrete_klcenter", &klcenter, py::arg("num_centers") = 1, py::arg("ell") = 2, py::arg("in") = Curves(), py::arg("distances") = Clustering::Distance_Matrix(), py::arg("center_domain") = Curves(), py::arg("random_start_center") = true); + m.def("discrete_klmedian", &klmedian, py::arg("num_centers") = 1, py::arg("ell") = 2, py::arg("in") = Curves(), py::arg("distances") = Clustering::Distance_Matrix(), py::arg("center_domain") = Curves()); // these are experimental - def("two_two_dtw_one_two_median", dtw_one_median); - def("two_two_dtw_one_two_median_exact", dtw_one_median_exact); + //m.def("two_two_dtw_one_two_median", dtw_one_median); + //m.def("two_two_dtw_one_two_median_exact", dtw_one_median_exact); //def("discrete_onemedian_sampling", onemedian_sampling, onemedian_sampling_overloads()); //def("discrete_onemedian_exhaustive", onemedian_exhaustive, onemedian_exhaustive_overloads()); //def("onemedian_coreset", onemedian_coreset, onemedian_coreset_overloads()); - def("set_maximum_number_threads", set_number_threads); + m.def("set_maximum_number_threads", set_number_threads); } diff --git a/py/test/test.py b/test/test.py similarity index 100% rename from py/test/test.py rename to test/test.py