Skip to content

Commit

Permalink
Switch to PyBind11
Browse files Browse the repository at this point in the history
  • Loading branch information
derohde committed Aug 16, 2021
1 parent e70b773 commit a464444
Show file tree
Hide file tree
Showing 23 changed files with 209 additions and 292 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "pybind11"]
path = pybind11
url = https://github.com/pybind/pybind11.git
21 changes: 7 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ project(Fred LANGUAGES CXX C)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Ofast")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive") #supress error in older gcc
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
Expand All @@ -12,24 +12,14 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-trapping-math")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftree-vectorize")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopt-info-vec")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopt-info-loop")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--no-undefined")
#set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++ -static")

include_directories(${CMAKE_SOURCE_DIR}/include)

find_package(PythonInterp REQUIRED)
find_package(PythonLibs REQUIRED)
find_package(Boost 1.63 COMPONENTS system chrono ${BPY} ${BNPY} REQUIRED)
find_package(OpenMP REQUIRED)

add_definitions(-D_GLIBCXX_PARALLEL)

include_directories(${Boost_INCLUDE_DIRS})
include_directories(${PYTHON_INCLUDE_DIRS})

link_libraries(${Boost_LIBRARIES})
link_libraries(${PYTHON_LIBRARIES})

find_package(OpenMP)
if(OpenMP_CXX_FOUND)
link_libraries(OpenMP::OpenMP_CXX)
endif()
Expand All @@ -40,10 +30,13 @@ if(NOT TARGET OpenMP::OpenMP_CXX)
PROPERTY INTERFACE_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS})
set_property(TARGET OpenMP::OpenMP_CXX
PROPERTY INTERFACE_LINK_LIBRARIES ${OpenMP_CXX_FLAGS} Threads::Threads)
link_libraries(OpenMP::OpenMP_CXX)

endif()
link_libraries(OpenMP::OpenMP_CXX)

add_subdirectory(pybind11)

PYTHON_ADD_MODULE(backend
pybind11_add_module(backend
src/fred_python_wrapper.cpp
src/curve.cpp
src/point.cpp
Expand Down
File renamed without changes.
Binary file added Fred/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
File renamed without changes.
File renamed without changes.
14 changes: 6 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
all: pre install

pre:
sudo apt install -y libboost-all-dev
sudo apt-get install -y python3-setuptools
sudo apt-get install -y python3-numpy
sudo apt-get install -y python3-matplotlib
sudo apt-get install -y cmake
git submodule init
git submodule update

install:
cd py && /usr/bin/python3 ./setup.py install --user
python setup.py install --user

clean:
rm -r py/dist py/build/ py/Fred.egg-info/
pip3 uninstall Fred -y
rm -r dist build/ Fred_Frechet.egg-info/ & pip uninstall Fred-Frechet -y

62 changes: 18 additions & 44 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Fred ![alt text](https://raw.githubusercontent.com/derohde/Fred/master/logo/logo.png "Fred logo")
A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves.

### NOW USING PYBIND11 INSTEAD OF BOOST!

## Ingredients C++ Backend
`import Fred.backend as fred`

Expand Down Expand Up @@ -52,55 +54,31 @@ By default, Fred will automatically determine the number of threads to use. If y

A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.discrete_klcenter` and `fred.discrete_klmedian`. As the name suggests, it stores the distances already computed.

#### discrete (k,l)-center clustering (continuous Fréchet) -- multiple calls
#### discrete (k,l)-center clustering (continuous Fréchet)
- from [**Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616)
- signature: `fred.discrete_klcenter_multi(k, l, curves, distances, center_domain, random_first_center)` with parameters
- `k`: number of centers
- `l`: maximum complexity of the centers, only used when center_domain is default value
- `distances`: `fred.Distance_Matrix`
- `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix`
- `center_domain`: possible centers, defaults to empty `fred.Curves()`, in this case the input is simplified and used as center domain
- `random_first_center`: determines if first center is chosen uniformly at random or first curve is used as first center, optional, defaults to true
- returns: `fred.Clustering_Result` with mebers
- `value`: objective value
- `time`: running-time
- `assignment`: empty if compute_assignment has not been called

#### discrete (k,l)-median clustering (continuous Fréchet) -- multiple calls
#### discrete (k,l)-median clustering (continuous Fréchet)
- Algorithm 6 in [**Coresets for (k,l)-Clustering under the Fréchet distance**](https://arxiv.org/pdf/1901.01870.pdf) + simplification
- signature: `fred.discrete_klmedian_multi(k, l, curves, distances, center_domain)` with parameters
- `k`: number of centers
- `l`: maximum complexity of the centers, only used when center_domain is default value
- `distances`: `fred.Distance_Matrix`
- `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain
- returns: `fred.Clustering_Result` with mebers
- `value`: objective value
- `time`: running-time
- `assignment`: empty if compute_assignment has not been called

#### discrete (k,l)-center clustering (continuous Fréchet) -- oneshot
- from [**Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616)
- signature: `fred.discrete_klcenter(k, l, curves, center_domain, random_first_center)` with parameters
- `k`: number of centers
- `l`: maximum complexity of the centers, only used when center_domain is default value
- `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain
- `random_first_center`: determines if first center is chosen uniformly at random or first curve is used as first center, optional, defaults to true
- returns: `fred.Clustering_Result` with mebers
- `value`: objective value
- `time`: running-time
- `assignment`: empty if compute_assignment has not been called

#### discrete (k,l)-median clustering (continuous Fréchet) -- oneshot
- Algorithm 6 in [**Coresets for (k,l)-Clustering under the Fréchet distance**](https://arxiv.org/pdf/1901.01870.pdf) + simplification
- signature: `fred.discrete_klmedian(k, l, curves, center_domain)` with parameters
- `k`: number of centers
- `l`: maximum complexity of the centers, only used when center_domain is default value
- `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix`
- `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain
- returns: `fred.Clustering_Result` with mebers
- `value`: objective value
- `time`: running-time
- `assignment`: empty if compute_assignment has not been called


#### Clustering Result
- signature: `fred.Clustering_Result`
- methods: `len(fred.Clustering_Result)`: number of centers, `fred.Clustering_Result[i]`: get ith center, `fred.Clustering_Result.compute_assignment(fred.Curves)`: assigns every curve to its nearest center
Expand All @@ -112,27 +90,23 @@ A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.di

### Dimension Reduction via Gaussian Random Projection
- [Section 2 in **Random Projections and Sampling Algorithms for Clustering of High Dimensional Polygonal Curves**](https://papers.nips.cc/paper/9443-random-projections-and-sampling-algorithms-for-clustering-of-high-dimensional-polygonal-curves)
- signature: `fred.dimension_reduction(curves, epsilon, empirical_constant)` with parameters `epsilon`: (1+epsilon) approximation parameter, `empirical_constant`: use constant of empirical study (faster, but less accurate)
- signature: `fred.dimension_reduction(curves, epsilon, empirical_constant)` with parameters `epsilon`: (1+epsilon) approximation parameter, `empirical_constant`: use constant of empirical study (faster, but less accurate), defaults to `True`
- returns: `fred.Curves` collection of curves

## Installation
Get requirements under Ubuntu: `make pre`

Python3 installation into userdir: `make install`

### If something does not work with Boost
### Requirements

Manual installation of Boost
You have to have installed:
- git
- openmp available (should be a part of your compiler)

Thats it!

- `mkdir $HOME/boost` (This folder is hardcoded in setup.py, another location won't work.)
- `cd /tmp`
- `wget https://dl.bintray.com/boostorg/release/1.73.0/source/boost_1_73_0.tar.gz`
- `tar -xzf boost_1_73_0.tar.gz`
- `cd boost_1_73_0`
- `./bootstrap.sh --with-python=/usr/bin/python3`
- `./b2 install --prefix=$HOME/boost`
### Installation Procedure

After that, go back to Freds folder and run `make clean` and then `make install`
- Variant 1: simply run `pip install git+https://github.com/derohde/Fred`
- Variant 2: clone repository and run `make` for installation into userdir

## Test
Just run `python py/test.py`.
Expand Down Expand Up @@ -213,10 +187,10 @@ dm = fred.Distance_Matrix() # computing the Fréchet distance is costly,

for k in range(2, 6):

clustering = fred.discrete_klcenter_multi(k, 10, curves, dm)
clustering = fred.discrete_klcenter(k, 10, curves, dm)
print("clustering cost is {}".format(clustering.value))

clustering = fred.discrete_klmedian_multi(k, 10, curves, dm)
clustering = fred.discrete_klmedian(k, 10, curves, dm)
print("clustering cost is {}".format(clustering.value))

clustering.compute_assignment(curves)
Expand Down
33 changes: 16 additions & 17 deletions include/clustering.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
#pragma once

#include <unordered_map>

#include <boost/chrono/include.hpp>
#include <chrono>

#include "random.hpp"
#include "curve.hpp"
Expand Down Expand Up @@ -132,7 +131,7 @@ struct Clustering_Result {

Clustering_Result gonzalez(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool arya = false, const Curves &center_domain = Curves(), const bool random_start_center = true) {

const auto start = boost::chrono::process_real_cpu_clock::now();
const auto start = std::chrono::high_resolution_clock::now();
Clustering_Result result;

if (in.empty()) return result;
Expand Down Expand Up @@ -233,10 +232,10 @@ Clustering_Result gonzalez(const curve_number_t num_centers, const curve_size_t
Curves simpl_centers;
for (const auto center: centers) simpl_centers.push_back(simplified_in[center]);

auto end = boost::chrono::process_real_cpu_clock::now();
auto end = std::chrono::high_resolution_clock::now();
result.centers = simpl_centers;
result.value = curr_maxdist;
result.running_time = (end-start).count() / 1000000000.0;
result.running_time = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
return result;
}

Expand All @@ -245,7 +244,7 @@ Clustering_Result arya(const curve_number_t num_centers, const curve_size_t ell,
}

Clustering_Result one_median_sampling(const curve_size_t ell, const Curves &in, const double epsilon, const Curves &center_domain = Curves()) {
const auto start = boost::chrono::process_real_cpu_clock::now();
const auto start = std::chrono::high_resolution_clock::now();
Clustering_Result result;
std::vector<curve_number_t> centers;
const Curves &simplified_in = center_domain;
Expand Down Expand Up @@ -296,15 +295,15 @@ Clustering_Result one_median_sampling(const curve_size_t ell, const Curves &in,
}
centers.push_back(best_candidate);

auto end = boost::chrono::process_real_cpu_clock::now();
auto end = std::chrono::high_resolution_clock::now();
result.centers.push_back(simplified_in[centers[0]]);
result.value = _center_cost_sum(in, simplified_in, centers, distances);
result.running_time = (end-start).count() / 1000000000.0;
result.running_time = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
return result;
}

Clustering_Result one_median_exhaustive(const curve_size_t ell, const Curves &in, const Curves &center_domain = Curves()) {
const auto start = boost::chrono::process_real_cpu_clock::now();
const auto start = std::chrono::high_resolution_clock::now();
Clustering_Result result;
std::vector<curve_number_t> centers;
const Curves &simplified_in = center_domain;
Expand Down Expand Up @@ -344,15 +343,15 @@ Clustering_Result one_median_exhaustive(const curve_size_t ell, const Curves &in
}
centers.push_back(best_candidate);

auto end = boost::chrono::process_real_cpu_clock::now();
auto end = std::chrono::high_resolution_clock::now();
result.centers.push_back(simplified_in[centers[0]]);
result.value = best_objective_value;
result.running_time = (end-start).count() / 1000000000.0;
result.running_time = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
return result;
}

Clustering_Result two_two_dtw_one_two_median(const Curves &in, const bool with_assignment = false) {
const auto start = boost::chrono::process_real_cpu_clock::now();
const auto start = std::chrono::high_resolution_clock::now();
Clustering_Result result;

const auto n = in.size();
Expand Down Expand Up @@ -431,15 +430,15 @@ Clustering_Result two_two_dtw_one_two_median(const Curves &in, const bool with_a
for (const auto &p : S1) cost += p.dist(mu1);
for (const auto &p : S2) cost += p.dist(mu2);

auto end = boost::chrono::process_real_cpu_clock::now();
auto end = std::chrono::high_resolution_clock::now();
result.centers.push_back(center_curve);
result.value = cost;
result.running_time = (end-start).count() / 1000000000.0;
result.running_time = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
return result;
}

Clustering_Result two_two_dtw_one_two_median_exact(const Curves &in, const bool with_assignment = false) {
const auto start = boost::chrono::process_real_cpu_clock::now();
const auto start = std::chrono::high_resolution_clock::now();
Clustering_Result result;
Curve best_center(in.dimensions());
const auto infty = std::numeric_limits<distance_t>::infinity();
Expand Down Expand Up @@ -508,10 +507,10 @@ Clustering_Result two_two_dtw_one_two_median_exact(const Curves &in, const bool
}
}

auto end = boost::chrono::process_real_cpu_clock::now();
auto end = std::chrono::high_resolution_clock::now();
result.centers.push_back(best_center);
result.value = best;
result.running_time = (end-start).count() / 1000000000.0;
result.running_time = std::chrono::duration_cast<std::chrono::seconds>(end - start).count();
return result;
}

Expand Down
46 changes: 20 additions & 26 deletions include/coreset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,10 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
*/
#pragma once

#include <boost/python.hpp>
#include <boost/python/numpy.hpp>

#include "types.hpp"
#include "clustering.hpp"
#include "frechet.hpp"

namespace np = boost::python::numpy;
namespace p = boost::python;

namespace Coreset {

class Onemedian_Coreset {
Expand Down Expand Up @@ -62,31 +56,31 @@ class Onemedian_Coreset {
}
}

inline np::ndarray get_lambda() const {
np::dtype dt = np::dtype::get_builtin<distance_t>();
p::list l;
np::ndarray result = np::array(l, dt);
for (const auto &elem: lambda) {
l.append(elem);
}
result = np::array(l, dt);
return result;
}
// inline np::ndarray get_lambda() const {
// np::dtype dt = np::dtype::get_builtin<distance_t>();
// p::list l;
// np::ndarray result = np::array(l, dt);
// for (const auto &elem: lambda) {
// l.append(elem);
// }
// result = np::array(l, dt);
// return result;
// }

inline distance_t get_Lambda() const {
return Lambda;
}

inline np::ndarray get_curves() const {
np::dtype dt = np::dtype::get_builtin<curve_number_t>();
p::list l;
np::ndarray result = np::array(l, dt);
for (const auto &elem: coreset) {
l.append(elem);
}
result = np::array(l, dt);
return result;
}
// inline np::ndarray get_curves() const {
// np::dtype dt = np::dtype::get_builtin<curve_number_t>();
// p::list l;
// np::ndarray result = np::array(l, dt);
// for (const auto &elem: coreset) {
// l.append(elem);
// }
// result = np::array(l, dt);
// return result;
// }

inline distance_t get_cost() const {
return cost;
Expand Down
Loading

0 comments on commit a464444

Please sign in to comment.