Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #114

Open
wants to merge 24 commits into
base: dev
Choose a base branch
from
Open

Dev #114

Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e8c0df8
warning --
ThomasRetornaz Feb 26, 2018
d70bbc3
fix TestData& operator=(const TestData& other) assignment operator
ThomasRetornaz Feb 26, 2018
8d91b7e
wip issue #107 add transform/reduce algorithm
ThomasRetornaz Feb 26, 2018
c679f81
issue #107 add fill,copy,copy_n algorithm
ThomasRetornaz Feb 26, 2018
4bc2c63
issue #107 gcc compil fix
Feb 28, 2018
22e5357
issue #107 add search max/min
ThomasRetornaz Mar 5, 2018
5c48da0
issue #107 add find,find_if,find_if_not
ThomasRetornaz Mar 5, 2018
ab3e92b
issue #107 fix gcc and release mode for find*
Mar 5, 2018
b0735f5
issue #107 add max_element and min_element
ThomasRetornaz Mar 6, 2018
0025a8f
issue #107 gcc compil/warning fix
Mar 6, 2018
ae48025
issue #107 add count, count_if
ThomasRetornaz Mar 7, 2018
c8d12f4
Merge branch 'dev' of https://github.com/ThomasRetornaz/libsimdpp int…
ThomasRetornaz Mar 7, 2018
dc33d00
issue #107 add all_of, any_of, none_of
Mar 7, 2018
d6a6bfa
issue #107 add replace,replace_if
Mar 8, 2018
f95aa05
issue #107 add equal and lexicographic_compare
Mar 10, 2018
b8b0b34
issue #107 add transform_reduce
Mar 11, 2018
179cc90
issue #107 ras
Mar 11, 2018
f57deb0
issue #107 visual compilation fix
ThomasRetornaz Mar 11, 2018
3d9fb98
issue #107
ThomasRetornaz Apr 9, 2018
d8b2eda
issue #107 gcc and c++11 only compil fix
Apr 9, 2018
9a3636a
issue #107
ThomasRetornaz Apr 11, 2018
6ae2a4a
issue #115 Proof of concept
ThomasRetornaz Jul 16, 2018
5977ee9
#issue 115 linux/gcc fix
Jul 20, 2018
e286519
* warn --
ThomasRetornaz Oct 26, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
* warn --
* add binary flavor of transform reduce bench
ThomasRetornaz committed Oct 26, 2018
commit e28651968e2c6805d20f6d22e415e4da3b376d74
4 changes: 3 additions & 1 deletion bench/insn/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -8,7 +8,9 @@ set(TEST_BENCH_SOURCES

set(BENCH_INSN_ARCH_SOURCES
algorithm/transform_unary.cc
algorithm/transform_binary.cc
algorithm/reduce_unary.cc
algorithm/reduce_binary.cc
load_store.cc
)

@@ -23,7 +25,7 @@ foreach(ARCH ${COMPILABLE_ARCHS}})
if(SIMDPP_MSVC)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
# enable _vectorcall on i386 builds (only works on MSVC 2013)
set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Gv")
#set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Gv")
endif()
elseif(SIMDPP_MSVC_INTEL)
set_target_properties(${exename} PROPERTIES COMPILE_FLAGS "/Qstd=c++11")
155 changes: 155 additions & 0 deletions bench/insn/algorithm/reduce_binary.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/* Copyright (C) 2018 Povilas Kanapickas <povilas@radix.lt>
Copyright (C) 2018 Thomas Retornaz <thomas.retornaz@mines-paris.org>

Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/

#include "benchmark/benchmark.h"
#include <vector>
#include <numeric>
#include <iterator>
#include <simdpp/simd.h>
//algorithm
#include <simdpp/algorithm/reduce.h>


namespace {

template< typename T>
struct BinaryOpPlus
{
public:
BinaryOpPlus() {}
SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT
{
return a0 + a1;
}

template<typename U>
SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT
{
return a0 + a1;
}
};

template <typename T>
struct GeneratorConstant
{
GeneratorConstant(T constant) { m_constant = constant; }
T operator()() { return m_constant; }
T m_constant;
};


template<typename T, class Generator>
std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>> DataGenerator(std::size_t size, Generator gen)
{

using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
vector_aligned_t input(size);
std::generate(input.begin(), input.end(), gen);
return input;
}

/*********************UNARY****************************/

template<typename T>
class ReduceBinaryFixture : public ::benchmark::Fixture {
public:
void SetUp(const ::benchmark::State& st)
{
m_inputvect = DataGenerator<T, GeneratorConstant<T>>((size_t)st.range(0), GeneratorConstant<T>(1));
}
void TearDown(const ::benchmark::State&)
{
m_inputvect.clear();
}
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
vector_aligned_t m_inputvect;
};

//UINT64_T
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
uint64_t init = (uint64_t)0;
auto opPlus = BinaryOpPlus<uint64_t>();
uint64_t neutral = (uint64_t)0;
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus));
}
}
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryUNINT64_STD_Test, uint64_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
uint64_t init = (uint64_t)0;
auto opPlus = BinaryOpPlus<uint64_t>();
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus));
}
}
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);


//FLOAT
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryFLOAT_SIMD_Test, float)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
float init = (float)0;
auto opPlus = BinaryOpPlus<float>();
float neutral = (float)0;
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus));
}
}
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryFLOAT_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryFLOAT_STD_Test, float)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
float init = (float)0;
auto opPlus = BinaryOpPlus<uint64_t>();
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus));
}
}
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryFLOAT_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);


//DOUBLE
BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryDOUBLE_SIMD_Test, double)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
double init = (double)0;
auto opPlus = BinaryOpPlus<double>();
double neutral = (double)0;
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), init, neutral, opPlus));
}
}
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryDOUBLE_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(ReduceBinaryFixture, BinaryDOUBLE_STD_Test, double)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
double init = (double)0;
auto opPlus = BinaryOpPlus<uint64_t>();
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::accumulate(m_inputvect.cbegin(), m_inputvect.cend(), init, opPlus));
}
}
BENCHMARK_REGISTER_F(ReduceBinaryFixture, BinaryDOUBLE_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);



} // namespace
209 changes: 209 additions & 0 deletions bench/insn/algorithm/transform_binary.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
/* Copyright (C) 2018 Povilas Kanapickas <povilas@radix.lt>
Copyright (C) 2018 Thomas Retornaz <thomas.retornaz@mines-paris.org>

Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/

#include "benchmark/benchmark.h"
#include <vector>
#include <algorithm>
#include <iterator>
#include <simdpp/simd.h>
//algorithm
#include <simdpp/algorithm/transform.h>


namespace {

template< typename T>
struct BinaryOpAdd
{
public:
BinaryOpAdd() {}
SIMDPP_INL T operator()(T const &a0, T const &a1) const SIMDPP_NOEXCEPT
{
return a0 + a1;
}

template<typename U>
SIMDPP_INL U operator()(U const &a0, U const &a1) const SIMDPP_NOEXCEPT
{
using namespace simdpp;
return a0 + a1;
}
};


template <typename T>
struct GeneratorConstant
{
GeneratorConstant(T constant) { m_constant = constant; }
T operator()() { return m_constant; }
T m_constant;
};


template<typename T, class Generator>
std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>> DataGenerator(std::size_t size, Generator gen)
{

using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
vector_aligned_t input(size);
std::generate(input.begin(), input.end(), gen);
return input;
}

/*********************Binary****************************/

template<typename T>
class TransformBinaryFixture : public ::benchmark::Fixture {
public:
void SetUp(const ::benchmark::State& st)
{
m_inputvect = DataGenerator<T, GeneratorConstant<T>>((size_t)st.range(0), GeneratorConstant<T>(42));
m_inputvect2 = DataGenerator<T, GeneratorConstant<T>>((size_t)st.range(0), GeneratorConstant<T>(42));
m_outputvect.resize((size_t)st.range(0));
}
void TearDown(const ::benchmark::State&)
{
m_inputvect.clear();
m_inputvect2.clear();
m_outputvect.clear();
}
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
vector_aligned_t m_inputvect;
vector_aligned_t m_inputvect2;
vector_aligned_t m_outputvect;
BinaryOpAdd<T> opPlus= BinaryOpAdd<T>();
};

//UINT8_T
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(),m_inputvect2.data(),m_outputvect.data(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT8_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT8_STD_Test, uint8_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(),m_outputvect.begin(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT8_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

//UINT16_T
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT16_SIMD_Test, uint16_t)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT16_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT16_STD_Test, uint16_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT16_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

//UINT32_T
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT32_SIMD_Test, uint32_t)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT32_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT32_STD_Test, uint32_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT32_STD_Test)->Arg(1)->Arg(10)->Arg(31)->Arg(100)->Arg(1000)->Arg(10000);

//UINT64_T
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryUNINT64_STD_Test, uint64_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

//FLOAT
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryFloat_SIMD_Test, float)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryFloat_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);


BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryFloat_STD_Test, float)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryFloat_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

//DOUBLE
BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryDouble_SIMD_Test, double)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_inputvect2.data(), m_outputvect.data(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryDouble_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(TransformBinaryFixture, BinaryDouble_STD_Test, double)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_inputvect.begin(), m_outputvect.begin(), opPlus));
}
}
BENCHMARK_REGISTER_F(TransformBinaryFixture, BinaryDouble_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

} // namespace
22 changes: 11 additions & 11 deletions bench/insn/algorithm/transform_unary.cc
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@ struct UnaryOpAddValue
SIMDPP_INL T operator()(T const &a) const SIMDPP_NOEXCEPT
{
return m_val + a;
}
}

template<typename U>
SIMDPP_INL U operator()(U const &a) const SIMDPP_NOEXCEPT
@@ -102,7 +102,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT16_SIMD_Test, uint1
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne);
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT16_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
@@ -112,7 +112,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT16_STD_Test, uint16
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne);
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT16_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
@@ -123,7 +123,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT32_SIMD_Test, uint3
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne);
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT32_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
@@ -133,7 +133,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT32_STD_Test, uint32
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne);
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT32_STD_Test)->Arg(1)->Arg(10)->Arg(31)->Arg(100)->Arg(1000)->Arg(10000);
@@ -144,7 +144,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT64_SIMD_Test, uint6
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne);
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
@@ -154,7 +154,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT64_STD_Test, uint64
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne);
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
@@ -165,7 +165,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryFloat_SIMD_Test, float)(
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne);
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryFloat_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
@@ -176,7 +176,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryFloat_STD_Test, float)(b
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne);
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryFloat_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
@@ -187,7 +187,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryDouble_SIMD_Test, double
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne);
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryDouble_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
@@ -197,7 +197,7 @@ BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryDouble_STD_Test, double)
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne);
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryDouble_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);
191 changes: 191 additions & 0 deletions bench/insn/load_store.cc
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@ namespace {
vector_aligned_t m_outputvect;
};

//UINT8_T
BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
@@ -70,4 +71,194 @@ namespace {
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT8_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);

//UINT16
BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT16_SIMD_Test, uint16_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
using simd_type_T = typename simdpp::simd_traits<uint16_t>::simd_type;
const auto simd_size = simd_type_T::base_length;
while (st.KeepRunning())
{
const auto* ptrin = m_inputvect.data();
auto* ptrout = m_outputvect.data();
for (size_t i = 0; i < size; i += simd_size)
{
simd_type_T element = simdpp::load(ptrin);
simdpp::store(ptrout, element);
ptrin += simd_size;
ptrout += simd_size;
}
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT16_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);

BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT16_STD_Test, uint16_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
using simd_type_T = typename simdpp::simd_traits<uint16_t>::simd_type;
const auto simd_size = simd_type_T::base_length;
while (st.KeepRunning())
{
const auto* ptrin = m_inputvect.data();
auto* ptrout = m_outputvect.data();
for (size_t i = 0; i < size; ++i)
{
*ptrout++ = *ptrin++;
}
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT16_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);

//UINT32
BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT32_SIMD_Test, uint32_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
using simd_type_T = typename simdpp::simd_traits<uint32_t>::simd_type;
const auto simd_size = simd_type_T::base_length;
while (st.KeepRunning())
{
const auto* ptrin = m_inputvect.data();
auto* ptrout = m_outputvect.data();
for (size_t i = 0; i < size; i += simd_size)
{
simd_type_T element = simdpp::load(ptrin);
simdpp::store(ptrout, element);
ptrin += simd_size;
ptrout += simd_size;
}
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT32_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);

BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT32_STD_Test, uint32_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
using simd_type_T = typename simdpp::simd_traits<uint32_t>::simd_type;
const auto simd_size = simd_type_T::base_length;
while (st.KeepRunning())
{
const auto* ptrin = m_inputvect.data();
auto* ptrout = m_outputvect.data();
for (size_t i = 0; i < size; ++i)
{
*ptrout++ = *ptrin++;
}
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT32_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);

//UINT64
BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
using simd_type_T = typename simdpp::simd_traits<uint64_t>::simd_type;
const auto simd_size = simd_type_T::base_length;
while (st.KeepRunning())
{
const auto* ptrin = m_inputvect.data();
auto* ptrout = m_outputvect.data();
for (size_t i = 0; i < size; i += simd_size)
{
simd_type_T element = simdpp::load(ptrin);
simdpp::store(ptrout, element);
ptrin += simd_size;
ptrout += simd_size;
}
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT64_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);

BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryUNINT64_STD_Test, uint64_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
using simd_type_T = typename simdpp::simd_traits<uint64_t>::simd_type;
const auto simd_size = simd_type_T::base_length;
while (st.KeepRunning())
{
const auto* ptrin = m_inputvect.data();
auto* ptrout = m_outputvect.data();
for (size_t i = 0; i < size; ++i)
{
*ptrout++ = *ptrin++;
}
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryUNINT64_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);

//FLOAT
BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryFloat_SIMD_Test, float)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
using simd_type_T = typename simdpp::simd_traits<float>::simd_type;
const auto simd_size = simd_type_T::base_length;
while (st.KeepRunning())
{
const auto* ptrin = m_inputvect.data();
auto* ptrout = m_outputvect.data();
for (size_t i = 0; i < size; i += simd_size)
{
simd_type_T element = simdpp::load(ptrin);
simdpp::store(ptrout, element);
ptrin += simd_size;
ptrout += simd_size;
}
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryFloat_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);

BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryFloat_STD_Test, float)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
using simd_type_T = typename simdpp::simd_traits<float>::simd_type;
const auto simd_size = simd_type_T::base_length;
while (st.KeepRunning())
{
const auto* ptrin = m_inputvect.data();
auto* ptrout = m_outputvect.data();
for (size_t i = 0; i < size; ++i)
{
*ptrout++ = *ptrin++;
}
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryFloat_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);

//DOUBLE
BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryDouble_SIMD_Test, double)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
using simd_type_T = typename simdpp::simd_traits<double>::simd_type;
const auto simd_size = simd_type_T::base_length;
while (st.KeepRunning())
{
const auto* ptrin = m_inputvect.data();
auto* ptrout = m_outputvect.data();
for (size_t i = 0; i < size; i += simd_size)
{
simd_type_T element = simdpp::load(ptrin);
simdpp::store(ptrout, element);
ptrin += simd_size;
ptrout += simd_size;
}
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryDouble_SIMD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);

BENCHMARK_TEMPLATE_DEFINE_F(LoadStoreFixture, UnaryDouble_STD_Test, double)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
using simd_type_T = typename simdpp::simd_traits<double>::simd_type;
const auto simd_size = simd_type_T::base_length;
while (st.KeepRunning())
{
const auto* ptrin = m_inputvect.data();
auto* ptrout = m_outputvect.data();
for (size_t i = 0; i < size; ++i)
{
*ptrout++ = *ptrin++;
}
}
}
BENCHMARK_REGISTER_F(LoadStoreFixture, UnaryDouble_STD_Test)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);
} // namespace
2 changes: 1 addition & 1 deletion simdpp/algorithm/helper_input_range.h
Original file line number Diff line number Diff line change
@@ -28,7 +28,7 @@ Extract from contigous range [first,last[
- Note epilogue equals [size_simd_loop,stop[
*/
template<typename T>
const std::pair<size_t,size_t> SIMDPP_INL helper_input_range(const T* first, const T* last)
const std::pair<ptrdiff_t, ptrdiff_t> SIMDPP_INL helper_input_range(const T* first, const T* last)
{
#ifndef SIMDPP_DEBUG //precondition debug mode
if (!first)
2 changes: 1 addition & 1 deletion simdpp/algorithm/reduce.h
Original file line number Diff line number Diff line change
@@ -92,7 +92,7 @@ T reduce(T const* first, T const* last, T init, T neutral, BinOp f) //need neutr
const auto size_prologue_loop = range.first;
const auto size_simd_loop = range.second;

auto i = 0u;
auto i = 0;
simd_type_T accusimd = splat(T(neutral)); //think about product sum

//---prologue
4 changes: 2 additions & 2 deletions simdpp/algorithm/transform.h
Original file line number Diff line number Diff line change
@@ -54,7 +54,7 @@ U* transform(T const* first, T const* last, U* out, UnOp f)
const auto size_prologue_loop = range.first;
const auto size_simd_loop = range.second;

auto i = 0u;
auto i = 0;

//---prologue
for (; i < size_prologue_loop; ++i)
@@ -127,7 +127,7 @@ U* transform(T1 const* first1, T1 const* last1, T2 const* first2, U* out, BinOp
const auto size_simd_loop = range.second;


auto i = 0u;
auto i = 0;

//---prologue
for (; i < size_prologue_loop; ++i)
4 changes: 2 additions & 2 deletions test/insn/transform.cc
Original file line number Diff line number Diff line change
@@ -159,7 +159,7 @@ void test_transform_type_binary(TestResultsSet& ts, TestReporter& tr)
vector_t expected = { 1,3 };

transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus);
for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS
for (auto i = 0u; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS
{
TEST_EQUAL(tr, expected[i], ovect[i]);
}
@@ -171,7 +171,7 @@ void test_transform_type_binary(TestResultsSet& ts, TestReporter& tr)
vector_t expected(150, 1);

transform(ivect1.data(), ivect1.data() + ivect1.size(), ivect2.data(), ovect.data(), opPlus);
for (auto i = 0; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS
for (auto i = 0u; i < ovect.size(); ++i) //TODO make TEST_EQUAL_COLECTIONS
{
TEST_EQUAL(tr, expected[i], ovect[i]);
}