From 46fb877ef1618d9de9a9ba10cee107592b7cdb2d Mon Sep 17 00:00:00 2001 From: tbbdev Date: Tue, 8 Dec 2020 11:36:22 +0300 Subject: [PATCH] Commit oneTBB 2021.1.1 source code --- CMakeLists.txt | 27 +- README.md | 4 +- cmake/README.md | 16 +- cmake/compilers/AppleClang.cmake | 8 +- cmake/config_generation.cmake | 26 +- cmake/modules/FindHWLOC.cmake | 240 +- cmake/scripts/cmake_gen_github_configs.cmake | 22 +- cmake/templates/TBBConfig.cmake.in | 6 +- cmake/templates/TBBConfigVersion.cmake.in | 10 - cmake/utils.cmake | 34 + include/oneapi/tbb.h | 73 + include/oneapi/tbb/blocked_range.h | 163 + include/oneapi/tbb/blocked_range2d.h | 108 + include/oneapi/tbb/blocked_range3d.h | 127 + include/oneapi/tbb/blocked_rangeNd.h | 144 + include/oneapi/tbb/cache_aligned_allocator.h | 187 + include/oneapi/tbb/combinable.h | 69 + include/oneapi/tbb/concurrent_hash_map.h | 1509 ++++++++ include/oneapi/tbb/concurrent_lru_cache.h | 364 ++ include/oneapi/tbb/concurrent_map.h | 288 ++ .../oneapi/tbb/concurrent_priority_queue.h | 482 +++ include/oneapi/tbb/concurrent_queue.h | 591 +++ include/oneapi/tbb/concurrent_set.h | 207 ++ include/oneapi/tbb/concurrent_unordered_map.h | 310 ++ include/oneapi/tbb/concurrent_unordered_set.h | 235 ++ include/oneapi/tbb/concurrent_vector.h | 1094 ++++++ include/{ => oneapi}/tbb/detail/_aggregator.h | 0 .../{ => oneapi}/tbb/detail/_aligned_space.h | 0 .../tbb/detail/_allocator_traits.h | 0 include/{ => oneapi}/tbb/detail/_assert.h | 0 .../tbb/detail/_concurrent_queue_base.h | 0 .../tbb/detail/_concurrent_skip_list.h | 0 .../tbb/detail/_concurrent_unordered_base.h | 0 include/{ => oneapi}/tbb/detail/_config.h | 4 +- .../tbb/detail/_containers_helpers.h | 0 include/{ => oneapi}/tbb/detail/_exception.h | 0 .../tbb/detail/_flow_graph_body_impl.h | 10 +- .../tbb/detail/_flow_graph_cache_impl.h | 0 .../tbb/detail/_flow_graph_impl.h | 9 +- .../tbb/detail/_flow_graph_indexer_impl.h | 0 .../tbb/detail/_flow_graph_item_buffer_impl.h | 0 .../tbb/detail/_flow_graph_join_impl.h | 0 .../tbb/detail/_flow_graph_node_impl.h | 0 .../tbb/detail/_flow_graph_node_set_impl.h | 0 .../tbb/detail/_flow_graph_nodes_deduction.h | 0 .../detail/_flow_graph_tagged_buffer_impl.h | 0 .../tbb/detail/_flow_graph_trace_impl.h | 0 .../tbb/detail/_flow_graph_types_impl.h | 0 .../{ => oneapi}/tbb/detail/_hash_compare.h | 0 include/{ => oneapi}/tbb/detail/_machine.h | 0 .../oneapi/tbb/detail/_namespace_injection.h | 24 + .../{ => oneapi}/tbb/detail/_node_handle.h | 0 .../tbb/detail/_pipeline_filters.h | 0 .../tbb/detail/_pipeline_filters_deduction.h | 0 .../{ => oneapi}/tbb/detail/_range_common.h | 0 include/{ => oneapi}/tbb/detail/_rtm_mutex.h | 0 .../{ => oneapi}/tbb/detail/_rtm_rw_mutex.h | 0 .../{ => oneapi}/tbb/detail/_segment_table.h | 0 .../tbb/detail/_small_object_pool.h | 0 .../tbb/detail/_string_resource.h | 0 include/{ => oneapi}/tbb/detail/_task.h | 53 +- .../tbb/detail/_template_helpers.h | 30 + include/{ => oneapi}/tbb/detail/_utils.h | 0 .../oneapi/tbb/enumerable_thread_specific.h | 1113 ++++++ include/oneapi/tbb/flow_graph.h | 3213 +++++++++++++++++ include/oneapi/tbb/flow_graph_abstractions.h | 51 + include/oneapi/tbb/global_control.h | 188 + include/oneapi/tbb/info.h | 64 + include/oneapi/tbb/memory_pool.h | 272 ++ include/oneapi/tbb/null_mutex.h | 79 + include/oneapi/tbb/null_rw_mutex.h | 84 + include/oneapi/tbb/parallel_for.h | 415 +++ include/oneapi/tbb/parallel_for_each.h | 600 +++ include/oneapi/tbb/parallel_invoke.h | 227 ++ include/oneapi/tbb/parallel_pipeline.h | 153 + include/oneapi/tbb/parallel_reduce.h | 687 ++++ include/oneapi/tbb/parallel_scan.h | 590 +++ include/oneapi/tbb/parallel_sort.h | 247 ++ include/oneapi/tbb/partitioner.h | 688 ++++ include/oneapi/tbb/profiling.h | 243 ++ include/oneapi/tbb/queuing_mutex.h | 197 + include/oneapi/tbb/queuing_rw_mutex.h | 199 + include/oneapi/tbb/scalable_allocator.h | 330 ++ include/oneapi/tbb/spin_mutex.h | 179 + include/oneapi/tbb/spin_rw_mutex.h | 307 ++ include/oneapi/tbb/task.h | 37 + include/oneapi/tbb/task_arena.h | 425 +++ include/oneapi/tbb/task_group.h | 556 +++ include/oneapi/tbb/task_scheduler_observer.h | 116 + include/oneapi/tbb/tbb_allocator.h | 124 + include/oneapi/tbb/tbbmalloc_proxy.h | 65 + include/oneapi/tbb/tick_count.h | 99 + include/oneapi/tbb/version.h | 108 + include/tbb/blocked_range.h | 147 +- include/tbb/blocked_range2d.h | 92 +- include/tbb/blocked_range3d.h | 111 +- include/tbb/blocked_rangeNd.h | 129 +- include/tbb/cache_aligned_allocator.h | 171 +- include/tbb/combinable.h | 52 +- include/tbb/concurrent_hash_map.h | 1493 +------- include/tbb/concurrent_lru_cache.h | 349 +- include/tbb/concurrent_map.h | 272 +- include/tbb/concurrent_priority_queue.h | 466 +-- include/tbb/concurrent_queue.h | 575 +-- include/tbb/concurrent_set.h | 191 +- include/tbb/concurrent_unordered_map.h | 294 +- include/tbb/concurrent_unordered_set.h | 219 +- include/tbb/concurrent_vector.h | 1078 +----- include/tbb/enumerable_thread_specific.h | 1097 +----- include/tbb/flow_graph.h | 3189 +--------------- include/tbb/flow_graph_abstractions.h | 36 +- include/tbb/global_control.h | 172 +- include/tbb/info.h | 48 +- include/tbb/memory_pool.h | 257 +- include/tbb/null_mutex.h | 63 +- include/tbb/null_rw_mutex.h | 68 +- include/tbb/parallel_for.h | 399 +- include/tbb/parallel_for_each.h | 584 +-- include/tbb/parallel_invoke.h | 211 +- include/tbb/parallel_pipeline.h | 137 +- include/tbb/parallel_reduce.h | 671 +--- include/tbb/parallel_scan.h | 574 +-- include/tbb/parallel_sort.h | 231 +- include/tbb/partitioner.h | 672 +--- include/tbb/profiling.h | 228 +- include/tbb/queuing_mutex.h | 181 +- include/tbb/queuing_rw_mutex.h | 183 +- include/tbb/scalable_allocator.h | 315 +- include/tbb/spin_mutex.h | 162 +- include/tbb/spin_rw_mutex.h | 290 +- include/tbb/task.h | 21 +- include/tbb/task_arena.h | 405 +-- include/tbb/task_group.h | 527 +-- include/tbb/task_scheduler_observer.h | 100 +- include/tbb/tbb.h | 58 +- include/tbb/tbb_allocator.h | 108 +- include/tbb/tbbmalloc_proxy.h | 50 +- include/tbb/tick_count.h | 82 +- include/tbb/version.h | 84 +- python/CMakeLists.txt | 4 + python/rml/CMakeLists.txt | 12 +- python/rml/ipc_server.cpp | 4 +- python/setup.py | 4 +- python/tbb/api.i | 3 +- src/tbb/CMakeLists.txt | 35 +- src/tbb/allocator.cpp | 8 +- src/tbb/arena.cpp | 6 +- src/tbb/arena.h | 16 +- src/tbb/arena_slot.h | 10 +- src/tbb/assert_impl.h | 2 +- src/tbb/co_context.h | 2 +- src/tbb/concurrent_bounded_queue.cpp | 10 +- src/tbb/concurrent_monitor.cpp | 127 +- src/tbb/concurrent_monitor.h | 337 +- src/tbb/def/lin32-tbb.def | 2 +- src/tbb/def/lin64-tbb.def | 2 +- src/tbb/def/mac64-tbb.def | 2 +- src/tbb/def/win32-tbb.def | 2 +- src/tbb/def/win64-tbb.def | 2 +- src/tbb/dynamic_link.cpp | 4 +- src/tbb/dynamic_link.h | 2 +- src/tbb/exception.cpp | 6 +- src/tbb/global_control.cpp | 10 +- src/tbb/governor.cpp | 6 +- src/tbb/itt_notify.h | 2 +- src/tbb/mailbox.h | 4 +- src/tbb/main.cpp | 3 +- src/tbb/market.cpp | 2 +- src/tbb/market.h | 12 +- src/tbb/misc.cpp | 15 +- src/tbb/misc.h | 8 +- src/tbb/observer_proxy.cpp | 4 +- src/tbb/observer_proxy.h | 8 +- src/tbb/parallel_pipeline.cpp | 12 +- src/tbb/private_server.cpp | 2 +- src/tbb/profiling.cpp | 8 +- src/tbb/queuing_rw_mutex.cpp | 6 +- src/tbb/rml_tbb.cpp | 2 +- src/tbb/rml_tbb.h | 2 +- src/tbb/rml_thread_monitor.h | 2 +- src/tbb/rtm_mutex.cpp | 4 +- src/tbb/rtm_rw_mutex.cpp | 4 +- src/tbb/scheduler_common.h | 16 +- src/tbb/semaphore.h | 2 +- src/tbb/small_object_pool.cpp | 6 +- src/tbb/small_object_pool_impl.h | 4 +- src/tbb/task.cpp | 107 +- src/tbb/task_dispatcher.h | 27 +- src/tbb/task_group_context.cpp | 6 +- src/tbb/task_stream.h | 6 +- src/tbb/thread_data.h | 10 +- src/tbb/tls.h | 2 +- src/tbb/version.cpp | 2 +- src/tbb/waiters.h | 134 +- src/tbbbind/CMakeLists.txt | 119 +- src/tbbbind/tbb_bind.cpp | 2 +- src/tbbmalloc/CMakeLists.txt | 12 +- src/tbbmalloc/Customize.h | 6 +- src/tbbmalloc/Synchronize.h | 2 +- src/tbbmalloc/frontend.cpp | 2 +- src/tbbmalloc/tbbmalloc.cpp | 2 +- src/tbbmalloc/tbbmalloc_internal.h | 6 +- src/tbbmalloc_proxy/CMakeLists.txt | 6 +- src/tbbmalloc_proxy/function_replacement.cpp | 4 +- src/tbbmalloc_proxy/proxy.cpp | 2 +- test/CMakeLists.txt | 19 +- test/common/allocator_overload.h | 2 +- test/common/common_arena_constraints.h | 6 +- test/common/concurrency_tracker.h | 2 +- test/common/concurrent_associative_common.h | 2 +- .../common/concurrent_priority_queue_common.h | 4 +- test/common/config.h | 2 +- test/common/cpu_usertime.h | 2 +- test/common/custom_allocators.h | 2 +- test/common/exception_handling.h | 4 +- test/common/fp_control.h | 2 +- test/common/graph_utils.h | 6 +- test/common/parallel_for_each_common.h | 4 +- test/common/parallel_invoke_common.h | 4 +- test/common/parallel_reduce_common.h | 6 +- test/common/spin_barrier.h | 6 +- test/common/test_follows_and_precedes_api.h | 2 +- test/common/utils.h | 6 +- test/common/utils_concurrency_limit.h | 2 +- test/common/utils_dynamic_libs.h | 2 +- test/conformance/conformance_allocators.cpp | 28 +- .../conformance_arena_constraints.cpp | 18 +- test/conformance/conformance_async_node.cpp | 36 +- .../conformance/conformance_blocked_range.cpp | 42 +- .../conformance_blocked_range2d.cpp | 42 +- .../conformance_blocked_range3d.cpp | 46 +- .../conformance_blocked_rangeNd.cpp | 54 +- .../conformance_broadcast_node.cpp | 26 +- test/conformance/conformance_buffer_node.cpp | 26 +- test/conformance/conformance_combinable.cpp | 100 +- .../conformance_composite_node.cpp | 10 +- .../conformance_concurrent_hash_map.cpp | 64 +- .../conformance_concurrent_lru_cache.cpp | 2 +- .../conformance_concurrent_map.cpp | 34 +- .../conformance_concurrent_priority_queue.cpp | 30 +- .../conformance_concurrent_queue.cpp | 126 +- .../conformance_concurrent_set.cpp | 30 +- .../conformance_concurrent_unordered_map.cpp | 42 +- .../conformance_concurrent_unordered_set.cpp | 30 +- .../conformance_concurrent_vector.cpp | 114 +- .../conformance/conformance_continue_node.cpp | 62 +- ...conformance_enumerable_thread_specific.cpp | 228 +- test/conformance/conformance_flowgraph.h | 16 +- .../conformance/conformance_function_node.cpp | 70 +- .../conformance_global_control.cpp | 80 +- test/conformance/conformance_graph.cpp | 52 +- test/conformance/conformance_indexer_node.cpp | 18 +- test/conformance/conformance_input_node.cpp | 38 +- test/conformance/conformance_join_node.cpp | 20 +- test/conformance/conformance_limiter_node.cpp | 32 +- .../conformance_multifunction_node.cpp | 74 +- test/conformance/conformance_mutex.cpp | 102 +- .../conformance_overwrite_node.cpp | 30 +- test/conformance/conformance_parallel_for.cpp | 80 +- .../conformance_parallel_for_each.cpp | 12 +- .../conformance_parallel_invoke.cpp | 18 +- .../conformance_parallel_pipeline.cpp | 138 +- .../conformance_parallel_reduce.cpp | 38 +- .../conformance/conformance_parallel_scan.cpp | 28 +- .../conformance/conformance_parallel_sort.cpp | 20 +- .../conformance_priority_queue_node.cpp | 30 +- test/conformance/conformance_queue_node.cpp | 34 +- .../conformance_resumable_tasks.cpp | 14 +- .../conformance_sequencer_node.cpp | 34 +- test/conformance/conformance_split_node.cpp | 18 +- test/conformance/conformance_task_arena.cpp | 40 +- .../conformance_task_group_context.cpp | 12 +- test/conformance/conformance_tick_count.cpp | 40 +- test/conformance/conformance_version.cpp | 10 +- .../conformance_write_once_node.cpp | 30 +- test/tbb/test_arena_priorities.cpp | 14 - test/tbb/test_concurrent_monitor.cpp | 92 + test/tbb/test_dynamic_link.cpp | 2 +- test/tbb/test_enumerable_thread_specific.cpp | 2 +- test/tbb/test_flow_graph_priorities.cpp | 20 +- test/tbb/test_flow_graph_whitebox.cpp | 11 +- test/tbb/test_handle_perror.cpp | 2 +- test/tbb/test_limiter_node.cpp | 24 +- test/tbb/test_mutex.cpp | 4 +- test/tbb/test_task.cpp | 34 +- test/tbb/test_task_arena.cpp | 24 +- test/tbb/test_task_group.cpp | 23 +- test/tbb/test_tbb_header.cpp | 6 +- test/tbbmalloc/test_malloc_compliance.cpp | 2 +- test/tbbmalloc/test_malloc_init_shutdown.cpp | 2 +- test/tbbmalloc/test_malloc_overload.cpp | 2 +- test/tbbmalloc/test_malloc_regression.cpp | 2 +- test/tbbmalloc/test_malloc_whitebox.cpp | 2 +- 293 files changed, 20049 insertions(+), 18797 deletions(-) create mode 100644 cmake/utils.cmake create mode 100644 include/oneapi/tbb.h create mode 100644 include/oneapi/tbb/blocked_range.h create mode 100644 include/oneapi/tbb/blocked_range2d.h create mode 100644 include/oneapi/tbb/blocked_range3d.h create mode 100644 include/oneapi/tbb/blocked_rangeNd.h create mode 100644 include/oneapi/tbb/cache_aligned_allocator.h create mode 100644 include/oneapi/tbb/combinable.h create mode 100644 include/oneapi/tbb/concurrent_hash_map.h create mode 100644 include/oneapi/tbb/concurrent_lru_cache.h create mode 100644 include/oneapi/tbb/concurrent_map.h create mode 100644 include/oneapi/tbb/concurrent_priority_queue.h create mode 100644 include/oneapi/tbb/concurrent_queue.h create mode 100644 include/oneapi/tbb/concurrent_set.h create mode 100644 include/oneapi/tbb/concurrent_unordered_map.h create mode 100644 include/oneapi/tbb/concurrent_unordered_set.h create mode 100644 include/oneapi/tbb/concurrent_vector.h rename include/{ => oneapi}/tbb/detail/_aggregator.h (100%) rename include/{ => oneapi}/tbb/detail/_aligned_space.h (100%) rename include/{ => oneapi}/tbb/detail/_allocator_traits.h (100%) rename include/{ => oneapi}/tbb/detail/_assert.h (100%) rename include/{ => oneapi}/tbb/detail/_concurrent_queue_base.h (100%) rename include/{ => oneapi}/tbb/detail/_concurrent_skip_list.h (100%) rename include/{ => oneapi}/tbb/detail/_concurrent_unordered_base.h (100%) rename include/{ => oneapi}/tbb/detail/_config.h (99%) rename include/{ => oneapi}/tbb/detail/_containers_helpers.h (100%) rename include/{ => oneapi}/tbb/detail/_exception.h (100%) rename include/{ => oneapi}/tbb/detail/_flow_graph_body_impl.h (98%) rename include/{ => oneapi}/tbb/detail/_flow_graph_cache_impl.h (100%) rename include/{ => oneapi}/tbb/detail/_flow_graph_impl.h (99%) rename include/{ => oneapi}/tbb/detail/_flow_graph_indexer_impl.h (100%) rename include/{ => oneapi}/tbb/detail/_flow_graph_item_buffer_impl.h (100%) rename include/{ => oneapi}/tbb/detail/_flow_graph_join_impl.h (100%) rename include/{ => oneapi}/tbb/detail/_flow_graph_node_impl.h (100%) rename include/{ => oneapi}/tbb/detail/_flow_graph_node_set_impl.h (100%) rename include/{ => oneapi}/tbb/detail/_flow_graph_nodes_deduction.h (100%) rename include/{ => oneapi}/tbb/detail/_flow_graph_tagged_buffer_impl.h (100%) rename include/{ => oneapi}/tbb/detail/_flow_graph_trace_impl.h (100%) rename include/{ => oneapi}/tbb/detail/_flow_graph_types_impl.h (100%) rename include/{ => oneapi}/tbb/detail/_hash_compare.h (100%) rename include/{ => oneapi}/tbb/detail/_machine.h (100%) create mode 100644 include/oneapi/tbb/detail/_namespace_injection.h rename include/{ => oneapi}/tbb/detail/_node_handle.h (100%) rename include/{ => oneapi}/tbb/detail/_pipeline_filters.h (100%) rename include/{ => oneapi}/tbb/detail/_pipeline_filters_deduction.h (100%) rename include/{ => oneapi}/tbb/detail/_range_common.h (100%) rename include/{ => oneapi}/tbb/detail/_rtm_mutex.h (100%) rename include/{ => oneapi}/tbb/detail/_rtm_rw_mutex.h (100%) rename include/{ => oneapi}/tbb/detail/_segment_table.h (100%) rename include/{ => oneapi}/tbb/detail/_small_object_pool.h (100%) rename include/{ => oneapi}/tbb/detail/_string_resource.h (100%) rename include/{ => oneapi}/tbb/detail/_task.h (76%) rename include/{ => oneapi}/tbb/detail/_template_helpers.h (93%) rename include/{ => oneapi}/tbb/detail/_utils.h (100%) create mode 100644 include/oneapi/tbb/enumerable_thread_specific.h create mode 100644 include/oneapi/tbb/flow_graph.h create mode 100644 include/oneapi/tbb/flow_graph_abstractions.h create mode 100644 include/oneapi/tbb/global_control.h create mode 100644 include/oneapi/tbb/info.h create mode 100644 include/oneapi/tbb/memory_pool.h create mode 100644 include/oneapi/tbb/null_mutex.h create mode 100644 include/oneapi/tbb/null_rw_mutex.h create mode 100644 include/oneapi/tbb/parallel_for.h create mode 100644 include/oneapi/tbb/parallel_for_each.h create mode 100644 include/oneapi/tbb/parallel_invoke.h create mode 100644 include/oneapi/tbb/parallel_pipeline.h create mode 100644 include/oneapi/tbb/parallel_reduce.h create mode 100644 include/oneapi/tbb/parallel_scan.h create mode 100644 include/oneapi/tbb/parallel_sort.h create mode 100644 include/oneapi/tbb/partitioner.h create mode 100644 include/oneapi/tbb/profiling.h create mode 100644 include/oneapi/tbb/queuing_mutex.h create mode 100644 include/oneapi/tbb/queuing_rw_mutex.h create mode 100644 include/oneapi/tbb/scalable_allocator.h create mode 100644 include/oneapi/tbb/spin_mutex.h create mode 100644 include/oneapi/tbb/spin_rw_mutex.h create mode 100644 include/oneapi/tbb/task.h create mode 100644 include/oneapi/tbb/task_arena.h create mode 100644 include/oneapi/tbb/task_group.h create mode 100644 include/oneapi/tbb/task_scheduler_observer.h create mode 100644 include/oneapi/tbb/tbb_allocator.h create mode 100644 include/oneapi/tbb/tbbmalloc_proxy.h create mode 100644 include/oneapi/tbb/tick_count.h create mode 100644 include/oneapi/tbb/version.h create mode 100644 test/tbb/test_concurrent_monitor.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a12cb60548..f71eff14b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ cmake_minimum_required(VERSION 3.1) include(CheckCXXCompilerFlag) +include(GNUInstallDirs) # Enable CMake policies @@ -46,7 +47,7 @@ if (CMAKE_VERSION VERSION_LESS 3.4) enable_language(C) endif() -file(READ include/tbb/version.h _tbb_version_info) +file(READ include/oneapi/tbb/version.h _tbb_version_info) string(REGEX REPLACE ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1" _tbb_ver_major "${_tbb_version_info}") string(REGEX REPLACE ".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1" _tbb_ver_minor "${_tbb_version_info}") string(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" TBB_INTERFACE_VERSION "${_tbb_version_info}") @@ -55,7 +56,7 @@ set(TBB_BINARY_MINOR_VERSION 1) set(TBBMALLOC_BINARY_VERSION 2) set(TBBBIND_BINARY_VERSION 3) -project(TBB VERSION ${_tbb_ver_major}.${_tbb_ver_minor} LANGUAGES CXX) +project(TBB VERSION ${_tbb_ver_major}.${_tbb_ver_minor}.1 LANGUAGES CXX) unset(_tbb_ver_major) unset(_tbb_ver_minor) @@ -93,7 +94,6 @@ endif() option(TBB_TEST "Enable testing" ON) option(TBB_EXAMPLES "Enable examples" OFF) option(TBB_STRICT "Treat compiler warnings as errors" ON) -option(TBB_NUMA_SUPPORT "Enable NUMA support that depends on Portable Hardware Locality (hwloc) library" OFF) option(TBB_WINDOWS_DRIVER "Build as Universal Windows Driver (UWD)" OFF) option(TBB_NO_APPCONTAINER "Apply /APPCONTAINER:NO (for testing binaries for Windows Store)" OFF) option(TBB4PY_BUILD "Enable tbb4py build" OFF) @@ -187,12 +187,10 @@ else() if (NOT "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "mips") add_subdirectory(src/tbbmalloc) add_subdirectory(src/tbbmalloc_proxy) - if (TBB_NUMA_SUPPORT) - if (APPLE) - message(WARNING "TBBBind build target is disabled due to unsupported environment") - else() - add_subdirectory(src/tbbbind) - endif() + if (APPLE) + message(STATUS "TBBBind build target is disabled due to unsupported environment") + else() + add_subdirectory(src/tbbbind) endif() endif() @@ -200,12 +198,14 @@ else() # Installation instructions include(CMakePackageConfigHelpers) - install(DIRECTORY include - DESTINATION .) + install(DIRECTORY include/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT devel) install(EXPORT ${PROJECT_NAME}Targets NAMESPACE TBB:: - DESTINATION lib/cmake/${PROJECT_NAME}) + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + COMPONENT devel) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake "include(\${CMAKE_CURRENT_LIST_DIR}/${PROJECT_NAME}Targets.cmake)\n") @@ -214,7 +214,8 @@ else() install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" - DESTINATION lib/cmake/${PROJECT_NAME}) + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + COMPONENT devel) # ------------------------------------------------------------------- endif() diff --git a/README.md b/README.md index 386a7070bf..48f9ade8ad 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# oneAPI Threading Building Blocks (Beta) +# oneAPI Threading Building Blocks [![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE.txt) oneAPI Threading Building Blocks (oneTBB) lets you easily write parallel C++ programs that take @@ -28,4 +28,4 @@ By its terms, contributions submitted to the project are also done under that li ------------------------------------------------------------------------ Intel and the Intel logo are trademarks of Intel Corporation or its subsidiaries in the U.S. and/or other countries. -\* Other names and brands may be claimed as the property of others. \ No newline at end of file +\* Other names and brands may be claimed as the property of others. diff --git a/cmake/README.md b/cmake/README.md index f6be26df58..2b4098504a 100644 --- a/cmake/README.md +++ b/cmake/README.md @@ -6,7 +6,6 @@ The following controls are available during the configure stage: ``` TBB_TEST:BOOL - Enable testing (ON by default) TBB_STRICT:BOOL - Treat compiler warnings as errors (ON by default) -TBB_NUMA_SUPPORT:BOOL - Enable TBBBind build target and task_arena extensions for NUMA support (depends on Portable Hardware Locality (hwloc) library) (OFF by default) TBB_SANITIZE:STRING - Sanitizer parameter, passed to compiler/linker TBB_SIGNTOOL:FILEPATH - Tool for digital signing, used in post install step for libraries if provided. TBB_SIGNTOOL_ARGS:STRING - Additional arguments for TBB_SIGNTOOL, used if TBB_SIGNTOOL is set. @@ -138,6 +137,17 @@ make test_spec **NOTE: be careful about installation: avoid commands like `make install` unless you fully understand the consequences.** +The following install components are supported: +- `runtime` - oneTBB runtime package (core shared libraries and `.dll` files on Windows). +- `devel` - oneTBB development package (header files, CMake integration files, library symbolic links and `.lib` files on Windows). +- `tbb4py` - [oneTBB Module for Python](#onetbb-python-module-support). + +How to install specific components after configuration and build: + +```bash +cmake -DCOMPONENT= [-DBUILD_TYPE=] -P cmake_install.cmake +``` + Simple packaging using CPack is supported. The following commands allow to create a simple portable package which includes header files, libraries and integration files for CMake: @@ -184,8 +194,8 @@ Variable | Description `TBB_VERSION` | oneTBB version (format: `...`) `TBB_IMPORTED_TARGETS` | All created oneTBB imported targets (not supported for builds from source code) -Starting from [oneTBB 2021.1-beta08](https://github.com/oneapi-src/oneTBB/releases/tag/v2021.1-beta08) GitHub release TBBConfig files in the binary packages are located under `/lib[/]/cmake/TBB`. -For example, for Linux 64-bit `TBB_DIR` should be set to `/lib/intel64/cmake/TBB`. +Starting from [oneTBB 2021.1](https://github.com/oneapi-src/oneTBB/releases/tag/v2021.1) GitHub release TBBConfig files in the binary packages are located under `/lib/cmake/TBB`. +For example, `TBB_DIR` should be set to `/lib/cmake/TBB`. TBBConfig files are automatically created during the build from source code and can be installed together with the library. Also oneTBB provides a helper function that creates TBBConfig files from predefined templates: see `tbb_generate_config` in `cmake/config_generation.cmake`. diff --git a/cmake/compilers/AppleClang.cmake b/cmake/compilers/AppleClang.cmake index 5e6ed5ea94..3b4f85f5bd 100644 --- a/cmake/compilers/AppleClang.cmake +++ b/cmake/compilers/AppleClang.cmake @@ -18,11 +18,13 @@ set(TBB_WARNING_LEVEL -Wall -Wextra $<$:-Werror>) set(TBB_TEST_WARNING_FLAGS -Wshadow -Wcast-qual -Woverloaded-virtual -Wnon-virtual-dtor) set(TBB_WARNING_SUPPRESS -Wno-parentheses -Wno-non-virtual-dtor -Wno-dangling-else) # For correct ucontext.h structures layout -set(TBB_LIB_COMPILE_FLAGS -D_XOPEN_SOURCE) -set(TBB_BENCH_COMPILE_FLAGS -D_XOPEN_SOURCE) +set(TBB_COMMON_COMPILE_FLAGS -D_XOPEN_SOURCE) set(TBB_MMD_FLAG -MMD) -set(TBB_COMMON_COMPILE_FLAGS -mrtm) + +if (NOT "${CMAKE_OSX_ARCHITECTURES}" MATCHES "^arm64$") + set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -mrtm) +endif() # TBB malloc settings set(TBBMALLOC_LIB_COMPILE_FLAGS -fno-rtti -fno-exceptions) diff --git a/cmake/config_generation.cmake b/cmake/config_generation.cmake index 1d9e4771ea..b34e0a2401 100644 --- a/cmake/config_generation.cmake +++ b/cmake/config_generation.cmake @@ -22,7 +22,6 @@ function(tbb_generate_config) set(options HANDLE_SUBDIRS) set(oneValueArgs INSTALL_DIR SYSTEM_NAME - SIZEOF_VOID_P # 4 for 32 bit, 8 for 64 bit. LIB_REL_PATH INC_REL_PATH DLL_REL_PATH VERSION TBB_BINARY_VERSION @@ -39,8 +38,6 @@ function(tbb_generate_config) file(TO_CMAKE_PATH "${tbb_gen_cfg_DLL_REL_PATH}" TBB_DLL_REL_PATH) file(TO_CMAKE_PATH "${tbb_gen_cfg_INC_REL_PATH}" TBB_INC_REL_PATH) - set(TBB_SIZEOF_VOID_P "${tbb_gen_cfg_SIZEOF_VOID_P}") - set(TBB_VERSION ${tbb_gen_cfg_VERSION}) set(TBB_COMPONENTS_BIN_VERSION " @@ -57,7 +54,11 @@ set(_tbbbind_bin_version ${tbb_gen_cfg_TBBBIND_BINARY_VERSION}) set(TBB_IMPLIB_DEBUG "") if (tbb_gen_cfg_HANDLE_SUBDIRS) set(TBB_HANDLE_SUBDIRS " -set(_tbb_subdir gcc4.8) +if (CMAKE_SIZEOF_VOID_P STREQUAL \"8\") + set(_tbb_subdir intel64/gcc4.8) +else () + set(_tbb_subdir ia32/gcc4.8) +endif() ") endif() elseif (tbb_gen_cfg_SYSTEM_NAME STREQUAL "Darwin") @@ -76,9 +77,9 @@ set(_tbb_subdir gcc4.8) # Expand TBB_LIB_REL_PATH here in IMPORTED_IMPLIB property and # redefine it with TBB_DLL_REL_PATH value to properly fill IMPORTED_LOCATION property in TBBConfig.cmake.in template. set(TBB_IMPLIB_RELEASE " - IMPORTED_IMPLIB_RELEASE \"\${CMAKE_CURRENT_LIST_DIR}/${TBB_LIB_REL_PATH}/\${_tbb_subdir}/\${_tbb_component}.lib\"") + IMPORTED_IMPLIB_RELEASE \"\${CMAKE_CURRENT_LIST_DIR}/${TBB_LIB_REL_PATH}/\${_tbb_subdir}/\${_tbb_component}\${_bin_version}.lib\"") set(TBB_IMPLIB_DEBUG " - IMPORTED_IMPLIB_DEBUG \"\${CMAKE_CURRENT_LIST_DIR}/${TBB_LIB_REL_PATH}/\${_tbb_subdir}/\${_tbb_component}_debug.lib\"") + IMPORTED_IMPLIB_DEBUG \"\${CMAKE_CURRENT_LIST_DIR}/${TBB_LIB_REL_PATH}/\${_tbb_subdir}/\${_tbb_component}\${_bin_version}_debug.lib\"") set(TBB_LIB_REL_PATH ${TBB_DLL_REL_PATH}) if (tbb_gen_cfg_HANDLE_SUBDIRS) @@ -87,8 +88,21 @@ set(_tbb_subdir vc14) if (WINDOWS_STORE) set(_tbb_subdir \${_tbb_subdir}_uwp) endif() + +if (CMAKE_SIZEOF_VOID_P STREQUAL \"8\") + set(_tbb_subdir intel64/\${_tbb_subdir}) +else () + set(_tbb_subdir ia32/\${_tbb_subdir}) +endif() ") endif() + + set(TBB_HANDLE_BIN_VERSION " + unset(_bin_version) + if (_tbb_component STREQUAL tbb) + set(_bin_version \${_tbb_bin_version}) + endif() +") else() message(FATAL_ERROR "Unsupported OS name: ${tbb_system_name}") endif() diff --git a/cmake/modules/FindHWLOC.cmake b/cmake/modules/FindHWLOC.cmake index 696c516dc6..90c348c5a8 100644 --- a/cmake/modules/FindHWLOC.cmake +++ b/cmake/modules/FindHWLOC.cmake @@ -14,80 +14,224 @@ include(FindPackageHandleStandardArgs) -# Firstly search for HWLOC in config mode (i.e. search for HWLOCConfig.cmake). +# Check Find module settings +# -------------------------------------------------------------------------------------------------- +if (HWLOC_FIND_VERSION) + if (NOT ${HWLOC_FIND_VERSION} STREQUAL "2" AND NOT ${HWLOC_FIND_VERSION} STREQUAL "1.11") + message(FATAL_ERROR "This find module can find only following HWLOC versions: 1.11, 2") + endif() +else() + set(HWLOC_FIND_VERSION "1.11") +endif() + +if (NOT HWLOC_FIND_VERSION_EXACT) + message(FATAL_ERROR "Please pass exact argument to the find_package() call") +endif() + +# Check required target availability +# -------------------------------------------------------------------------------------------------- +string(REPLACE "." "_" _target_version_suffix "${HWLOC_FIND_VERSION}") +set(_hwloc_target_name "HWLOC::hwloc_${_target_version_suffix}") +if (TARGET ${_hwloc_target_name}) + unset(_hwloc_target_name) + return() +endif() + +# Search for HWLOC in config mode (i.e. search for HWLOCConfig.cmake). +# -------------------------------------------------------------------------------------------------- +unset(HWLOC_FOUND) find_package(HWLOC QUIET CONFIG) if (HWLOC_FOUND) find_package_handle_standard_args(HWLOC CONFIG_MODE) return() endif() -find_program(_hwloc_info_exe - NAMES hwloc-info - PATHS ENV HWLOC_ROOT ENV PATH - PATH_SUFFIXES bin -) +# Variables definition +# -------------------------------------------------------------------------------------------------- +if (WIN32) + list(APPEND _additional_lib_dirs $ENV{PATH} $ENV{LIB}) + list(APPEND _additional_include_dirs $ENV{INCLUDE} $ENV{CPATH}) +else() + list(APPEND _additional_lib_dirs $ENV{LIBRARY_PATH} $ENV{LD_LIBRARY_PATH} $ENV{DYLD_LIBRARY_PATH}) + list(APPEND _additional_include_dirs $ENV{CPATH} $ENV{C_INCLUDE_PATH} $ENV{CPLUS_INCLUDE_PATH} $ENV{INCLUDE_PATH}) +endif() +list(APPEND _additional_tools_dirs $ENV{HWLOC_ROOT} $ENV{PATH}) +set(_hwloc_lib_name libhwloc) -if (_hwloc_info_exe) - execute_process( - COMMAND ${_hwloc_info_exe} "--version" +# We should not replace : by ; on Windows since it brakes the system paths (e.g. C:\...) +if (NOT WIN32) + string(REPLACE ":" ";" _additional_lib_dirs "${_additional_lib_dirs}") + string(REPLACE ":" ";" _additional_include_dirs "${_additional_include_dirs}") + string(REPLACE ":" ";" _additional_tools_dirs "${_additional_tools_dirs}") +endif() + +if (${HWLOC_FIND_VERSION} STREQUAL "2") + set(_api_version_pattern "2[0-9a-fA-F]+$") + if (NOT WIN32) + set(_hwloc_lib_name "${_hwloc_lib_name}.so.15") + endif() +elseif (${HWLOC_FIND_VERSION} STREQUAL "1.11") + set(_api_version_pattern "10b[0-9a-fA-F]+$") + if (NOT WIN32) + set(_hwloc_lib_name "${_hwloc_lib_name}.so.5") + endif() +endif() +set(_include_version_pattern "#define HWLOC_API_VERSION 0x000${_api_version_pattern}") + +# Detect the HWLOC version +# -------------------------------------------------------------------------------------------------- +macro(find_required_version _hwloc_info_exe) + execute_process(COMMAND ${_hwloc_info_exe} "--version" OUTPUT_VARIABLE _hwloc_info_output OUTPUT_STRIP_TRAILING_WHITESPACE ) - string(REGEX MATCH "([0-9]+.[0-9]+.[0-9]+)$" HWLOC_VERSION "${_hwloc_info_output}") - if ("${HWLOC_VERSION}" STREQUAL "") - unset(HWLOC_VERSION) + if (_hwloc_info_output) + string(REGEX MATCH "([0-9]+\.[0-9]+\.[0-9]+)$" _current_hwloc_version "${_hwloc_info_output}") + + string(REPLACE "." ";" _version_list ${_current_hwloc_version}) + list(GET _version_list 0 _current_hwloc_version_major) + list(GET _version_list 1 _current_hwloc_version_minor) + + if (${HWLOC_FIND_VERSION_MAJOR} STREQUAL ${_current_hwloc_version_major}) + if(${HWLOC_FIND_VERSION_MINOR} STREQUAL "0" OR ${HWLOC_FIND_VERSION_MINOR} STREQUAL ${_current_hwloc_version_minor}) + set(HWLOC_VERSION ${_current_hwloc_version}) + endif() + endif() + + unset(_hwloc_info_output) + unset(_version_list) + unset(_current_hwloc_version) + unset(_current_hwloc_version_major) + unset(_current_hwloc_version_minor) + unset(_current_hwloc_version_patch) endif() +endmacro() - unset(_hwloc_info_output) - unset(_hwloc_info_exe) +foreach(path ${_additional_tools_dirs}) + unset(HWLOC_VERSION) + find_program(_hwloc_info_exe NAMES hwloc-info HINTS ${path} PATH_SUFFIXES bin NO_DEFAULT_PATH) + if (_hwloc_info_exe) + find_required_version(${_hwloc_info_exe}) + unset(_hwloc_info_exe CACHE) + if (HWLOC_VERSION) + break() + endif() + endif() +endforeach() +if (NOT HWLOC_VERSION) + find_program(_hwloc_info_exe NAMES hwloc-info PATH_SUFFIXES bin) + if (_hwloc_info_exe) + find_required_version(${_hwloc_info_exe}) + unset(_hwloc_info_exe CACHE) + endif() endif() -if (WIN32) - list(APPEND _additional_lib_dirs ENV PATH ENV LIB) - list(APPEND _additional_include_dirs ENV INCLUDE ENV CPATH) - set(_hwloc_lib_name libhwloc) -else() - list(APPEND _additional_lib_dirs ENV LIBRARY_PATH ENV LD_LIBRARY_PATH ENV DYLD_LIBRARY_PATH) - list(APPEND _additional_include_dirs ENV CPATH ENV C_INCLUDE_PATH ENV CPLUS_INCLUDE_PATH ENV INCLUDE_PATH) - set(_hwloc_lib_name hwloc) +# Find the include path +# -------------------------------------------------------------------------------------------------- +foreach(path ${_additional_include_dirs}) + find_path(HWLOC_INCLUDE_PATH NAMES hwloc.h PATHS ${path} NO_DEFAULT_PATH) + + if (HWLOC_INCLUDE_PATH) + file(STRINGS ${HWLOC_INCLUDE_PATH}/hwloc.h _hwloc_api_version + REGEX "${_include_version_pattern}" + ) + if (_hwloc_api_version) + unset(_hwloc_api_version CACHE) + break() + endif() + unset(HWLOC_INCLUDE_PATH CACHE) + endif() +endforeach() + +if (NOT HWLOC_INCLUDE_PATH) + find_path(HWLOC_INCLUDE_PATH NAMES hwloc.h) + if (HWLOC_INCLUDE_PATH) + file(STRINGS ${HWLOC_INCLUDE_PATH}/hwloc.h _hwloc_api_version + REGEX "${_include_version_pattern}" + ) + if (NOT _hwloc_api_version) + unset(HWLOC_INCLUDE_PATH CACHE) + endif() + unset(_hwloc_api_version CACHE) + endif() endif() -if (NOT TARGET HWLOC::hwloc) - find_path(_hwloc_include_dirs - NAMES hwloc.h - PATHS ${_additional_include_dirs} - PATH_SUFFIXES "hwloc") - - if (_hwloc_include_dirs) - add_library(HWLOC::hwloc SHARED IMPORTED) - set_property(TARGET HWLOC::hwloc APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${_hwloc_include_dirs}) - - find_library(_hwloc_lib_dirs ${_hwloc_lib_name} PATHS ${_additional_lib_dirs}) - if (_hwloc_lib_dirs) - if (WIN32) - set_target_properties(HWLOC::hwloc PROPERTIES - IMPORTED_LOCATION "${_hwloc_lib_dirs}" - IMPORTED_IMPLIB "${_hwloc_lib_dirs}") - else() - set_target_properties(HWLOC::hwloc PROPERTIES - IMPORTED_LOCATION "${_hwloc_lib_dirs}") - endif() +# Find the library path +# -------------------------------------------------------------------------------------------------- +macro(check_hwloc_runtime_version _hwloc_lib_path) + file(WRITE ${CMAKE_BINARY_DIR}/hwloc_version_check/version_check.cpp + "#include \n" + "int main() {printf(\"%x\", hwloc_get_api_version());}\n" + ) + try_run(RUN_RESULT COMPILE_RESULT + ${CMAKE_BINARY_DIR}/hwloc_version_check + ${CMAKE_BINARY_DIR}/hwloc_version_check/version_check.cpp + LINK_LIBRARIES ${_hwloc_lib_path} + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${HWLOC_INCLUDE_PATH}" + RUN_OUTPUT_VARIABLE _execution_result + ) + string(REGEX MATCH "${_api_version_pattern}" _runtime_api_version "${_execution_result}") + if (_runtime_api_version) + set(HWLOC_LIBRARY_PATH "${_hwloc_lib_path}") + endif() + unset(_execution_result CACHE) + unset(_api_version_pattern CACHE) +endmacro() - set(HWLOC_FOUND 1) +if (HWLOC_INCLUDE_PATH) + foreach(path ${_additional_lib_dirs}) + find_library(_hwloc_lib_path ${_hwloc_lib_name} PATHS ${path} NO_DEFAULT_PATH) + if (_hwloc_lib_path) + check_hwloc_runtime_version(${_hwloc_lib_path}) + unset(_hwloc_lib_path CACHE) + if (HWLOC_LIBRARY_PATH) + break() + endif() + endif() + endforeach() + if (NOT HWLOC_LIBRARY_PATH) + find_library(_hwloc_lib_path ${_hwloc_lib_name}) + if (_hwloc_lib_path) + check_hwloc_runtime_version(${_hwloc_lib_path}) + unset(_hwloc_lib_path CACHE) endif() endif() endif() -unset(_additional_include_dirs CACHE) -unset(_additional_lib_dirs CACHE) -unset(_hwloc_lib_name CACHE) +# Define the library target +# -------------------------------------------------------------------------------------------------- +if (HWLOC_VERSION AND HWLOC_INCLUDE_PATH AND HWLOC_LIBRARY_PATH) + add_library(${_hwloc_target_name} SHARED IMPORTED) + set_target_properties(${_hwloc_target_name} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${HWLOC_INCLUDE_PATH}") + if (WIN32) + set_target_properties(${_hwloc_target_name} PROPERTIES + IMPORTED_LOCATION "${HWLOC_LIBRARY_PATH}" + IMPORTED_IMPLIB "${HWLOC_LIBRARY_PATH}") + else() + set_target_properties(${_hwloc_target_name} PROPERTIES + IMPORTED_LOCATION "${HWLOC_LIBRARY_PATH}") + endif() + + set(HWLOC_${_target_version_suffix}_TARGET_DEFINED TRUE) +endif() find_package_handle_standard_args( HWLOC - REQUIRED_VARS _hwloc_include_dirs _hwloc_lib_dirs + REQUIRED_VARS HWLOC_${_target_version_suffix}_TARGET_DEFINED VERSION_VAR HWLOC_VERSION ) -unset(_hwloc_include_dirs CACHE) +unset(_additional_include_dirs CACHE) +unset(_additional_lib_dirs CACHE) +unset(_additional_tools_dirs CACHE) +unset(_hwloc_lib_name CACHE) unset(_hwloc_lib_dirs CACHE) +unset(_api_version_pattern CACHE) +unset(_include_version_pattern CACHE) +unset(_hwloc_target_name) + +unset(HWLOC_INCLUDE_PATH CACHE) +unset(HWLOC_LIBRARY_PATH) + +unset(HWLOC_FIND_VERSION) diff --git a/cmake/scripts/cmake_gen_github_configs.cmake b/cmake/scripts/cmake_gen_github_configs.cmake index 530401cf66..d62c70e98c 100644 --- a/cmake/scripts/cmake_gen_github_configs.cmake +++ b/cmake/scripts/cmake_gen_github_configs.cmake @@ -14,14 +14,13 @@ include(${CMAKE_CURRENT_LIST_DIR}/../config_generation.cmake) -# TBBConfig in TBB provided packages are expected to be placed into: /lib[/]/cmake/TBB -set(WIN_LIN_INC_REL_PATH "../../../../include") -set(DARWIN_INC_REL_PATH "../../../include") +# TBBConfig in TBB provided packages are expected to be placed into: /lib/cmake/TBB +set(INC_REL_PATH "../../../include") set(LIB_REL_PATH "../..") -set(DLL_REL_PATH "../../../../redist") # ia32/intel64 subdir is appended depending on configuration. +set(DLL_REL_PATH "../../../redist") # Parse version info -file(READ ${CMAKE_CURRENT_LIST_DIR}/../../include/tbb/version.h _tbb_version_info) +file(READ ${CMAKE_CURRENT_LIST_DIR}/../../include/oneapi/tbb/version.h _tbb_version_info) string(REGEX REPLACE ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1" _tbb_ver_major "${_tbb_version_info}") string(REGEX REPLACE ".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1" _tbb_ver_minor "${_tbb_version_info}") string(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" _tbb_interface_ver "${_tbb_version_info}") @@ -35,11 +34,6 @@ string(REGEX REPLACE ".*TBBBIND_BINARY_VERSION ([0-9]+).*" "\\1" TBBBIND_BINARY_ math(EXPR _tbb_ver_patch "${_tbb_interface_ver} % 1000 / 10") math(EXPR _tbb_ver_tweak "${_tbb_interface_ver} % 10") -# Applicable for beta releases. -if (_tbb_ver_patch EQUAL 0) - math(EXPR _tbb_ver_tweak "${_tbb_ver_tweak} + 6") -endif() - set(COMMON_ARGS LIB_REL_PATH ${LIB_REL_PATH} VERSION ${_tbb_ver_major}.${_tbb_ver_minor}.${_tbb_ver_patch}.${_tbb_ver_tweak} @@ -49,9 +43,7 @@ set(COMMON_ARGS TBBBIND_BINARY_VERSION ${TBBBIND_BINARY_VERSION} ) -tbb_generate_config(INSTALL_DIR ${INSTALL_DIR}/linux-32 SYSTEM_NAME Linux INC_REL_PATH ${WIN_LIN_INC_REL_PATH} SIZEOF_VOID_P 4 HANDLE_SUBDIRS ${COMMON_ARGS}) -tbb_generate_config(INSTALL_DIR ${INSTALL_DIR}/linux-64 SYSTEM_NAME Linux INC_REL_PATH ${WIN_LIN_INC_REL_PATH} SIZEOF_VOID_P 8 HANDLE_SUBDIRS ${COMMON_ARGS}) -tbb_generate_config(INSTALL_DIR ${INSTALL_DIR}/windows-32 SYSTEM_NAME Windows INC_REL_PATH ${WIN_LIN_INC_REL_PATH} SIZEOF_VOID_P 4 HANDLE_SUBDIRS DLL_REL_PATH ${DLL_REL_PATH}/ia32 ${COMMON_ARGS}) -tbb_generate_config(INSTALL_DIR ${INSTALL_DIR}/windows-64 SYSTEM_NAME Windows INC_REL_PATH ${WIN_LIN_INC_REL_PATH} SIZEOF_VOID_P 8 HANDLE_SUBDIRS DLL_REL_PATH ${DLL_REL_PATH}/intel64 ${COMMON_ARGS}) -tbb_generate_config(INSTALL_DIR ${INSTALL_DIR}/darwin SYSTEM_NAME Darwin INC_REL_PATH ${DARWIN_INC_REL_PATH} SIZEOF_VOID_P 8 ${COMMON_ARGS}) +tbb_generate_config(INSTALL_DIR ${INSTALL_DIR}/linux SYSTEM_NAME Linux INC_REL_PATH ${INC_REL_PATH} HANDLE_SUBDIRS ${COMMON_ARGS}) +tbb_generate_config(INSTALL_DIR ${INSTALL_DIR}/windows SYSTEM_NAME Windows INC_REL_PATH ${INC_REL_PATH} HANDLE_SUBDIRS DLL_REL_PATH ${DLL_REL_PATH} ${COMMON_ARGS}) +tbb_generate_config(INSTALL_DIR ${INSTALL_DIR}/darwin SYSTEM_NAME Darwin INC_REL_PATH ${INC_REL_PATH} ${COMMON_ARGS}) message(STATUS "TBBConfig files were created in ${INSTALL_DIR}") diff --git a/cmake/templates/TBBConfig.cmake.in b/cmake/templates/TBBConfig.cmake.in index 299497967c..2d768a2d07 100644 --- a/cmake/templates/TBBConfig.cmake.in +++ b/cmake/templates/TBBConfig.cmake.in @@ -43,11 +43,11 @@ unset(_tbbmalloc_proxy_ix) @TBB_HANDLE_SUBDIRS@ foreach (_tbb_component ${TBB_FIND_COMPONENTS}) set(TBB_${_tbb_component}_FOUND 0) - - get_filename_component(_tbb_release_lib "${CMAKE_CURRENT_LIST_DIR}/@TBB_LIB_REL_PATH@/${_tbb_subdir}/@TBB_LIB_PREFIX@${_tbb_component}.@TBB_LIB_EXT@" ABSOLUTE) + @TBB_HANDLE_BIN_VERSION@ + get_filename_component(_tbb_release_lib "${CMAKE_CURRENT_LIST_DIR}/@TBB_LIB_REL_PATH@/${_tbb_subdir}/@TBB_LIB_PREFIX@${_tbb_component}${_bin_version}.@TBB_LIB_EXT@" ABSOLUTE) if (NOT TBB_FIND_RELEASE_ONLY) - get_filename_component(_tbb_debug_lib "${CMAKE_CURRENT_LIST_DIR}/@TBB_LIB_REL_PATH@/${_tbb_subdir}/@TBB_LIB_PREFIX@${_tbb_component}_debug.@TBB_LIB_EXT@" ABSOLUTE) + get_filename_component(_tbb_debug_lib "${CMAKE_CURRENT_LIST_DIR}/@TBB_LIB_REL_PATH@/${_tbb_subdir}/@TBB_LIB_PREFIX@${_tbb_component}${_bin_version}_debug.@TBB_LIB_EXT@" ABSOLUTE) endif() if (EXISTS "${_tbb_release_lib}" OR EXISTS "${_tbb_debug_lib}") diff --git a/cmake/templates/TBBConfigVersion.cmake.in b/cmake/templates/TBBConfigVersion.cmake.in index 969a520b65..c7b107b0a9 100644 --- a/cmake/templates/TBBConfigVersion.cmake.in +++ b/cmake/templates/TBBConfigVersion.cmake.in @@ -22,13 +22,3 @@ else() set(PACKAGE_VERSION_EXACT TRUE) endif() endif() - -if ("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "@TBB_SIZEOF_VOID_P@" STREQUAL "") - return() -endif() - -if (NOT CMAKE_SIZEOF_VOID_P STREQUAL "@TBB_SIZEOF_VOID_P@") - math(EXPR installedBits "8 * @TBB_SIZEOF_VOID_P@") - set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)") - set(PACKAGE_VERSION_UNSUITABLE TRUE) -endif() diff --git a/cmake/utils.cmake b/cmake/utils.cmake new file mode 100644 index 0000000000..c7e9a06790 --- /dev/null +++ b/cmake/utils.cmake @@ -0,0 +1,34 @@ +# Copyright (c) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +macro(tbb_install_target target) + install(TARGETS ${target} + EXPORT TBBTargets + LIBRARY + DESTINATION ${CMAKE_INSTALL_LIBDIR} + NAMELINK_SKIP + COMPONENT runtime + RUNTIME + DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT runtime + ARCHIVE + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT devel) + + install(TARGETS ${target} + LIBRARY + DESTINATION ${CMAKE_INSTALL_LIBDIR} + NAMELINK_ONLY + COMPONENT devel) +endmacro() diff --git a/include/oneapi/tbb.h b/include/oneapi/tbb.h new file mode 100644 index 0000000000..011d863a0e --- /dev/null +++ b/include/oneapi/tbb.h @@ -0,0 +1,73 @@ +/* + Copyright (c) 2005-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_tbb_H +#define __TBB_tbb_H + +/** + This header bulk-includes declarations or definitions of all the functionality + provided by TBB (save for tbbmalloc and 3rd party dependent headers). + + If you use only a few TBB constructs, consider including specific headers only. + Any header listed below can be included independently of others. +**/ + +#include "oneapi/tbb/blocked_range.h" +#include "oneapi/tbb/blocked_range2d.h" +#include "oneapi/tbb/blocked_range3d.h" +#if TBB_PREVIEW_BLOCKED_RANGE_ND +#include "tbb/blocked_rangeNd.h" +#endif +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "oneapi/tbb/combinable.h" +#include "oneapi/tbb/concurrent_hash_map.h" +#if TBB_PREVIEW_CONCURRENT_LRU_CACHE +#include "tbb/concurrent_lru_cache.h" +#endif +#include "oneapi/tbb/concurrent_priority_queue.h" +#include "oneapi/tbb/concurrent_queue.h" +#include "oneapi/tbb/concurrent_unordered_map.h" +#include "oneapi/tbb/concurrent_unordered_set.h" +#include "oneapi/tbb/concurrent_map.h" +#include "oneapi/tbb/concurrent_set.h" +#include "oneapi/tbb/concurrent_vector.h" +#include "oneapi/tbb/enumerable_thread_specific.h" +#include "oneapi/tbb/flow_graph.h" +#include "oneapi/tbb/global_control.h" +#include "oneapi/tbb/info.h" +#include "oneapi/tbb/null_mutex.h" +#include "oneapi/tbb/null_rw_mutex.h" +#include "oneapi/tbb/parallel_for.h" +#include "oneapi/tbb/parallel_for_each.h" +#include "oneapi/tbb/parallel_invoke.h" +#include "oneapi/tbb/parallel_pipeline.h" +#include "oneapi/tbb/parallel_reduce.h" +#include "oneapi/tbb/parallel_scan.h" +#include "oneapi/tbb/parallel_sort.h" +#include "oneapi/tbb/partitioner.h" +#include "oneapi/tbb/queuing_mutex.h" +#include "oneapi/tbb/queuing_rw_mutex.h" +#include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/spin_rw_mutex.h" +#include "oneapi/tbb/task.h" +#include "oneapi/tbb/task_arena.h" +#include "oneapi/tbb/task_group.h" +#include "oneapi/tbb/task_scheduler_observer.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/tick_count.h" +#include "oneapi/tbb/version.h" + +#endif /* __TBB_tbb_H */ diff --git a/include/oneapi/tbb/blocked_range.h b/include/oneapi/tbb/blocked_range.h new file mode 100644 index 0000000000..6452170992 --- /dev/null +++ b/include/oneapi/tbb/blocked_range.h @@ -0,0 +1,163 @@ +/* + Copyright (c) 2005-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_range_H +#define __TBB_blocked_range_H + +#include + +#include "detail/_range_common.h" +#include "detail/_namespace_injection.h" + +#include "version.h" + +namespace tbb { +namespace detail { +namespace d1 { + +/** \page range_req Requirements on range concept + Class \c R implementing the concept of range must define: + - \code R::R( const R& ); \endcode Copy constructor + - \code R::~R(); \endcode Destructor + - \code bool R::is_divisible() const; \endcode True if range can be partitioned into two subranges + - \code bool R::empty() const; \endcode True if range is empty + - \code R::R( R& r, split ); \endcode Split range \c r into two subranges. +**/ + +//! A range over which to iterate. +/** @ingroup algorithms */ +template +class blocked_range { +public: + //! Type of a value + /** Called a const_iterator for sake of algorithms that need to treat a blocked_range + as an STL container. */ + using const_iterator = Value; + + //! Type for size of a range + using size_type = std::size_t; + + //! Construct range over half-open interval [begin,end), with the given grainsize. + blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) : + my_end(end_), my_begin(begin_), my_grainsize(grainsize_) + { + __TBB_ASSERT( my_grainsize>0, "grainsize must be positive" ); + } + + //! Beginning of range. + const_iterator begin() const { return my_begin; } + + //! One past last value in range. + const_iterator end() const { return my_end; } + + //! Size of the range + /** Unspecified if end() + friend class blocked_range2d; + + template + friend class blocked_range3d; + + template + friend class blocked_rangeNd_impl; +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_range; +// Split types +using detail::split; +using detail::proportional_split; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_blocked_range_H */ diff --git a/include/oneapi/tbb/blocked_range2d.h b/include/oneapi/tbb/blocked_range2d.h new file mode 100644 index 0000000000..f3d3b8f596 --- /dev/null +++ b/include/oneapi/tbb/blocked_range2d.h @@ -0,0 +1,108 @@ +/* + Copyright (c) 2005-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_range2d_H +#define __TBB_blocked_range2d_H + +#include + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +#include "blocked_range.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! A 2-dimensional range that models the Range concept. +/** @ingroup algorithms */ +template +class blocked_range2d { +public: + //! Type for size of an iteration range + using row_range_type = blocked_range; + using col_range_type = blocked_range; + +private: + row_range_type my_rows; + col_range_type my_cols; + +public: + blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, + ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : + my_rows(row_begin,row_end,row_grainsize), + my_cols(col_begin,col_end,col_grainsize) + {} + + blocked_range2d( RowValue row_begin, RowValue row_end, + ColValue col_begin, ColValue col_end ) : + my_rows(row_begin,row_end), + my_cols(col_begin,col_end) + {} + + //! True if range is empty + bool empty() const { + // Range is empty if at least one dimension is empty. + return my_rows.empty() || my_cols.empty(); + } + + //! True if range is divisible into two pieces. + bool is_divisible() const { + return my_rows.is_divisible() || my_cols.is_divisible(); + } + + blocked_range2d( blocked_range2d& r, split ) : + my_rows(r.my_rows), + my_cols(r.my_cols) + { + split split_obj; + do_split(r, split_obj); + } + + blocked_range2d( blocked_range2d& r, proportional_split& proportion ) : + my_rows(r.my_rows), + my_cols(r.my_cols) + { + do_split(r, proportion); + } + + //! The rows of the iteration space + const row_range_type& rows() const { return my_rows; } + + //! The columns of the iteration space + const col_range_type& cols() const { return my_cols; } + +private: + template + void do_split( blocked_range2d& r, Split& split_obj ) { + if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { + my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); + } else { + my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); + } + } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_range2d; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_blocked_range2d_H */ diff --git a/include/oneapi/tbb/blocked_range3d.h b/include/oneapi/tbb/blocked_range3d.h new file mode 100644 index 0000000000..08bb491f0a --- /dev/null +++ b/include/oneapi/tbb/blocked_range3d.h @@ -0,0 +1,127 @@ +/* + Copyright (c) 2005-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_range3d_H +#define __TBB_blocked_range3d_H + +#include + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +#include "blocked_range.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! A 3-dimensional range that models the Range concept. +/** @ingroup algorithms */ +template +class blocked_range3d { +public: + //! Type for size of an iteration range + using page_range_type = blocked_range; + using row_range_type = blocked_range; + using col_range_type = blocked_range; + +private: + page_range_type my_pages; + row_range_type my_rows; + col_range_type my_cols; + +public: + + blocked_range3d( PageValue page_begin, PageValue page_end, + RowValue row_begin, RowValue row_end, + ColValue col_begin, ColValue col_end ) : + my_pages(page_begin,page_end), + my_rows(row_begin,row_end), + my_cols(col_begin,col_end) + {} + + blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize, + RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, + ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : + my_pages(page_begin,page_end,page_grainsize), + my_rows(row_begin,row_end,row_grainsize), + my_cols(col_begin,col_end,col_grainsize) + {} + + //! True if range is empty + bool empty() const { + // Range is empty if at least one dimension is empty. + return my_pages.empty() || my_rows.empty() || my_cols.empty(); + } + + //! True if range is divisible into two pieces. + bool is_divisible() const { + return my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible(); + } + + blocked_range3d( blocked_range3d& r, split split_obj ) : + my_pages(r.my_pages), + my_rows(r.my_rows), + my_cols(r.my_cols) + { + do_split(r, split_obj); + } + + blocked_range3d( blocked_range3d& r, proportional_split& proportion ) : + my_pages(r.my_pages), + my_rows(r.my_rows), + my_cols(r.my_cols) + { + do_split(r, proportion); + } + + //! The pages of the iteration space + const page_range_type& pages() const { return my_pages; } + + //! The rows of the iteration space + const row_range_type& rows() const { return my_rows; } + + //! The columns of the iteration space + const col_range_type& cols() const { return my_cols; } + +private: + template + void do_split( blocked_range3d& r, Split& split_obj) { + if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) { + if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { + my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); + } else { + my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); + } + } else { + if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) { + my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); + } else { + my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj); + } + } + } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_range3d; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_blocked_range3d_H */ diff --git a/include/oneapi/tbb/blocked_rangeNd.h b/include/oneapi/tbb/blocked_rangeNd.h new file mode 100644 index 0000000000..3b2677d07a --- /dev/null +++ b/include/oneapi/tbb/blocked_rangeNd.h @@ -0,0 +1,144 @@ +/* + Copyright (c) 2017-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_rangeNd_H +#define __TBB_blocked_rangeNd_H + +#if !TBB_PREVIEW_BLOCKED_RANGE_ND + #error Set TBB_PREVIEW_BLOCKED_RANGE_ND to include blocked_rangeNd.h +#endif + +#include // std::any_of +#include +#include +#include // std::is_same, std::enable_if + +#include "detail/_config.h" +#include "detail/_template_helpers.h" // index_sequence, make_index_sequence + +#include "blocked_range.h" + +namespace tbb { +namespace detail { +namespace d1 { + +/* + The blocked_rangeNd_impl uses make_index_sequence to automatically generate a ctor with + exactly N arguments of the type tbb::blocked_range. Such ctor provides an opportunity + to use braced-init-list parameters to initialize each dimension. + Use of parameters, whose representation is a braced-init-list, but they're not + std::initializer_list or a reference to one, produces a non-deduced context + within template argument deduction. + + NOTE: blocked_rangeNd must be exactly a templated alias to the blocked_rangeNd_impl + (and not e.g. a derived class), otherwise it would need to declare its own ctor + facing the same problem that the impl class solves. +*/ + +template> +class blocked_rangeNd_impl; + +template +class blocked_rangeNd_impl> { +public: + //! Type of a value. + using value_type = Value; + +private: + //! Helper type to construct range with N tbb::blocked_range objects. + template + using dim_type_helper = tbb::blocked_range; + +public: + blocked_rangeNd_impl() = delete; + + //! Constructs N-dimensional range over N half-open intervals each represented as tbb::blocked_range. + blocked_rangeNd_impl(const dim_type_helper&... args) : my_dims{ {args...} } {} + + //! Dimensionality of a range. + static constexpr unsigned int ndims() { return N; } + + //! Range in certain dimension. + const tbb::blocked_range& dim(unsigned int dimension) const { + __TBB_ASSERT(dimension < N, "out of bound"); + return my_dims[dimension]; + } + + //------------------------------------------------------------------------ + // Methods that implement Range concept + //------------------------------------------------------------------------ + + //! True if at least one dimension is empty. + bool empty() const { + return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range& d) { + return d.empty(); + }); + } + + //! True if at least one dimension is divisible. + bool is_divisible() const { + return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range& d) { + return d.is_divisible(); + }); + } + + blocked_rangeNd_impl(blocked_rangeNd_impl& r, proportional_split proportion) : my_dims(r.my_dims) { + do_split(r, proportion); + } + + blocked_rangeNd_impl(blocked_rangeNd_impl& r, split proportion) : my_dims(r.my_dims) { + do_split(r, proportion); + } + +private: + static_assert(N != 0, "zero dimensional blocked_rangeNd can't be constructed"); + + //! Ranges in each dimension. + std::array, N> my_dims; + + template + void do_split(blocked_rangeNd_impl& r, split_type proportion) { + static_assert((std::is_same::value || std::is_same::value), "type of split object is incorrect"); + __TBB_ASSERT(r.is_divisible(), "can't split not divisible range"); + + auto my_it = std::max_element(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range& first, const tbb::blocked_range& second) { + return (first.size() * second.grainsize() < second.size() * first.grainsize()); + }); + + auto r_it = r.my_dims.begin() + (my_it - my_dims.begin()); + + my_it->my_begin = tbb::blocked_range::do_split(*r_it, proportion); + + // (!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin)) equals to + // (my_it->my_begin == r_it->my_end), but we can't use operator== due to Value concept + __TBB_ASSERT(!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin), + "blocked_range has been split incorrectly"); + } +}; + +template +using blocked_rangeNd = blocked_rangeNd_impl; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_rangeNd; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_blocked_rangeNd_H */ + diff --git a/include/oneapi/tbb/cache_aligned_allocator.h b/include/oneapi/tbb/cache_aligned_allocator.h new file mode 100644 index 0000000000..0630abe7ec --- /dev/null +++ b/include/oneapi/tbb/cache_aligned_allocator.h @@ -0,0 +1,187 @@ +/* + Copyright (c) 2005-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_cache_aligned_allocator_H +#define __TBB_cache_aligned_allocator_H + +#include "detail/_utils.h" +#include "detail/_namespace_injection.h" +#include +#include + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +#include +#endif + +namespace tbb { +namespace detail { + +namespace r1 { +void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); +void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); +std::size_t __TBB_EXPORTED_FUNC cache_line_size(); +} + +namespace d1 { + +template +class cache_aligned_allocator { +public: + using value_type = T; + using propagate_on_container_move_assignment = std::true_type; + + //! Always defined for TBB containers (supported since C++17 for std containers) + using is_always_equal = std::true_type; + + cache_aligned_allocator() = default; + template cache_aligned_allocator(const cache_aligned_allocator&) noexcept {} + + //! Allocate space for n objects, starting on a cache/sector line. + T* allocate(std::size_t n) { + return static_cast(r1::cache_aligned_allocate(n * sizeof(value_type))); + } + + //! Free block of memory that starts on a cache line + void deallocate(T* p, std::size_t) { + r1::cache_aligned_deallocate(p); + } + + //! Largest value for which method allocate might succeed. + std::size_t max_size() const noexcept { + return (~std::size_t(0) - r1::cache_line_size()) / sizeof(value_type); + } + +#if TBB_ALLOCATOR_TRAITS_BROKEN + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using difference_type = std::ptrdiff_t; + using size_type = std::size_t; + template struct rebind { + using other = cache_aligned_allocator; + }; + template + void construct(U *p, Args&&... args) + { ::new (p) U(std::forward(args)...); } + void destroy(pointer p) { p->~value_type(); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +#endif // TBB_ALLOCATOR_TRAITS_BROKEN +}; + +#if TBB_ALLOCATOR_TRAITS_BROKEN + template<> + class cache_aligned_allocator { + public: + using pointer = void*; + using const_pointer = const void*; + using value_type = void; + template struct rebind { + using other = cache_aligned_allocator; + }; + }; +#endif + +template +bool operator==(const cache_aligned_allocator&, const cache_aligned_allocator&) noexcept { return true; } + +template +bool operator!=(const cache_aligned_allocator&, const cache_aligned_allocator&) noexcept { return false; } + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT + +//! C++17 memory resource wrapper to ensure cache line size alignment +class cache_aligned_resource : public std::pmr::memory_resource { +public: + cache_aligned_resource() : cache_aligned_resource(std::pmr::get_default_resource()) {} + explicit cache_aligned_resource(std::pmr::memory_resource* upstream) : m_upstream(upstream) {} + + std::pmr::memory_resource* upstream_resource() const { + return m_upstream; + } + +private: + //! We don't know what memory resource set. Use padding to guarantee alignment + void* do_allocate(std::size_t bytes, std::size_t alignment) override { + // TODO: make it common with tbb_allocator.cpp + std::size_t cache_line_alignment = correct_alignment(alignment); + std::size_t space = correct_size(bytes) + cache_line_alignment; + std::uintptr_t base = reinterpret_cast(m_upstream->allocate(space)); + __TBB_ASSERT(base != 0, "Upstream resource returned NULL."); + + // Round up to the next cache line (align the base address) + std::uintptr_t result = (base + cache_line_alignment) & ~(cache_line_alignment - 1); + __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Can`t store a base pointer to the header"); + __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); + + // Record where block actually starts. + (reinterpret_cast(result))[-1] = base; + return reinterpret_cast(result); + } + + void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment) override { + if (ptr) { + // Recover where block actually starts + std::uintptr_t base = (reinterpret_cast(ptr))[-1]; + m_upstream->deallocate(reinterpret_cast(base), correct_size(bytes) + correct_alignment(alignment)); + } + } + + bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { + if (this == &other) { return true; } +#if __TBB_USE_OPTIONAL_RTTI + const cache_aligned_resource* other_res = dynamic_cast(&other); + return other_res && (upstream_resource() == other_res->upstream_resource()); +#else + return false; +#endif + } + + std::size_t correct_alignment(std::size_t alignment) { + __TBB_ASSERT(tbb::detail::is_power_of_two(alignment), "Alignment is not a power of 2"); +#if __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT + std::size_t cache_line_size = std::hardware_destructive_interference_size; +#else + std::size_t cache_line_size = r1::cache_line_size(); +#endif + return alignment < cache_line_size ? cache_line_size : alignment; + } + + std::size_t correct_size(std::size_t bytes) { + // To handle the case, when small size requested. There could be not + // enough space to store the original pointer. + return bytes < sizeof(std::uintptr_t) ? sizeof(std::uintptr_t) : bytes; + } + + std::pmr::memory_resource* m_upstream; +}; + +#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::cache_aligned_allocator; +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +using detail::d1::cache_aligned_resource; +#endif +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_cache_aligned_allocator_H */ + diff --git a/include/oneapi/tbb/combinable.h b/include/oneapi/tbb/combinable.h new file mode 100644 index 0000000000..774bde76f7 --- /dev/null +++ b/include/oneapi/tbb/combinable.h @@ -0,0 +1,69 @@ +/* + Copyright (c) 2005-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_combinable_H +#define __TBB_combinable_H + +#include "detail/_namespace_injection.h" + +#include "enumerable_thread_specific.h" +#include "cache_aligned_allocator.h" + +namespace tbb { +namespace detail { +namespace d1 { +/** \name combinable **/ +//@{ +//! Thread-local storage with optional reduction +/** @ingroup containers */ +template +class combinable { + using my_alloc = typename tbb::cache_aligned_allocator; + using my_ets_type = typename tbb::enumerable_thread_specific; + my_ets_type my_ets; + +public: + combinable() = default; + + template + explicit combinable(Finit _finit) : my_ets(_finit) { } + + void clear() { my_ets.clear(); } + + T& local() { return my_ets.local(); } + + T& local(bool& exists) { return my_ets.local(exists); } + + // combine_func_t has signature T(T,T) or T(const T&, const T&) + template + T combine(CombineFunc f_combine) { return my_ets.combine(f_combine); } + + // combine_func_t has signature void(T) or void(const T&) + template + void combine_each(CombineFunc f_combine) { my_ets.combine_each(f_combine); } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::combinable; +} // inline namespace v1 + +} // namespace tbb + +#endif /* __TBB_combinable_H */ + diff --git a/include/oneapi/tbb/concurrent_hash_map.h b/include/oneapi/tbb/concurrent_hash_map.h new file mode 100644 index 0000000000..4d76297f23 --- /dev/null +++ b/include/oneapi/tbb/concurrent_hash_map.h @@ -0,0 +1,1509 @@ +/* + Copyright (c) 2005-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_hash_map_H +#define __TBB_concurrent_hash_map_H + +#include "detail/_namespace_injection.h" +#include "detail/_utils.h" +#include "detail/_assert.h" +#include "detail/_allocator_traits.h" +#include "detail/_containers_helpers.h" +#include "detail/_template_helpers.h" +#include "detail/_hash_compare.h" +#include "detail/_range_common.h" +#include "tbb_allocator.h" +#include "spin_rw_mutex.h" + +#include +#include +#include +#include +#include // Need std::pair +#include // Need std::memset + +namespace tbb { +namespace detail { +namespace d1 { + +struct hash_map_node_base : no_copy { + using mutex_type = spin_rw_mutex; + // Scoped lock type for mutex + using scoped_type = mutex_type::scoped_lock; + // Next node in chain + hash_map_node_base* next; + mutex_type mutex; +}; + +// Incompleteness flag value +static hash_map_node_base* const rehash_req = reinterpret_cast(std::size_t(3)); +// Rehashed empty bucket flag +static hash_map_node_base* const empty_rehashed = reinterpret_cast(std::size_t(0)); + +// base class of concurrent_hash_map + +template +class hash_map_base { +public: + using size_type = std::size_t; + using hashcode_type = std::size_t; + using segment_index_type = std::size_t; + using node_base = hash_map_node_base; + + struct bucket : no_copy { + using mutex_type = spin_rw_mutex; + using scoped_type = mutex_type::scoped_lock; + mutex_type mutex; + std::atomic node_list; + }; + + using allocator_type = Allocator; + using allocator_traits_type = tbb::detail::allocator_traits; + using bucket_allocator_type = typename allocator_traits_type::template rebind_alloc; + using bucket_allocator_traits = tbb::detail::allocator_traits; + + // Count of segments in the first block + static constexpr size_type embedded_block = 1; + // Count of segments in the first block + static constexpr size_type embedded_buckets = 1 << embedded_block; + // Count of segments in the first block + static constexpr size_type first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096 + // Size of a pointer / table size + static constexpr size_type pointers_per_table = sizeof(segment_index_type) * 8; // one segment per bit + + using segment_ptr_type = bucket*; + using atomic_segment_type = std::atomic; + using segments_table_type = atomic_segment_type[pointers_per_table]; + + hash_map_base( const allocator_type& alloc ) : my_allocator(alloc), my_mask(embedded_buckets - 1), my_size(0) { + for (size_type i = 0; i != embedded_buckets; ++i) { + my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); + } + + for (size_type segment_index = 0; segment_index < pointers_per_table; ++segment_index) { + auto argument = segment_index < embedded_block ? my_embedded_segment + segment_base(segment_index) : nullptr; + bucket_allocator_traits::construct(my_allocator, &my_table[segment_index], argument); + } + + __TBB_ASSERT( embedded_block <= first_block, "The first block number must include embedded blocks"); + } + + ~hash_map_base() { + for (size_type segment_index = 0; segment_index < pointers_per_table; ++segment_index) { + bucket_allocator_traits::destroy(my_allocator, &my_table[segment_index]); + } + } + + // segment index of given index in the array + static segment_index_type segment_index_of( size_type index ) { + return segment_index_type(tbb::detail::log2( index|1 )); + } + + // the first array index of given segment + static segment_index_type segment_base( segment_index_type k ) { + return (segment_index_type(1) << k & ~segment_index_type(1)); + } + + // segment size except for k == 0 + static size_type segment_size( segment_index_type k ) { + return size_type(1) << k; // fake value for k==0 + } + + // true if ptr is valid pointer + static bool is_valid( void* ptr ) { + return reinterpret_cast(ptr) > uintptr_t(63); + } + + // Initialize buckets + static void init_buckets( segment_ptr_type ptr, size_type sz, bool is_initial ) { + if (is_initial) { + std::memset(static_cast(ptr), 0, sz * sizeof(bucket)); + } else { + for(size_type i = 0; i < sz; i++, ptr++) { + *reinterpret_cast(&ptr->mutex) = 0; + ptr->node_list.store(rehash_req, std::memory_order_relaxed); + } + } + } + + // Add node n to bucket b + static void add_to_bucket( bucket* b, node_base* n ) { + __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr); + n->next = b->node_list.load(std::memory_order_relaxed); + b->node_list.store(n, std::memory_order_relaxed); // its under lock and flag is set + } + + const bucket_allocator_type& get_allocator() const { + return my_allocator; + } + + bucket_allocator_type& get_allocator() { + return my_allocator; + } + + // Enable segment + void enable_segment( segment_index_type k, bool is_initial = false ) { + __TBB_ASSERT( k, "Zero segment must be embedded" ); + size_type sz; + __TBB_ASSERT( !is_valid(my_table[k].load(std::memory_order_relaxed)), "Wrong concurrent assignment"); + if (k >= first_block) { + sz = segment_size(k); + segment_ptr_type ptr = nullptr; + try_call( [&] { + ptr = bucket_allocator_traits::allocate(my_allocator, sz); + } ).on_exception( [&] { + my_table[k].store(nullptr, std::memory_order_relaxed); + }); + + __TBB_ASSERT(ptr, nullptr); + init_buckets(ptr, sz, is_initial); + my_table[k].store(ptr, std::memory_order_release); + sz <<= 1;// double it to get entire capacity of the container + } else { // the first block + __TBB_ASSERT( k == embedded_block, "Wrong segment index" ); + sz = segment_size(first_block); + segment_ptr_type ptr = nullptr; + try_call( [&] { + ptr = bucket_allocator_traits::allocate(my_allocator, sz - embedded_buckets); + } ).on_exception( [&] { + my_table[k].store(nullptr, std::memory_order_relaxed); + }); + + __TBB_ASSERT(ptr, nullptr); + init_buckets(ptr, sz - embedded_buckets, is_initial); + ptr -= segment_base(embedded_block); + for(segment_index_type i = embedded_block; i < first_block; i++) // calc the offsets + my_table[i].store(ptr + segment_base(i), std::memory_order_release); + } + my_mask.store(sz-1, std::memory_order_release); + } + + void delete_segment( segment_index_type s ) { + segment_ptr_type buckets_ptr = my_table[s].load(std::memory_order_relaxed); + size_type sz = segment_size( s ? s : 1 ); + + if (s >= first_block) // the first segment or the next + bucket_allocator_traits::deallocate(my_allocator, buckets_ptr, sz); + else if (s == embedded_block && embedded_block != first_block) + bucket_allocator_traits::deallocate(my_allocator, buckets_ptr, + segment_size(first_block) - embedded_buckets); + if (s >= embedded_block) my_table[s].store(nullptr, std::memory_order_relaxed); + } + + // Get bucket by (masked) hashcode + bucket *get_bucket( hashcode_type h ) const noexcept { + segment_index_type s = segment_index_of( h ); + h -= segment_base(s); + segment_ptr_type seg = my_table[s].load(std::memory_order_acquire); + __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mask for allocated segments" ); + return &seg[h]; + } + + // detail serial rehashing helper + void mark_rehashed_levels( hashcode_type h ) noexcept { + segment_index_type s = segment_index_of( h ); + while (segment_ptr_type seg = my_table[++s].load(std::memory_order_relaxed)) + if( seg[h].node_list.load(std::memory_order_relaxed) == rehash_req ) { + seg[h].node_list.store(empty_rehashed, std::memory_order_relaxed); + mark_rehashed_levels( h + ((hashcode_type)1<node_list.load(std::memory_order_acquire) != rehash_req ) { + return true; + } + } + return false; + } + + // Insert a node and check for load factor. @return segment index to enable. + segment_index_type insert_new_node( bucket *b, node_base *n, hashcode_type mask ) { + size_type sz = ++my_size; // prefix form is to enforce allocation after the first item inserted + add_to_bucket( b, n ); + // check load factor + if( sz >= mask ) { // TODO: add custom load_factor + segment_index_type new_seg = tbb::detail::log2( mask+1 ); //optimized segment_index_of + __TBB_ASSERT( is_valid(my_table[new_seg-1].load(std::memory_order_relaxed)), "new allocations must not publish new mask until segment has allocated"); + static const segment_ptr_type is_allocating = segment_ptr_type(2);; + segment_ptr_type disabled = nullptr; + if (!(my_table[new_seg].load(std::memory_order_acquire)) + && my_table[new_seg].compare_exchange_strong(disabled, is_allocating)) + return new_seg; // The value must be processed + } + return 0; + } + + // Prepare enough segments for number of buckets + void reserve(size_type buckets) { + if( !buckets-- ) return; + bool is_initial = !my_size.load(std::memory_order_relaxed); + for (size_type m = my_mask.load(std::memory_order_relaxed); buckets > m; + m = my_mask.load(std::memory_order_relaxed)) + { + enable_segment( segment_index_of( m+1 ), is_initial ); + } + } + + // Swap hash_map_bases + void internal_swap_content(hash_map_base &table) { + using std::swap; + swap_atomics_relaxed(my_mask, table.my_mask); + swap_atomics_relaxed(my_size, table.my_size); + + for(size_type i = 0; i < embedded_buckets; i++) { + auto temp = my_embedded_segment[i].node_list.load(std::memory_order_relaxed); + my_embedded_segment[i].node_list.store(table.my_embedded_segment[i].node_list.load(std::memory_order_relaxed), + std::memory_order_relaxed); + table.my_embedded_segment[i].node_list.store(temp, std::memory_order_relaxed); + } + for(size_type i = embedded_block; i < pointers_per_table; i++) { + auto temp = my_table[i].load(std::memory_order_relaxed); + my_table[i].store(table.my_table[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + table.my_table[i].store(temp, std::memory_order_relaxed); + } + } + + void internal_move(hash_map_base&& other) { + my_mask.store(other.my_mask.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_mask.store(embedded_buckets - 1, std::memory_order_relaxed); + + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_size.store(0, std::memory_order_relaxed); + + for (size_type i = 0; i < embedded_buckets; ++i) { + my_embedded_segment[i].node_list.store(other.my_embedded_segment[i].node_list, std::memory_order_relaxed); + other.my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); + } + + for (size_type i = embedded_block; i < pointers_per_table; ++i) { + my_table[i].store(other.my_table[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + other.my_table[i].store(nullptr, std::memory_order_relaxed); + } + } + +protected: + + bucket_allocator_type my_allocator; + // Hash mask = sum of allocated segment sizes - 1 + std::atomic my_mask; + // Size of container in stored items + std::atomic my_size; // It must be in separate cache line from my_mask due to performance effects + // Zero segment + bucket my_embedded_segment[embedded_buckets]; + // Segment pointers table. Also prevents false sharing between my_mask and my_size + segments_table_type my_table; +}; + +template +class hash_map_range; + +// Meets requirements of a forward iterator for STL +// Value is either the T or const T type of the container. +template +class hash_map_iterator { + using map_type = Container; + using node = typename Container::node; + using map_base = typename Container::base_type; + using node_base = typename map_base::node_base; + using bucket = typename map_base::bucket; +public: + using value_type = Value; + using size_type = typename Container::size_type; + using difference_type = typename Container::difference_type; + using pointer = value_type*; + using reference = value_type&; + using iterator_category = std::forward_iterator_tag; + + // Construct undefined iterator + hash_map_iterator(): my_map(), my_index(), my_bucket(), my_node() {} + hash_map_iterator( const hash_map_iterator& other ) : + my_map(other.my_map), + my_index(other.my_index), + my_bucket(other.my_bucket), + my_node(other.my_node) + {} + + hash_map_iterator& operator=( const hash_map_iterator& other ) { + my_map = other.my_map; + my_index = other.my_index; + my_bucket = other.my_bucket; + my_node = other.my_node; + return *this; + } + + Value& operator*() const { + __TBB_ASSERT( map_base::is_valid(my_node), "iterator uninitialized or at end of container?" ); + return my_node->value(); + } + + Value* operator->() const {return &operator*();} + + hash_map_iterator& operator++() { + my_node = static_cast( my_node->next ); + if( !my_node ) advance_to_next_bucket(); + return *this; + } + + // Post increment + hash_map_iterator operator++(int) { + hash_map_iterator old(*this); + operator++(); + return old; + } +private: + template + friend bool operator==( const hash_map_iterator& i, const hash_map_iterator& j ); + + template + friend bool operator!=( const hash_map_iterator& i, const hash_map_iterator& j ); + + template + friend ptrdiff_t operator-( const hash_map_iterator& i, const hash_map_iterator& j ); + + template + friend class hash_map_iterator; + + template + friend class hash_map_range; + + void advance_to_next_bucket() { // TODO?: refactor to iterator_base class + size_t k = my_index+1; + __TBB_ASSERT( my_bucket, "advancing an invalid iterator?"); + while (k <= my_map->my_mask.load(std::memory_order_relaxed)) { + // Following test uses 2's-complement wizardry + if( k&(k-2) ) // not the beginning of a segment + ++my_bucket; + else my_bucket = my_map->get_bucket( k ); + my_node = static_cast( my_bucket->node_list.load(std::memory_order_relaxed) ); + if( map_base::is_valid(my_node) ) { + my_index = k; return; + } + ++k; + } + my_bucket = 0; my_node = 0; my_index = k; // the end + } + + template + friend class concurrent_hash_map; + + hash_map_iterator( const Container &map, std::size_t index, const bucket *b, node_base *n ) : + my_map(&map), my_index(index), my_bucket(b), my_node(static_cast(n)) + { + if( b && !map_base::is_valid(n) ) + advance_to_next_bucket(); + } + + // concurrent_hash_map over which we are iterating. + const Container *my_map; + // Index in hash table for current item + size_t my_index; + // Pointer to bucket + const bucket* my_bucket; + // Pointer to node that has current item + node* my_node; +}; + +template +bool operator==( const hash_map_iterator& i, const hash_map_iterator& j ) { + return i.my_node == j.my_node && i.my_map == j.my_map; +} + +template +bool operator!=( const hash_map_iterator& i, const hash_map_iterator& j ) { + return i.my_node != j.my_node || i.my_map != j.my_map; +} + +// Range class used with concurrent_hash_map +template +class hash_map_range { + using map_type = typename Iterator::map_type; +public: + // Type for size of a range + using size_type = std::size_t; + using value_type = typename Iterator::value_type; + using reference = typename Iterator::reference; + using difference_type = typename Iterator::difference_type; + using iterator = Iterator; + + // True if range is empty. + bool empty() const {return my_begin == my_end;} + + // True if range can be partitioned into two subranges. + bool is_divisible() const { + return my_midpoint != my_end; + } + + // Split range. + hash_map_range( hash_map_range& r, split ) : + my_end(r.my_end), + my_grainsize(r.my_grainsize) + { + r.my_end = my_begin = r.my_midpoint; + __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" ); + __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" ); + set_midpoint(); + r.set_midpoint(); + } + + // Init range with container and grainsize specified + hash_map_range( const map_type &map, size_type grainsize_ = 1 ) : + my_begin( Iterator( map, 0, map.my_embedded_segment, map.my_embedded_segment->node_list.load(std::memory_order_relaxed) ) ), + my_end( Iterator( map, map.my_mask.load(std::memory_order_relaxed) + 1, 0, 0 ) ), + my_grainsize( grainsize_ ) + { + __TBB_ASSERT( grainsize_>0, "grainsize must be positive" ); + set_midpoint(); + } + + const Iterator begin() const { return my_begin; } + const Iterator end() const { return my_end; } + // The grain size for this range. + size_type grainsize() const { return my_grainsize; } + +private: + Iterator my_begin; + Iterator my_end; + mutable Iterator my_midpoint; + size_t my_grainsize; + // Set my_midpoint to point approximately half way between my_begin and my_end. + void set_midpoint() const; + template friend class hash_map_range; +}; + +template +void hash_map_range::set_midpoint() const { + // Split by groups of nodes + size_t m = my_end.my_index-my_begin.my_index; + if( m > my_grainsize ) { + m = my_begin.my_index + m/2u; + auto b = my_begin.my_map->get_bucket(m); + my_midpoint = Iterator(*my_begin.my_map,m,b,b->node_list.load(std::memory_order_relaxed)); + } else { + my_midpoint = my_end; + } + __TBB_ASSERT( my_begin.my_index <= my_midpoint.my_index, + "my_begin is after my_midpoint" ); + __TBB_ASSERT( my_midpoint.my_index <= my_end.my_index, + "my_midpoint is after my_end" ); + __TBB_ASSERT( my_begin != my_midpoint || my_begin == my_end, + "[my_begin, my_midpoint) range should not be empty" ); +} + +template , + typename Allocator = tbb_allocator>> +class concurrent_hash_map : protected hash_map_base { + template + friend class hash_map_iterator; + + template + friend class hash_map_range; + using allocator_traits_type = tbb::detail::allocator_traits; +public: + using base_type = hash_map_base; + using key_type = Key; + using mapped_type = T; + using allocator_type = Allocator; + using value_type = std::pair; + using size_type = typename base_type::size_type; + using difference_type = std::ptrdiff_t; + + + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using reference = value_type&; + using const_reference = const value_type&; + using iterator = hash_map_iterator; + using const_iterator = hash_map_iterator; + using range_type = hash_map_range; + using const_range_type = hash_map_range; + + +protected: + static_assert(std::is_same::value, + "value_type of the container must be the same as its allocator's"); + + friend class const_accessor; + class node; + using segment_index_type = typename base_type::segment_index_type; + using segment_ptr_type = typename base_type::segment_ptr_type; + using node_base = typename base_type::node_base; + using bucket = typename base_type::bucket; + using hashcode_type = typename base_type::hashcode_type; + using bucket_allocator_type = typename base_type::bucket_allocator_type; + using node_allocator_type = typename base_type::allocator_traits_type::template rebind_alloc; + using node_allocator_traits = tbb::detail::allocator_traits; + HashCompare my_hash_compare; + + class node : public node_base { + public: + node() {} + ~node() {} + pointer storage() { return &my_value; } + value_type& value() { return *storage(); } + private: + union { + value_type my_value; + }; + }; + + void delete_node( node_base *n ) { + node_allocator_type node_allocator(this->get_allocator()); + node_allocator_traits::destroy(node_allocator, static_cast(n)->storage()); + node_allocator_traits::destroy(node_allocator, static_cast(n)); + node_allocator_traits::deallocate(node_allocator, static_cast(n), 1); + } + + template + static node* create_node(bucket_allocator_type& allocator, Args&&... args) { + node_allocator_type node_allocator(allocator); + node* node_ptr = node_allocator_traits::allocate(node_allocator, 1); + auto guard = make_raii_guard([&] { + node_allocator_traits::destroy(node_allocator, node_ptr); + node_allocator_traits::deallocate(node_allocator, node_ptr, 1); + }); + + node_allocator_traits::construct(node_allocator, node_ptr); + node_allocator_traits::construct(node_allocator, node_ptr->storage(), std::forward(args)...); + guard.dismiss(); + return node_ptr; + } + + static node* allocate_node_copy_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ + return create_node(allocator, key, *t); + } + + static node* allocate_node_move_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ + return create_node(allocator, key, std::move(*const_cast(t))); + } + + static node* allocate_node_default_construct(bucket_allocator_type& allocator, const Key &key, const T * ){ + // Emplace construct an empty T object inside the pair + return create_node(allocator, std::piecewise_construct, + std::forward_as_tuple(key), std::forward_as_tuple()); + } + + static node* do_not_allocate_node(bucket_allocator_type& , const Key &, const T * ){ + __TBB_ASSERT(false,"this dummy function should not be called"); + return nullptr; + } + + node *search_bucket( const key_type &key, bucket *b ) const { + node *n = static_cast( b->node_list.load(std::memory_order_relaxed) ); + while (this->is_valid(n) && !my_hash_compare.equal(key, n->value().first)) + n = static_cast( n->next ); + __TBB_ASSERT(n != rehash_req, "Search can be executed only for rehashed bucket"); + return n; + } + + // bucket accessor is to find, rehash, acquire a lock, and access a bucket + class bucket_accessor : public bucket::scoped_type { + bucket *my_b; + public: + bucket_accessor( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { acquire( base, h, writer ); } + // find a bucket by masked hashcode, optionally rehash, and acquire the lock + inline void acquire( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { + my_b = base->get_bucket( h ); + // TODO: actually, notification is unnecessary here, just hiding double-check + if( my_b->node_list.load(std::memory_order_acquire) == rehash_req + && bucket::scoped_type::try_acquire( my_b->mutex, /*write=*/true ) ) + { + if( my_b->node_list.load(std::memory_order_relaxed) == rehash_req ) base->rehash_bucket( my_b, h ); //recursive rehashing + } + else bucket::scoped_type::acquire( my_b->mutex, writer ); + __TBB_ASSERT( my_b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr); + } + // check whether bucket is locked for write + bool is_writer() { return bucket::scoped_type::m_is_writer; } + // get bucket pointer + bucket *operator() () { return my_b; } + }; + + // TODO refactor to hash_base + void rehash_bucket( bucket *b_new, const hashcode_type hash ) { + __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (for write)"); + __TBB_ASSERT( hash > 1, "The lowermost buckets can't be rehashed" ); + b_new->node_list.store(empty_rehashed, std::memory_order_release); // mark rehashed + hashcode_type mask = (1u << tbb::detail::log2(hash)) - 1; // get parent mask from the topmost bit + bucket_accessor b_old( this, hash & mask ); + + mask = (mask<<1) | 1; // get full mask for new bucket + __TBB_ASSERT( (mask&(mask+1))==0 && (hash & mask) == hash, nullptr ); + restart: + node_base* prev = nullptr; + node_base* curr = b_old()->node_list.load(std::memory_order_acquire); + while (this->is_valid(curr)) { + hashcode_type curr_node_hash = my_hash_compare.hash(static_cast(curr)->value().first); + + if ((curr_node_hash & mask) == hash) { + if (!b_old.is_writer()) { + if (!b_old.upgrade_to_writer()) { + goto restart; // node ptr can be invalid due to concurrent erase + } + } + node_base* next = curr->next; + // exclude from b_old + if (prev == nullptr) { + b_old()->node_list.store(curr->next, std::memory_order_relaxed); + } else { + prev->next = curr->next; + } + this->add_to_bucket(b_new, curr); + curr = next; + } else { + prev = curr; + curr = curr->next; + } + } + } + +public: + + class accessor; + // Combines data access, locking, and garbage collection. + class const_accessor : private node::scoped_type /*which derived from no_copy*/ { + friend class concurrent_hash_map; + friend class accessor; + public: + // Type of value + using value_type = const typename concurrent_hash_map::value_type; + + // True if result is empty. + bool empty() const { return !my_node; } + + // Set to null + void release() { + if( my_node ) { + node::scoped_type::release(); + my_node = 0; + } + } + + // Return reference to associated value in hash table. + const_reference operator*() const { + __TBB_ASSERT( my_node, "attempt to dereference empty accessor" ); + return my_node->value(); + } + + // Return pointer to associated value in hash table. + const_pointer operator->() const { + return &operator*(); + } + + // Create empty result + const_accessor() : my_node(nullptr) {} + + // Destroy result after releasing the underlying reference. + ~const_accessor() { + my_node = nullptr; // scoped lock's release() is called in its destructor + } + protected: + bool is_writer() { return node::scoped_type::m_is_writer; } + node *my_node; + hashcode_type my_hash; + }; + + // Allows write access to elements and combines data access, locking, and garbage collection. + class accessor: public const_accessor { + public: + // Type of value + using value_type = typename concurrent_hash_map::value_type; + + // Return reference to associated value in hash table. + reference operator*() const { + __TBB_ASSERT( this->my_node, "attempt to dereference empty accessor" ); + return this->my_node->value(); + } + + // Return pointer to associated value in hash table. + pointer operator->() const { + return &operator*(); + } + }; + + explicit concurrent_hash_map( const HashCompare& compare, const allocator_type& a = allocator_type() ) + : base_type(a) + , my_hash_compare(compare) + {} + + concurrent_hash_map() : concurrent_hash_map(HashCompare()) {} + + explicit concurrent_hash_map( const allocator_type& a ) + : concurrent_hash_map(HashCompare(), a) + {} + + // Construct empty table with n preallocated buckets. This number serves also as initial concurrency level. + concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() ) + : concurrent_hash_map(a) + { + this->reserve(n); + } + + concurrent_hash_map( size_type n, const HashCompare& compare, const allocator_type& a = allocator_type() ) + : concurrent_hash_map(compare, a) + { + this->reserve(n); + } + + // Copy constructor + concurrent_hash_map( const concurrent_hash_map &table ) + : concurrent_hash_map(node_allocator_traits::select_on_container_copy_construction(table.get_allocator())) + { + try_call( [&] { + internal_copy(table); + }).on_exception( [&] { + this->clear(); + }); + } + + concurrent_hash_map( const concurrent_hash_map &table, const allocator_type &a) + : concurrent_hash_map(a) + { + try_call( [&] { + internal_copy(table); + }).on_exception( [&] { + this->clear(); + }); + } + + // Move constructor + concurrent_hash_map( concurrent_hash_map &&table ) + : concurrent_hash_map(std::move(table.get_allocator())) + { + this->internal_move(std::move(table)); + } + + // Move constructor + concurrent_hash_map( concurrent_hash_map &&table, const allocator_type &a ) + : concurrent_hash_map(a) + { + using is_equal_type = typename node_allocator_traits::is_always_equal; + internal_move_construct_with_allocator(std::move(table), a, is_equal_type()); + } + + // Construction with copying iteration range and given allocator instance + template + concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() ) + : concurrent_hash_map(a) + { + try_call( [&] { + internal_copy(first, last, std::distance(first, last)); + }).on_exception( [&] { + this->clear(); + }); + } + + template + concurrent_hash_map( I first, I last, const HashCompare& compare, const allocator_type& a = allocator_type() ) + : concurrent_hash_map(compare, a) + { + try_call( [&] { + internal_copy(first, last, std::distance(first, last)); + }).on_exception( [&] { + this->clear(); + }); + } + + // Construct empty table with n preallocated buckets. This number serves also as initial concurrency level. + concurrent_hash_map( std::initializer_list il, const allocator_type &a = allocator_type() ) + : concurrent_hash_map(a) + { + try_call( [&] { + internal_copy(il.begin(), il.end(), il.size()); + }).on_exception( [&] { + this->clear(); + }); + } + + concurrent_hash_map( std::initializer_list il, const HashCompare& compare, const allocator_type& a = allocator_type() ) + : concurrent_hash_map(compare, a) + { + try_call( [&] { + internal_copy(il.begin(), il.end(), il.size()); + }).on_exception( [&] { + this->clear(); + }); + } + + // Assignment + concurrent_hash_map& operator=( const concurrent_hash_map &table ) { + if( this != &table ) { + clear(); + copy_assign_allocators(this->my_allocator, table.my_allocator); + internal_copy(table); + } + return *this; + } + + // Move Assignment + concurrent_hash_map& operator=( concurrent_hash_map &&table ) { + if( this != &table ) { + using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; + using is_equal_type = typename node_allocator_traits::is_always_equal; + move_assign_allocators(this->my_allocator, table.my_allocator); + internal_move_assign(std::move(table), tbb::detail::disjunction()); + } + return *this; + } + + // Assignment + concurrent_hash_map& operator=( std::initializer_list il ) { + clear(); + internal_copy(il.begin(), il.end(), il.size()); + return *this; + } + + // Rehashes and optionally resizes the whole table. + /** Useful to optimize performance before or after concurrent operations. + Also enables using of find() and count() concurrent methods in serial context. */ + void rehash(size_type sz = 0) { + this->reserve(sz); // TODO: add reduction of number of buckets as well + hashcode_type mask = this->my_mask.load(std::memory_order_relaxed); + hashcode_type b = (mask+1)>>1; // size or first index of the last segment + __TBB_ASSERT((b&(b-1))==0, nullptr); // zero or power of 2 + bucket *bp = this->get_bucket( b ); // only the last segment should be scanned for rehashing + for(; b <= mask; b++, bp++ ) { + node_base *n = bp->node_list.load(std::memory_order_relaxed); + __TBB_ASSERT( this->is_valid(n) || n == empty_rehashed || n == rehash_req, "Broken detail structure" ); + __TBB_ASSERT( *reinterpret_cast(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" ); + if( n == rehash_req ) { // rehash bucket, conditional because rehashing of a previous bucket may affect this one + hashcode_type h = b; bucket *b_old = bp; + do { + __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" ); + hashcode_type m = ( 1u<get_bucket( h &= m ); + } while( b_old->node_list.load(std::memory_order_relaxed) == rehash_req ); + // now h - is index of the root rehashed bucket b_old + this->mark_rehashed_levels( h ); // mark all non-rehashed children recursively across all segments + node_base* prev = nullptr; + node_base* curr = b_old->node_list.load(std::memory_order_relaxed); + while (this->is_valid(curr)) { + hashcode_type curr_node_hash = my_hash_compare.hash(static_cast(curr)->value().first); + + if ((curr_node_hash & mask) != h) { // should be rehashed + node_base* next = curr->next; + // exclude from b_old + if (prev == nullptr) { + b_old->node_list.store(curr->next, std::memory_order_relaxed); + } else { + prev->next = curr->next; + } + bucket *b_new = this->get_bucket(curr_node_hash & mask); + __TBB_ASSERT(b_new->node_list.load(std::memory_order_relaxed) != rehash_req, "hash() function changed for key in table or detail error" ); + this->add_to_bucket(b_new, curr); + curr = next; + } else { + prev = curr; + curr = curr->next; + } + } + } + } + } + + // Clear table + void clear() { + hashcode_type m = this->my_mask.load(std::memory_order_relaxed); + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + this->my_size.store(0, std::memory_order_relaxed); + segment_index_type s = this->segment_index_of( m ); + __TBB_ASSERT( s+1 == this->pointers_per_table || !this->my_table[s+1].load(std::memory_order_relaxed), "wrong mask or concurrent grow" ); + do { + __TBB_ASSERT(this->is_valid(this->my_table[s].load(std::memory_order_relaxed)), "wrong mask or concurrent grow" ); + segment_ptr_type buckets_ptr = this->my_table[s].load(std::memory_order_relaxed); + size_type sz = this->segment_size( s ? s : 1 ); + for( segment_index_type i = 0; i < sz; i++ ) + for( node_base *n = buckets_ptr[i].node_list.load(std::memory_order_relaxed); + this->is_valid(n); n = buckets_ptr[i].node_list.load(std::memory_order_relaxed) ) + { + buckets_ptr[i].node_list.store(n->next, std::memory_order_relaxed); + delete_node( n ); + } + this->delete_segment(s); + } while(s-- > 0); + this->my_mask.store(this->embedded_buckets - 1, std::memory_order_relaxed); + } + + // Clear table and destroy it. + ~concurrent_hash_map() { clear(); } + + //------------------------------------------------------------------------ + // Parallel algorithm support + //------------------------------------------------------------------------ + range_type range( size_type grainsize=1 ) { + return range_type( *this, grainsize ); + } + const_range_type range( size_type grainsize=1 ) const { + return const_range_type( *this, grainsize ); + } + + //------------------------------------------------------------------------ + // STL support - not thread-safe methods + //------------------------------------------------------------------------ + iterator begin() { return iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } + const_iterator begin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } + const_iterator cbegin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } + iterator end() { return iterator( *this, 0, 0, 0 ); } + const_iterator end() const { return const_iterator( *this, 0, 0, 0 ); } + const_iterator cend() const { return const_iterator( *this, 0, 0, 0 ); } + std::pair equal_range( const Key& key ) { return internal_equal_range( key, end() ); } + std::pair equal_range( const Key& key ) const { return internal_equal_range( key, end() ); } + + // Number of items in table. + size_type size() const { return this->my_size.load(std::memory_order_acquire); } + + // True if size()==0. + bool empty() const { return size() == 0; } + + // Upper bound on size. + size_type max_size() const { + return allocator_traits_type::max_size(base_type::get_allocator()); + } + + // Returns the current number of buckets + size_type bucket_count() const { return this->my_mask.load(std::memory_order_relaxed) + 1; } + + // return allocator object + allocator_type get_allocator() const { return base_type::get_allocator(); } + + // swap two instances. Iterators are invalidated + void swap(concurrent_hash_map& table) { + using pocs_type = typename node_allocator_traits::propagate_on_container_swap; + using is_equal_type = typename node_allocator_traits::is_always_equal; + swap_allocators(this->my_allocator, table.my_allocator); + internal_swap(table, tbb::detail::disjunction()); + } + + //------------------------------------------------------------------------ + // concurrent map operations + //------------------------------------------------------------------------ + + // Return count of items (0 or 1) + size_type count( const Key &key ) const { + return const_cast(this)->lookup(/*insert*/false, key, nullptr, nullptr, /*write=*/false, &do_not_allocate_node ); + } + + // Find item and acquire a read lock on the item. + /** Return true if item is found, false otherwise. */ + bool find( const_accessor &result, const Key &key ) const { + result.release(); + return const_cast(this)->lookup(/*insert*/false, key, nullptr, &result, /*write=*/false, &do_not_allocate_node ); + } + + // Find item and acquire a write lock on the item. + /** Return true if item is found, false otherwise. */ + bool find( accessor &result, const Key &key ) { + result.release(); + return lookup(/*insert*/false, key, nullptr, &result, /*write=*/true, &do_not_allocate_node ); + } + + // Insert item (if not already present) and acquire a read lock on the item. + /** Returns true if item is new. */ + bool insert( const_accessor &result, const Key &key ) { + result.release(); + return lookup(/*insert*/true, key, nullptr, &result, /*write=*/false, &allocate_node_default_construct ); + } + + // Insert item (if not already present) and acquire a write lock on the item. + /** Returns true if item is new. */ + bool insert( accessor &result, const Key &key ) { + result.release(); + return lookup(/*insert*/true, key, nullptr, &result, /*write=*/true, &allocate_node_default_construct ); + } + + // Insert item by copying if there is no such key present already and acquire a read lock on the item. + /** Returns true if item is new. */ + bool insert( const_accessor &result, const value_type &value ) { + result.release(); + return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/false, &allocate_node_copy_construct ); + } + + // Insert item by copying if there is no such key present already and acquire a write lock on the item. + /** Returns true if item is new. */ + bool insert( accessor &result, const value_type &value ) { + result.release(); + return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/true, &allocate_node_copy_construct ); + } + + // Insert item by copying if there is no such key present already + /** Returns true if item is inserted. */ + bool insert( const value_type &value ) { + return lookup(/*insert*/true, value.first, &value.second, nullptr, /*write=*/false, &allocate_node_copy_construct ); + } + + // Insert item by copying if there is no such key present already and acquire a read lock on the item. + /** Returns true if item is new. */ + bool insert( const_accessor &result, value_type && value ) { + return generic_move_insert(result, std::move(value)); + } + + // Insert item by copying if there is no such key present already and acquire a write lock on the item. + /** Returns true if item is new. */ + bool insert( accessor &result, value_type && value ) { + return generic_move_insert(result, std::move(value)); + } + + // Insert item by copying if there is no such key present already + /** Returns true if item is inserted. */ + bool insert( value_type && value ) { + return generic_move_insert(accessor_not_used(), std::move(value)); + } + + // Insert item by copying if there is no such key present already and acquire a read lock on the item. + /** Returns true if item is new. */ + template + bool emplace( const_accessor &result, Args&&... args ) { + return generic_emplace(result, std::forward(args)...); + } + + // Insert item by copying if there is no such key present already and acquire a write lock on the item. + /** Returns true if item is new. */ + template + bool emplace( accessor &result, Args&&... args ) { + return generic_emplace(result, std::forward(args)...); + } + + // Insert item by copying if there is no such key present already + /** Returns true if item is inserted. */ + template + bool emplace( Args&&... args ) { + return generic_emplace(accessor_not_used(), std::forward(args)...); + } + + // Insert range [first, last) + template + void insert( I first, I last ) { + for ( ; first != last; ++first ) + insert( *first ); + } + + // Insert initializer list + void insert( std::initializer_list il ) { + insert( il.begin(), il.end() ); + } + + // Erase item. + /** Return true if item was erased by particularly this call. */ + bool erase( const Key &key ) { + node_base *erase_node; + hashcode_type const hash = my_hash_compare.hash(key); + hashcode_type mask = this->my_mask.load(std::memory_order_acquire); + restart: + {//lock scope + // get bucket + bucket_accessor b( this, hash & mask ); + search: + node_base* prev = nullptr; + erase_node = b()->node_list.load(std::memory_order_relaxed); + while (this->is_valid(erase_node) && !my_hash_compare.equal(key, static_cast(erase_node)->value().first ) ) { + prev = erase_node; + erase_node = erase_node->next; + } + + if (erase_node == nullptr) { // not found, but mask could be changed + if (this->check_mask_race(hash, mask)) + goto restart; + return false; + } else if (!b.is_writer() && !b.upgrade_to_writer()) { + if (this->check_mask_race(hash, mask)) // contended upgrade, check mask + goto restart; + goto search; + } + + // remove from container + if (prev == nullptr) { + b()->node_list.store(erase_node->next, std::memory_order_relaxed); + } else { + prev->next = erase_node->next; + } + this->my_size--; + } + { + typename node::scoped_type item_locker( erase_node->mutex, /*write=*/true ); + } + // note: there should be no threads pretending to acquire this mutex again, do not try to upgrade const_accessor! + delete_node(erase_node); // Only one thread can delete it due to write lock on the bucket + return true; + } + + // Erase item by const_accessor. + /** Return true if item was erased by particularly this call. */ + bool erase( const_accessor& item_accessor ) { + return exclude( item_accessor ); + } + + // Erase item by accessor. + /** Return true if item was erased by particularly this call. */ + bool erase( accessor& item_accessor ) { + return exclude( item_accessor ); + } + +protected: + // Insert or find item and optionally acquire a lock on the item. + bool lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write, node* (*allocate_node)(bucket_allocator_type&, + const Key&, const T*), node *tmp_n = 0) + { + __TBB_ASSERT( !result || !result->my_node, nullptr ); + bool return_value; + hashcode_type const h = my_hash_compare.hash( key ); + hashcode_type m = this->my_mask.load(std::memory_order_acquire); + segment_index_type grow_segment = 0; + node *n; + restart: + {//lock scope + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + return_value = false; + // get bucket + bucket_accessor b( this, h & m ); + // find a node + n = search_bucket( key, b() ); + if( op_insert ) { + // [opt] insert a key + if( !n ) { + if( !tmp_n ) { + tmp_n = allocate_node(base_type::get_allocator(), key, t); + } + if( !b.is_writer() && !b.upgrade_to_writer() ) { // TODO: improved insertion + // Rerun search_list, in case another thread inserted the item during the upgrade. + n = search_bucket( key, b() ); + if( this->is_valid(n) ) { // unfortunately, it did + b.downgrade_to_reader(); + goto exists; + } + } + if( this->check_mask_race(h, m) ) + goto restart; // b.release() is done in ~b(). + // insert and set flag to grow the container + grow_segment = this->insert_new_node( b(), n = tmp_n, m ); + tmp_n = 0; + return_value = true; + } + } else { // find or count + if( !n ) { + if( this->check_mask_race( h, m ) ) + goto restart; // b.release() is done in ~b(). TODO: replace by continue + return false; + } + return_value = true; + } + exists: + if( !result ) goto check_growth; + // TODO: the following seems as generic/regular operation + // acquire the item + if( !result->try_acquire( n->mutex, write ) ) { + for( tbb::detail::atomic_backoff backoff(true);; ) { + if( result->try_acquire( n->mutex, write ) ) break; + if( !backoff.bounded_pause() ) { + // the wait takes really long, restart the operation + b.release(); + __TBB_ASSERT( !op_insert || !return_value, "Can't acquire new item in locked bucket?" ); + yield(); + m = this->my_mask.load(std::memory_order_acquire); + goto restart; + } + } + } + }//lock scope + result->my_node = n; + result->my_hash = h; + check_growth: + // [opt] grow the container + if( grow_segment ) { + this->enable_segment( grow_segment ); + } + if( tmp_n ) // if op_insert only + delete_node( tmp_n ); + return return_value; + } + + struct accessor_not_used { void release(){}}; + friend const_accessor* accessor_location( accessor_not_used const& ){ return nullptr;} + friend const_accessor* accessor_location( const_accessor & a ) { return &a;} + + friend bool is_write_access_needed( accessor const& ) { return true;} + friend bool is_write_access_needed( const_accessor const& ) { return false;} + friend bool is_write_access_needed( accessor_not_used const& ) { return false;} + + template + bool generic_move_insert( Accessor && result, value_type && value ) { + result.release(); + return lookup(/*insert*/true, value.first, &value.second, accessor_location(result), is_write_access_needed(result), &allocate_node_move_construct ); + } + + template + bool generic_emplace( Accessor && result, Args &&... args ) { + result.release(); + node * node_ptr = create_node(base_type::get_allocator(), std::forward(args)...); + return lookup(/*insert*/true, node_ptr->value().first, nullptr, accessor_location(result), is_write_access_needed(result), &do_not_allocate_node, node_ptr ); + } + + // delete item by accessor + bool exclude( const_accessor &item_accessor ) { + __TBB_ASSERT( item_accessor.my_node, nullptr ); + node_base *const exclude_node = item_accessor.my_node; + hashcode_type const hash = item_accessor.my_hash; + hashcode_type mask = this->my_mask.load(std::memory_order_acquire); + do { + // get bucket + bucket_accessor b( this, hash & mask, /*writer=*/true ); + node_base* prev = nullptr; + node_base* curr = b()->node_list.load(std::memory_order_relaxed); + + while (curr && curr != exclude_node) { + prev = curr; + curr = curr->next; + } + + if (curr == nullptr) { // someone else was first + if (this->check_mask_race(hash, mask)) + continue; + item_accessor.release(); + return false; + } + __TBB_ASSERT( curr == exclude_node, nullptr ); + // remove from container + if (prev == nullptr) { + b()->node_list.store(curr->next, std::memory_order_relaxed); + } else { + prev->next = curr->next; + } + + this->my_size--; + break; + } while(true); + if (!item_accessor.is_writer()) { // need to get exclusive lock + item_accessor.upgrade_to_writer(); // return value means nothing here + } + + item_accessor.release(); + delete_node(exclude_node); // Only one thread can delete it + return true; + } + + // Returns an iterator for an item defined by the key, or for the next item after it (if upper==true) + template + std::pair internal_equal_range( const Key& key, I end_ ) const { + hashcode_type h = my_hash_compare.hash( key ); + hashcode_type m = this->my_mask.load(std::memory_order_relaxed); + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + h &= m; + bucket *b = this->get_bucket( h ); + while ( b->node_list.load(std::memory_order_relaxed) == rehash_req ) { + m = ( 1u<get_bucket( h &= m ); + } + node *n = search_bucket( key, b ); + if( !n ) + return std::make_pair(end_, end_); + iterator lower(*this, h, b, n), upper(lower); + return std::make_pair(lower, ++upper); + } + + // Copy "source" to *this, where *this must start out empty. + void internal_copy( const concurrent_hash_map& source ) { + hashcode_type mask = source.my_mask.load(std::memory_order_relaxed); + if( this->my_mask.load(std::memory_order_relaxed) == mask ) { // optimized version + this->reserve(source.my_size.load(std::memory_order_relaxed)); // TODO: load_factor? + bucket *dst = 0, *src = 0; + bool rehash_required = false; + for( hashcode_type k = 0; k <= mask; k++ ) { + if( k & (k-2) ) ++dst,src++; // not the beginning of a segment + else { dst = this->get_bucket( k ); src = source.get_bucket( k ); } + __TBB_ASSERT( dst->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table"); + node *n = static_cast( src->node_list.load(std::memory_order_relaxed) ); + if( n == rehash_req ) { // source is not rehashed, items are in previous buckets + rehash_required = true; + dst->node_list.store(rehash_req, std::memory_order_relaxed); + } else for(; n; n = static_cast( n->next ) ) { + node* node_ptr = create_node(base_type::get_allocator(), n->value().first, n->value().second); + this->add_to_bucket( dst, node_ptr); + this->my_size.fetch_add(1, std::memory_order_relaxed); + } + } + if( rehash_required ) rehash(); + } else internal_copy(source.begin(), source.end(), source.my_size.load(std::memory_order_relaxed)); + } + + template + void internal_copy( I first, I last, size_type reserve_size ) { + this->reserve(reserve_size); // TODO: load_factor? + hashcode_type m = this->my_mask.load(std::memory_order_relaxed); + for(; first != last; ++first) { + hashcode_type h = my_hash_compare.hash( (*first).first ); + bucket *b = this->get_bucket( h & m ); + __TBB_ASSERT( b->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table"); + node* node_ptr = create_node(base_type::get_allocator(), (*first).first, (*first).second); + this->add_to_bucket( b, node_ptr ); + ++this->my_size; // TODO: replace by non-atomic op + } + } + + void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type&, + /*is_always_equal=*/std::true_type ) + { + this->internal_move(std::move(other)); + } + + void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type& a, + /*is_always_equal=*/std::false_type ) + { + if (a == other.get_allocator()){ + this->internal_move(std::move(other)); + } else { + try_call( [&] { + internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), + other.size()); + }).on_exception( [&] { + this->clear(); + }); + } + } + + void internal_move_assign( concurrent_hash_map&& other, + /*is_always_equal || POCMA = */std::true_type) + { + this->internal_move(std::move(other)); + } + + void internal_move_assign(concurrent_hash_map&& other, /*is_always_equal=*/ std::false_type) { + if (this->my_allocator == other.my_allocator) { + this->internal_move(std::move(other)); + } else { + //do per element move + internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), + other.size()); + } + } + + void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::true_type) { + this->internal_swap_content(other); + } + + void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::false_type) { + __TBB_ASSERT(this->my_allocator == other.my_allocator, nullptr); + this->internal_swap_content(other); + } + + // Fast find when no concurrent erasure is used. For internal use inside TBB only! + /** Return pointer to item with given key, or nullptr if no such item exists. + Must not be called concurrently with erasure operations. */ + const_pointer internal_fast_find( const Key& key ) const { + hashcode_type h = my_hash_compare.hash( key ); + hashcode_type m = this->my_mask.load(std::memory_order_acquire); + node *n; + restart: + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + bucket *b = this->get_bucket( h & m ); + // TODO: actually, notification is unnecessary here, just hiding double-check + if( b->node_list.load(std::memory_order_acquire) == rehash_req ) + { + typename bucket::scoped_type lock; + if( lock.try_acquire( b->mutex, /*write=*/true ) ) { + if( b->node_list.load(std::memory_order_relaxed) == rehash_req) + const_cast(this)->rehash_bucket( b, h & m ); //recursive rehashing + } + else lock.acquire( b->mutex, /*write=*/false ); + __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req,nullptr); + } + n = search_bucket( key, b ); + if( n ) + return n->storage(); + else if( this->check_mask_race( h, m ) ) + goto restart; + return 0; + } +}; + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template