Skip to content

Commit

Permalink
Use pinned_vector to prevent string interning realloc
Browse files Browse the repository at this point in the history
Add symbol interning tests

Add vmcontainer for pinned_vector
	pinned_vector reserves virtual memory at runtime using OS APIs to maintain pointer validity

Remove exception and RTTI requirement from range-v3 submodule
  • Loading branch information
Spartan322 committed Jul 2, 2024
1 parent deed8ec commit fdac3c4
Show file tree
Hide file tree
Showing 19 changed files with 489 additions and 36 deletions.
5 changes: 4 additions & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@
ignore = dirty
[submodule "deps/range-v3"]
path = deps/range-v3
url = https://github.com/ericniebler/range-v3
url = https://github.com/spartan322/range-v3
ignore = dirty
[submodule "tests/deps/snitch"]
path = tests/deps/snitch
url = https://github.com/snitch-org/snitch
ignore = dirty
[submodule "deps/vmcontainer"]
path = deps/vmcontainer
url = https://github.com/spartan322/vmcontainer
40 changes: 36 additions & 4 deletions deps/SCsub
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def build_dryad(env):
env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"])
else:
env.Append(CXXFLAGS=["-isystem", include_dir])

env.exposed_includes += env.dryad["INCPATH"]

def build_fmt(env):
Expand Down Expand Up @@ -117,7 +117,7 @@ def build_fmt(env):
env.Append(CXXFLAGS=[""])
env.Append(LIBPATH=[fmt_env.Dir(source_path)])
env.Prepend(LIBS=[library_name])

env.exposed_includes += env.fmt["INCPATH"]

def build_range_v3(env):
Expand All @@ -135,10 +135,42 @@ def build_range_v3(env):
env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"])
else:
env.Append(CXXFLAGS=["-isystem", include_dir])

env.exposed_includes += env.range_v3["INCPATH"]

def build_vmcontainer(env):
vmcontainer_env = env.Clone()

include_path = "vmcontainer/lib/include"
source_path = "vmcontainer/lib/src"
paths = [include_path, source_path]
vmcontainer_env.Append(CPPPATH=[[vmcontainer_env.Dir(p) for p in paths]])
sources = env.GlobRecursive("*.cpp", paths)
env.vmcontainer_sources = sources

library_name = "libvmcontainer" + env["LIBSUFFIX"]
library = vmcontainer_env.StaticLibrary(target=os.path.join(source_path, library_name), source=sources)
Default(library)

include_dir = vmcontainer_env.Dir(include_path)
source_dir = vmcontainer_env.Dir(source_path)

env.vmcontainer = {}
env.vmcontainer["INCPATH"] = [include_dir]

env.Append(CPPPATH=env.vmcontainer["INCPATH"])
if env.get("is_msvc", False):
env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"])
else:
env.Append(CXXFLAGS=["-isystem", include_dir])
env.Append(CXXFLAGS=[""])
env.Append(LIBPATH=[vmcontainer_env.Dir(source_path)])
env.Prepend(LIBS=[library_name])

env.exposed_includes += env.vmcontainer["INCPATH"]

build_dryad(env)
build_fmt(env)
build_lexy(env)
build_range_v3(env)
build_range_v3(env)
build_vmcontainer(env)
1 change: 1 addition & 0 deletions deps/vmcontainer
Submodule vmcontainer added at f7851c
255 changes: 253 additions & 2 deletions include/openvic-dataloader/detail/SymbolIntern.hpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,265 @@
#pragma once

#include <cstdint>
#include <iterator>
#include <string_view>

#include <openvic-dataloader/detail/pinned_vector.hpp>

#include <dryad/symbol.hpp>

namespace ovdl {
// Contains all unique symbols, null-terminated, in memory one after the other.
template<typename CharT>
struct symbol_buffer {
static constexpr auto min_buffer_size = 16 * 1024;

constexpr symbol_buffer() : _data_buffer(ovdl::detail::max_elements(min_buffer_size + 1)) {}
explicit symbol_buffer(std::size_t max_elements)
: _data_buffer(ovdl::detail::max_elements(std::max<std::size_t>(max_elements, min_buffer_size + 1))) {
_data_buffer.reserve(min_buffer_size);
}

void free() {
if (_data_buffer.capacity() == 0)
return;

_data_buffer.clear();
_data_buffer.shrink_to_fit();
}

bool reserve(std::size_t new_capacity) {
if (new_capacity <= _data_buffer.capacity())
return true;

if (new_capacity >= _data_buffer.max_size())
return false;

_data_buffer.reserve(new_capacity * sizeof(CharT));

return true;
}

bool reserve_new_string(std::size_t new_string_length) {
// +1 for null-terminator.
auto new_size = _data_buffer.size() + new_string_length + 1;
if (new_size <= _data_buffer.capacity())
return true;

auto new_capacity = new_size * 2;
if (new_capacity < min_buffer_size)
new_capacity = min_buffer_size;

return reserve(new_capacity);
}

const CharT* insert(const CharT* str, std::size_t length) {
DRYAD_PRECONDITION(_data_buffer.capacity() - _data_buffer.size() >= length + 1);

auto index = _data_buffer.cend();

_data_buffer.insert(_data_buffer.cend(), str, str + (length * sizeof(CharT)));
_data_buffer.push_back(CharT(0));

return index;
}

const CharT* c_str(std::size_t index) const {
DRYAD_PRECONDITION(index < _data_buffer.size());
return _data_buffer.data() + index;
}

std::size_t size() const {
return _data_buffer.size();
}

std::size_t capacity() const {
return _data_buffer.capacity();
}

std::size_t max_size() const {
return _data_buffer.max_size();
}

private:
detail::pinned_vector<CharT> _data_buffer;
};

template<typename IndexType, typename CharT>
struct symbol_index_hash_traits {
const symbol_buffer<CharT>* buffer;

using value_type = IndexType;

struct string_view {
const CharT* ptr;
std::size_t length;
};

static constexpr bool is_unoccupied(IndexType index) {
return index == IndexType(-1);
}
static void fill_unoccupied(IndexType* data, std::size_t size) {
// It has all bits set to 1, so we can do it per-byte.
std::memset(data, static_cast<unsigned char>(-1), size * sizeof(IndexType));
}

static constexpr bool is_equal(IndexType entry, IndexType value) {
return entry == value;
}
bool is_equal(IndexType entry, string_view str) const {
auto existing_str = buffer->c_str(entry);
return std::strncmp(existing_str, str.ptr, str.length) == 0;
}

std::size_t hash(IndexType entry) const {
auto str = buffer->c_str(entry);
return dryad::default_hash_algorithm().hash_c_str(str).finish();
}
static constexpr std::size_t hash(string_view str) {
return dryad::default_hash_algorithm()
.hash_bytes(reinterpret_cast<const unsigned char*>(str.ptr), str.length * sizeof(CharT))
.finish();
}
};

template<typename CharT = char>
class symbol;

template<typename Id, typename CharT = char, typename IndexType = std::size_t,
typename MemoryResource = void>
class symbol_interner {
static_assert(std::is_trivial_v<CharT>);
static_assert(std::is_unsigned_v<IndexType>);

using resource_ptr = dryad::_detail::memory_resource_ptr<MemoryResource>;
using traits = symbol_index_hash_traits<IndexType, CharT>;

public:
using symbol = ovdl::symbol<CharT>;

//=== construction ===//
constexpr symbol_interner() : _resource(dryad::_detail::get_memory_resource<MemoryResource>()) {}
constexpr explicit symbol_interner(std::size_t max_elements)
: _buffer(max_elements),
_resource(dryad::_detail::get_memory_resource<MemoryResource>()) {}
constexpr explicit symbol_interner(std::size_t max_elements, MemoryResource* resource)
: _buffer(max_elements),
_resource(resource) {}

~symbol_interner() noexcept {
_buffer.free();
_map.free(_resource);
}

symbol_interner(symbol_interner&& other) noexcept
: _buffer(other._buffer), _map(other._map), _resource(other._resource) {
other._buffer = {};
other._map = {};
}

symbol_interner& operator=(symbol_interner&& other) noexcept {
dryad::_detail::swap(_buffer, other._buffer);
dryad::_detail::swap(_map, other._map);
dryad::_detail::swap(_resource, other._resource);
return *this;
}

//=== interning ===//
bool reserve(std::size_t number_of_symbols, std::size_t average_symbol_length) {
auto success = _buffer.reserve(number_of_symbols * average_symbol_length);
_map.rehash(_resource, _map.to_table_capacity(number_of_symbols), traits { &_buffer });
return success;
}

symbol intern(const CharT* str, std::size_t length) {
if (_map.should_rehash())
_map.rehash(_resource, traits { &_buffer });

auto entry = _map.lookup_entry(typename traits::string_view { str, length }, traits { &_buffer });
if (entry)
// Already interned, return index.
return symbol(_buffer.c_str(entry.get()));

// Copy string data to buffer, as we don't have it yet.
if (!_buffer.reserve_new_string(length)) // Ran out of virtual memory space
return symbol();

auto begin = _buffer.insert(str, length);
auto idx = std::distance(_buffer.c_str(0), begin);
DRYAD_PRECONDITION(idx == IndexType(idx)); // Overflow of index type.

// Store index in map.
entry.create(IndexType(idx));

// Return new symbol.
return symbol(begin);
}
template<std::size_t N>
symbol intern(const CharT (&literal)[N]) {
DRYAD_PRECONDITION(literal[N - 1] == CharT(0));
return intern(literal, N - 1);
}

private:
symbol_buffer<CharT> _buffer;
dryad::_detail::hash_table<traits, 1024> _map;
DRYAD_EMPTY_MEMBER resource_ptr _resource;

friend symbol;
};

template<typename CharT>
struct symbol {
using char_type = CharT;

constexpr symbol() = default;
constexpr explicit symbol(const CharT* begin) : _begin(begin) {}

constexpr explicit operator bool() const {
return _begin != nullptr;
}

constexpr const CharT* c_str() const {
return _begin;
}

constexpr const std::basic_string_view<CharT> view() const {
return _begin;
}

//=== comparison ===//
friend constexpr bool operator==(symbol lhs, symbol rhs) {
return lhs._begin == rhs._begin;
}
friend constexpr bool operator!=(symbol lhs, symbol rhs) {
return lhs._begin != rhs._begin;
}

friend constexpr bool operator<(symbol lhs, symbol rhs) {
return lhs._begin < rhs._begin;
}
friend constexpr bool operator<=(symbol lhs, symbol rhs) {
return lhs._begin <= rhs._begin;
}
friend constexpr bool operator>(symbol lhs, symbol rhs) {
return lhs._begin > rhs._begin;
}
friend constexpr bool operator>=(symbol lhs, symbol rhs) {
return lhs._begin >= rhs._begin;
}

private:
const CharT* _begin = nullptr;

template<typename, typename, typename, typename>
friend class symbol_interner;
};

struct SymbolIntern {
struct SymbolId;
using index_type = std::uint32_t;
using symbol_type = dryad::symbol<SymbolId, index_type>;
using symbol_interner_type = dryad::symbol_interner<SymbolId, char, index_type>;
using symbol_type = symbol<char>;
using symbol_interner_type = symbol_interner<SymbolId, symbol_type::char_type, index_type>;
};
}
3 changes: 3 additions & 0 deletions include/openvic-dataloader/detail/Utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,7 @@ namespace ovdl::detail {

template<typename Type, template<typename...> typename Template>
static constexpr auto is_instance_of_v = is_instance_of<Type, Template>::value;

template<typename T, template<typename...> typename Template>
concept InstanceOf = is_instance_of_v<std::remove_cv_t<std::remove_reference_t<T>>, Template>;
}
15 changes: 15 additions & 0 deletions include/openvic-dataloader/detail/pinned_vector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#include <vmcontainer/detail.hpp>
#include <vmcontainer/pinned_vector.hpp>

namespace ovdl::detail {
static constexpr auto max_elements = mknejp::vmcontainer::max_elements;
static constexpr auto max_bytes = mknejp::vmcontainer::max_bytes;
static constexpr auto max_pages = mknejp::vmcontainer::max_pages;

using pinned_vector_traits = mknejp::vmcontainer::pinned_vector_traits;

template<typename T, typename Traits = pinned_vector_traits>
using pinned_vector = mknejp::vmcontainer::pinned_vector<T, Traits>;
}
4 changes: 0 additions & 4 deletions include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,6 @@ namespace ovdl::v2script::ast {
return _value;
}

const char* value(const SymbolIntern::symbol_interner_type& symbols) const {
return _value.c_str(symbols);
}

protected:
explicit FlatValue(dryad::node_ctor ctor, NodeKind kind, SymbolIntern::symbol_type value)
: node_base(ctor, kind),
Expand Down
2 changes: 1 addition & 1 deletion src/openvic-dataloader/AbstractSyntaxTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ AbstractSyntaxTree::symbol_type AbstractSyntaxTree::intern(std::string_view str)
}

const char* AbstractSyntaxTree::intern_cstr(const char* str, std::size_t length) {
return intern(str, length).c_str(_symbol_interner);
return intern(str, length).c_str();
}

const char* AbstractSyntaxTree::intern_cstr(std::string_view str) {
Expand Down
Loading

0 comments on commit fdac3c4

Please sign in to comment.