Skip to content

Commit

Permalink
[#25538] docdb: Add mechanism for reserving virtual addresses
Browse files Browse the repository at this point in the history
Summary:
This diff adds `ReservedAddressSpace`, which is a manager for a reserved virtual address space,
and `AddressSpaceNegotiator`, which finds a segment of virtual memory that is unused in both
parent and child process for use with `ReservedAddressSpace`.

Since we call `exec()` (or `posix_spawn()`) when starting up postmaster, all memory mappings
created by tserver beforehand are lost. In order to have a reserved virtual address space
at the same address for both tserver and postmaster, `AddressSpaceNegotiator` has the
parent process propose addresses to start the reserved address space at that work for the parent,
and has the child accept or reject it depending on whether or not it works for the child. The
block of virtual addresses is reserved with
`mmap(..., PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE)` .

The negotiated address space is then passed to `ReservedAddressSpace`, which provides methods to
lay out `mmap` calls on fixed addresses in the space with `MAP_FIXED`. Since the reserved segment
is at the same address in both child and parent, this provides us with a building block for having
pointers that are meaningful in both processes.

This is needed for future changes to move the table locks lock manager into shared memory. For more
context: D40272.
Jira: DB-14793

Test Plan: Added unit tests: `./yb_build.sh --cxx-test 'util_reserved_address_space-test'`

Reviewers: sergei, bkolagani

Reviewed By: sergei

Subscribers: ybase, rthallam, zdrudi, amitanand

Differential Revision: https://phorge.dev.yugabyte.com/D41083
  • Loading branch information
es1024 committed Jan 28, 2025
1 parent 37381bd commit f17f92e
Show file tree
Hide file tree
Showing 12 changed files with 974 additions and 112 deletions.
10 changes: 10 additions & 0 deletions src/yb/gutil/dynamic_annotations.h
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,16 @@ void __asan_set_death_callback(void (*callback)(void));
((void)(cb))
#endif

void __asan_get_shadow_mapping(size_t* shadow_scale, size_t* shadow_offset);

#if defined(__SANITIZE_ADDRESS__) || defined(ADDRESS_SANITIZER)
#define ASAN_GET_SHADOW_MAPPING(shadow_scale, shadow_offset) \
__asan_get_shadow_mapping(shadow_scale, shadow_offset)
#else
#define ASAN_GET_SHADOW_MAPPING(shadow_scale, shadow_offset) \
(*(shadow_scale) = 0, *(shadow_offset) = 0)
#endif

#ifdef __cplusplus
}
#endif
Expand Down
97 changes: 4 additions & 93 deletions src/yb/tserver/tserver_shared_mem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@
#include <atomic>
#include <mutex>

#include <boost/interprocess/sync/interprocess_semaphore.hpp>
#include <boost/interprocess/shared_memory_object.hpp>

#include "yb/gutil/casts.h"

#include "yb/util/enums.h"
#include "yb/util/env.h"
#include "yb/util/flags.h"
#include "yb/util/interprocess_semaphore.h"
#include "yb/util/path_util.h"
#include "yb/util/result.h"
#include "yb/util/size_literals.h"
Expand Down Expand Up @@ -55,95 +55,6 @@ std::chrono::system_clock::time_point ToSystem(CoarseTimePoint tp) {
return base + std::chrono::duration_cast<SystemClock::duration>(tp.time_since_epoch());
}

#if defined(BOOST_INTERPROCESS_POSIX_PROCESS_SHARED)
class Semaphore {
public:
explicit Semaphore(unsigned int initial_count) {
int ret = sem_init(&impl_, 1, initial_count);
CHECK_NE(ret, -1);
}

Semaphore(const Semaphore&) = delete;
void operator=(const Semaphore&) = delete;

~Semaphore() {
CHECK_EQ(sem_destroy(&impl_), 0);
}

Status Post() {
return ResToStatus(sem_post(&impl_), "Post");
}

Status Wait() {
return ResToStatus(sem_wait(&impl_), "Wait");
}

template<class TimePoint>
Status TimedWait(const TimePoint &abs_time) {
// Posix does not support infinity absolute time so handle it here
if (boost::interprocess::ipcdetail::is_pos_infinity(abs_time)) {
return Wait();
}

auto tspec = boost::interprocess::ipcdetail::timepoint_to_timespec(abs_time);
int res = sem_timedwait(&impl_, &tspec);
if (res == 0) {
return Status::OK();
}
if (res > 0) {
// buggy glibc, copy the returned error code to errno
errno = res;
}
if (errno == ETIMEDOUT) {
static const Status timed_out_status = STATUS(TimedOut, "Timed out waiting semaphore");
return timed_out_status;
}
if (errno == EINTR) {
return Status::OK();
}
return ResToStatus(res, "TimedWait");
}

private:
static Status ResToStatus(int res, const char* op) {
if (res == 0) {
return Status::OK();
}
return STATUS_FORMAT(RuntimeError, "$0 on semaphore failed: $1", op, errno);
}

sem_t impl_;
};
#else
class Semaphore {
public:
explicit Semaphore(unsigned int initial_count) : impl_(initial_count) {
}

Status Post() {
impl_.post();
return Status::OK();
}

Status Wait() {
impl_.wait();
return Status::OK();
}

template<class TimePoint>
Status TimedWait(const TimePoint &abs_time) {
if (!impl_.timed_wait(abs_time)) {
static const Status timed_out_status = STATUS(TimedOut, "Timed out waiting semaphore");
return timed_out_status;
}
return Status::OK();
}

private:
boost::interprocess::interprocess_semaphore impl_;
};
#endif

YB_DEFINE_ENUM(SharedExchangeState,
(kIdle)(kRequestSent)(kProcessingRequest)(kResponseSent)(kShutdown));

Expand Down Expand Up @@ -236,7 +147,7 @@ class SharedExchangeHeader {
Status DoWait(
SharedExchangeState expected_state,
std::chrono::system_clock::time_point deadline,
Semaphore* semaphore) {
InterprocessSemaphore* semaphore) {
auto state = state_.load(std::memory_order_acquire);
for (;;) {
if (state == SharedExchangeState::kShutdown) {
Expand All @@ -254,8 +165,8 @@ class SharedExchangeHeader {
}
}

Semaphore request_semaphore_{0};
Semaphore response_semaphore_{0};
InterprocessSemaphore request_semaphore_{0};
InterprocessSemaphore response_semaphore_{0};
std::atomic<SharedExchangeState> state_{SharedExchangeState::kIdle};
size_t data_size_;
std::byte data_[0];
Expand Down
2 changes: 2 additions & 0 deletions src/yb/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ set(UTIL_SRCS
rw_semaphore.cc
rwc_lock.cc
shared_mem.cc
shmem/reserved_address_segment.cc
shmem/robust_mutex.cc
signal_util.cc
slice.cc
Expand Down Expand Up @@ -400,6 +401,7 @@ if (NOT "${YB_BUILD_TYPE}" STREQUAL "asan")
endif()
ADD_YB_TEST(scope_exit-test)
ADD_YB_TEST(shared_mem-test)
ADD_YB_TEST(shmem/reserved_address_segment-test)
ADD_YB_TEST(shmem/robust_mutex-test)
ADD_YB_TEST(slice-test)
ADD_YB_TEST(spinlock_profiling-test)
Expand Down
114 changes: 114 additions & 0 deletions src/yb/util/interprocess_semaphore.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright (c) YugaByte, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations
// under the License.
//

#pragma once

#include <semaphore.h>

#include <boost/interprocess/sync/interprocess_semaphore.hpp>

#include "yb/util/result.h"
#include "yb/util/status.h"

namespace yb {

#if defined(BOOST_INTERPROCESS_POSIX_PROCESS_SHARED)
class InterprocessSemaphore {
public:
explicit InterprocessSemaphore(unsigned int initial_count) {
int ret = sem_init(&impl_, 1, initial_count);
CHECK_NE(ret, -1);
}

InterprocessSemaphore(const InterprocessSemaphore&) = delete;
void operator=(const InterprocessSemaphore&) = delete;

~InterprocessSemaphore() {
CHECK_EQ(sem_destroy(&impl_), 0);
}

Status Post() {
return ResToStatus(sem_post(&impl_), "Post");
}

Status Wait() {
return ResToStatus(sem_wait(&impl_), "Wait");
}

template<class TimePoint>
Status TimedWait(const TimePoint &abs_time) {
// Posix does not support infinity absolute time so handle it here
if (boost::interprocess::ipcdetail::is_pos_infinity(abs_time)) {
return Wait();
}

auto tspec = boost::interprocess::ipcdetail::timepoint_to_timespec(abs_time);
int res = sem_timedwait(&impl_, &tspec);
if (res == 0) {
return Status::OK();
}
if (res > 0) {
// buggy glibc, copy the returned error code to errno
errno = res;
}
if (errno == ETIMEDOUT) {
static const Status timed_out_status = STATUS(TimedOut, "Timed out waiting semaphore");
return timed_out_status;
}
if (errno == EINTR) {
return Status::OK();
}
return ResToStatus(res, "TimedWait");
}

private:
static Status ResToStatus(int res, const char* op) {
if (res == 0) {
return Status::OK();
}
return STATUS_FORMAT(RuntimeError, "$0 on semaphore failed: $1", op, errno);
}

sem_t impl_;
};
#else
class InterprocessSemaphore {
public:
explicit InterprocessSemaphore(unsigned int initial_count) : impl_(initial_count) {
}

Status Post() {
impl_.post();
return Status::OK();
}

Status Wait() {
impl_.wait();
return Status::OK();
}

template<class TimePoint>
Status TimedWait(const TimePoint &abs_time) {
if (!impl_.timed_wait(abs_time)) {
static const Status timed_out_status = STATUS(TimedOut, "Timed out waiting semaphore");
return timed_out_status;
}
return Status::OK();
}

private:
boost::interprocess::interprocess_semaphore impl_;
};
#endif

} // namespace yb
7 changes: 6 additions & 1 deletion src/yb/util/math_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,15 @@ constexpr T1 constexpr_max(const T1& t1, const T2& t2, Args&&... args) {
}

template <class T>
constexpr T ceil_div(const T& n, const T& div) {
constexpr T ceil_div(T n, T div) {
return (n + div - 1) / div;
}

template<typename T>
constexpr T round_up_multiple_of(T n, T div) {
return ceil_div(n, div) * div;
}

template <class T>
T max_if_negative(T value) {
if (value < 0) {
Expand Down
16 changes: 10 additions & 6 deletions src/yb/util/shared_mem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -249,12 +249,9 @@ SharedMemorySegment::SharedMemorySegment(void* base_address, int fd, size_t segm
}

SharedMemorySegment::SharedMemorySegment(SharedMemorySegment&& other)
: base_address_(other.base_address_),
fd_(other.fd_),
segment_size_(other.segment_size_) {
other.base_address_ = nullptr;
other.fd_ = -1;
}
: base_address_(std::exchange(other.base_address_, nullptr)),
fd_(std::exchange(other.fd_, -1)),
segment_size_(other.segment_size_) { }

SharedMemorySegment::~SharedMemorySegment() {
if (base_address_ && munmap(base_address_, segment_size_) == -1) {
Expand All @@ -267,6 +264,13 @@ SharedMemorySegment::~SharedMemorySegment() {
}
}

SharedMemorySegment& SharedMemorySegment::operator=(SharedMemorySegment&& other) {
base_address_ = std::exchange(other.base_address_, nullptr);
fd_ = std::exchange(other.fd_, -1);
segment_size_ = other.segment_size_;
return *this;
}

void* SharedMemorySegment::GetAddress() const {
return base_address_;
}
Expand Down
31 changes: 24 additions & 7 deletions src/yb/util/shared_mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class SharedMemorySegment {
kReadWrite = PROT_READ | PROT_WRITE,
};

SharedMemorySegment() = default;

// Creates a new anonymous shared memory segment with the given size.
static Result<SharedMemorySegment> Create(size_t segment_size);

Expand All @@ -44,6 +46,10 @@ class SharedMemorySegment {

~SharedMemorySegment();

SharedMemorySegment& operator=(SharedMemorySegment&& other);

SharedMemorySegment& operator=(const SharedMemorySegment& other) = delete;

// Returns the address of the start of the shared memory segment.
void* GetAddress() const;

Expand All @@ -54,25 +60,36 @@ class SharedMemorySegment {
SharedMemorySegment(void* base_address, int fd, size_t segment_size);

// The address of the start of the shared memory segment.
void* base_address_;
void* base_address_ = nullptr;

// The file descriptor of the shared memory segment.
int fd_;
int fd_ = -1;

// The size, in bytes, of the shared memory segment.
size_t segment_size_;
size_t segment_size_ = 0;
};

// Utility wrapper for sharing object of specified type.
template <class Object>
class SharedMemoryObject {
public:
SharedMemoryObject() = default;

SharedMemoryObject(SharedMemoryObject&& rhs)
: segment_(std::move(rhs.segment_)), owned_(rhs.owned_) {
rhs.owned_ = false;
}
: segment_(std::move(rhs.segment_)), owned_(std::exchange(rhs.owned_, false)) { }

~SharedMemoryObject() {
Reset();
}

SharedMemoryObject& operator=(SharedMemoryObject&& rhs) {
Reset();
segment_ = std::move(rhs.segment_);
owned_ = std::exchange(rhs.owned_, false);
return *this;
}

void Reset() {
if (owned_) {
get()->~Object();
}
Expand Down Expand Up @@ -126,7 +143,7 @@ class SharedMemoryObject {
}

SharedMemorySegment segment_;
bool owned_;
bool owned_ = false;
};

} // namespace yb
Loading

0 comments on commit f17f92e

Please sign in to comment.