Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't generate a mutex for each Locked<T>, share one per object #10117

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 32 additions & 26 deletions lib/base/atomic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

#include <atomic>
#include <mutex>
#include <type_traits>
#include <utility>

namespace icinga
{
Expand Down Expand Up @@ -34,8 +32,10 @@ class Atomic : public std::atomic<T> {
}
};

class LockedMutex;

/**
* Wraps any T into a std::atomic<T>-like interface that locks using a mutex.
* Wraps any T into an interface similar to std::atomic<T>, that locks using a mutex.
*
* In contrast to std::atomic<T>, Locked<T> is also valid for types that are not trivially copyable.
* In case T is trivially copyable, std::atomic<T> is almost certainly the better choice.
Expand All @@ -46,38 +46,44 @@ template<typename T>
class Locked
{
public:
inline T load() const
{
std::unique_lock<std::mutex> lock(m_Mutex);

return m_Value;
}

inline void store(T desired)
{
std::unique_lock<std::mutex> lock(m_Mutex);

m_Value = std::move(desired);
}
T load(LockedMutex& mtx) const;
void store(T desired, LockedMutex& mtx);

private:
mutable std::mutex m_Mutex;
T m_Value;
};

/**
* Type alias for std::atomic<T> if possible, otherwise Locked<T> is used as a fallback.
* Wraps std::mutex, so that only Locked<T> can (un)lock it.
*
* The latter tiny lock scope is enforced this way to prevent deadlocks while passing around mutexes.
*
* @ingroup base
*/
template <typename T>
using AtomicOrLocked =
#if defined(__GNUC__) && __GNUC__ < 5
// GCC does not implement std::is_trivially_copyable until version 5.
typename std::conditional<std::is_fundamental<T>::value || std::is_pointer<T>::value, std::atomic<T>, Locked<T>>::type;
#else /* defined(__GNUC__) && __GNUC__ < 5 */
typename std::conditional<std::is_trivially_copyable<T>::value, std::atomic<T>, Locked<T>>::type;
#endif /* defined(__GNUC__) && __GNUC__ < 5 */
class LockedMutex
{
template<class T>
friend class Locked;

private:
std::mutex m_Mutex;
};

template<class T>
T Locked<T>::load(LockedMutex& mtx) const
{
std::unique_lock lock (mtx.m_Mutex);

return m_Value;
}

template<class T>
void Locked<T>::store(T desired, LockedMutex& mtx)
{
std::unique_lock lock (mtx.m_Mutex);

m_Value = std::move(desired);
}

}

Expand Down
2 changes: 1 addition & 1 deletion lib/base/configobject.ti
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ abstract class ConfigObject : ConfigObjectBase < ConfigType
[config, no_user_modify] String __name (Name);
[config, no_user_modify, required] String "name" (ShortName) {
get {{{
String shortName = m_ShortName.load();
String shortName = m_ShortName.load(m_FieldsMutex);
if (shortName.IsEmpty())
return GetName();
else
Expand Down
2 changes: 1 addition & 1 deletion lib/icinga/host.ti
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class Host : Checkable

[config] String display_name {
get {{{
String displayName = m_DisplayName.load();
String displayName = m_DisplayName.load(m_FieldsMutex);
if (displayName.IsEmpty())
return GetName();
else
Expand Down
2 changes: 1 addition & 1 deletion lib/icinga/hostgroup.ti
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class HostGroup : CustomVarObject
{
[config] String display_name {
get {{{
String displayName = m_DisplayName.load();
String displayName = m_DisplayName.load(m_FieldsMutex);
if (displayName.IsEmpty())
return GetName();
else
Expand Down
2 changes: 1 addition & 1 deletion lib/icinga/service.ti
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class Service : Checkable < ServiceNameComposer

[config] String display_name {
get {{{
String displayName = m_DisplayName.load();
String displayName = m_DisplayName.load(m_FieldsMutex);
if (displayName.IsEmpty())
return GetShortName();
else
Expand Down
2 changes: 1 addition & 1 deletion lib/icinga/servicegroup.ti
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class ServiceGroup : CustomVarObject
{
[config] String display_name {
get {{{
String displayName = m_DisplayName.load();
String displayName = m_DisplayName.load(m_FieldsMutex);
if (displayName.IsEmpty())
return GetName();
else
Expand Down
2 changes: 1 addition & 1 deletion lib/icinga/timeperiod.ti
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class TimePeriod : CustomVarObject
{
[config] String display_name {
get {{{
String displayName = m_DisplayName.load();
String displayName = m_DisplayName.load(m_FieldsMutex);
if (displayName.IsEmpty())
return GetName();
else
Expand Down
2 changes: 1 addition & 1 deletion lib/icinga/user.ti
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class User : CustomVarObject
{
[config] String display_name {
get {{{
String displayName = m_DisplayName.load();
String displayName = m_DisplayName.load(m_FieldsMutex);
if (displayName.IsEmpty())
return GetName();
else
Expand Down
2 changes: 1 addition & 1 deletion lib/icinga/usergroup.ti
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class UserGroup : CustomVarObject
{
[config] String display_name {
get {{{
String displayName = m_DisplayName.load();
String displayName = m_DisplayName.load(m_FieldsMutex);
if (displayName.IsEmpty())
return GetName();
else
Expand Down
4 changes: 2 additions & 2 deletions lib/icingadb/icingadb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ REGISTER_TYPE(IcingaDB);
IcingaDB::IcingaDB()
: m_Rcon(nullptr)
{
m_RconLocked.store(nullptr);
m_RconLocked.store(nullptr, m_FieldsMutex);

m_WorkQueue.SetName("IcingaDB");

Expand Down Expand Up @@ -84,7 +84,7 @@ void IcingaDB::Start(bool runtimeCreated)
m_Rcon = new RedisConnection(GetHost(), GetPort(), GetPath(), GetUsername(), GetPassword(), GetDbIndex(),
GetEnableTls(), GetInsecureNoverify(), GetCertPath(), GetKeyPath(), GetCaPath(), GetCrlPath(),
GetTlsProtocolmin(), GetCipherList(), GetConnectTimeout(), GetDebugInfo());
m_RconLocked.store(m_Rcon);
m_RconLocked.store(m_Rcon, m_FieldsMutex);

for (const Type::Ptr& type : GetTypes()) {
auto ctype (dynamic_cast<ConfigType*>(type.get()));
Expand Down
2 changes: 1 addition & 1 deletion lib/icingadb/icingadb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class IcingaDB : public ObjectImpl<IcingaDB>

inline RedisConnection::Ptr GetConnection()
{
return m_RconLocked.load();
return m_RconLocked.load(m_FieldsMutex);
}

template<class T>
Expand Down
16 changes: 11 additions & 5 deletions tools/mkclass/classcompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,8 +454,14 @@ void ClassCompiler::HandleClass(const Klass& klass, const ClassDebugInfo&)
m_Header << "template<>" << std::endl
<< "class ObjectImpl<" << klass.Name << ">"
<< " : public " << (klass.Parent.empty() ? "Object" : klass.Parent) << std::endl
<< "{" << std::endl
<< "public:" << std::endl
<< "{" << std::endl;

if (klass.Parent.empty()) {
m_Header << "protected:" << std::endl
<< "\tmutable LockedMutex m_FieldsMutex;" << std::endl << std::endl;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And the cool "side" effect here is that the amount of mutexes scales linear with the amount of objects. So, in contrast to a central array of mutexes created at reload time, you guaranteed won't ever have too few mutexes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you think there would be a problem with "too few mutexes"? Like you'll have a more or less fixed number of threads started by Icinga 2, so that limits the concurrency and would be a good candidate for sizing the number of mutexes in a std::atomic/libatomic-like implementation.

I'm not opposed to the idea of using one mutex per object, however, the current PR introduces a quite strange interface with AtomicPseudoLocked where you have to specify a mutex which then is ignored and has to be specified in the custom get {{{ }}} implementations. What if mkclass would just emit the required locking code in the relevant getter methods instead? Like depending on the type, it will either be an atomic load or a lock + non-atomic load + unlock. (And similar for the setters of course.)

Nonetheless, I still don't think that would be necessary and having a global array of mutexes would suffice and keep the implementation simpler.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you think there would be a problem with "too few mutexes"?

No. I don't. I don't know.

I don't know how many mutexes are enough – O(?).
I don't know the hashing algorithm mapping tons of Locked<> in memory to the O(?) mutexes in your scenario and how fair it "schedules".
And beyond the things which I know and the (above) ones I know that I don't know, there are things I don't know that I don't know.

The more radically you change a running system in an amount of time, the more likely you hit a (delayed) black swan. Like JSON-RPC crash one (v2.11.3), JSON-RPC crash two (v2.11.4), IcingaDB(?) OOM ref/NC/820479 (v2.?) ...

I'm not opposed to the idea of using one mutex per object,

👍

Btw. I'm neither opposed to yours.
I just lack the knowledge to implement it by myself properly enough to sleep well afterwards.

AtomicPseudoLocked where you have to specify a mutex which then is ignored and has to be specified in the custom get {{{ }}} implementations

No, yes and no.
AtomicPseudoLocked<T> just extends std::atomic<T> with two additional LockedMutex-compliant methods you can call if you wish. They indeed ignore the mutex (God bless compiler optimization!) and call their (public) equivalents which don't take one. Custom get {{{ }}} implementations can also call the latter, not specifying any mutex, but all .ti files in the current diff override get {{{ }}} for strings. So they need the mutex and they actually lock it.

What if mkclass would just emit the required locking code in the relevant getter methods instead? Like depending on the type, it will either be an atomic load or a lock + non-atomic load + unlock.

mkclass is surely a good tool for stuff that would be significantly(?) more difficult in vanilla C++.
But from my experience I'd prefer "native solutions" (e.g vanilla C++) if applicable easily enough.

Copy link
Contributor

@julianbrost julianbrost Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I went down the rabbit hole quite a bit. The GCC implementation for the std::atomic_load(std::shared_ptr<T>*) and related is actually quite trivial:

These function use a RAII-style locker called _Sp_locker (source) which uses some hash function (I didn't dig into deeper the hash function itself) modulo __gnu_internal::mask to get a mutex index (source) which is then used from a static array of mutexes (source). That mask is actually quite small (0xf) and results in a pool size of 16 (source).

To be honest, that wasn't really what I was expecting and I knew I found something different when initially suggesting this. Luckily I found it again, it was this answer on Stack Overflow for a somewhat related but different question, namely how std::atomic<T> works when T is too large for atomic instructions. The answer there relating to clang++ (in particular the lock_for_pointer(void*) function) look much more like what I was expecting.

For completeness: std::atomic<std::shared_ptr<T>> is a new thing in C++20 and GCC also has an implementation for it which is way different from the one for std::atomic_load(), though I didn't look into it in detail as it doesn't seem to do what we would need for our implementation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the current PR introduces a quite strange interface with AtomicPseudoLocked where you have to specify a mutex which then is ignored

I've fixed it* – everything's Locked<>. (Involuntarily*, however, because I consider this solution development, not a beauty contest. Especially, you admitted yourself you can't summarize what's so "ugly" and why this "uglyness" seems a problem to you.) Anyway:

I've got why that std::atomic<std::shared_ptr<T>> implementation isn't problematic: because it only affects std::atomic<std::shared_ptr<T>>. Well-implemented copying of std::shared_ptr (or at least boost::intrusive_ptr) IMAO involves few enough instructions to protect it via just a spinlock (with yield). So copying one std::shared_ptr doesn't even block the next noticeably long and a noticeable amount may share one mutex. String plays in another league with its malloc(3) and memcpy(3).

*) If you still consider your approach better, here is my "counter"-suggestion:

  • AtomicOrLocked as such stays (in contrast to current PR state)
  • Locked<> uses the general idea of yours/C++, BUT:
    • (Don't fall from your chair!) There's a pool of std::thread::hardware_concurrency() * 64 mutexes. This is enough for sure (and just 4 MB on an M3 Mac with 1024 cores)
    • Which mutex is used for x is calculated this way: std::hash<decltype(x)*>()(&x) % (std::thread::hardware_concurrency() * 64)
    • As strings are stored in the heap anyway, and to reduce lock time, Locked<String> stores internally not a String, but a Shared<struct{size_t,char[1]}>::Ptr

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've fixed it* – everything's Locked<>.

So you're not any atomic operations for attribute accesses anymore but always lock a mutex instead? That sounds like a strange thing to do without any performance considerations, especially given your worries about "not enough mutexes".

*) If you still consider your approach better, here is my "counter"-suggestion:

I mean basically it's the fixed-size shared mutex pool suggestion from the beginning, just with some details filled in.

There's a pool of std::thread::hardware_concurrency() * 64 mutexes. This is enough for sure

Probably enough, that even sounds a bit excessive. Was 64 chosen by a fair dice role or is there more consideration behind that number?

on an M3 Mac with 1024 cores

Sounds like an amazing machine! Where did you get that?

Shared<struct{size_t,char[1]}>::Ptr

I'm not really sure what's that supposed to do? Especially that struct inside instead of simply a string.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've fixed it* – everything's Locked<>.

So you're not any atomic operations for attribute accesses anymore but always lock a mutex instead? That sounds like a strange thing to do without any performance considerations, especially given your worries about "not enough mutexes".

I prefer atomic where possible. But you considered ignoring the mutex in case of atomic too ugly.

That I prefer functionality doesn't mean I don't make compromises to meet your code beauty standards.

*) If you still consider your approach better, here is my "counter"-suggestion:

I mean basically it's the fixed-size shared mutex pool suggestion from the beginning, just with some details filled in.

Sure. "Locked<> uses the general idea of yours (...)"

There's a pool of std::thread::hardware_concurrency() * 64 mutexes. This is enough for sure

Probably enough, that even sounds a bit excessive. Was 64 chosen by a fair dice role or is there more consideration behind that number?

Yes, 64 was primarily chosen because it's a cool number, but also:

I'm glad to hear your feedback because "a bit excessive" is definitely enough.

on an M3 Mac with 1024 cores

Sounds like an amazing machine! Where did you get that?

(I'm an Apple shareholder, you know... 😎 /s)

Our senior consultant @lbetz confirmed that he hasn't seen any customer machine with 1024 cores, yet. And M3 has the larger std::mutex. Now, if you combine those maximums, you'll get my 4 MB from above. For a machine of that (theoretical) size I consider that not excessive.

But I'm open to a less cool sounding power of two at your option.

Shared<struct{size_t,char[1]}>::Ptr

I'm not really sure what's that supposed to do? Especially that struct inside instead of simply a string.

It saves you one malloc(3), especially in this frequently used code path. Any kind of string is fixed size with its payload outsourced to the heap. This struct would unite everything of a string in one "data block".

}

m_Header << "public:" << std::endl
<< "\t" << "DECLARE_PTR_TYPEDEFS(ObjectImpl<" << klass.Name << ">);" << std::endl << std::endl;

/* Validate */
Expand Down Expand Up @@ -815,7 +821,7 @@ void ClassCompiler::HandleClass(const Klass& klass, const ClassDebugInfo&)
<< "{" << std::endl;

if (field.GetAccessor.empty() && !(field.Attributes & FANoStorage))
m_Impl << "\t" << "return m_" << field.GetFriendlyName() << ".load();" << std::endl;
m_Impl << "\treturn m_" << field.GetFriendlyName() << ".load(m_FieldsMutex);" << std::endl;
else
m_Impl << field.GetAccessor << std::endl;

Expand Down Expand Up @@ -853,7 +859,7 @@ void ClassCompiler::HandleClass(const Klass& klass, const ClassDebugInfo&)
<< "\t" << "auto *dobj = dynamic_cast<ConfigObject *>(this);" << std::endl;

if (field.SetAccessor.empty() && !(field.Attributes & FANoStorage))
m_Impl << "\t" << "m_" << field.GetFriendlyName() << ".store(value);" << std::endl;
m_Impl << "\tm_" << field.GetFriendlyName() << ".store(value, m_FieldsMutex);" << std::endl;
else
m_Impl << field.SetAccessor << std::endl << std::endl;

Expand Down Expand Up @@ -1068,7 +1074,7 @@ void ClassCompiler::HandleClass(const Klass& klass, const ClassDebugInfo&)
if (field.Attributes & FANoStorage)
continue;

m_Header << "\tAtomicOrLocked<" << field.Type.GetRealType() << "> m_" << field.GetFriendlyName() << ";" << std::endl;
m_Header << "\tLocked<" << field.Type.GetRealType() << "> m_" << field.GetFriendlyName() << ";" << std::endl;
}

/* signal */
Expand Down
Loading