Skip to content

Commit 0043aba

Browse files
TreeHunter9Artyom Ivanov
andauthored
fix(shutdown): Prevent race condition when GlobalObject destruction routine unlocks global mutex (#8652)
* fix(shutdown): Prevent race condition when ~GlobalObjectHolder() routine unlocks global mutex Unlocking global mutex in GlobalObject destruction routine made it possible for a new attachment to slip in, so it will be creating new GlobalObject and using it, while destroying routine still in action. This can lead to an undefined state of the global objects, such as shared memory, where one thread is actively using it while another thread is destroying it. * fix(shutdown): Reimplement synchronization between shutdown and init routine for GlobalObjectHolder * refactor(shutdown): Remove unused variable * refactor(shutdown): Code adjustments after Vlad review --------- Co-authored-by: Artyom Ivanov <[email protected]>
1 parent ed64eb6 commit 0043aba

File tree

2 files changed

+49
-8
lines changed

2 files changed

+49
-8
lines changed

src/jrd/Database.cpp

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -586,12 +586,39 @@ namespace Jrd
586586
{
587587
MutexLockGuard guard(g_mutex, FB_FUNCTION);
588588

589-
Database::GlobalObjectHolder::DbId* entry = g_hashTable->lookup(id);
589+
// Get entry with incrementing ref counter, so if someone is currently destroying it, the object itself
590+
// will remain alive.
591+
RefPtr<Database::GlobalObjectHolder::DbId> entry(g_hashTable->lookup(id));
592+
if (entry)
593+
{
594+
auto& shutdownMutex = entry->shutdownMutex;
595+
// Check if someone else currently destroying GlobalObject.
596+
if (shutdownMutex.tryEnter(FB_FUNCTION))
597+
{
598+
// No one is destroying GlobalObject, continue init routine.
599+
shutdownMutex.leave();
600+
}
601+
else
602+
{
603+
// Someone is currently destroying GlobalObject, wait until he finish it to eliminate potential
604+
// race conditions.
605+
{
606+
MutexUnlockGuard unlockGuard(g_mutex, FB_FUNCTION);
607+
608+
MutexLockGuard guard(shutdownMutex, FB_FUNCTION);
609+
}
610+
// Now we are the one who owned DbId object.
611+
// It also was removed from hash table, so simply delete it and recreate it next.
612+
fb_assert(entry->holder == nullptr);
613+
entry = nullptr;
614+
}
615+
}
590616
if (!entry)
591617
{
592618
const auto holder = FB_NEW Database::GlobalObjectHolder(id, filename, config);
593-
entry = FB_NEW Database::GlobalObjectHolder::DbId(id, holder);
619+
entry = makeRef(FB_NEW Database::GlobalObjectHolder::DbId(id, holder));
594620
g_hashTable->add(entry);
621+
entry->addRef();
595622
}
596623

597624
entry->holder->addRef();
@@ -601,9 +628,18 @@ namespace Jrd
601628
Database::GlobalObjectHolder::~GlobalObjectHolder()
602629
{
603630
// dtor is executed under g_mutex protection
604-
Database::GlobalObjectHolder::DbId* entry = g_hashTable->lookup(m_id);
605-
if (!g_hashTable->remove(m_id))
606-
fb_assert(false);
631+
632+
// Stole the object from the hash table without incrementing ref counter, so we will be the one who will delete the object
633+
// at the end of this function.
634+
RefPtr<Database::GlobalObjectHolder::DbId> entry(REF_NO_INCR, g_hashTable->lookup(m_id));
635+
fb_assert(entry);
636+
fb_assert(entry->holder == this);
637+
// We need to unlock the global mutex to safely shutdown some managers, so lock shutdown mutex to make sure that
638+
// other threads will wait until we done our shutdown routine.
639+
// This is done to eliminate potential race conditions involving global objects, such as shared memory.
640+
//! Be careful with order of mutex locking: first - `g_mutex`, second - `shutdownMutex`, but after `MutexUnlockGuard`
641+
//! this order will be reversed, so potentially a deadlock situation may occur here.
642+
MutexLockGuard guard(entry->shutdownMutex, FB_FUNCTION);
607643

608644
{ // scope
609645
// here we cleanup what should not be globally protected
@@ -616,7 +652,10 @@ namespace Jrd
616652
m_eventMgr = nullptr;
617653
m_replMgr = nullptr;
618654

619-
delete entry;
655+
if (!g_hashTable->remove(m_id))
656+
fb_assert(false);
657+
658+
entry->holder = nullptr;
620659

621660
fb_assert(m_tempCacheUsage == 0);
622661
}

src/jrd/Database.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ class Database : public pool_alloc<type_dbb>
256256
typedef Firebird::HashTable<DbId, Firebird::DEFAULT_HASH_SIZE,
257257
Firebird::string, DbId, DbId > DbIdHash;
258258

259-
struct DbId : public DbIdHash::Entry, public Firebird::GlobalStorage
259+
struct DbId : public DbIdHash::Entry, public Firebird::GlobalStorage, public Firebird::RefCounted
260260
{
261261
DbId(const Firebird::string& x, GlobalObjectHolder* h)
262262
: id(getPool(), x), holder(h)
@@ -285,7 +285,9 @@ class Database : public pool_alloc<type_dbb>
285285
}
286286

287287
const Firebird::string id;
288-
GlobalObjectHolder* const holder;
288+
GlobalObjectHolder* holder;
289+
// This mutex is working very tight with `g_mutex`, so use it carefully to avoid possible deadlocks.
290+
Firebird::Mutex shutdownMutex;
289291
};
290292

291293
static Firebird::GlobalPtr<DbIdHash> g_hashTable;

0 commit comments

Comments
 (0)