# HG changeset patch # User redestad # Date 1421937492 -3600 # Thu Jan 22 15:38:12 2015 +0100 # Node ID fe6f92712028159cb5cd54f0b3be57fa4e895ec3 # Parent e6a0cfbfdc9abbff0b47af93996335a7a9ae72f4 8069273: Reduce Hot Card Cache Lock contention Reviewed-by: tschatzl diff --git a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp --- a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp @@ -36,10 +36,12 @@ if (default_use_cache()) { _use_cache = true; - _hot_cache_size = (1 << G1ConcRSLogCacheSize); + _hot_cache_size = (intptr_t)1 << G1ConcRSLogCacheSize; _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC); + for (intptr_t i = 0; i < _hot_cache_size; i++) { + _hot_cache[i] = NULL; + } - _n_hot = 0; _hot_cache_idx = 0; // For refining the cards in the hot cache in parallel @@ -64,26 +66,29 @@ // return it for immediate refining. return card_ptr; } + // Otherwise, the card is hot. + jbyte* current_ptr; + intptr_t masked_index; + intptr_t index; + do { + index = _hot_cache_idx; + masked_index = index & (_hot_cache_size - 1); + current_ptr = _hot_cache[masked_index]; + if (current_ptr == card_ptr) { + return NULL; + } + } while (index != Atomic::cmpxchg_ptr(index + 1, &_hot_cache_idx, index)); - // Otherwise, the card is hot. - jbyte* res = NULL; - MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); - if (_n_hot == _hot_cache_size) { - res = _hot_cache[_hot_cache_idx]; - _n_hot--; - } - - // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. - _hot_cache[_hot_cache_idx] = card_ptr; - _hot_cache_idx++; - - if (_hot_cache_idx == _hot_cache_size) { - // Wrap around - _hot_cache_idx = 0; - } - _n_hot++; - - return res; + // Try to store the new card pointer into the cache. Compare-and-swap to guard + // against the unlikely event of a race resulting in another card pointer to + // have already been written to the cache. In this case we will return + // card_ptr in favor of the other option, which would be starting over. This + // should be OK since card_ptr will likely be the older card already when/if + // this ever happens. + jbyte* previous_ptr = (jbyte*)Atomic::cmpxchg_ptr(card_ptr, + &_hot_cache[masked_index], + current_ptr); + return (previous_ptr == current_ptr) ? previous_ptr : card_ptr; } void G1HotCardCache::drain(uint worker_i, @@ -96,16 +101,16 @@ assert(_hot_cache != NULL, "Logic"); assert(!use_cache(), "cache should be disabled"); - int start_idx; + intptr_t start_idx; - while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once - int end_idx = start_idx + _hot_cache_par_chunk_size; + while ((start_idx = _hot_cache_par_claimed_idx) < _hot_cache_size) { // read once + intptr_t end_idx = start_idx + _hot_cache_par_chunk_size; if (start_idx == - Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { + Atomic::cmpxchg_ptr(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { // The current worker has successfully claimed the chunk [start_idx..end_idx) - end_idx = MIN2(end_idx, _n_hot); - for (int i = start_idx; i < end_idx; i++) { + end_idx = MIN2(end_idx, _hot_cache_size); + for (intptr_t i = start_idx; i < end_idx; i++) { jbyte* card_ptr = _hot_cache[i]; if (card_ptr != NULL) { if (g1rs->refine_card(card_ptr, worker_i, true)) { @@ -124,6 +129,8 @@ into_cset_dcq->enqueue(card_ptr); } + } else { + break; } } } diff --git a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp --- a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp @@ -54,21 +54,30 @@ // code, increasing throughput. class G1HotCardCache: public CHeapObj { - G1CollectedHeap* _g1h; + + G1CollectedHeap* _g1h; + + bool _use_cache; + + G1CardCounts _card_counts; // The card cache table - jbyte** _hot_cache; + jbyte** _hot_cache; - int _hot_cache_size; - int _n_hot; - int _hot_cache_idx; + intptr_t _hot_cache_size; - int _hot_cache_par_chunk_size; - volatile int _hot_cache_par_claimed_idx; + int _hot_cache_par_chunk_size; - bool _use_cache; + // Avoids false sharing when concurrently updating _hot_cache_idx or + // _hot_cache_par_claimed_idx. These are never updated at the same time + // thus it's not necessary to separate them as well + char _pad_before[DEFAULT_CACHE_LINE_SIZE]; - G1CardCounts _card_counts; + volatile intptr_t _hot_cache_idx; + + volatile intptr_t _hot_cache_par_claimed_idx; + + char _pad_after[DEFAULT_CACHE_LINE_SIZE]; // The number of cached cards a thread claims when flushing the cache static const int ClaimChunkSize = 32; @@ -113,10 +122,13 @@ void reset_hot_cache() { assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread"); - _hot_cache_idx = 0; _n_hot = 0; + _hot_cache_idx = 0; + for (intptr_t i = 0; i < _hot_cache_size; i++) { + _hot_cache[i] = NULL; + } } - bool hot_cache_is_empty() { return _n_hot == 0; } + bool hot_cache_is_empty() { return _hot_cache[0] == NULL; } // Zeros the values in the card counts table for entire committed heap void reset_card_counts(); diff --git a/src/share/vm/runtime/mutexLocker.cpp b/src/share/vm/runtime/mutexLocker.cpp --- a/src/share/vm/runtime/mutexLocker.cpp +++ b/src/share/vm/runtime/mutexLocker.cpp @@ -120,7 +120,6 @@ Mutex* OldSets_lock = NULL; Monitor* RootRegionScan_lock = NULL; Mutex* MMUTracker_lock = NULL; -Mutex* HotCardCache_lock = NULL; Monitor* GCTaskManager_lock = NULL; @@ -199,7 +198,6 @@ def(OldSets_lock , Mutex , leaf , true, Monitor::_safepoint_check_never); def(RootRegionScan_lock , Monitor, leaf , true, Monitor::_safepoint_check_never); def(MMUTracker_lock , Mutex , leaf , true, Monitor::_safepoint_check_never); - def(HotCardCache_lock , Mutex , special , true, Monitor::_safepoint_check_never); def(EvacFailureStack_lock , Mutex , nonleaf , true, Monitor::_safepoint_check_never); def(StringDedupQueue_lock , Monitor, leaf, true, Monitor::_safepoint_check_never); diff --git a/src/share/vm/runtime/mutexLocker.hpp b/src/share/vm/runtime/mutexLocker.hpp --- a/src/share/vm/runtime/mutexLocker.hpp +++ b/src/share/vm/runtime/mutexLocker.hpp @@ -122,7 +122,6 @@ extern Monitor* RootRegionScan_lock; // used to notify that the CM threads have finished scanning the IM snapshot regions extern Mutex* MMUTracker_lock; // protects the MMU // tracker data structures -extern Mutex* HotCardCache_lock; // protects the hot card cache extern Mutex* Management_lock; // a lock used to serialize JVM management extern Monitor* Service_lock; // a lock used for service thread operation # HG changeset patch # User redestad # Date 1422024712 -3600 # Fri Jan 23 15:51:52 2015 +0100 # Node ID 768089606ee00cd079958f9e6179bac0ea08c945 # Parent fe6f92712028159cb5cd54f0b3be57fa4e895ec3 [mq]: atomicadd diff --git a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp --- a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp @@ -36,9 +36,9 @@ if (default_use_cache()) { _use_cache = true; - _hot_cache_size = (intptr_t)1 << G1ConcRSLogCacheSize; + _hot_cache_size = (size_t)1 << G1ConcRSLogCacheSize; _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC); - for (intptr_t i = 0; i < _hot_cache_size; i++) { + for (size_t i = 0; i < _hot_cache_size; i++) { _hot_cache[i] = NULL; } @@ -66,19 +66,11 @@ // return it for immediate refining. return card_ptr; } - // Otherwise, the card is hot. - jbyte* current_ptr; - intptr_t masked_index; - intptr_t index; - do { - index = _hot_cache_idx; - masked_index = index & (_hot_cache_size - 1); - current_ptr = _hot_cache[masked_index]; - if (current_ptr == card_ptr) { - return NULL; - } - } while (index != Atomic::cmpxchg_ptr(index + 1, &_hot_cache_idx, index)); - + // Otherwise, the card is hot. + size_t index = Atomic::add(1, &_hot_cache_idx) - 1; + size_t masked_index = index & (_hot_cache_size - 1); + jbyte* current_ptr = _hot_cache[masked_index]; + // Try to store the new card pointer into the cache. Compare-and-swap to guard // against the unlikely event of a race resulting in another card pointer to // have already been written to the cache. In this case we will return @@ -86,8 +78,8 @@ // should be OK since card_ptr will likely be the older card already when/if // this ever happens. jbyte* previous_ptr = (jbyte*)Atomic::cmpxchg_ptr(card_ptr, - &_hot_cache[masked_index], - current_ptr); + &_hot_cache[masked_index], + current_ptr); return (previous_ptr == current_ptr) ? previous_ptr : card_ptr; } @@ -101,40 +93,37 @@ assert(_hot_cache != NULL, "Logic"); assert(!use_cache(), "cache should be disabled"); - intptr_t start_idx; - while ((start_idx = _hot_cache_par_claimed_idx) < _hot_cache_size) { // read once - intptr_t end_idx = start_idx + _hot_cache_par_chunk_size; + while (_hot_cache_par_claimed_idx < _hot_cache_size) { // read once + size_t end_idx = Atomic::add(_hot_cache_par_chunk_size, &_hot_cache_par_claimed_idx); + size_t start_idx = end_idx - _hot_cache_par_chunk_size; + // The current worker has successfully claimed the chunk [start_idx..end_idx) + end_idx = MIN2(end_idx, _hot_cache_size); + for (size_t i = start_idx; i < end_idx; i++) { + jbyte* card_ptr = _hot_cache[i]; + if (card_ptr != NULL) { + if (g1rs->refine_card(card_ptr, worker_i, true)) { + // The part of the heap spanned by the card contains references + // that point into the current collection set. + // We need to record the card pointer in the DirtyCardQueueSet + // that we use for such cards. + // + // The only time we care about recording cards that contain + // references that point into the collection set is during + // RSet updating while within an evacuation pause. + // In this case worker_i should be the id of a GC worker thread + assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); + assert(worker_i < ParallelGCThreads, + err_msg("incorrect worker id: %u", worker_i)); - if (start_idx == - Atomic::cmpxchg_ptr(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { - // The current worker has successfully claimed the chunk [start_idx..end_idx) - end_idx = MIN2(end_idx, _hot_cache_size); - for (intptr_t i = start_idx; i < end_idx; i++) { - jbyte* card_ptr = _hot_cache[i]; - if (card_ptr != NULL) { - if (g1rs->refine_card(card_ptr, worker_i, true)) { - // The part of the heap spanned by the card contains references - // that point into the current collection set. - // We need to record the card pointer in the DirtyCardQueueSet - // that we use for such cards. - // - // The only time we care about recording cards that contain - // references that point into the collection set is during - // RSet updating while within an evacuation pause. - // In this case worker_i should be the id of a GC worker thread - assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); - assert(worker_i < ParallelGCThreads, - err_msg("incorrect worker id: %u", worker_i)); - - into_cset_dcq->enqueue(card_ptr); - } - } else { - break; + into_cset_dcq->enqueue(card_ptr); } + } else { + break; } } } + // The existing entries in the hot card cache, which were just refined // above, are discarded prior to re-enabling the cache near the end of the GC. } diff --git a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp --- a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp +++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp @@ -64,7 +64,7 @@ // The card cache table jbyte** _hot_cache; - intptr_t _hot_cache_size; + size_t _hot_cache_size; int _hot_cache_par_chunk_size; @@ -73,9 +73,9 @@ // thus it's not necessary to separate them as well char _pad_before[DEFAULT_CACHE_LINE_SIZE]; - volatile intptr_t _hot_cache_idx; + volatile size_t _hot_cache_idx; - volatile intptr_t _hot_cache_par_claimed_idx; + volatile size_t _hot_cache_par_claimed_idx; char _pad_after[DEFAULT_CACHE_LINE_SIZE]; @@ -123,7 +123,7 @@ assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread"); _hot_cache_idx = 0; - for (intptr_t i = 0; i < _hot_cache_size; i++) { + for (size_t i = 0; i < _hot_cache_size; i++) { _hot_cache[i] = NULL; } }