# HG changeset patch # User zgu # Date 1591029718 14400 # Mon Jun 01 12:41:58 2020 -0400 # Node ID d1a3933c21234edbefb37b2614ae9b889369bd9d # Parent f42ea705a4bfc02ab8b5ae1735dbc261fd367214 8245961: Shenandoah: move some root marking to concurrent phase diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp @@ -174,7 +174,6 @@ rp = NULL; } - _cm->concurrent_scan_code_roots(worker_id, rp); _cm->mark_loop(worker_id, _terminator, rp, true, // cancellable ShenandoahStringDedup::is_enabled()); // perform string dedup @@ -215,6 +214,103 @@ } }; +template +class ShenandoahConcurrentRootsIterator { +private: + ShenandoahVMRoots _vm_roots; + ShenandoahClassLoaderDataRoots + _cld_roots; + ShenandoahNMethodTableSnapshot* _codecache_snapshot; + ShenandoahPhaseTimings::Phase _phase; + +public: + ShenandoahConcurrentRootsIterator(ShenandoahPhaseTimings::Phase phase); + ~ShenandoahConcurrentRootsIterator(); + + void oops_do(OopClosure* oops, uint worker_id); +}; + +template +ShenandoahConcurrentRootsIterator::ShenandoahConcurrentRootsIterator(ShenandoahPhaseTimings::Phase phase) : + _vm_roots(phase), + _cld_roots(phase), + _codecache_snapshot(NULL), + _phase(phase) { + if (!ShenandoahHeap::heap()->unload_classes()) { + if (CONCURRENT) { + CodeCache_lock->lock_without_safepoint_check(); + } else { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint"); + } + _codecache_snapshot = ShenandoahCodeRoots::table()->snapshot_for_iteration(); + } + assert(!CONCURRENT || !ShenandoahHeap::heap()->has_forwarded_objects(), "Not expecting forwarded pointers during concurrent marking"); +} + +template +ShenandoahConcurrentRootsIterator::~ShenandoahConcurrentRootsIterator() { + if (!ShenandoahHeap::heap()->unload_classes()) { + ShenandoahCodeRoots::table()->finish_iteration(_codecache_snapshot); + if (CONCURRENT) { + CodeCache_lock->unlock(); + } + } +} + +template +void ShenandoahConcurrentRootsIterator::oops_do(OopClosure* oops, uint worker_id) { + ShenandoahHeap* const heap = ShenandoahHeap::heap(); + CLDToOopClosure clds_cl(oops, CONCURRENT ? ClassLoaderData::_claim_strong : ClassLoaderData::_claim_none); + _vm_roots.oops_do(oops, worker_id); + + if (!heap->unload_classes()) { + _cld_roots.cld_do(&clds_cl, worker_id); + + ShenandoahWorkerTimingsTracker timer(_phase, ShenandoahPhaseTimings::CodeCacheRoots, worker_id); + CodeBlobToOopClosure blobs(oops, !CodeBlobToOopClosure::FixRelocations); + _codecache_snapshot->parallel_blobs_do(&blobs); + } else { + _cld_roots.always_strong_cld_do(&clds_cl, worker_id); + } +} + +// Process concurrent roots at safepoints +template +class ShenandoahProcessConcurrentRootsTask : public AbstractGangTask { +private: + ShenandoahConcurrentRootsIterator _itr; + ShenandoahConcurrentMark* const _cm; + ReferenceProcessor* _rp; +public: + + ShenandoahProcessConcurrentRootsTask(ShenandoahConcurrentMark* cm, + ShenandoahPhaseTimings::Phase phase); + void work(uint worker_id); +}; + +template +ShenandoahProcessConcurrentRootsTask::ShenandoahProcessConcurrentRootsTask(ShenandoahConcurrentMark* cm, + ShenandoahPhaseTimings::Phase phase) : + AbstractGangTask("Shenandoah STW Concurrent Mark Task"), + _itr(phase), + _cm(cm), + _rp(NULL) { + ShenandoahHeap* heap = ShenandoahHeap::heap(); + if (heap->process_references()) { + _rp = heap->ref_processor(); + shenandoah_assert_rp_isalive_installed(); + } +} + +template +void ShenandoahProcessConcurrentRootsTask::work(uint worker_id) { + ShenandoahParallelWorkerSession worker_session(worker_id); + ShenandoahObjToScanQueue* q = _cm->task_queues()->queue(worker_id); + CLOSURE cl(q, _rp); + _itr.oops_do(&cl, worker_id); +} + + class ShenandoahFinalMarkingTask : public AbstractGangTask { private: ShenandoahConcurrentMark* _cm; @@ -267,13 +363,6 @@ } } - if (heap->is_degenerated_gc_in_progress() || heap->is_full_gc_in_progress()) { - // Full GC does not execute concurrent cycle. - // Degenerated cycle may bypass concurrent cycle. - // So code roots might not be scanned, let's scan here. - _cm->concurrent_scan_code_roots(worker_id, rp); - } - _cm->mark_loop(worker_id, _terminator, rp, false, // not cancellable _dedup_string); @@ -308,8 +397,6 @@ ShenandoahInitMarkRootsTask mark_roots(&root_proc); workers->run_task(&mark_roots); } - - clear_claim_codecache(); } void ShenandoahConcurrentMark::update_roots(ShenandoahPhaseTimings::Phase root_phase) { @@ -390,34 +477,45 @@ } } -void ShenandoahConcurrentMark::concurrent_scan_code_roots(uint worker_id, ReferenceProcessor* rp) { - if (_heap->unload_classes()) { - return; - } +// Mark concurrent roots during concurrent phases +class ShenandoahMarkConcurrentRootsTask : public AbstractGangTask { +private: + SuspendibleThreadSetJoiner _sts_joiner; + ShenandoahConcurrentRootsIterator _itr; + ShenandoahObjToScanQueueSet* const _queue_set; + ReferenceProcessor* const _rp; + +public: + ShenandoahMarkConcurrentRootsTask(ShenandoahObjToScanQueueSet* qs, + ReferenceProcessor* rp, + ShenandoahPhaseTimings::Phase phase); + void work(uint worker_id); +}; - if (claim_codecache()) { - ShenandoahObjToScanQueue* q = task_queues()->queue(worker_id); - MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); - // TODO: We can not honor StringDeduplication here, due to lock ranking - // inversion. So, we may miss some deduplication candidates. - if (_heap->has_forwarded_objects()) { - ShenandoahMarkResolveRefsClosure cl(q, rp); - CodeBlobToOopClosure blobs(&cl, !CodeBlobToOopClosure::FixRelocations); - CodeCache::blobs_do(&blobs); - } else { - ShenandoahMarkRefsClosure cl(q, rp); - CodeBlobToOopClosure blobs(&cl, !CodeBlobToOopClosure::FixRelocations); - CodeCache::blobs_do(&blobs); - } - } +ShenandoahMarkConcurrentRootsTask::ShenandoahMarkConcurrentRootsTask(ShenandoahObjToScanQueueSet* qs, + ReferenceProcessor* rp, + ShenandoahPhaseTimings::Phase phase) : + AbstractGangTask("Shenandoah Concurrent Mark Task"), + _itr(phase), + _queue_set(qs), + _rp(rp) { + assert(!ShenandoahHeap::heap()->has_forwarded_objects(), "Not expected"); +} + +void ShenandoahMarkConcurrentRootsTask::work(uint worker_id) { + ShenandoahConcurrentWorkerSession worker_session(worker_id); + ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id); + ShenandoahMarkResolveRefsClosure cl(q, _rp); + _itr.oops_do(&cl, worker_id); } void ShenandoahConcurrentMark::mark_from_roots() { WorkGang* workers = _heap->workers(); uint nworkers = workers->active_workers(); + ReferenceProcessor* rp = NULL; if (_heap->process_references()) { - ReferenceProcessor* rp = _heap->ref_processor(); + rp = _heap->ref_processor(); rp->set_active_mt_degree(nworkers); // enable ("weak") refs discovery @@ -432,6 +530,13 @@ task_queues()->reserve(nworkers); { + ShenandoahGCPhase phase(ShenandoahPhaseTimings::conc_mark_roots); + // Use separate task to mark concurrent roots, since it may hold ClassLoaderData_lock and CodeCache_lock + ShenandoahMarkConcurrentRootsTask task(task_queues(), rp, ShenandoahPhaseTimings::conc_mark_roots); + workers->run_task(&task); + } + + { TaskTerminator terminator(nworkers, task_queues()); ShenandoahConcurrentMarkingTask task(this, &terminator); workers->run_task(&task); @@ -445,31 +550,54 @@ uint nworkers = _heap->workers()->active_workers(); - // Finally mark everything else we've got in our queues during the previous steps. - // It does two different things for concurrent vs. mark-compact GC: - // - For concurrent GC, it starts with empty task queues, drains the remaining - // SATB buffers, and then completes the marking closure. - // - For mark-compact GC, it starts out with the task queues seeded by initial - // root scan, and completes the closure, thus marking through all live objects - // The implementation is the same, so it's shared here. { - ShenandoahGCPhase phase(full_gc ? - ShenandoahPhaseTimings::full_gc_mark_finish_queues : - ShenandoahPhaseTimings::finish_queues); - task_queues()->reserve(nworkers); - shenandoah_assert_rp_isalive_not_installed(); ShenandoahIsAliveSelector is_alive; ReferenceProcessorIsAliveMutator fix_isalive(_heap->ref_processor(), is_alive.is_alive_closure()); - StrongRootsScope scope(nworkers); - TaskTerminator terminator(nworkers, task_queues()); - ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled()); - _heap->workers()->run_task(&task); + + // Full GC does not execute concurrent cycle. + // Degenerated cycle may bypass concurrent cycle. + // So concurrent roots might not be scanned, scan them here. + // Ideally, this should be piggyback to ShenandoahFinalMarkingTask, but it makes time tracking + // very hard. Given full GC and degenerated GC should be rare, let's use separate task. + if (_heap->is_degenerated_gc_in_progress() || _heap->is_full_gc_in_progress()) { + ShenandoahPhaseTimings::Phase phase = _heap->is_full_gc_in_progress() ? + ShenandoahPhaseTimings::full_gc_scan_conc_roots : + ShenandoahPhaseTimings::degen_gc_scan_conc_roots; + ShenandoahGCPhase gc_phase(phase); + if (_heap->has_forwarded_objects()) { + ShenandoahProcessConcurrentRootsTask task(this, phase); + _heap->workers()->run_task(&task); + } else { + ShenandoahProcessConcurrentRootsTask task(this, phase); + _heap->workers()->run_task(&task); + } + } + + + // Finally mark everything else we've got in our queues during the previous steps. + // It does two different things for concurrent vs. mark-compact GC: + // - For concurrent GC, it starts with empty task queues, drains the remaining + // SATB buffers, and then completes the marking closure. + // - For mark-compact GC, it starts out with the task queues seeded by initial + // root scan, and completes the closure, thus marking through all live objects + // The implementation is the same, so it's shared here. + { + ShenandoahGCPhase phase(full_gc ? + ShenandoahPhaseTimings::full_gc_mark_finish_queues : + ShenandoahPhaseTimings::finish_queues); + task_queues()->reserve(nworkers); + + StrongRootsScope scope(nworkers); + TaskTerminator terminator(nworkers, task_queues()); + ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled()); + _heap->workers()->run_task(&task); + } + + assert(task_queues()->is_empty(), "Should be empty"); } - assert(task_queues()->is_empty(), "Should be empty"); - // When we're done marking everything, we process weak references. if (_heap->process_references()) { weak_refs_work(full_gc); @@ -942,11 +1070,3 @@ } } } - -bool ShenandoahConcurrentMark::claim_codecache() { - return _claimed_codecache.try_set(); -} - -void ShenandoahConcurrentMark::clear_claim_codecache() { - _claimed_codecache.unset(); -} diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.hpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.hpp @@ -91,16 +91,6 @@ public: void preclean_weak_refs(); -// ---------- Concurrent code cache -// -private: - ShenandoahSharedFlag _claimed_codecache; - -public: - void concurrent_scan_code_roots(uint worker_id, ReferenceProcessor* rp); - bool claim_codecache(); - void clear_claim_codecache(); - // ---------- Helpers // Used from closures, need to be public // diff --git a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp @@ -523,13 +523,13 @@ } ShenandoahNMethodList* ShenandoahNMethodList::acquire() { - assert(CodeCache_lock->owned_by_self(), "Lock must be held"); + assert_locked_or_safepoint(CodeCache_lock); _ref_count++; return this; } void ShenandoahNMethodList::release() { - assert(CodeCache_lock->owned_by_self(), "Lock must be held"); + assert_locked_or_safepoint(CodeCache_lock); _ref_count--; if (_ref_count == 0) { delete this; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.cpp b/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.cpp @@ -103,12 +103,15 @@ case full_gc_scan_roots: case full_gc_update_roots: case full_gc_adjust_roots: + case degen_gc_scan_conc_roots: case degen_gc_update_roots: + case full_gc_scan_conc_roots: case full_gc_purge_class_unload: case full_gc_purge_weak_par: case purge_class_unload: case purge_weak_par: case heap_iteration_roots: + case conc_mark_roots: case conc_weak_roots_work: case conc_strong_roots: return true; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.hpp b/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.hpp @@ -68,6 +68,9 @@ f(resize_tlabs, " Resize TLABs") \ \ f(conc_mark, "Concurrent Marking") \ + f(conc_mark_roots, " Concurrent Mark Roots ") \ + SHENANDOAH_PAR_PHASE_DO(conc_mark_roots, " CM: ", f) \ + \ f(conc_preclean, "Concurrent Precleaning") \ \ f(final_mark_gross, "Pause Final Mark (G)") \ @@ -128,6 +131,8 @@ \ f(degen_gc_gross, "Pause Degenerated GC (G)") \ f(degen_gc, "Pause Degenerated GC (N)") \ + f(degen_gc_scan_conc_roots, " Degen Mark Roots") \ + SHENANDOAH_PAR_PHASE_DO(degen_gc_conc_mark_, " DM: ", f) \ f(degen_gc_update_roots, " Degen Update Roots") \ SHENANDOAH_PAR_PHASE_DO(degen_gc_update_, " DU: ", f) \ \ @@ -137,6 +142,8 @@ f(full_gc_prepare, " Prepare") \ f(full_gc_scan_roots, " Scan Roots") \ SHENANDOAH_PAR_PHASE_DO(full_gc_scan_roots_, " FS: ", f) \ + f(full_gc_scan_conc_roots, " Scan Concurrnet Roots") \ + SHENANDOAH_PAR_PHASE_DO(full_gc_scan_conc_roots, " FCS: ", f) \ f(full_gc_update_roots, " Update Roots") \ SHENANDOAH_PAR_PHASE_DO(full_gc_update_roots_, " FU: ", f) \ f(full_gc_mark, " Mark") \ diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp @@ -28,6 +28,7 @@ #include "classfile/stringTable.hpp" #include "classfile/systemDictionary.hpp" #include "code/codeCache.hpp" +#include "code/nmethod.hpp" #include "gc/shenandoah/shenandoahClosures.inline.hpp" #include "gc/shenandoah/shenandoahConcurrentRoots.hpp" #include "gc/shenandoah/shenandoahRootProcessor.inline.hpp" @@ -199,10 +200,12 @@ ShenandoahRootProcessor(phase), _serial_roots(phase), _thread_roots(phase, n_workers > 1), - _code_roots(phase), - _vm_roots(phase), - _dedup_roots(phase), - _cld_roots(phase) { + _dedup_roots(phase) { + nmethod::oops_do_marking_prologue(); +} + +ShenandoahRootScanner::~ShenandoahRootScanner() { + nmethod::oops_do_marking_epilogue(); } void ShenandoahRootScanner::roots_do(uint worker_id, OopClosure* oops) { @@ -224,11 +227,6 @@ ResourceMark rm; _serial_roots.oops_do(oops, worker_id); - _vm_roots.oops_do(oops, worker_id); - - assert(clds != NULL, "Only possible with CLD closure"); - _cld_roots.cld_do(clds, worker_id); - ShenandoahParallelOopsDoThreadClosure tc_cl(oops, code, tc); _thread_roots.threads_do(&tc_cl, worker_id); @@ -242,8 +240,6 @@ ResourceMark rm; _serial_roots.oops_do(oops, worker_id); - _vm_roots.oops_do(oops, worker_id); - _cld_roots.always_strong_cld_do(clds, worker_id); _thread_roots.threads_do(&tc_cl, worker_id); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp @@ -250,13 +250,11 @@ private: ShenandoahSerialRoots _serial_roots; ShenandoahThreadRoots _thread_roots; - ShenandoahCodeCacheRoots _code_roots; - ShenandoahVMRoots _vm_roots; ShenandoahStringDedupRoots _dedup_roots; - ShenandoahClassLoaderDataRoots - _cld_roots; + public: ShenandoahRootScanner(uint n_workers, ShenandoahPhaseTimings::Phase phase); + ~ShenandoahRootScanner(); // Apply oops, clds and blobs to all strongly reachable roots in the system, // during class unloading cycle