# HG changeset patch
# User zgu
# Date 1591029718 14400
#      Mon Jun 01 12:41:58 2020 -0400
# Node ID d1a3933c21234edbefb37b2614ae9b889369bd9d
# Parent  f42ea705a4bfc02ab8b5ae1735dbc261fd367214
8245961: Shenandoah: move some root marking to concurrent phase

diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp
@@ -174,7 +174,6 @@
       rp = NULL;
     }
 
-    _cm->concurrent_scan_code_roots(worker_id, rp);
     _cm->mark_loop(worker_id, _terminator, rp,
                    true, // cancellable
                    ShenandoahStringDedup::is_enabled()); // perform string dedup
@@ -215,6 +214,103 @@
   }
 };
 
+template <bool CONCURRENT, bool SINGLE_THREADED>
+class ShenandoahConcurrentRootsIterator {
+private:
+  ShenandoahVMRoots<CONCURRENT>            _vm_roots;
+  ShenandoahClassLoaderDataRoots<CONCURRENT, SINGLE_THREADED>
+                                           _cld_roots;
+  ShenandoahNMethodTableSnapshot*          _codecache_snapshot;
+  ShenandoahPhaseTimings::Phase            _phase;
+
+public:
+  ShenandoahConcurrentRootsIterator(ShenandoahPhaseTimings::Phase phase);
+  ~ShenandoahConcurrentRootsIterator();
+
+  void oops_do(OopClosure* oops, uint worker_id);
+};
+
+template <bool CONCURRENT, bool SINGLE_THREADED>
+ShenandoahConcurrentRootsIterator<CONCURRENT, SINGLE_THREADED>::ShenandoahConcurrentRootsIterator(ShenandoahPhaseTimings::Phase phase) :
+  _vm_roots(phase),
+  _cld_roots(phase),
+  _codecache_snapshot(NULL),
+  _phase(phase) {
+  if (!ShenandoahHeap::heap()->unload_classes()) {
+    if (CONCURRENT) {
+      CodeCache_lock->lock_without_safepoint_check();
+    } else {
+      assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint");
+    }
+    _codecache_snapshot = ShenandoahCodeRoots::table()->snapshot_for_iteration();
+  }
+  assert(!CONCURRENT || !ShenandoahHeap::heap()->has_forwarded_objects(), "Not expecting forwarded pointers during concurrent marking");
+}
+
+template <bool CONCURRENT, bool SINGLE_THREADED>
+ShenandoahConcurrentRootsIterator<CONCURRENT, SINGLE_THREADED>::~ShenandoahConcurrentRootsIterator() {
+  if (!ShenandoahHeap::heap()->unload_classes()) {
+    ShenandoahCodeRoots::table()->finish_iteration(_codecache_snapshot);
+    if (CONCURRENT) {
+      CodeCache_lock->unlock();
+    }
+  }
+}
+
+template <bool CONCURRENT, bool SINGLE_THREADED>
+void ShenandoahConcurrentRootsIterator<CONCURRENT, SINGLE_THREADED>::oops_do(OopClosure* oops, uint worker_id) {
+  ShenandoahHeap* const heap = ShenandoahHeap::heap();
+  CLDToOopClosure clds_cl(oops, CONCURRENT ? ClassLoaderData::_claim_strong : ClassLoaderData::_claim_none);
+  _vm_roots.oops_do(oops, worker_id);
+
+  if (!heap->unload_classes()) {
+    _cld_roots.cld_do(&clds_cl, worker_id);
+
+    ShenandoahWorkerTimingsTracker timer(_phase, ShenandoahPhaseTimings::CodeCacheRoots, worker_id);
+    CodeBlobToOopClosure blobs(oops, !CodeBlobToOopClosure::FixRelocations);
+    _codecache_snapshot->parallel_blobs_do(&blobs);
+  } else {
+    _cld_roots.always_strong_cld_do(&clds_cl, worker_id);
+  }
+}
+
+// Process concurrent roots at safepoints
+template <typename CLOSURE>
+class ShenandoahProcessConcurrentRootsTask : public AbstractGangTask {
+private:
+  ShenandoahConcurrentRootsIterator<false /* concurrent */, false /* single_thread */> _itr;
+  ShenandoahConcurrentMark* const _cm;
+  ReferenceProcessor*             _rp;
+public:
+
+  ShenandoahProcessConcurrentRootsTask(ShenandoahConcurrentMark* cm,
+                                       ShenandoahPhaseTimings::Phase phase);
+  void work(uint worker_id);
+};
+
+template <typename CLOSURE>
+ShenandoahProcessConcurrentRootsTask<CLOSURE>::ShenandoahProcessConcurrentRootsTask(ShenandoahConcurrentMark* cm,
+                                                                                    ShenandoahPhaseTimings::Phase phase) :
+  AbstractGangTask("Shenandoah STW Concurrent Mark Task"),
+  _itr(phase),
+  _cm(cm),
+  _rp(NULL) {
+  ShenandoahHeap* heap = ShenandoahHeap::heap();
+  if (heap->process_references()) {
+    _rp = heap->ref_processor();
+    shenandoah_assert_rp_isalive_installed();
+  }
+}
+
+template <typename CLOSURE>
+void ShenandoahProcessConcurrentRootsTask<CLOSURE>::work(uint worker_id) {
+  ShenandoahParallelWorkerSession worker_session(worker_id);
+  ShenandoahObjToScanQueue* q = _cm->task_queues()->queue(worker_id);
+  CLOSURE cl(q, _rp);
+  _itr.oops_do(&cl, worker_id);
+}
+
+
 class ShenandoahFinalMarkingTask : public AbstractGangTask {
 private:
   ShenandoahConcurrentMark* _cm;
@@ -267,13 +363,6 @@
       }
     }
 
-    if (heap->is_degenerated_gc_in_progress() || heap->is_full_gc_in_progress()) {
-      // Full GC does not execute concurrent cycle.
-      // Degenerated cycle may bypass concurrent cycle.
-      // So code roots might not be scanned, let's scan here.
-      _cm->concurrent_scan_code_roots(worker_id, rp);
-    }
-
     _cm->mark_loop(worker_id, _terminator, rp,
                    false, // not cancellable
                    _dedup_string);
@@ -308,8 +397,6 @@
     ShenandoahInitMarkRootsTask<NONE> mark_roots(&root_proc);
     workers->run_task(&mark_roots);
   }
-
-  clear_claim_codecache();
 }
 
 void ShenandoahConcurrentMark::update_roots(ShenandoahPhaseTimings::Phase root_phase) {
@@ -390,34 +477,45 @@
   }
 }
 
-void ShenandoahConcurrentMark::concurrent_scan_code_roots(uint worker_id, ReferenceProcessor* rp) {
-  if (_heap->unload_classes()) {
-    return;
-  }
+// Mark concurrent roots during concurrent phases
+class ShenandoahMarkConcurrentRootsTask : public AbstractGangTask {
+private:
+  SuspendibleThreadSetJoiner         _sts_joiner;
+  ShenandoahConcurrentRootsIterator<true /* concurrent */, false /* single-threaded */> _itr;
+  ShenandoahObjToScanQueueSet* const _queue_set;
+  ReferenceProcessor* const          _rp;
+
+public:
+  ShenandoahMarkConcurrentRootsTask(ShenandoahObjToScanQueueSet* qs,
+                                    ReferenceProcessor* rp,
+                                    ShenandoahPhaseTimings::Phase phase);
+  void work(uint worker_id);
+};
 
-  if (claim_codecache()) {
-    ShenandoahObjToScanQueue* q = task_queues()->queue(worker_id);
-    MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    // TODO: We can not honor StringDeduplication here, due to lock ranking
-    // inversion. So, we may miss some deduplication candidates.
-    if (_heap->has_forwarded_objects()) {
-      ShenandoahMarkResolveRefsClosure cl(q, rp);
-      CodeBlobToOopClosure blobs(&cl, !CodeBlobToOopClosure::FixRelocations);
-      CodeCache::blobs_do(&blobs);
-    } else {
-      ShenandoahMarkRefsClosure cl(q, rp);
-      CodeBlobToOopClosure blobs(&cl, !CodeBlobToOopClosure::FixRelocations);
-      CodeCache::blobs_do(&blobs);
-    }
-  }
+ShenandoahMarkConcurrentRootsTask::ShenandoahMarkConcurrentRootsTask(ShenandoahObjToScanQueueSet* qs,
+                                                                     ReferenceProcessor* rp,
+                                                                     ShenandoahPhaseTimings::Phase phase) :
+  AbstractGangTask("Shenandoah Concurrent Mark Task"),
+  _itr(phase),
+  _queue_set(qs),
+  _rp(rp) {
+  assert(!ShenandoahHeap::heap()->has_forwarded_objects(), "Not expected");
+}
+
+void ShenandoahMarkConcurrentRootsTask::work(uint worker_id) {
+  ShenandoahConcurrentWorkerSession worker_session(worker_id);
+  ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id);
+  ShenandoahMarkResolveRefsClosure cl(q, _rp);
+  _itr.oops_do(&cl, worker_id);
 }
 
 void ShenandoahConcurrentMark::mark_from_roots() {
   WorkGang* workers = _heap->workers();
   uint nworkers = workers->active_workers();
 
+  ReferenceProcessor* rp = NULL;
   if (_heap->process_references()) {
-    ReferenceProcessor* rp = _heap->ref_processor();
+    rp = _heap->ref_processor();
     rp->set_active_mt_degree(nworkers);
 
     // enable ("weak") refs discovery
@@ -432,6 +530,13 @@
   task_queues()->reserve(nworkers);
 
   {
+    ShenandoahGCPhase phase(ShenandoahPhaseTimings::conc_mark_roots);
+    // Use separate task to mark concurrent roots, since it may hold ClassLoaderData_lock and CodeCache_lock
+    ShenandoahMarkConcurrentRootsTask task(task_queues(), rp, ShenandoahPhaseTimings::conc_mark_roots);
+    workers->run_task(&task);
+  }
+
+  {
     TaskTerminator terminator(nworkers, task_queues());
     ShenandoahConcurrentMarkingTask task(this, &terminator);
     workers->run_task(&task);
@@ -445,31 +550,54 @@
 
   uint nworkers = _heap->workers()->active_workers();
 
-  // Finally mark everything else we've got in our queues during the previous steps.
-  // It does two different things for concurrent vs. mark-compact GC:
-  // - For concurrent GC, it starts with empty task queues, drains the remaining
-  //   SATB buffers, and then completes the marking closure.
-  // - For mark-compact GC, it starts out with the task queues seeded by initial
-  //   root scan, and completes the closure, thus marking through all live objects
-  // The implementation is the same, so it's shared here.
   {
-    ShenandoahGCPhase phase(full_gc ?
-                            ShenandoahPhaseTimings::full_gc_mark_finish_queues :
-                            ShenandoahPhaseTimings::finish_queues);
-    task_queues()->reserve(nworkers);
-
     shenandoah_assert_rp_isalive_not_installed();
     ShenandoahIsAliveSelector is_alive;
     ReferenceProcessorIsAliveMutator fix_isalive(_heap->ref_processor(), is_alive.is_alive_closure());
 
-    StrongRootsScope scope(nworkers);
-    TaskTerminator terminator(nworkers, task_queues());
-    ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled());
-    _heap->workers()->run_task(&task);
+
+    // Full GC does not execute concurrent cycle.
+    // Degenerated cycle may bypass concurrent cycle.
+    // So concurrent roots might not be scanned, scan them here.
+    // Ideally, this should be piggyback to ShenandoahFinalMarkingTask, but it makes time tracking
+    // very hard. Given full GC and degenerated GC should be rare, let's use separate task.
+    if (_heap->is_degenerated_gc_in_progress() || _heap->is_full_gc_in_progress()) {
+      ShenandoahPhaseTimings::Phase phase = _heap->is_full_gc_in_progress() ?
+                                            ShenandoahPhaseTimings::full_gc_scan_conc_roots :
+                                            ShenandoahPhaseTimings::degen_gc_scan_conc_roots;
+      ShenandoahGCPhase gc_phase(phase);
+      if (_heap->has_forwarded_objects()) {
+        ShenandoahProcessConcurrentRootsTask<ShenandoahMarkResolveRefsClosure> task(this, phase);
+        _heap->workers()->run_task(&task);
+      } else {
+        ShenandoahProcessConcurrentRootsTask<ShenandoahMarkRefsClosure> task(this, phase);
+        _heap->workers()->run_task(&task);
+      }
+    }
+
+
+    // Finally mark everything else we've got in our queues during the previous steps.
+    // It does two different things for concurrent vs. mark-compact GC:
+    // - For concurrent GC, it starts with empty task queues, drains the remaining
+    //   SATB buffers, and then completes the marking closure.
+    // - For mark-compact GC, it starts out with the task queues seeded by initial
+    //   root scan, and completes the closure, thus marking through all live objects
+    // The implementation is the same, so it's shared here.
+    {
+      ShenandoahGCPhase phase(full_gc ?
+                              ShenandoahPhaseTimings::full_gc_mark_finish_queues :
+                              ShenandoahPhaseTimings::finish_queues);
+      task_queues()->reserve(nworkers);
+
+      StrongRootsScope scope(nworkers);
+      TaskTerminator terminator(nworkers, task_queues());
+      ShenandoahFinalMarkingTask task(this, &terminator, ShenandoahStringDedup::is_enabled());
+      _heap->workers()->run_task(&task);
+    }
+
+    assert(task_queues()->is_empty(), "Should be empty");
   }
 
-  assert(task_queues()->is_empty(), "Should be empty");
-
   // When we're done marking everything, we process weak references.
   if (_heap->process_references()) {
     weak_refs_work(full_gc);
@@ -942,11 +1070,3 @@
     }
   }
 }
-
-bool ShenandoahConcurrentMark::claim_codecache() {
-  return _claimed_codecache.try_set();
-}
-
-void ShenandoahConcurrentMark::clear_claim_codecache() {
-  _claimed_codecache.unset();
-}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.hpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.hpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.hpp
@@ -91,16 +91,6 @@
 public:
   void preclean_weak_refs();
 
-// ---------- Concurrent code cache
-//
-private:
-  ShenandoahSharedFlag _claimed_codecache;
-
-public:
-  void concurrent_scan_code_roots(uint worker_id, ReferenceProcessor* rp);
-  bool claim_codecache();
-  void clear_claim_codecache();
-
 // ---------- Helpers
 // Used from closures, need to be public
 //
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahNMethod.cpp
@@ -523,13 +523,13 @@
 }
 
 ShenandoahNMethodList* ShenandoahNMethodList::acquire() {
-  assert(CodeCache_lock->owned_by_self(), "Lock must be held");
+  assert_locked_or_safepoint(CodeCache_lock);
   _ref_count++;
   return this;
 }
 
 void ShenandoahNMethodList::release() {
-  assert(CodeCache_lock->owned_by_self(), "Lock must be held");
+  assert_locked_or_safepoint(CodeCache_lock);
   _ref_count--;
   if (_ref_count == 0) {
     delete this;
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.cpp b/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.cpp
@@ -103,12 +103,15 @@
     case full_gc_scan_roots:
     case full_gc_update_roots:
     case full_gc_adjust_roots:
+    case degen_gc_scan_conc_roots:
     case degen_gc_update_roots:
+    case full_gc_scan_conc_roots:
     case full_gc_purge_class_unload:
     case full_gc_purge_weak_par:
     case purge_class_unload:
     case purge_weak_par:
     case heap_iteration_roots:
+    case conc_mark_roots:
     case conc_weak_roots_work:
     case conc_strong_roots:
       return true;
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.hpp b/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.hpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahPhaseTimings.hpp
@@ -68,6 +68,9 @@
   f(resize_tlabs,                                   "  Resize TLABs")                  \
                                                                                        \
   f(conc_mark,                                      "Concurrent Marking")              \
+  f(conc_mark_roots,                                "  Concurrent Mark Roots ")        \
+  SHENANDOAH_PAR_PHASE_DO(conc_mark_roots,          "    CM: ", f)                     \
+                                                                                       \
   f(conc_preclean,                                  "Concurrent Precleaning")          \
                                                                                        \
   f(final_mark_gross,                               "Pause Final Mark (G)")            \
@@ -128,6 +131,8 @@
                                                                                        \
   f(degen_gc_gross,                                 "Pause Degenerated GC (G)")        \
   f(degen_gc,                                       "Pause Degenerated GC (N)")        \
+  f(degen_gc_scan_conc_roots,                       "  Degen Mark Roots")              \
+  SHENANDOAH_PAR_PHASE_DO(degen_gc_conc_mark_,      "    DM: ", f)                     \
   f(degen_gc_update_roots,                          "  Degen Update Roots")            \
   SHENANDOAH_PAR_PHASE_DO(degen_gc_update_,         "    DU: ", f)                     \
                                                                                        \
@@ -137,6 +142,8 @@
   f(full_gc_prepare,                                "  Prepare")                       \
   f(full_gc_scan_roots,                             "  Scan Roots")                    \
   SHENANDOAH_PAR_PHASE_DO(full_gc_scan_roots_,      "    FS: ", f)                     \
+  f(full_gc_scan_conc_roots,                        "  Scan Concurrnet Roots")         \
+  SHENANDOAH_PAR_PHASE_DO(full_gc_scan_conc_roots,  "   FCS: ", f)                     \
   f(full_gc_update_roots,                           "  Update Roots")                  \
   SHENANDOAH_PAR_PHASE_DO(full_gc_update_roots_,    "    FU: ", f)                     \
   f(full_gc_mark,                                   "  Mark")                          \
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.cpp
@@ -28,6 +28,7 @@
 #include "classfile/stringTable.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "code/codeCache.hpp"
+#include "code/nmethod.hpp"
 #include "gc/shenandoah/shenandoahClosures.inline.hpp"
 #include "gc/shenandoah/shenandoahConcurrentRoots.hpp"
 #include "gc/shenandoah/shenandoahRootProcessor.inline.hpp"
@@ -199,10 +200,12 @@
   ShenandoahRootProcessor(phase),
   _serial_roots(phase),
   _thread_roots(phase, n_workers > 1),
-  _code_roots(phase),
-  _vm_roots(phase),
-  _dedup_roots(phase),
-  _cld_roots(phase) {
+  _dedup_roots(phase) {
+  nmethod::oops_do_marking_prologue();
+}
+
+ShenandoahRootScanner::~ShenandoahRootScanner() {
+  nmethod::oops_do_marking_epilogue();
 }
 
 void ShenandoahRootScanner::roots_do(uint worker_id, OopClosure* oops) {
@@ -224,11 +227,6 @@
   ResourceMark rm;
 
   _serial_roots.oops_do(oops, worker_id);
-  _vm_roots.oops_do(oops, worker_id);
-
-  assert(clds != NULL, "Only possible with CLD closure");
-  _cld_roots.cld_do(clds, worker_id);
-
   ShenandoahParallelOopsDoThreadClosure tc_cl(oops, code, tc);
   _thread_roots.threads_do(&tc_cl, worker_id);
 
@@ -242,8 +240,6 @@
   ResourceMark rm;
 
   _serial_roots.oops_do(oops, worker_id);
-  _vm_roots.oops_do(oops, worker_id);
-  _cld_roots.always_strong_cld_do(clds, worker_id);
   _thread_roots.threads_do(&tc_cl, worker_id);
 }
 
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp
@@ -250,13 +250,11 @@
 private:
   ShenandoahSerialRoots                                     _serial_roots;
   ShenandoahThreadRoots                                     _thread_roots;
-  ShenandoahCodeCacheRoots                                  _code_roots;
-  ShenandoahVMRoots<false /*concurrent*/ >                  _vm_roots;
   ShenandoahStringDedupRoots                                _dedup_roots;
-  ShenandoahClassLoaderDataRoots<false /*concurrent*/, false /*single threaded*/>
-                                                            _cld_roots;
+
 public:
   ShenandoahRootScanner(uint n_workers, ShenandoahPhaseTimings::Phase phase);
+  ~ShenandoahRootScanner();
 
   // Apply oops, clds and blobs to all strongly reachable roots in the system,
   // during class unloading cycle