1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1StringDedup.hpp"
  36 #include "gc/g1/heapRegion.inline.hpp"
  37 #include "gc/g1/heapRegionRemSet.hpp"
  38 #include "gc/g1/heapRegionSet.inline.hpp"
  39 #include "gc/g1/suspendibleThreadSet.hpp"
  40 #include "gc/shared/gcId.hpp"
  41 #include "gc/shared/gcTimer.hpp"
  42 #include "gc/shared/gcTrace.hpp"
  43 #include "gc/shared/gcTraceTime.inline.hpp"
  44 #include "gc/shared/genOopClosures.inline.hpp"
  45 #include "gc/shared/referencePolicy.hpp"
  46 #include "gc/shared/strongRootsScope.hpp"
  47 #include "gc/shared/taskqueue.inline.hpp"
  48 #include "gc/shared/vmGCOperations.hpp"
  49 #include "logging/log.hpp"
  50 #include "memory/allocation.hpp"
  51 #include "memory/resourceArea.hpp"
  52 #include "oops/oop.inline.hpp"
  53 #include "runtime/atomic.inline.hpp"
  54 #include "runtime/handles.inline.hpp"
  55 #include "runtime/java.hpp"
  56 #include "runtime/prefetch.inline.hpp"
  57 #include "services/memTracker.hpp"
  58 
  59 // Concurrent marking bit map wrapper
  60 
  61 CMBitMapRO::CMBitMapRO(int shifter) :
  62   _bm(),
  63   _shifter(shifter) {
  64   _bmStartWord = 0;
  65   _bmWordSize = 0;
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  69                                                const HeapWord* limit) const {
  70   // First we must round addr *up* to a possible object boundary.
  71   addr = (HeapWord*)align_size_up((intptr_t)addr,
  72                                   HeapWordSize << _shifter);
  73   size_t addrOffset = heapWordToOffset(addr);
  74   assert(limit != NULL, "limit must not be NULL");
  75   size_t limitOffset = heapWordToOffset(limit);
  76   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  77   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  78   assert(nextAddr >= addr, "get_next_one postcondition");
  79   assert(nextAddr == limit || isMarked(nextAddr),
  80          "get_next_one postcondition");
  81   return nextAddr;
  82 }
  83 
  84 #ifndef PRODUCT
  85 bool CMBitMapRO::covers(MemRegion heap_rs) const {
  86   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  87   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  88          "size inconsistency");
  89   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  90          _bmWordSize  == heap_rs.word_size();
  91 }
  92 #endif
  93 
  94 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  95   _bm.print_on_error(st, prefix);
  96 }
  97 
  98 size_t CMBitMap::compute_size(size_t heap_size) {
  99   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 100 }
 101 
 102 size_t CMBitMap::mark_distance() {
 103   return MinObjAlignmentInBytes * BitsPerByte;
 104 }
 105 
 106 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 107   _bmStartWord = heap.start();
 108   _bmWordSize = heap.word_size();
 109 
 110   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 111   _bm.set_size(_bmWordSize >> _shifter);
 112 
 113   storage->set_mapping_changed_listener(&_listener);
 114 }
 115 
 116 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 117   if (zero_filled) {
 118     return;
 119   }
 120   // We need to clear the bitmap on commit, removing any existing information.
 121   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 122   _bm->clearRange(mr);
 123 }
 124 
 125 // Closure used for clearing the given mark bitmap.
 126 class ClearBitmapHRClosure : public HeapRegionClosure {
 127  private:
 128   ConcurrentMark* _cm;
 129   CMBitMap* _bitmap;
 130   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 131  public:
 132   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 133     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 134   }
 135 
 136   virtual bool doHeapRegion(HeapRegion* r) {
 137     size_t const chunk_size_in_words = M / HeapWordSize;
 138 
 139     HeapWord* cur = r->bottom();
 140     HeapWord* const end = r->end();
 141 
 142     while (cur < end) {
 143       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 144       _bitmap->clearRange(mr);
 145 
 146       cur += chunk_size_in_words;
 147 
 148       // Abort iteration if after yielding the marking has been aborted.
 149       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 150         return true;
 151       }
 152       // Repeat the asserts from before the start of the closure. We will do them
 153       // as asserts here to minimize their overhead on the product. However, we
 154       // will have them as guarantees at the beginning / end of the bitmap
 155       // clearing to get some checking in the product.
 156       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 157       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 158     }
 159 
 160     return false;
 161   }
 162 };
 163 
 164 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 165   ClearBitmapHRClosure* _cl;
 166   HeapRegionClaimer     _hrclaimer;
 167   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 168 
 169 public:
 170   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 171       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 172 
 173   void work(uint worker_id) {
 174     SuspendibleThreadSetJoiner sts_join(_suspendible);
 175     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 176   }
 177 };
 178 
 179 void CMBitMap::clearAll() {
 180   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 181   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 182   uint n_workers = g1h->workers()->active_workers();
 183   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 184   g1h->workers()->run_task(&task);
 185   guarantee(cl.complete(), "Must have completed iteration.");
 186   return;
 187 }
 188 
 189 void CMBitMap::clearRange(MemRegion mr) {
 190   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 191   assert(!mr.is_empty(), "unexpected empty region");
 192   // convert address range into offset range
 193   _bm.at_put_range(heapWordToOffset(mr.start()),
 194                    heapWordToOffset(mr.end()), false);
 195 }
 196 
 197 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 198   _base(NULL), _cm(cm)
 199 {}
 200 
 201 bool CMMarkStack::allocate(size_t capacity) {
 202   // allocate a stack of the requisite depth
 203   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 204   if (!rs.is_reserved()) {
 205     warning("ConcurrentMark MarkStack allocation failure");
 206     return false;
 207   }
 208   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 209   if (!_virtual_space.initialize(rs, rs.size())) {
 210     warning("ConcurrentMark MarkStack backing store failure");
 211     // Release the virtual memory reserved for the marking stack
 212     rs.release();
 213     return false;
 214   }
 215   assert(_virtual_space.committed_size() == rs.size(),
 216          "Didn't reserve backing store for all of ConcurrentMark stack?");
 217   _base = (oop*) _virtual_space.low();
 218   setEmpty();
 219   _capacity = (jint) capacity;
 220   _saved_index = -1;
 221   _should_expand = false;
 222   return true;
 223 }
 224 
 225 void CMMarkStack::expand() {
 226   // Called, during remark, if we've overflown the marking stack during marking.
 227   assert(isEmpty(), "stack should been emptied while handling overflow");
 228   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 229   // Clear expansion flag
 230   _should_expand = false;
 231   if (_capacity == (jint) MarkStackSizeMax) {
 232     log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
 233     return;
 234   }
 235   // Double capacity if possible
 236   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 237   // Do not give up existing stack until we have managed to
 238   // get the double capacity that we desired.
 239   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 240                                                            sizeof(oop)));
 241   if (rs.is_reserved()) {
 242     // Release the backing store associated with old stack
 243     _virtual_space.release();
 244     // Reinitialize virtual space for new stack
 245     if (!_virtual_space.initialize(rs, rs.size())) {
 246       fatal("Not enough swap for expanded marking stack capacity");
 247     }
 248     _base = (oop*)(_virtual_space.low());
 249     _index = 0;
 250     _capacity = new_capacity;
 251   } else {
 252     // Failed to double capacity, continue;
 253     log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
 254                   _capacity / K, new_capacity / K);
 255   }
 256 }
 257 
 258 void CMMarkStack::set_should_expand() {
 259   // If we're resetting the marking state because of an
 260   // marking stack overflow, record that we should, if
 261   // possible, expand the stack.
 262   _should_expand = _cm->has_overflown();
 263 }
 264 
 265 CMMarkStack::~CMMarkStack() {
 266   if (_base != NULL) {
 267     _base = NULL;
 268     _virtual_space.release();
 269   }
 270 }
 271 
 272 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 273   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 274   jint start = _index;
 275   jint next_index = start + n;
 276   if (next_index > _capacity) {
 277     _overflow = true;
 278     return;
 279   }
 280   // Otherwise.
 281   _index = next_index;
 282   for (int i = 0; i < n; i++) {
 283     int ind = start + i;
 284     assert(ind < _capacity, "By overflow test above.");
 285     _base[ind] = ptr_arr[i];
 286   }
 287 }
 288 
 289 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 290   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 291   jint index = _index;
 292   if (index == 0) {
 293     *n = 0;
 294     return false;
 295   } else {
 296     int k = MIN2(max, index);
 297     jint  new_ind = index - k;
 298     for (int j = 0; j < k; j++) {
 299       ptr_arr[j] = _base[new_ind + j];
 300     }
 301     _index = new_ind;
 302     *n = k;
 303     return true;
 304   }
 305 }
 306 
 307 void CMMarkStack::note_start_of_gc() {
 308   assert(_saved_index == -1,
 309          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 310   _saved_index = _index;
 311 }
 312 
 313 void CMMarkStack::note_end_of_gc() {
 314   // This is intentionally a guarantee, instead of an assert. If we
 315   // accidentally add something to the mark stack during GC, it
 316   // will be a correctness issue so it's better if we crash. we'll
 317   // only check this once per GC anyway, so it won't be a performance
 318   // issue in any way.
 319   guarantee(_saved_index == _index,
 320             "saved index: %d index: %d", _saved_index, _index);
 321   _saved_index = -1;
 322 }
 323 
 324 CMRootRegions::CMRootRegions() :
 325   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 326   _should_abort(false),  _next_survivor(NULL) { }
 327 
 328 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 329   _young_list = g1h->young_list();
 330   _cm = cm;
 331 }
 332 
 333 void CMRootRegions::prepare_for_scan() {
 334   assert(!scan_in_progress(), "pre-condition");
 335 
 336   // Currently, only survivors can be root regions.
 337   assert(_next_survivor == NULL, "pre-condition");
 338   _next_survivor = _young_list->first_survivor_region();
 339   _scan_in_progress = (_next_survivor != NULL);
 340   _should_abort = false;
 341 }
 342 
 343 HeapRegion* CMRootRegions::claim_next() {
 344   if (_should_abort) {
 345     // If someone has set the should_abort flag, we return NULL to
 346     // force the caller to bail out of their loop.
 347     return NULL;
 348   }
 349 
 350   // Currently, only survivors can be root regions.
 351   HeapRegion* res = _next_survivor;
 352   if (res != NULL) {
 353     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 354     // Read it again in case it changed while we were waiting for the lock.
 355     res = _next_survivor;
 356     if (res != NULL) {
 357       if (res == _young_list->last_survivor_region()) {
 358         // We just claimed the last survivor so store NULL to indicate
 359         // that we're done.
 360         _next_survivor = NULL;
 361       } else {
 362         _next_survivor = res->get_next_young_region();
 363       }
 364     } else {
 365       // Someone else claimed the last survivor while we were trying
 366       // to take the lock so nothing else to do.
 367     }
 368   }
 369   assert(res == NULL || res->is_survivor(), "post-condition");
 370 
 371   return res;
 372 }
 373 
 374 void CMRootRegions::scan_finished() {
 375   assert(scan_in_progress(), "pre-condition");
 376 
 377   // Currently, only survivors can be root regions.
 378   if (!_should_abort) {
 379     assert(_next_survivor == NULL, "we should have claimed all survivors");
 380   }
 381   _next_survivor = NULL;
 382 
 383   {
 384     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 385     _scan_in_progress = false;
 386     RootRegionScan_lock->notify_all();
 387   }
 388 }
 389 
 390 bool CMRootRegions::wait_until_scan_finished() {
 391   if (!scan_in_progress()) return false;
 392 
 393   {
 394     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 395     while (scan_in_progress()) {
 396       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 397     }
 398   }
 399   return true;
 400 }
 401 
 402 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 403   return MAX2((n_par_threads + 2) / 4, 1U);
 404 }
 405 
 406 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 407   _g1h(g1h),
 408   _markBitMap1(),
 409   _markBitMap2(),
 410   _parallel_marking_threads(0),
 411   _max_parallel_marking_threads(0),
 412   _sleep_factor(0.0),
 413   _marking_task_overhead(1.0),
 414   _cleanup_list("Cleanup List"),
 415   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 416   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 417             CardTableModRefBS::card_shift,
 418             false /* in_resource_area*/),
 419 
 420   _prevMarkBitMap(&_markBitMap1),
 421   _nextMarkBitMap(&_markBitMap2),
 422 
 423   _markStack(this),
 424   // _finger set in set_non_marking_state
 425 
 426   _max_worker_id(ParallelGCThreads),
 427   // _active_tasks set in set_non_marking_state
 428   // _tasks set inside the constructor
 429   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 430   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 431 
 432   _has_overflown(false),
 433   _concurrent(false),
 434   _has_aborted(false),
 435   _restart_for_overflow(false),
 436   _concurrent_marking_in_progress(false),
 437   _concurrent_phase_started(false),
 438 
 439   // _verbose_level set below
 440 
 441   _init_times(),
 442   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 443   _cleanup_times(),
 444   _total_counting_time(0.0),
 445   _total_rs_scrub_time(0.0),
 446 
 447   _parallel_workers(NULL),
 448 
 449   _count_card_bitmaps(NULL),
 450   _count_marked_bytes(NULL),
 451   _completed_initialization(false) {
 452 
 453   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 454   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 455 
 456   // Create & start a ConcurrentMark thread.
 457   _cmThread = new ConcurrentMarkThread(this);
 458   assert(cmThread() != NULL, "CM Thread should have been created");
 459   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 460   if (_cmThread->osthread() == NULL) {
 461       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 462   }
 463 
 464   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 465   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 466   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 467 
 468   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 469   satb_qs.set_buffer_size(G1SATBBufferSize);
 470 
 471   _root_regions.init(_g1h, this);
 472 
 473   if (ConcGCThreads > ParallelGCThreads) {
 474     warning("Can't have more ConcGCThreads (%u) "
 475             "than ParallelGCThreads (%u).",
 476             ConcGCThreads, ParallelGCThreads);
 477     return;
 478   }
 479   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 480     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 481     // if both are set
 482     _sleep_factor             = 0.0;
 483     _marking_task_overhead    = 1.0;
 484   } else if (G1MarkingOverheadPercent > 0) {
 485     // We will calculate the number of parallel marking threads based
 486     // on a target overhead with respect to the soft real-time goal
 487     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 488     double overall_cm_overhead =
 489       (double) MaxGCPauseMillis * marking_overhead /
 490       (double) GCPauseIntervalMillis;
 491     double cpu_ratio = 1.0 / (double) os::processor_count();
 492     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 493     double marking_task_overhead =
 494       overall_cm_overhead / marking_thread_num *
 495                                               (double) os::processor_count();
 496     double sleep_factor =
 497                        (1.0 - marking_task_overhead) / marking_task_overhead;
 498 
 499     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 500     _sleep_factor             = sleep_factor;
 501     _marking_task_overhead    = marking_task_overhead;
 502   } else {
 503     // Calculate the number of parallel marking threads by scaling
 504     // the number of parallel GC threads.
 505     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 506     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 507     _sleep_factor             = 0.0;
 508     _marking_task_overhead    = 1.0;
 509   }
 510 
 511   assert(ConcGCThreads > 0, "Should have been set");
 512   _parallel_marking_threads = ConcGCThreads;
 513   _max_parallel_marking_threads = _parallel_marking_threads;
 514 
 515   _parallel_workers = new WorkGang("G1 Marker",
 516        _max_parallel_marking_threads, false, true);
 517   if (_parallel_workers == NULL) {
 518     vm_exit_during_initialization("Failed necessary allocation.");
 519   } else {
 520     _parallel_workers->initialize_workers();
 521   }
 522 
 523   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 524     size_t mark_stack_size =
 525       MIN2(MarkStackSizeMax,
 526           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 527     // Verify that the calculated value for MarkStackSize is in range.
 528     // It would be nice to use the private utility routine from Arguments.
 529     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 530       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 531               "must be between 1 and " SIZE_FORMAT,
 532               mark_stack_size, MarkStackSizeMax);
 533       return;
 534     }
 535     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 536   } else {
 537     // Verify MarkStackSize is in range.
 538     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 539       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 540         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 541           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 542                   "must be between 1 and " SIZE_FORMAT,
 543                   MarkStackSize, MarkStackSizeMax);
 544           return;
 545         }
 546       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 547         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 548           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 549                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 550                   MarkStackSize, MarkStackSizeMax);
 551           return;
 552         }
 553       }
 554     }
 555   }
 556 
 557   if (!_markStack.allocate(MarkStackSize)) {
 558     warning("Failed to allocate CM marking stack");
 559     return;
 560   }
 561 
 562   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 563   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 564 
 565   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 566   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 567 
 568   BitMap::idx_t card_bm_size = _card_bm.size();
 569 
 570   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 571   _active_tasks = _max_worker_id;
 572 
 573   uint max_regions = _g1h->max_regions();
 574   for (uint i = 0; i < _max_worker_id; ++i) {
 575     CMTaskQueue* task_queue = new CMTaskQueue();
 576     task_queue->initialize();
 577     _task_queues->register_queue(i, task_queue);
 578 
 579     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 580     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 581 
 582     _tasks[i] = new CMTask(i, this,
 583                            _count_marked_bytes[i],
 584                            &_count_card_bitmaps[i],
 585                            task_queue, _task_queues);
 586 
 587     _accum_task_vtime[i] = 0.0;
 588   }
 589 
 590   // Calculate the card number for the bottom of the heap. Used
 591   // in biasing indexes into the accounting card bitmaps.
 592   _heap_bottom_card_num =
 593     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 594                                 CardTableModRefBS::card_shift);
 595 
 596   // Clear all the liveness counting data
 597   clear_all_count_data();
 598 
 599   // so that the call below can read a sensible value
 600   _heap_start = g1h->reserved_region().start();
 601   set_non_marking_state();
 602   _completed_initialization = true;
 603 }
 604 
 605 void ConcurrentMark::reset() {
 606   // Starting values for these two. This should be called in a STW
 607   // phase.
 608   MemRegion reserved = _g1h->g1_reserved();
 609   _heap_start = reserved.start();
 610   _heap_end   = reserved.end();
 611 
 612   // Separated the asserts so that we know which one fires.
 613   assert(_heap_start != NULL, "heap bounds should look ok");
 614   assert(_heap_end != NULL, "heap bounds should look ok");
 615   assert(_heap_start < _heap_end, "heap bounds should look ok");
 616 
 617   // Reset all the marking data structures and any necessary flags
 618   reset_marking_state();
 619 
 620   // We do reset all of them, since different phases will use
 621   // different number of active threads. So, it's easiest to have all
 622   // of them ready.
 623   for (uint i = 0; i < _max_worker_id; ++i) {
 624     _tasks[i]->reset(_nextMarkBitMap);
 625   }
 626 
 627   // we need this to make sure that the flag is on during the evac
 628   // pause with initial mark piggy-backed
 629   set_concurrent_marking_in_progress();
 630 }
 631 
 632 
 633 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 634   _markStack.set_should_expand();
 635   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 636   if (clear_overflow) {
 637     clear_has_overflown();
 638   } else {
 639     assert(has_overflown(), "pre-condition");
 640   }
 641   _finger = _heap_start;
 642 
 643   for (uint i = 0; i < _max_worker_id; ++i) {
 644     CMTaskQueue* queue = _task_queues->queue(i);
 645     queue->set_empty();
 646   }
 647 }
 648 
 649 void ConcurrentMark::set_concurrency(uint active_tasks) {
 650   assert(active_tasks <= _max_worker_id, "we should not have more");
 651 
 652   _active_tasks = active_tasks;
 653   // Need to update the three data structures below according to the
 654   // number of active threads for this phase.
 655   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 656   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 657   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 658 }
 659 
 660 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 661   set_concurrency(active_tasks);
 662 
 663   _concurrent = concurrent;
 664   // We propagate this to all tasks, not just the active ones.
 665   for (uint i = 0; i < _max_worker_id; ++i)
 666     _tasks[i]->set_concurrent(concurrent);
 667 
 668   if (concurrent) {
 669     set_concurrent_marking_in_progress();
 670   } else {
 671     // We currently assume that the concurrent flag has been set to
 672     // false before we start remark. At this point we should also be
 673     // in a STW phase.
 674     assert(!concurrent_marking_in_progress(), "invariant");
 675     assert(out_of_regions(),
 676            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 677            p2i(_finger), p2i(_heap_end));
 678   }
 679 }
 680 
 681 void ConcurrentMark::set_non_marking_state() {
 682   // We set the global marking state to some default values when we're
 683   // not doing marking.
 684   reset_marking_state();
 685   _active_tasks = 0;
 686   clear_concurrent_marking_in_progress();
 687 }
 688 
 689 ConcurrentMark::~ConcurrentMark() {
 690   // The ConcurrentMark instance is never freed.
 691   ShouldNotReachHere();
 692 }
 693 
 694 void ConcurrentMark::clearNextBitmap() {
 695   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 696 
 697   // Make sure that the concurrent mark thread looks to still be in
 698   // the current cycle.
 699   guarantee(cmThread()->during_cycle(), "invariant");
 700 
 701   // We are finishing up the current cycle by clearing the next
 702   // marking bitmap and getting it ready for the next cycle. During
 703   // this time no other cycle can start. So, let's make sure that this
 704   // is the case.
 705   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 706 
 707   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 708   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 709   _parallel_workers->run_task(&task);
 710 
 711   // Clear the liveness counting data. If the marking has been aborted, the abort()
 712   // call already did that.
 713   if (cl.complete()) {
 714     clear_all_count_data();
 715   }
 716 
 717   // Repeat the asserts from above.
 718   guarantee(cmThread()->during_cycle(), "invariant");
 719   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 720 }
 721 
 722 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 723   CMBitMap* _bitmap;
 724   bool _error;
 725  public:
 726   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 727   }
 728 
 729   virtual bool doHeapRegion(HeapRegion* r) {
 730     // This closure can be called concurrently to the mutator, so we must make sure
 731     // that the result of the getNextMarkedWordAddress() call is compared to the
 732     // value passed to it as limit to detect any found bits.
 733     // end never changes in G1.
 734     HeapWord* end = r->end();
 735     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 736   }
 737 };
 738 
 739 bool ConcurrentMark::nextMarkBitmapIsClear() {
 740   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 741   _g1h->heap_region_iterate(&cl);
 742   return cl.complete();
 743 }
 744 
 745 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 746 public:
 747   bool doHeapRegion(HeapRegion* r) {
 748     r->note_start_of_marking();
 749     return false;
 750   }
 751 };
 752 
 753 void ConcurrentMark::checkpointRootsInitialPre() {
 754   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 755   G1CollectorPolicy* g1p = g1h->g1_policy();
 756 
 757   _has_aborted = false;
 758 
 759   // Initialize marking structures. This has to be done in a STW phase.
 760   reset();
 761 
 762   // For each region note start of marking.
 763   NoteStartOfMarkHRClosure startcl;
 764   g1h->heap_region_iterate(&startcl);
 765 }
 766 
 767 
 768 void ConcurrentMark::checkpointRootsInitialPost() {
 769   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 770 
 771   // Start Concurrent Marking weak-reference discovery.
 772   ReferenceProcessor* rp = g1h->ref_processor_cm();
 773   // enable ("weak") refs discovery
 774   rp->enable_discovery();
 775   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 776 
 777   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 778   // This is the start of  the marking cycle, we're expected all
 779   // threads to have SATB queues with active set to false.
 780   satb_mq_set.set_active_all_threads(true, /* new active value */
 781                                      false /* expected_active */);
 782 
 783   _root_regions.prepare_for_scan();
 784 
 785   // update_g1_committed() will be called at the end of an evac pause
 786   // when marking is on. So, it's also called at the end of the
 787   // initial-mark pause to update the heap end, if the heap expands
 788   // during it. No need to call it here.
 789 }
 790 
 791 /*
 792  * Notice that in the next two methods, we actually leave the STS
 793  * during the barrier sync and join it immediately afterwards. If we
 794  * do not do this, the following deadlock can occur: one thread could
 795  * be in the barrier sync code, waiting for the other thread to also
 796  * sync up, whereas another one could be trying to yield, while also
 797  * waiting for the other threads to sync up too.
 798  *
 799  * Note, however, that this code is also used during remark and in
 800  * this case we should not attempt to leave / enter the STS, otherwise
 801  * we'll either hit an assert (debug / fastdebug) or deadlock
 802  * (product). So we should only leave / enter the STS if we are
 803  * operating concurrently.
 804  *
 805  * Because the thread that does the sync barrier has left the STS, it
 806  * is possible to be suspended for a Full GC or an evacuation pause
 807  * could occur. This is actually safe, since the entering the sync
 808  * barrier is one of the last things do_marking_step() does, and it
 809  * doesn't manipulate any data structures afterwards.
 810  */
 811 
 812 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 813   bool barrier_aborted;
 814   {
 815     SuspendibleThreadSetLeaver sts_leave(concurrent());
 816     barrier_aborted = !_first_overflow_barrier_sync.enter();
 817   }
 818 
 819   // at this point everyone should have synced up and not be doing any
 820   // more work
 821 
 822   if (barrier_aborted) {
 823     // If the barrier aborted we ignore the overflow condition and
 824     // just abort the whole marking phase as quickly as possible.
 825     return;
 826   }
 827 
 828   // If we're executing the concurrent phase of marking, reset the marking
 829   // state; otherwise the marking state is reset after reference processing,
 830   // during the remark pause.
 831   // If we reset here as a result of an overflow during the remark we will
 832   // see assertion failures from any subsequent set_concurrency_and_phase()
 833   // calls.
 834   if (concurrent()) {
 835     // let the task associated with with worker 0 do this
 836     if (worker_id == 0) {
 837       // task 0 is responsible for clearing the global data structures
 838       // We should be here because of an overflow. During STW we should
 839       // not clear the overflow flag since we rely on it being true when
 840       // we exit this method to abort the pause and restart concurrent
 841       // marking.
 842       reset_marking_state(true /* clear_overflow */);
 843 
 844       log_info(gc)("Concurrent Mark reset for overflow");
 845     }
 846   }
 847 
 848   // after this, each task should reset its own data structures then
 849   // then go into the second barrier
 850 }
 851 
 852 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 853   SuspendibleThreadSetLeaver sts_leave(concurrent());
 854   _second_overflow_barrier_sync.enter();
 855 
 856   // at this point everything should be re-initialized and ready to go
 857 }
 858 
 859 class CMConcurrentMarkingTask: public AbstractGangTask {
 860 private:
 861   ConcurrentMark*       _cm;
 862   ConcurrentMarkThread* _cmt;
 863 
 864 public:
 865   void work(uint worker_id) {
 866     assert(Thread::current()->is_ConcurrentGC_thread(),
 867            "this should only be done by a conc GC thread");
 868     ResourceMark rm;
 869 
 870     double start_vtime = os::elapsedVTime();
 871 
 872     {
 873       SuspendibleThreadSetJoiner sts_join;
 874 
 875       assert(worker_id < _cm->active_tasks(), "invariant");
 876       CMTask* the_task = _cm->task(worker_id);
 877       the_task->record_start_time();
 878       if (!_cm->has_aborted()) {
 879         do {
 880           double start_vtime_sec = os::elapsedVTime();
 881           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 882 
 883           the_task->do_marking_step(mark_step_duration_ms,
 884                                     true  /* do_termination */,
 885                                     false /* is_serial*/);
 886 
 887           double end_vtime_sec = os::elapsedVTime();
 888           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 889           _cm->clear_has_overflown();
 890 
 891           _cm->do_yield_check(worker_id);
 892 
 893           jlong sleep_time_ms;
 894           if (!_cm->has_aborted() && the_task->has_aborted()) {
 895             sleep_time_ms =
 896               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 897             {
 898               SuspendibleThreadSetLeaver sts_leave;
 899               os::sleep(Thread::current(), sleep_time_ms, false);
 900             }
 901           }
 902         } while (!_cm->has_aborted() && the_task->has_aborted());
 903       }
 904       the_task->record_end_time();
 905       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 906     }
 907 
 908     double end_vtime = os::elapsedVTime();
 909     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 910   }
 911 
 912   CMConcurrentMarkingTask(ConcurrentMark* cm,
 913                           ConcurrentMarkThread* cmt) :
 914       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 915 
 916   ~CMConcurrentMarkingTask() { }
 917 };
 918 
 919 // Calculates the number of active workers for a concurrent
 920 // phase.
 921 uint ConcurrentMark::calc_parallel_marking_threads() {
 922   uint n_conc_workers = 0;
 923   if (!UseDynamicNumberOfGCThreads ||
 924       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 925        !ForceDynamicNumberOfGCThreads)) {
 926     n_conc_workers = max_parallel_marking_threads();
 927   } else {
 928     n_conc_workers =
 929       AdaptiveSizePolicy::calc_default_active_workers(
 930                                    max_parallel_marking_threads(),
 931                                    1, /* Minimum workers */
 932                                    parallel_marking_threads(),
 933                                    Threads::number_of_non_daemon_threads());
 934     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 935     // that scaling has already gone into "_max_parallel_marking_threads".
 936   }
 937   assert(n_conc_workers > 0, "Always need at least 1");
 938   return n_conc_workers;
 939 }
 940 
 941 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
 942   // Currently, only survivors can be root regions.
 943   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 944   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 945 
 946   const uintx interval = PrefetchScanIntervalInBytes;
 947   HeapWord* curr = hr->bottom();
 948   const HeapWord* end = hr->top();
 949   while (curr < end) {
 950     Prefetch::read(curr, interval);
 951     oop obj = oop(curr);
 952     int size = obj->oop_iterate_size(&cl);
 953     assert(size == obj->size(), "sanity");
 954     curr += size;
 955   }
 956 }
 957 
 958 class CMRootRegionScanTask : public AbstractGangTask {
 959 private:
 960   ConcurrentMark* _cm;
 961 
 962 public:
 963   CMRootRegionScanTask(ConcurrentMark* cm) :
 964     AbstractGangTask("Root Region Scan"), _cm(cm) { }
 965 
 966   void work(uint worker_id) {
 967     assert(Thread::current()->is_ConcurrentGC_thread(),
 968            "this should only be done by a conc GC thread");
 969 
 970     CMRootRegions* root_regions = _cm->root_regions();
 971     HeapRegion* hr = root_regions->claim_next();
 972     while (hr != NULL) {
 973       _cm->scanRootRegion(hr, worker_id);
 974       hr = root_regions->claim_next();
 975     }
 976   }
 977 };
 978 
 979 void ConcurrentMark::scanRootRegions() {
 980   // Start of concurrent marking.
 981   ClassLoaderDataGraph::clear_claimed_marks();
 982 
 983   // scan_in_progress() will have been set to true only if there was
 984   // at least one root region to scan. So, if it's false, we
 985   // should not attempt to do any further work.
 986   if (root_regions()->scan_in_progress()) {
 987     GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan");
 988 
 989     _parallel_marking_threads = calc_parallel_marking_threads();
 990     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
 991            "Maximum number of marking threads exceeded");
 992     uint active_workers = MAX2(1U, parallel_marking_threads());
 993 
 994     CMRootRegionScanTask task(this);
 995     _parallel_workers->set_active_workers(active_workers);
 996     _parallel_workers->run_task(&task);
 997 
 998     // It's possible that has_aborted() is true here without actually
 999     // aborting the survivor scan earlier. This is OK as it's
1000     // mainly used for sanity checking.
1001     root_regions()->scan_finished();
1002   }
1003 }
1004 
1005 void ConcurrentMark::register_concurrent_phase_start(const char* title) {
1006   assert(!_concurrent_phase_started, "Sanity");
1007   _concurrent_phase_started = true;
1008   _g1h->gc_timer_cm()->register_gc_concurrent_start(title);
1009 }
1010 
1011 void ConcurrentMark::register_concurrent_phase_end() {
1012   if (_concurrent_phase_started) {
1013     _concurrent_phase_started = false;
1014     _g1h->gc_timer_cm()->register_gc_concurrent_end();
1015   }
1016 }
1017 
1018 void ConcurrentMark::markFromRoots() {
1019   // we might be tempted to assert that:
1020   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1021   //        "inconsistent argument?");
1022   // However that wouldn't be right, because it's possible that
1023   // a safepoint is indeed in progress as a younger generation
1024   // stop-the-world GC happens even as we mark in this generation.
1025 
1026   _restart_for_overflow = false;
1027 
1028   // _g1h has _n_par_threads
1029   _parallel_marking_threads = calc_parallel_marking_threads();
1030   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1031     "Maximum number of marking threads exceeded");
1032 
1033   uint active_workers = MAX2(1U, parallel_marking_threads());
1034   assert(active_workers > 0, "Should have been set");
1035 
1036   // Parallel task terminator is set in "set_concurrency_and_phase()"
1037   set_concurrency_and_phase(active_workers, true /* concurrent */);
1038 
1039   CMConcurrentMarkingTask markingTask(this, cmThread());
1040   _parallel_workers->set_active_workers(active_workers);
1041   _parallel_workers->run_task(&markingTask);
1042   print_stats();
1043 }
1044 
1045 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1046   // world is stopped at this checkpoint
1047   assert(SafepointSynchronize::is_at_safepoint(),
1048          "world should be stopped");
1049 
1050   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1051 
1052   // If a full collection has happened, we shouldn't do this.
1053   if (has_aborted()) {
1054     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1055     return;
1056   }
1057 
1058   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1059 
1060   if (VerifyDuringGC) {
1061     HandleMark hm;  // handle scope
1062     g1h->prepare_for_verify();
1063     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1064   }
1065   g1h->verifier()->check_bitmaps("Remark Start");
1066 
1067   G1CollectorPolicy* g1p = g1h->g1_policy();
1068   g1p->record_concurrent_mark_remark_start();
1069 
1070   double start = os::elapsedTime();
1071 
1072   checkpointRootsFinalWork();
1073 
1074   double mark_work_end = os::elapsedTime();
1075 
1076   weakRefsWork(clear_all_soft_refs);
1077 
1078   if (has_overflown()) {
1079     // Oops.  We overflowed.  Restart concurrent marking.
1080     _restart_for_overflow = true;
1081     log_develop_trace(gc)("Remark led to restart for overflow.");
1082 
1083     // Verify the heap w.r.t. the previous marking bitmap.
1084     if (VerifyDuringGC) {
1085       HandleMark hm;  // handle scope
1086       g1h->prepare_for_verify();
1087       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1088     }
1089 
1090     // Clear the marking state because we will be restarting
1091     // marking due to overflowing the global mark stack.
1092     reset_marking_state();
1093   } else {
1094     {
1095       GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm());
1096 
1097       // Aggregate the per-task counting data that we have accumulated
1098       // while marking.
1099       aggregate_count_data();
1100     }
1101 
1102     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1103     // We're done with marking.
1104     // This is the end of  the marking cycle, we're expected all
1105     // threads to have SATB queues with active set to true.
1106     satb_mq_set.set_active_all_threads(false, /* new active value */
1107                                        true /* expected_active */);
1108 
1109     if (VerifyDuringGC) {
1110       HandleMark hm;  // handle scope
1111       g1h->prepare_for_verify();
1112       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1113     }
1114     g1h->verifier()->check_bitmaps("Remark End");
1115     assert(!restart_for_overflow(), "sanity");
1116     // Completely reset the marking state since marking completed
1117     set_non_marking_state();
1118   }
1119 
1120   // Expand the marking stack, if we have to and if we can.
1121   if (_markStack.should_expand()) {
1122     _markStack.expand();
1123   }
1124 
1125   // Statistics
1126   double now = os::elapsedTime();
1127   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1128   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1129   _remark_times.add((now - start) * 1000.0);
1130 
1131   g1p->record_concurrent_mark_remark_end();
1132 
1133   G1CMIsAliveClosure is_alive(g1h);
1134   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1135 }
1136 
1137 // Base class of the closures that finalize and verify the
1138 // liveness counting data.
1139 class CMCountDataClosureBase: public HeapRegionClosure {
1140 protected:
1141   G1CollectedHeap* _g1h;
1142   ConcurrentMark* _cm;
1143   CardTableModRefBS* _ct_bs;
1144 
1145   BitMap* _region_bm;
1146   BitMap* _card_bm;
1147 
1148   // Takes a region that's not empty (i.e., it has at least one
1149   // live object in it and sets its corresponding bit on the region
1150   // bitmap to 1.
1151   void set_bit_for_region(HeapRegion* hr) {
1152     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1153     _region_bm->par_at_put(index, true);
1154   }
1155 
1156 public:
1157   CMCountDataClosureBase(G1CollectedHeap* g1h,
1158                          BitMap* region_bm, BitMap* card_bm):
1159     _g1h(g1h), _cm(g1h->concurrent_mark()),
1160     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1161     _region_bm(region_bm), _card_bm(card_bm) { }
1162 };
1163 
1164 // Closure that calculates the # live objects per region. Used
1165 // for verification purposes during the cleanup pause.
1166 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1167   CMBitMapRO* _bm;
1168   size_t _region_marked_bytes;
1169 
1170 public:
1171   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1172                          BitMap* region_bm, BitMap* card_bm) :
1173     CMCountDataClosureBase(g1h, region_bm, card_bm),
1174     _bm(bm), _region_marked_bytes(0) { }
1175 
1176   bool doHeapRegion(HeapRegion* hr) {
1177     HeapWord* ntams = hr->next_top_at_mark_start();
1178     HeapWord* start = hr->bottom();
1179 
1180     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1181            "Preconditions not met - "
1182            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1183            p2i(start), p2i(ntams), p2i(hr->end()));
1184 
1185     // Find the first marked object at or after "start".
1186     start = _bm->getNextMarkedWordAddress(start, ntams);
1187 
1188     size_t marked_bytes = 0;
1189 
1190     while (start < ntams) {
1191       oop obj = oop(start);
1192       int obj_sz = obj->size();
1193       HeapWord* obj_end = start + obj_sz;
1194 
1195       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1196       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1197 
1198       // Note: if we're looking at the last region in heap - obj_end
1199       // could be actually just beyond the end of the heap; end_idx
1200       // will then correspond to a (non-existent) card that is also
1201       // just beyond the heap.
1202       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1203         // end of object is not card aligned - increment to cover
1204         // all the cards spanned by the object
1205         end_idx += 1;
1206       }
1207 
1208       // Set the bits in the card BM for the cards spanned by this object.
1209       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1210 
1211       // Add the size of this object to the number of marked bytes.
1212       marked_bytes += (size_t)obj_sz * HeapWordSize;
1213 
1214       // This will happen if we are handling a humongous object that spans
1215       // several heap regions.
1216       if (obj_end > hr->end()) {
1217         break;
1218       }
1219       // Find the next marked object after this one.
1220       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1221     }
1222 
1223     // Mark the allocated-since-marking portion...
1224     HeapWord* top = hr->top();
1225     if (ntams < top) {
1226       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1227       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1228 
1229       // Note: if we're looking at the last region in heap - top
1230       // could be actually just beyond the end of the heap; end_idx
1231       // will then correspond to a (non-existent) card that is also
1232       // just beyond the heap.
1233       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1234         // end of object is not card aligned - increment to cover
1235         // all the cards spanned by the object
1236         end_idx += 1;
1237       }
1238       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1239 
1240       // This definitely means the region has live objects.
1241       set_bit_for_region(hr);
1242     }
1243 
1244     // Update the live region bitmap.
1245     if (marked_bytes > 0) {
1246       set_bit_for_region(hr);
1247     }
1248 
1249     // Set the marked bytes for the current region so that
1250     // it can be queried by a calling verification routine
1251     _region_marked_bytes = marked_bytes;
1252 
1253     return false;
1254   }
1255 
1256   size_t region_marked_bytes() const { return _region_marked_bytes; }
1257 };
1258 
1259 // Heap region closure used for verifying the counting data
1260 // that was accumulated concurrently and aggregated during
1261 // the remark pause. This closure is applied to the heap
1262 // regions during the STW cleanup pause.
1263 
1264 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1265   G1CollectedHeap* _g1h;
1266   ConcurrentMark* _cm;
1267   CalcLiveObjectsClosure _calc_cl;
1268   BitMap* _region_bm;   // Region BM to be verified
1269   BitMap* _card_bm;     // Card BM to be verified
1270 
1271   BitMap* _exp_region_bm; // Expected Region BM values
1272   BitMap* _exp_card_bm;   // Expected card BM values
1273 
1274   int _failures;
1275 
1276 public:
1277   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1278                                 BitMap* region_bm,
1279                                 BitMap* card_bm,
1280                                 BitMap* exp_region_bm,
1281                                 BitMap* exp_card_bm) :
1282     _g1h(g1h), _cm(g1h->concurrent_mark()),
1283     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1284     _region_bm(region_bm), _card_bm(card_bm),
1285     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1286     _failures(0) { }
1287 
1288   int failures() const { return _failures; }
1289 
1290   bool doHeapRegion(HeapRegion* hr) {
1291     int failures = 0;
1292 
1293     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1294     // this region and set the corresponding bits in the expected region
1295     // and card bitmaps.
1296     bool res = _calc_cl.doHeapRegion(hr);
1297     assert(res == false, "should be continuing");
1298 
1299     // Verify the marked bytes for this region.
1300     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1301     size_t act_marked_bytes = hr->next_marked_bytes();
1302 
1303     if (exp_marked_bytes > act_marked_bytes) {
1304       if (hr->is_starts_humongous()) {
1305         // For start_humongous regions, the size of the whole object will be
1306         // in exp_marked_bytes.
1307         HeapRegion* region = hr;
1308         int num_regions;
1309         for (num_regions = 0; region != NULL; num_regions++) {
1310           region = _g1h->next_region_in_humongous(region);
1311         }
1312         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1313           failures += 1;
1314         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1315           failures += 1;
1316         }
1317       } else {
1318         // We're not OK if expected marked bytes > actual marked bytes. It means
1319         // we have missed accounting some objects during the actual marking.
1320         failures += 1;
1321       }
1322     }
1323 
1324     // Verify the bit, for this region, in the actual and expected
1325     // (which was just calculated) region bit maps.
1326     // We're not OK if the bit in the calculated expected region
1327     // bitmap is set and the bit in the actual region bitmap is not.
1328     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1329 
1330     bool expected = _exp_region_bm->at(index);
1331     bool actual = _region_bm->at(index);
1332     if (expected && !actual) {
1333       failures += 1;
1334     }
1335 
1336     // Verify that the card bit maps for the cards spanned by the current
1337     // region match. We have an error if we have a set bit in the expected
1338     // bit map and the corresponding bit in the actual bitmap is not set.
1339 
1340     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1341     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1342 
1343     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1344       expected = _exp_card_bm->at(i);
1345       actual = _card_bm->at(i);
1346 
1347       if (expected && !actual) {
1348         failures += 1;
1349       }
1350     }
1351 
1352     _failures += failures;
1353 
1354     // We could stop iteration over the heap when we
1355     // find the first violating region by returning true.
1356     return false;
1357   }
1358 };
1359 
1360 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1361 protected:
1362   G1CollectedHeap* _g1h;
1363   ConcurrentMark* _cm;
1364   BitMap* _actual_region_bm;
1365   BitMap* _actual_card_bm;
1366 
1367   uint    _n_workers;
1368 
1369   BitMap* _expected_region_bm;
1370   BitMap* _expected_card_bm;
1371 
1372   int  _failures;
1373 
1374   HeapRegionClaimer _hrclaimer;
1375 
1376 public:
1377   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1378                             BitMap* region_bm, BitMap* card_bm,
1379                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1380     : AbstractGangTask("G1 verify final counting"),
1381       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1382       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1383       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1384       _failures(0),
1385       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1386     assert(VerifyDuringGC, "don't call this otherwise");
1387     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1388     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1389   }
1390 
1391   void work(uint worker_id) {
1392     assert(worker_id < _n_workers, "invariant");
1393 
1394     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1395                                             _actual_region_bm, _actual_card_bm,
1396                                             _expected_region_bm,
1397                                             _expected_card_bm);
1398 
1399     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1400 
1401     Atomic::add(verify_cl.failures(), &_failures);
1402   }
1403 
1404   int failures() const { return _failures; }
1405 };
1406 
1407 // Closure that finalizes the liveness counting data.
1408 // Used during the cleanup pause.
1409 // Sets the bits corresponding to the interval [NTAMS, top]
1410 // (which contains the implicitly live objects) in the
1411 // card liveness bitmap. Also sets the bit for each region,
1412 // containing live data, in the region liveness bitmap.
1413 
1414 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1415  public:
1416   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1417                               BitMap* region_bm,
1418                               BitMap* card_bm) :
1419     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1420 
1421   bool doHeapRegion(HeapRegion* hr) {
1422     HeapWord* ntams = hr->next_top_at_mark_start();
1423     HeapWord* top   = hr->top();
1424 
1425     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1426 
1427     // Mark the allocated-since-marking portion...
1428     if (ntams < top) {
1429       // This definitely means the region has live objects.
1430       set_bit_for_region(hr);
1431 
1432       // Now set the bits in the card bitmap for [ntams, top)
1433       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1434       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1435 
1436       // Note: if we're looking at the last region in heap - top
1437       // could be actually just beyond the end of the heap; end_idx
1438       // will then correspond to a (non-existent) card that is also
1439       // just beyond the heap.
1440       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1441         // end of object is not card aligned - increment to cover
1442         // all the cards spanned by the object
1443         end_idx += 1;
1444       }
1445 
1446       assert(end_idx <= _card_bm->size(),
1447              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1448              end_idx, _card_bm->size());
1449       assert(start_idx < _card_bm->size(),
1450              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1451              start_idx, _card_bm->size());
1452 
1453       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1454     }
1455 
1456     // Set the bit for the region if it contains live data
1457     if (hr->next_marked_bytes() > 0) {
1458       set_bit_for_region(hr);
1459     }
1460 
1461     return false;
1462   }
1463 };
1464 
1465 class G1ParFinalCountTask: public AbstractGangTask {
1466 protected:
1467   G1CollectedHeap* _g1h;
1468   ConcurrentMark* _cm;
1469   BitMap* _actual_region_bm;
1470   BitMap* _actual_card_bm;
1471 
1472   uint    _n_workers;
1473   HeapRegionClaimer _hrclaimer;
1474 
1475 public:
1476   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1477     : AbstractGangTask("G1 final counting"),
1478       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1479       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1480       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1481   }
1482 
1483   void work(uint worker_id) {
1484     assert(worker_id < _n_workers, "invariant");
1485 
1486     FinalCountDataUpdateClosure final_update_cl(_g1h,
1487                                                 _actual_region_bm,
1488                                                 _actual_card_bm);
1489 
1490     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1491   }
1492 };
1493 
1494 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1495   G1CollectedHeap* _g1;
1496   size_t _freed_bytes;
1497   FreeRegionList* _local_cleanup_list;
1498   uint _old_regions_removed;
1499   uint _humongous_regions_removed;
1500   HRRSCleanupTask* _hrrs_cleanup_task;
1501 
1502 public:
1503   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1504                              FreeRegionList* local_cleanup_list,
1505                              HRRSCleanupTask* hrrs_cleanup_task) :
1506     _g1(g1),
1507     _freed_bytes(0),
1508     _local_cleanup_list(local_cleanup_list),
1509     _old_regions_removed(0),
1510     _humongous_regions_removed(0),
1511     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1512 
1513   size_t freed_bytes() { return _freed_bytes; }
1514   const uint old_regions_removed() { return _old_regions_removed; }
1515   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1516 
1517   bool doHeapRegion(HeapRegion *hr) {
1518     if (hr->is_archive()) {
1519       return false;
1520     }
1521     // We use a claim value of zero here because all regions
1522     // were claimed with value 1 in the FinalCount task.
1523     _g1->reset_gc_time_stamps(hr);
1524     hr->note_end_of_marking();
1525 
1526     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1527       _freed_bytes += hr->used();
1528       hr->set_containing_set(NULL);
1529       if (hr->is_humongous()) {
1530         _humongous_regions_removed++;
1531         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1532       } else {
1533         _old_regions_removed++;
1534         _g1->free_region(hr, _local_cleanup_list, true);
1535       }
1536     } else {
1537       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1538     }
1539 
1540     return false;
1541   }
1542 };
1543 
1544 class G1ParNoteEndTask: public AbstractGangTask {
1545   friend class G1NoteEndOfConcMarkClosure;
1546 
1547 protected:
1548   G1CollectedHeap* _g1h;
1549   FreeRegionList* _cleanup_list;
1550   HeapRegionClaimer _hrclaimer;
1551 
1552 public:
1553   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1554       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1555   }
1556 
1557   void work(uint worker_id) {
1558     FreeRegionList local_cleanup_list("Local Cleanup List");
1559     HRRSCleanupTask hrrs_cleanup_task;
1560     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1561                                            &hrrs_cleanup_task);
1562     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1563     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1564 
1565     // Now update the lists
1566     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1567     {
1568       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1569       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1570 
1571       // If we iterate over the global cleanup list at the end of
1572       // cleanup to do this printing we will not guarantee to only
1573       // generate output for the newly-reclaimed regions (the list
1574       // might not be empty at the beginning of cleanup; we might
1575       // still be working on its previous contents). So we do the
1576       // printing here, before we append the new regions to the global
1577       // cleanup list.
1578 
1579       G1HRPrinter* hr_printer = _g1h->hr_printer();
1580       if (hr_printer->is_active()) {
1581         FreeRegionListIterator iter(&local_cleanup_list);
1582         while (iter.more_available()) {
1583           HeapRegion* hr = iter.get_next();
1584           hr_printer->cleanup(hr);
1585         }
1586       }
1587 
1588       _cleanup_list->add_ordered(&local_cleanup_list);
1589       assert(local_cleanup_list.is_empty(), "post-condition");
1590 
1591       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1592     }
1593   }
1594 };
1595 
1596 void ConcurrentMark::cleanup() {
1597   // world is stopped at this checkpoint
1598   assert(SafepointSynchronize::is_at_safepoint(),
1599          "world should be stopped");
1600   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1601 
1602   // If a full collection has happened, we shouldn't do this.
1603   if (has_aborted()) {
1604     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1605     return;
1606   }
1607 
1608   g1h->verifier()->verify_region_sets_optional();
1609 
1610   if (VerifyDuringGC) {
1611     HandleMark hm;  // handle scope
1612     g1h->prepare_for_verify();
1613     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1614   }
1615   g1h->verifier()->check_bitmaps("Cleanup Start");
1616 
1617   G1CollectorPolicy* g1p = g1h->g1_policy();
1618   g1p->record_concurrent_mark_cleanup_start();
1619 
1620   double start = os::elapsedTime();
1621 
1622   HeapRegionRemSet::reset_for_cleanup_tasks();
1623 
1624   // Do counting once more with the world stopped for good measure.
1625   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1626 
1627   g1h->workers()->run_task(&g1_par_count_task);
1628 
1629   if (VerifyDuringGC) {
1630     // Verify that the counting data accumulated during marking matches
1631     // that calculated by walking the marking bitmap.
1632 
1633     // Bitmaps to hold expected values
1634     BitMap expected_region_bm(_region_bm.size(), true);
1635     BitMap expected_card_bm(_card_bm.size(), true);
1636 
1637     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1638                                                  &_region_bm,
1639                                                  &_card_bm,
1640                                                  &expected_region_bm,
1641                                                  &expected_card_bm);
1642 
1643     g1h->workers()->run_task(&g1_par_verify_task);
1644 
1645     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1646   }
1647 
1648   size_t start_used_bytes = g1h->used();
1649   g1h->collector_state()->set_mark_in_progress(false);
1650 
1651   double count_end = os::elapsedTime();
1652   double this_final_counting_time = (count_end - start);
1653   _total_counting_time += this_final_counting_time;
1654 
1655   if (log_is_enabled(Trace, gc, liveness)) {
1656     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1657     _g1h->heap_region_iterate(&cl);
1658   }
1659 
1660   // Install newly created mark bitMap as "prev".
1661   swapMarkBitMaps();
1662 
1663   g1h->reset_gc_time_stamp();
1664 
1665   uint n_workers = _g1h->workers()->active_workers();
1666 
1667   // Note end of marking in all heap regions.
1668   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1669   g1h->workers()->run_task(&g1_par_note_end_task);
1670   g1h->check_gc_time_stamps();
1671 
1672   if (!cleanup_list_is_empty()) {
1673     // The cleanup list is not empty, so we'll have to process it
1674     // concurrently. Notify anyone else that might be wanting free
1675     // regions that there will be more free regions coming soon.
1676     g1h->set_free_regions_coming();
1677   }
1678 
1679   // call below, since it affects the metric by which we sort the heap
1680   // regions.
1681   if (G1ScrubRemSets) {
1682     double rs_scrub_start = os::elapsedTime();
1683     g1h->scrub_rem_set(&_region_bm, &_card_bm);
1684     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1685   }
1686 
1687   // this will also free any regions totally full of garbage objects,
1688   // and sort the regions.
1689   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1690 
1691   // Statistics.
1692   double end = os::elapsedTime();
1693   _cleanup_times.add((end - start) * 1000.0);
1694 
1695   // Clean up will have freed any regions completely full of garbage.
1696   // Update the soft reference policy with the new heap occupancy.
1697   Universe::update_heap_info_at_gc();
1698 
1699   if (VerifyDuringGC) {
1700     HandleMark hm;  // handle scope
1701     g1h->prepare_for_verify();
1702     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1703   }
1704 
1705   g1h->verifier()->check_bitmaps("Cleanup End");
1706 
1707   g1h->verifier()->verify_region_sets_optional();
1708 
1709   // We need to make this be a "collection" so any collection pause that
1710   // races with it goes around and waits for completeCleanup to finish.
1711   g1h->increment_total_collections();
1712 
1713   // Clean out dead classes and update Metaspace sizes.
1714   if (ClassUnloadingWithConcurrentMark) {
1715     ClassLoaderDataGraph::purge();
1716   }
1717   MetaspaceGC::compute_new_size();
1718 
1719   // We reclaimed old regions so we should calculate the sizes to make
1720   // sure we update the old gen/space data.
1721   g1h->g1mm()->update_sizes();
1722   g1h->allocation_context_stats().update_after_mark();
1723 
1724   g1h->trace_heap_after_concurrent_cycle();
1725 }
1726 
1727 void ConcurrentMark::completeCleanup() {
1728   if (has_aborted()) return;
1729 
1730   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1731 
1732   _cleanup_list.verify_optional();
1733   FreeRegionList tmp_free_list("Tmp Free List");
1734 
1735   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1736                                   "cleanup list has %u entries",
1737                                   _cleanup_list.length());
1738 
1739   // No one else should be accessing the _cleanup_list at this point,
1740   // so it is not necessary to take any locks
1741   while (!_cleanup_list.is_empty()) {
1742     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1743     assert(hr != NULL, "Got NULL from a non-empty list");
1744     hr->par_clear();
1745     tmp_free_list.add_ordered(hr);
1746 
1747     // Instead of adding one region at a time to the secondary_free_list,
1748     // we accumulate them in the local list and move them a few at a
1749     // time. This also cuts down on the number of notify_all() calls
1750     // we do during this process. We'll also append the local list when
1751     // _cleanup_list is empty (which means we just removed the last
1752     // region from the _cleanup_list).
1753     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1754         _cleanup_list.is_empty()) {
1755       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1756                                       "appending %u entries to the secondary_free_list, "
1757                                       "cleanup list still has %u entries",
1758                                       tmp_free_list.length(),
1759                                       _cleanup_list.length());
1760 
1761       {
1762         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1763         g1h->secondary_free_list_add(&tmp_free_list);
1764         SecondaryFreeList_lock->notify_all();
1765       }
1766 #ifndef PRODUCT
1767       if (G1StressConcRegionFreeing) {
1768         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1769           os::sleep(Thread::current(), (jlong) 1, false);
1770         }
1771       }
1772 #endif
1773     }
1774   }
1775   assert(tmp_free_list.is_empty(), "post-condition");
1776 }
1777 
1778 // Supporting Object and Oop closures for reference discovery
1779 // and processing in during marking
1780 
1781 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1782   HeapWord* addr = (HeapWord*)obj;
1783   return addr != NULL &&
1784          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1785 }
1786 
1787 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1788 // Uses the CMTask associated with a worker thread (for serial reference
1789 // processing the CMTask for worker 0 is used) to preserve (mark) and
1790 // trace referent objects.
1791 //
1792 // Using the CMTask and embedded local queues avoids having the worker
1793 // threads operating on the global mark stack. This reduces the risk
1794 // of overflowing the stack - which we would rather avoid at this late
1795 // state. Also using the tasks' local queues removes the potential
1796 // of the workers interfering with each other that could occur if
1797 // operating on the global stack.
1798 
1799 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1800   ConcurrentMark* _cm;
1801   CMTask*         _task;
1802   int             _ref_counter_limit;
1803   int             _ref_counter;
1804   bool            _is_serial;
1805  public:
1806   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1807     _cm(cm), _task(task), _is_serial(is_serial),
1808     _ref_counter_limit(G1RefProcDrainInterval) {
1809     assert(_ref_counter_limit > 0, "sanity");
1810     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1811     _ref_counter = _ref_counter_limit;
1812   }
1813 
1814   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1815   virtual void do_oop(      oop* p) { do_oop_work(p); }
1816 
1817   template <class T> void do_oop_work(T* p) {
1818     if (!_cm->has_overflown()) {
1819       oop obj = oopDesc::load_decode_heap_oop(p);
1820       _task->deal_with_reference(obj);
1821       _ref_counter--;
1822 
1823       if (_ref_counter == 0) {
1824         // We have dealt with _ref_counter_limit references, pushing them
1825         // and objects reachable from them on to the local stack (and
1826         // possibly the global stack). Call CMTask::do_marking_step() to
1827         // process these entries.
1828         //
1829         // We call CMTask::do_marking_step() in a loop, which we'll exit if
1830         // there's nothing more to do (i.e. we're done with the entries that
1831         // were pushed as a result of the CMTask::deal_with_reference() calls
1832         // above) or we overflow.
1833         //
1834         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1835         // flag while there may still be some work to do. (See the comment at
1836         // the beginning of CMTask::do_marking_step() for those conditions -
1837         // one of which is reaching the specified time target.) It is only
1838         // when CMTask::do_marking_step() returns without setting the
1839         // has_aborted() flag that the marking step has completed.
1840         do {
1841           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1842           _task->do_marking_step(mark_step_duration_ms,
1843                                  false      /* do_termination */,
1844                                  _is_serial);
1845         } while (_task->has_aborted() && !_cm->has_overflown());
1846         _ref_counter = _ref_counter_limit;
1847       }
1848     }
1849   }
1850 };
1851 
1852 // 'Drain' oop closure used by both serial and parallel reference processing.
1853 // Uses the CMTask associated with a given worker thread (for serial
1854 // reference processing the CMtask for worker 0 is used). Calls the
1855 // do_marking_step routine, with an unbelievably large timeout value,
1856 // to drain the marking data structures of the remaining entries
1857 // added by the 'keep alive' oop closure above.
1858 
1859 class G1CMDrainMarkingStackClosure: public VoidClosure {
1860   ConcurrentMark* _cm;
1861   CMTask*         _task;
1862   bool            _is_serial;
1863  public:
1864   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1865     _cm(cm), _task(task), _is_serial(is_serial) {
1866     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1867   }
1868 
1869   void do_void() {
1870     do {
1871       // We call CMTask::do_marking_step() to completely drain the local
1872       // and global marking stacks of entries pushed by the 'keep alive'
1873       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1874       //
1875       // CMTask::do_marking_step() is called in a loop, which we'll exit
1876       // if there's nothing more to do (i.e. we've completely drained the
1877       // entries that were pushed as a a result of applying the 'keep alive'
1878       // closure to the entries on the discovered ref lists) or we overflow
1879       // the global marking stack.
1880       //
1881       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1882       // flag while there may still be some work to do. (See the comment at
1883       // the beginning of CMTask::do_marking_step() for those conditions -
1884       // one of which is reaching the specified time target.) It is only
1885       // when CMTask::do_marking_step() returns without setting the
1886       // has_aborted() flag that the marking step has completed.
1887 
1888       _task->do_marking_step(1000000000.0 /* something very large */,
1889                              true         /* do_termination */,
1890                              _is_serial);
1891     } while (_task->has_aborted() && !_cm->has_overflown());
1892   }
1893 };
1894 
1895 // Implementation of AbstractRefProcTaskExecutor for parallel
1896 // reference processing at the end of G1 concurrent marking
1897 
1898 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1899 private:
1900   G1CollectedHeap* _g1h;
1901   ConcurrentMark*  _cm;
1902   WorkGang*        _workers;
1903   uint             _active_workers;
1904 
1905 public:
1906   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1907                           ConcurrentMark* cm,
1908                           WorkGang* workers,
1909                           uint n_workers) :
1910     _g1h(g1h), _cm(cm),
1911     _workers(workers), _active_workers(n_workers) { }
1912 
1913   // Executes the given task using concurrent marking worker threads.
1914   virtual void execute(ProcessTask& task);
1915   virtual void execute(EnqueueTask& task);
1916 };
1917 
1918 class G1CMRefProcTaskProxy: public AbstractGangTask {
1919   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1920   ProcessTask&     _proc_task;
1921   G1CollectedHeap* _g1h;
1922   ConcurrentMark*  _cm;
1923 
1924 public:
1925   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1926                      G1CollectedHeap* g1h,
1927                      ConcurrentMark* cm) :
1928     AbstractGangTask("Process reference objects in parallel"),
1929     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1930     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1931     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1932   }
1933 
1934   virtual void work(uint worker_id) {
1935     ResourceMark rm;
1936     HandleMark hm;
1937     CMTask* task = _cm->task(worker_id);
1938     G1CMIsAliveClosure g1_is_alive(_g1h);
1939     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1940     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1941 
1942     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1943   }
1944 };
1945 
1946 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1947   assert(_workers != NULL, "Need parallel worker threads.");
1948   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1949 
1950   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1951 
1952   // We need to reset the concurrency level before each
1953   // proxy task execution, so that the termination protocol
1954   // and overflow handling in CMTask::do_marking_step() knows
1955   // how many workers to wait for.
1956   _cm->set_concurrency(_active_workers);
1957   _workers->run_task(&proc_task_proxy);
1958 }
1959 
1960 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1961   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1962   EnqueueTask& _enq_task;
1963 
1964 public:
1965   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1966     AbstractGangTask("Enqueue reference objects in parallel"),
1967     _enq_task(enq_task) { }
1968 
1969   virtual void work(uint worker_id) {
1970     _enq_task.work(worker_id);
1971   }
1972 };
1973 
1974 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1975   assert(_workers != NULL, "Need parallel worker threads.");
1976   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1977 
1978   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1979 
1980   // Not strictly necessary but...
1981   //
1982   // We need to reset the concurrency level before each
1983   // proxy task execution, so that the termination protocol
1984   // and overflow handling in CMTask::do_marking_step() knows
1985   // how many workers to wait for.
1986   _cm->set_concurrency(_active_workers);
1987   _workers->run_task(&enq_task_proxy);
1988 }
1989 
1990 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
1991   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
1992 }
1993 
1994 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1995   if (has_overflown()) {
1996     // Skip processing the discovered references if we have
1997     // overflown the global marking stack. Reference objects
1998     // only get discovered once so it is OK to not
1999     // de-populate the discovered reference lists. We could have,
2000     // but the only benefit would be that, when marking restarts,
2001     // less reference objects are discovered.
2002     return;
2003   }
2004 
2005   ResourceMark rm;
2006   HandleMark   hm;
2007 
2008   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2009 
2010   // Is alive closure.
2011   G1CMIsAliveClosure g1_is_alive(g1h);
2012 
2013   // Inner scope to exclude the cleaning of the string and symbol
2014   // tables from the displayed time.
2015   {
2016     GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm());
2017 
2018     ReferenceProcessor* rp = g1h->ref_processor_cm();
2019 
2020     // See the comment in G1CollectedHeap::ref_processing_init()
2021     // about how reference processing currently works in G1.
2022 
2023     // Set the soft reference policy
2024     rp->setup_policy(clear_all_soft_refs);
2025     assert(_markStack.isEmpty(), "mark stack should be empty");
2026 
2027     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2028     // in serial reference processing. Note these closures are also
2029     // used for serially processing (by the the current thread) the
2030     // JNI references during parallel reference processing.
2031     //
2032     // These closures do not need to synchronize with the worker
2033     // threads involved in parallel reference processing as these
2034     // instances are executed serially by the current thread (e.g.
2035     // reference processing is not multi-threaded and is thus
2036     // performed by the current thread instead of a gang worker).
2037     //
2038     // The gang tasks involved in parallel reference processing create
2039     // their own instances of these closures, which do their own
2040     // synchronization among themselves.
2041     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2042     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2043 
2044     // We need at least one active thread. If reference processing
2045     // is not multi-threaded we use the current (VMThread) thread,
2046     // otherwise we use the work gang from the G1CollectedHeap and
2047     // we utilize all the worker threads we can.
2048     bool processing_is_mt = rp->processing_is_mt();
2049     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2050     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2051 
2052     // Parallel processing task executor.
2053     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2054                                               g1h->workers(), active_workers);
2055     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2056 
2057     // Set the concurrency level. The phase was already set prior to
2058     // executing the remark task.
2059     set_concurrency(active_workers);
2060 
2061     // Set the degree of MT processing here.  If the discovery was done MT,
2062     // the number of threads involved during discovery could differ from
2063     // the number of active workers.  This is OK as long as the discovered
2064     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2065     rp->set_active_mt_degree(active_workers);
2066 
2067     // Process the weak references.
2068     const ReferenceProcessorStats& stats =
2069         rp->process_discovered_references(&g1_is_alive,
2070                                           &g1_keep_alive,
2071                                           &g1_drain_mark_stack,
2072                                           executor,
2073                                           g1h->gc_timer_cm());
2074     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2075 
2076     // The do_oop work routines of the keep_alive and drain_marking_stack
2077     // oop closures will set the has_overflown flag if we overflow the
2078     // global marking stack.
2079 
2080     assert(_markStack.overflow() || _markStack.isEmpty(),
2081             "mark stack should be empty (unless it overflowed)");
2082 
2083     if (_markStack.overflow()) {
2084       // This should have been done already when we tried to push an
2085       // entry on to the global mark stack. But let's do it again.
2086       set_has_overflown();
2087     }
2088 
2089     assert(rp->num_q() == active_workers, "why not");
2090 
2091     rp->enqueue_discovered_references(executor);
2092 
2093     rp->verify_no_references_recorded();
2094     assert(!rp->discovery_enabled(), "Post condition");
2095   }
2096 
2097   if (has_overflown()) {
2098     // We can not trust g1_is_alive if the marking stack overflowed
2099     return;
2100   }
2101 
2102   assert(_markStack.isEmpty(), "Marking should have completed");
2103 
2104   // Unload Klasses, String, Symbols, Code Cache, etc.
2105   {
2106     GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm());
2107 
2108     if (ClassUnloadingWithConcurrentMark) {
2109       bool purged_classes;
2110 
2111       {
2112         GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm());
2113         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2114       }
2115 
2116       {
2117         GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm());
2118         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2119       }
2120     }
2121 
2122     if (G1StringDedup::is_enabled()) {
2123       GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm());
2124       G1StringDedup::unlink(&g1_is_alive);
2125     }
2126   }
2127 }
2128 
2129 void ConcurrentMark::swapMarkBitMaps() {
2130   CMBitMapRO* temp = _prevMarkBitMap;
2131   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2132   _nextMarkBitMap  = (CMBitMap*)  temp;
2133 }
2134 
2135 // Closure for marking entries in SATB buffers.
2136 class CMSATBBufferClosure : public SATBBufferClosure {
2137 private:
2138   CMTask* _task;
2139   G1CollectedHeap* _g1h;
2140 
2141   // This is very similar to CMTask::deal_with_reference, but with
2142   // more relaxed requirements for the argument, so this must be more
2143   // circumspect about treating the argument as an object.
2144   void do_entry(void* entry) const {
2145     _task->increment_refs_reached();
2146     HeapRegion* hr = _g1h->heap_region_containing(entry);
2147     if (entry < hr->next_top_at_mark_start()) {
2148       // Until we get here, we don't know whether entry refers to a valid
2149       // object; it could instead have been a stale reference.
2150       oop obj = static_cast<oop>(entry);
2151       assert(obj->is_oop(true /* ignore mark word */),
2152              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2153       _task->make_reference_grey(obj, hr);
2154     }
2155   }
2156 
2157 public:
2158   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2159     : _task(task), _g1h(g1h) { }
2160 
2161   virtual void do_buffer(void** buffer, size_t size) {
2162     for (size_t i = 0; i < size; ++i) {
2163       do_entry(buffer[i]);
2164     }
2165   }
2166 };
2167 
2168 class G1RemarkThreadsClosure : public ThreadClosure {
2169   CMSATBBufferClosure _cm_satb_cl;
2170   G1CMOopClosure _cm_cl;
2171   MarkingCodeBlobClosure _code_cl;
2172   int _thread_parity;
2173 
2174  public:
2175   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2176     _cm_satb_cl(task, g1h),
2177     _cm_cl(g1h, g1h->concurrent_mark(), task),
2178     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2179     _thread_parity(Threads::thread_claim_parity()) {}
2180 
2181   void do_thread(Thread* thread) {
2182     if (thread->is_Java_thread()) {
2183       if (thread->claim_oops_do(true, _thread_parity)) {
2184         JavaThread* jt = (JavaThread*)thread;
2185 
2186         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2187         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2188         // * Alive if on the stack of an executing method
2189         // * Weakly reachable otherwise
2190         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2191         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2192         jt->nmethods_do(&_code_cl);
2193 
2194         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2195       }
2196     } else if (thread->is_VM_thread()) {
2197       if (thread->claim_oops_do(true, _thread_parity)) {
2198         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2199       }
2200     }
2201   }
2202 };
2203 
2204 class CMRemarkTask: public AbstractGangTask {
2205 private:
2206   ConcurrentMark* _cm;
2207 public:
2208   void work(uint worker_id) {
2209     // Since all available tasks are actually started, we should
2210     // only proceed if we're supposed to be active.
2211     if (worker_id < _cm->active_tasks()) {
2212       CMTask* task = _cm->task(worker_id);
2213       task->record_start_time();
2214       {
2215         ResourceMark rm;
2216         HandleMark hm;
2217 
2218         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2219         Threads::threads_do(&threads_f);
2220       }
2221 
2222       do {
2223         task->do_marking_step(1000000000.0 /* something very large */,
2224                               true         /* do_termination       */,
2225                               false        /* is_serial            */);
2226       } while (task->has_aborted() && !_cm->has_overflown());
2227       // If we overflow, then we do not want to restart. We instead
2228       // want to abort remark and do concurrent marking again.
2229       task->record_end_time();
2230     }
2231   }
2232 
2233   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2234     AbstractGangTask("Par Remark"), _cm(cm) {
2235     _cm->terminator()->reset_for_reuse(active_workers);
2236   }
2237 };
2238 
2239 void ConcurrentMark::checkpointRootsFinalWork() {
2240   ResourceMark rm;
2241   HandleMark   hm;
2242   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2243 
2244   GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm());
2245 
2246   g1h->ensure_parsability(false);
2247 
2248   // this is remark, so we'll use up all active threads
2249   uint active_workers = g1h->workers()->active_workers();
2250   set_concurrency_and_phase(active_workers, false /* concurrent */);
2251   // Leave _parallel_marking_threads at it's
2252   // value originally calculated in the ConcurrentMark
2253   // constructor and pass values of the active workers
2254   // through the gang in the task.
2255 
2256   {
2257     StrongRootsScope srs(active_workers);
2258 
2259     CMRemarkTask remarkTask(this, active_workers);
2260     // We will start all available threads, even if we decide that the
2261     // active_workers will be fewer. The extra ones will just bail out
2262     // immediately.
2263     g1h->workers()->run_task(&remarkTask);
2264   }
2265 
2266   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2267   guarantee(has_overflown() ||
2268             satb_mq_set.completed_buffers_num() == 0,
2269             "Invariant: has_overflown = %s, num buffers = %d",
2270             BOOL_TO_STR(has_overflown()),
2271             satb_mq_set.completed_buffers_num());
2272 
2273   print_stats();
2274 }
2275 
2276 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2277   // Note we are overriding the read-only view of the prev map here, via
2278   // the cast.
2279   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2280 }
2281 
2282 HeapRegion*
2283 ConcurrentMark::claim_region(uint worker_id) {
2284   // "checkpoint" the finger
2285   HeapWord* finger = _finger;
2286 
2287   // _heap_end will not change underneath our feet; it only changes at
2288   // yield points.
2289   while (finger < _heap_end) {
2290     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2291 
2292     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2293 
2294     // Above heap_region_containing may return NULL as we always scan claim
2295     // until the end of the heap. In this case, just jump to the next region.
2296     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2297 
2298     // Is the gap between reading the finger and doing the CAS too long?
2299     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2300     if (res == finger && curr_region != NULL) {
2301       // we succeeded
2302       HeapWord*   bottom        = curr_region->bottom();
2303       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2304 
2305       // notice that _finger == end cannot be guaranteed here since,
2306       // someone else might have moved the finger even further
2307       assert(_finger >= end, "the finger should have moved forward");
2308 
2309       if (limit > bottom) {
2310         return curr_region;
2311       } else {
2312         assert(limit == bottom,
2313                "the region limit should be at bottom");
2314         // we return NULL and the caller should try calling
2315         // claim_region() again.
2316         return NULL;
2317       }
2318     } else {
2319       assert(_finger > finger, "the finger should have moved forward");
2320       // read it again
2321       finger = _finger;
2322     }
2323   }
2324 
2325   return NULL;
2326 }
2327 
2328 #ifndef PRODUCT
2329 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2330 private:
2331   G1CollectedHeap* _g1h;
2332   const char* _phase;
2333   int _info;
2334 
2335 public:
2336   VerifyNoCSetOops(const char* phase, int info = -1) :
2337     _g1h(G1CollectedHeap::heap()),
2338     _phase(phase),
2339     _info(info)
2340   { }
2341 
2342   void operator()(oop obj) const {
2343     guarantee(obj->is_oop(),
2344               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2345               p2i(obj), _phase, _info);
2346     guarantee(!_g1h->obj_in_cs(obj),
2347               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2348               p2i(obj), _phase, _info);
2349   }
2350 };
2351 
2352 void ConcurrentMark::verify_no_cset_oops() {
2353   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2354   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2355     return;
2356   }
2357 
2358   // Verify entries on the global mark stack
2359   _markStack.iterate(VerifyNoCSetOops("Stack"));
2360 
2361   // Verify entries on the task queues
2362   for (uint i = 0; i < _max_worker_id; ++i) {
2363     CMTaskQueue* queue = _task_queues->queue(i);
2364     queue->iterate(VerifyNoCSetOops("Queue", i));
2365   }
2366 
2367   // Verify the global finger
2368   HeapWord* global_finger = finger();
2369   if (global_finger != NULL && global_finger < _heap_end) {
2370     // Since we always iterate over all regions, we might get a NULL HeapRegion
2371     // here.
2372     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2373     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2374               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2375               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2376   }
2377 
2378   // Verify the task fingers
2379   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2380   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2381     CMTask* task = _tasks[i];
2382     HeapWord* task_finger = task->finger();
2383     if (task_finger != NULL && task_finger < _heap_end) {
2384       // See above note on the global finger verification.
2385       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2386       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2387                 !task_hr->in_collection_set(),
2388                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2389                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2390     }
2391   }
2392 }
2393 #endif // PRODUCT
2394 
2395 // Aggregate the counting data that was constructed concurrently
2396 // with marking.
2397 class AggregateCountDataHRClosure: public HeapRegionClosure {
2398   G1CollectedHeap* _g1h;
2399   ConcurrentMark* _cm;
2400   CardTableModRefBS* _ct_bs;
2401   BitMap* _cm_card_bm;
2402   uint _max_worker_id;
2403 
2404  public:
2405   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2406                               BitMap* cm_card_bm,
2407                               uint max_worker_id) :
2408     _g1h(g1h), _cm(g1h->concurrent_mark()),
2409     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2410     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2411 
2412   bool doHeapRegion(HeapRegion* hr) {
2413     HeapWord* start = hr->bottom();
2414     HeapWord* limit = hr->next_top_at_mark_start();
2415     HeapWord* end = hr->end();
2416 
2417     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2418            "Preconditions not met - "
2419            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2420            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2421            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2422 
2423     assert(hr->next_marked_bytes() == 0, "Precondition");
2424 
2425     if (start == limit) {
2426       // NTAMS of this region has not been set so nothing to do.
2427       return false;
2428     }
2429 
2430     // 'start' should be in the heap.
2431     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2432     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2433     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2434 
2435     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2436     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2437     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2438 
2439     // If ntams is not card aligned then we bump card bitmap index
2440     // for limit so that we get the all the cards spanned by
2441     // the object ending at ntams.
2442     // Note: if this is the last region in the heap then ntams
2443     // could be actually just beyond the end of the the heap;
2444     // limit_idx will then  correspond to a (non-existent) card
2445     // that is also outside the heap.
2446     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2447       limit_idx += 1;
2448     }
2449 
2450     assert(limit_idx <= end_idx, "or else use atomics");
2451 
2452     // Aggregate the "stripe" in the count data associated with hr.
2453     uint hrm_index = hr->hrm_index();
2454     size_t marked_bytes = 0;
2455 
2456     for (uint i = 0; i < _max_worker_id; i += 1) {
2457       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2458       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2459 
2460       // Fetch the marked_bytes in this region for task i and
2461       // add it to the running total for this region.
2462       marked_bytes += marked_bytes_array[hrm_index];
2463 
2464       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2465       // into the global card bitmap.
2466       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2467 
2468       while (scan_idx < limit_idx) {
2469         assert(task_card_bm->at(scan_idx) == true, "should be");
2470         _cm_card_bm->set_bit(scan_idx);
2471         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2472 
2473         // BitMap::get_next_one_offset() can handle the case when
2474         // its left_offset parameter is greater than its right_offset
2475         // parameter. It does, however, have an early exit if
2476         // left_offset == right_offset. So let's limit the value
2477         // passed in for left offset here.
2478         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2479         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2480       }
2481     }
2482 
2483     // Update the marked bytes for this region.
2484     hr->add_to_marked_bytes(marked_bytes);
2485 
2486     // Next heap region
2487     return false;
2488   }
2489 };
2490 
2491 class G1AggregateCountDataTask: public AbstractGangTask {
2492 protected:
2493   G1CollectedHeap* _g1h;
2494   ConcurrentMark* _cm;
2495   BitMap* _cm_card_bm;
2496   uint _max_worker_id;
2497   uint _active_workers;
2498   HeapRegionClaimer _hrclaimer;
2499 
2500 public:
2501   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2502                            ConcurrentMark* cm,
2503                            BitMap* cm_card_bm,
2504                            uint max_worker_id,
2505                            uint n_workers) :
2506       AbstractGangTask("Count Aggregation"),
2507       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2508       _max_worker_id(max_worker_id),
2509       _active_workers(n_workers),
2510       _hrclaimer(_active_workers) {
2511   }
2512 
2513   void work(uint worker_id) {
2514     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2515 
2516     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2517   }
2518 };
2519 
2520 
2521 void ConcurrentMark::aggregate_count_data() {
2522   uint n_workers = _g1h->workers()->active_workers();
2523 
2524   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2525                                            _max_worker_id, n_workers);
2526 
2527   _g1h->workers()->run_task(&g1_par_agg_task);
2528 }
2529 
2530 // Clear the per-worker arrays used to store the per-region counting data
2531 void ConcurrentMark::clear_all_count_data() {
2532   // Clear the global card bitmap - it will be filled during
2533   // liveness count aggregation (during remark) and the
2534   // final counting task.
2535   _card_bm.clear();
2536 
2537   // Clear the global region bitmap - it will be filled as part
2538   // of the final counting task.
2539   _region_bm.clear();
2540 
2541   uint max_regions = _g1h->max_regions();
2542   assert(_max_worker_id > 0, "uninitialized");
2543 
2544   for (uint i = 0; i < _max_worker_id; i += 1) {
2545     BitMap* task_card_bm = count_card_bitmap_for(i);
2546     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2547 
2548     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2549     assert(marked_bytes_array != NULL, "uninitialized");
2550 
2551     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2552     task_card_bm->clear();
2553   }
2554 }
2555 
2556 void ConcurrentMark::print_stats() {
2557   if (!log_is_enabled(Debug, gc, stats)) {
2558     return;
2559   }
2560   log_debug(gc, stats)("---------------------------------------------------------------------");
2561   for (size_t i = 0; i < _active_tasks; ++i) {
2562     _tasks[i]->print_stats();
2563     log_debug(gc, stats)("---------------------------------------------------------------------");
2564   }
2565 }
2566 
2567 // abandon current marking iteration due to a Full GC
2568 void ConcurrentMark::abort() {
2569   if (!cmThread()->during_cycle() || _has_aborted) {
2570     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2571     return;
2572   }
2573 
2574   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2575   // concurrent bitmap clearing.
2576   _nextMarkBitMap->clearAll();
2577 
2578   // Note we cannot clear the previous marking bitmap here
2579   // since VerifyDuringGC verifies the objects marked during
2580   // a full GC against the previous bitmap.
2581 
2582   // Clear the liveness counting data
2583   clear_all_count_data();
2584   // Empty mark stack
2585   reset_marking_state();
2586   for (uint i = 0; i < _max_worker_id; ++i) {
2587     _tasks[i]->clear_region_fields();
2588   }
2589   _first_overflow_barrier_sync.abort();
2590   _second_overflow_barrier_sync.abort();
2591   _has_aborted = true;
2592 
2593   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2594   satb_mq_set.abandon_partial_marking();
2595   // This can be called either during or outside marking, we'll read
2596   // the expected_active value from the SATB queue set.
2597   satb_mq_set.set_active_all_threads(
2598                                  false, /* new active value */
2599                                  satb_mq_set.is_active() /* expected_active */);
2600 
2601   _g1h->trace_heap_after_concurrent_cycle();
2602 
2603   // Close any open concurrent phase timing
2604   register_concurrent_phase_end();
2605 
2606   _g1h->register_concurrent_cycle_end();
2607 }
2608 
2609 static void print_ms_time_info(const char* prefix, const char* name,
2610                                NumberSeq& ns) {
2611   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2612                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2613   if (ns.num() > 0) {
2614     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2615                            prefix, ns.sd(), ns.maximum());
2616   }
2617 }
2618 
2619 void ConcurrentMark::print_summary_info() {
2620   LogHandle(gc, marking) log;
2621   if (!log.is_trace()) {
2622     return;
2623   }
2624 
2625   log.trace(" Concurrent marking:");
2626   print_ms_time_info("  ", "init marks", _init_times);
2627   print_ms_time_info("  ", "remarks", _remark_times);
2628   {
2629     print_ms_time_info("     ", "final marks", _remark_mark_times);
2630     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2631 
2632   }
2633   print_ms_time_info("  ", "cleanups", _cleanup_times);
2634   log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2635             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2636   if (G1ScrubRemSets) {
2637     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2638               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2639   }
2640   log.trace("  Total stop_world time = %8.2f s.",
2641             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2642   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2643             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2644 }
2645 
2646 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2647   _parallel_workers->print_worker_threads_on(st);
2648 }
2649 
2650 void ConcurrentMark::print_on_error(outputStream* st) const {
2651   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2652       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2653   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2654   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2655 }
2656 
2657 // We take a break if someone is trying to stop the world.
2658 bool ConcurrentMark::do_yield_check(uint worker_id) {
2659   if (SuspendibleThreadSet::should_yield()) {
2660     if (worker_id == 0) {
2661       _g1h->g1_policy()->record_concurrent_pause();
2662     }
2663     SuspendibleThreadSet::yield();
2664     return true;
2665   } else {
2666     return false;
2667   }
2668 }
2669 
2670 // Closure for iteration over bitmaps
2671 class CMBitMapClosure : public BitMapClosure {
2672 private:
2673   // the bitmap that is being iterated over
2674   CMBitMap*                   _nextMarkBitMap;
2675   ConcurrentMark*             _cm;
2676   CMTask*                     _task;
2677 
2678 public:
2679   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2680     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2681 
2682   bool do_bit(size_t offset) {
2683     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2684     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2685     assert( addr < _cm->finger(), "invariant");
2686     assert(addr >= _task->finger(), "invariant");
2687 
2688     // We move that task's local finger along.
2689     _task->move_finger_to(addr);
2690 
2691     _task->scan_object(oop(addr));
2692     // we only partially drain the local queue and global stack
2693     _task->drain_local_queue(true);
2694     _task->drain_global_stack(true);
2695 
2696     // if the has_aborted flag has been raised, we need to bail out of
2697     // the iteration
2698     return !_task->has_aborted();
2699   }
2700 };
2701 
2702 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2703   ReferenceProcessor* result = NULL;
2704   if (G1UseConcMarkReferenceProcessing) {
2705     result = g1h->ref_processor_cm();
2706     assert(result != NULL, "should not be NULL");
2707   }
2708   return result;
2709 }
2710 
2711 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2712                                ConcurrentMark* cm,
2713                                CMTask* task)
2714   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2715     _g1h(g1h), _cm(cm), _task(task)
2716 { }
2717 
2718 void CMTask::setup_for_region(HeapRegion* hr) {
2719   assert(hr != NULL,
2720         "claim_region() should have filtered out NULL regions");
2721   _curr_region  = hr;
2722   _finger       = hr->bottom();
2723   update_region_limit();
2724 }
2725 
2726 void CMTask::update_region_limit() {
2727   HeapRegion* hr            = _curr_region;
2728   HeapWord* bottom          = hr->bottom();
2729   HeapWord* limit           = hr->next_top_at_mark_start();
2730 
2731   if (limit == bottom) {
2732     // The region was collected underneath our feet.
2733     // We set the finger to bottom to ensure that the bitmap
2734     // iteration that will follow this will not do anything.
2735     // (this is not a condition that holds when we set the region up,
2736     // as the region is not supposed to be empty in the first place)
2737     _finger = bottom;
2738   } else if (limit >= _region_limit) {
2739     assert(limit >= _finger, "peace of mind");
2740   } else {
2741     assert(limit < _region_limit, "only way to get here");
2742     // This can happen under some pretty unusual circumstances.  An
2743     // evacuation pause empties the region underneath our feet (NTAMS
2744     // at bottom). We then do some allocation in the region (NTAMS
2745     // stays at bottom), followed by the region being used as a GC
2746     // alloc region (NTAMS will move to top() and the objects
2747     // originally below it will be grayed). All objects now marked in
2748     // the region are explicitly grayed, if below the global finger,
2749     // and we do not need in fact to scan anything else. So, we simply
2750     // set _finger to be limit to ensure that the bitmap iteration
2751     // doesn't do anything.
2752     _finger = limit;
2753   }
2754 
2755   _region_limit = limit;
2756 }
2757 
2758 void CMTask::giveup_current_region() {
2759   assert(_curr_region != NULL, "invariant");
2760   clear_region_fields();
2761 }
2762 
2763 void CMTask::clear_region_fields() {
2764   // Values for these three fields that indicate that we're not
2765   // holding on to a region.
2766   _curr_region   = NULL;
2767   _finger        = NULL;
2768   _region_limit  = NULL;
2769 }
2770 
2771 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2772   if (cm_oop_closure == NULL) {
2773     assert(_cm_oop_closure != NULL, "invariant");
2774   } else {
2775     assert(_cm_oop_closure == NULL, "invariant");
2776   }
2777   _cm_oop_closure = cm_oop_closure;
2778 }
2779 
2780 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2781   guarantee(nextMarkBitMap != NULL, "invariant");
2782   _nextMarkBitMap                = nextMarkBitMap;
2783   clear_region_fields();
2784 
2785   _calls                         = 0;
2786   _elapsed_time_ms               = 0.0;
2787   _termination_time_ms           = 0.0;
2788   _termination_start_time_ms     = 0.0;
2789 }
2790 
2791 bool CMTask::should_exit_termination() {
2792   regular_clock_call();
2793   // This is called when we are in the termination protocol. We should
2794   // quit if, for some reason, this task wants to abort or the global
2795   // stack is not empty (this means that we can get work from it).
2796   return !_cm->mark_stack_empty() || has_aborted();
2797 }
2798 
2799 void CMTask::reached_limit() {
2800   assert(_words_scanned >= _words_scanned_limit ||
2801          _refs_reached >= _refs_reached_limit ,
2802          "shouldn't have been called otherwise");
2803   regular_clock_call();
2804 }
2805 
2806 void CMTask::regular_clock_call() {
2807   if (has_aborted()) return;
2808 
2809   // First, we need to recalculate the words scanned and refs reached
2810   // limits for the next clock call.
2811   recalculate_limits();
2812 
2813   // During the regular clock call we do the following
2814 
2815   // (1) If an overflow has been flagged, then we abort.
2816   if (_cm->has_overflown()) {
2817     set_has_aborted();
2818     return;
2819   }
2820 
2821   // If we are not concurrent (i.e. we're doing remark) we don't need
2822   // to check anything else. The other steps are only needed during
2823   // the concurrent marking phase.
2824   if (!concurrent()) return;
2825 
2826   // (2) If marking has been aborted for Full GC, then we also abort.
2827   if (_cm->has_aborted()) {
2828     set_has_aborted();
2829     return;
2830   }
2831 
2832   double curr_time_ms = os::elapsedVTime() * 1000.0;
2833 
2834   // (4) We check whether we should yield. If we have to, then we abort.
2835   if (SuspendibleThreadSet::should_yield()) {
2836     // We should yield. To do this we abort the task. The caller is
2837     // responsible for yielding.
2838     set_has_aborted();
2839     return;
2840   }
2841 
2842   // (5) We check whether we've reached our time quota. If we have,
2843   // then we abort.
2844   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2845   if (elapsed_time_ms > _time_target_ms) {
2846     set_has_aborted();
2847     _has_timed_out = true;
2848     return;
2849   }
2850 
2851   // (6) Finally, we check whether there are enough completed STAB
2852   // buffers available for processing. If there are, we abort.
2853   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2854   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2855     // we do need to process SATB buffers, we'll abort and restart
2856     // the marking task to do so
2857     set_has_aborted();
2858     return;
2859   }
2860 }
2861 
2862 void CMTask::recalculate_limits() {
2863   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2864   _words_scanned_limit      = _real_words_scanned_limit;
2865 
2866   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2867   _refs_reached_limit       = _real_refs_reached_limit;
2868 }
2869 
2870 void CMTask::decrease_limits() {
2871   // This is called when we believe that we're going to do an infrequent
2872   // operation which will increase the per byte scanned cost (i.e. move
2873   // entries to/from the global stack). It basically tries to decrease the
2874   // scanning limit so that the clock is called earlier.
2875 
2876   _words_scanned_limit = _real_words_scanned_limit -
2877     3 * words_scanned_period / 4;
2878   _refs_reached_limit  = _real_refs_reached_limit -
2879     3 * refs_reached_period / 4;
2880 }
2881 
2882 void CMTask::move_entries_to_global_stack() {
2883   // local array where we'll store the entries that will be popped
2884   // from the local queue
2885   oop buffer[global_stack_transfer_size];
2886 
2887   int n = 0;
2888   oop obj;
2889   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
2890     buffer[n] = obj;
2891     ++n;
2892   }
2893 
2894   if (n > 0) {
2895     // we popped at least one entry from the local queue
2896 
2897     if (!_cm->mark_stack_push(buffer, n)) {
2898       set_has_aborted();
2899     }
2900   }
2901 
2902   // this operation was quite expensive, so decrease the limits
2903   decrease_limits();
2904 }
2905 
2906 void CMTask::get_entries_from_global_stack() {
2907   // local array where we'll store the entries that will be popped
2908   // from the global stack.
2909   oop buffer[global_stack_transfer_size];
2910   int n;
2911   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
2912   assert(n <= global_stack_transfer_size,
2913          "we should not pop more than the given limit");
2914   if (n > 0) {
2915     // yes, we did actually pop at least one entry
2916     for (int i = 0; i < n; ++i) {
2917       bool success = _task_queue->push(buffer[i]);
2918       // We only call this when the local queue is empty or under a
2919       // given target limit. So, we do not expect this push to fail.
2920       assert(success, "invariant");
2921     }
2922   }
2923 
2924   // this operation was quite expensive, so decrease the limits
2925   decrease_limits();
2926 }
2927 
2928 void CMTask::drain_local_queue(bool partially) {
2929   if (has_aborted()) return;
2930 
2931   // Decide what the target size is, depending whether we're going to
2932   // drain it partially (so that other tasks can steal if they run out
2933   // of things to do) or totally (at the very end).
2934   size_t target_size;
2935   if (partially) {
2936     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2937   } else {
2938     target_size = 0;
2939   }
2940 
2941   if (_task_queue->size() > target_size) {
2942     oop obj;
2943     bool ret = _task_queue->pop_local(obj);
2944     while (ret) {
2945       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
2946       assert(!_g1h->is_on_master_free_list(
2947                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
2948 
2949       scan_object(obj);
2950 
2951       if (_task_queue->size() <= target_size || has_aborted()) {
2952         ret = false;
2953       } else {
2954         ret = _task_queue->pop_local(obj);
2955       }
2956     }
2957   }
2958 }
2959 
2960 void CMTask::drain_global_stack(bool partially) {
2961   if (has_aborted()) return;
2962 
2963   // We have a policy to drain the local queue before we attempt to
2964   // drain the global stack.
2965   assert(partially || _task_queue->size() == 0, "invariant");
2966 
2967   // Decide what the target size is, depending whether we're going to
2968   // drain it partially (so that other tasks can steal if they run out
2969   // of things to do) or totally (at the very end).  Notice that,
2970   // because we move entries from the global stack in chunks or
2971   // because another task might be doing the same, we might in fact
2972   // drop below the target. But, this is not a problem.
2973   size_t target_size;
2974   if (partially) {
2975     target_size = _cm->partial_mark_stack_size_target();
2976   } else {
2977     target_size = 0;
2978   }
2979 
2980   if (_cm->mark_stack_size() > target_size) {
2981     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2982       get_entries_from_global_stack();
2983       drain_local_queue(partially);
2984     }
2985   }
2986 }
2987 
2988 // SATB Queue has several assumptions on whether to call the par or
2989 // non-par versions of the methods. this is why some of the code is
2990 // replicated. We should really get rid of the single-threaded version
2991 // of the code to simplify things.
2992 void CMTask::drain_satb_buffers() {
2993   if (has_aborted()) return;
2994 
2995   // We set this so that the regular clock knows that we're in the
2996   // middle of draining buffers and doesn't set the abort flag when it
2997   // notices that SATB buffers are available for draining. It'd be
2998   // very counter productive if it did that. :-)
2999   _draining_satb_buffers = true;
3000 
3001   CMSATBBufferClosure satb_cl(this, _g1h);
3002   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3003 
3004   // This keeps claiming and applying the closure to completed buffers
3005   // until we run out of buffers or we need to abort.
3006   while (!has_aborted() &&
3007          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3008     regular_clock_call();
3009   }
3010 
3011   _draining_satb_buffers = false;
3012 
3013   assert(has_aborted() ||
3014          concurrent() ||
3015          satb_mq_set.completed_buffers_num() == 0, "invariant");
3016 
3017   // again, this was a potentially expensive operation, decrease the
3018   // limits to get the regular clock call early
3019   decrease_limits();
3020 }
3021 
3022 void CMTask::print_stats() {
3023   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
3024                        _worker_id, _calls);
3025   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3026                        _elapsed_time_ms, _termination_time_ms);
3027   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3028                        _step_times_ms.num(), _step_times_ms.avg(),
3029                        _step_times_ms.sd());
3030   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
3031                        _step_times_ms.maximum(), _step_times_ms.sum());
3032 }
3033 
3034 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3035   return _task_queues->steal(worker_id, hash_seed, obj);
3036 }
3037 
3038 /*****************************************************************************
3039 
3040     The do_marking_step(time_target_ms, ...) method is the building
3041     block of the parallel marking framework. It can be called in parallel
3042     with other invocations of do_marking_step() on different tasks
3043     (but only one per task, obviously) and concurrently with the
3044     mutator threads, or during remark, hence it eliminates the need
3045     for two versions of the code. When called during remark, it will
3046     pick up from where the task left off during the concurrent marking
3047     phase. Interestingly, tasks are also claimable during evacuation
3048     pauses too, since do_marking_step() ensures that it aborts before
3049     it needs to yield.
3050 
3051     The data structures that it uses to do marking work are the
3052     following:
3053 
3054       (1) Marking Bitmap. If there are gray objects that appear only
3055       on the bitmap (this happens either when dealing with an overflow
3056       or when the initial marking phase has simply marked the roots
3057       and didn't push them on the stack), then tasks claim heap
3058       regions whose bitmap they then scan to find gray objects. A
3059       global finger indicates where the end of the last claimed region
3060       is. A local finger indicates how far into the region a task has
3061       scanned. The two fingers are used to determine how to gray an
3062       object (i.e. whether simply marking it is OK, as it will be
3063       visited by a task in the future, or whether it needs to be also
3064       pushed on a stack).
3065 
3066       (2) Local Queue. The local queue of the task which is accessed
3067       reasonably efficiently by the task. Other tasks can steal from
3068       it when they run out of work. Throughout the marking phase, a
3069       task attempts to keep its local queue short but not totally
3070       empty, so that entries are available for stealing by other
3071       tasks. Only when there is no more work, a task will totally
3072       drain its local queue.
3073 
3074       (3) Global Mark Stack. This handles local queue overflow. During
3075       marking only sets of entries are moved between it and the local
3076       queues, as access to it requires a mutex and more fine-grain
3077       interaction with it which might cause contention. If it
3078       overflows, then the marking phase should restart and iterate
3079       over the bitmap to identify gray objects. Throughout the marking
3080       phase, tasks attempt to keep the global mark stack at a small
3081       length but not totally empty, so that entries are available for
3082       popping by other tasks. Only when there is no more work, tasks
3083       will totally drain the global mark stack.
3084 
3085       (4) SATB Buffer Queue. This is where completed SATB buffers are
3086       made available. Buffers are regularly removed from this queue
3087       and scanned for roots, so that the queue doesn't get too
3088       long. During remark, all completed buffers are processed, as
3089       well as the filled in parts of any uncompleted buffers.
3090 
3091     The do_marking_step() method tries to abort when the time target
3092     has been reached. There are a few other cases when the
3093     do_marking_step() method also aborts:
3094 
3095       (1) When the marking phase has been aborted (after a Full GC).
3096 
3097       (2) When a global overflow (on the global stack) has been
3098       triggered. Before the task aborts, it will actually sync up with
3099       the other tasks to ensure that all the marking data structures
3100       (local queues, stacks, fingers etc.)  are re-initialized so that
3101       when do_marking_step() completes, the marking phase can
3102       immediately restart.
3103 
3104       (3) When enough completed SATB buffers are available. The
3105       do_marking_step() method only tries to drain SATB buffers right
3106       at the beginning. So, if enough buffers are available, the
3107       marking step aborts and the SATB buffers are processed at
3108       the beginning of the next invocation.
3109 
3110       (4) To yield. when we have to yield then we abort and yield
3111       right at the end of do_marking_step(). This saves us from a lot
3112       of hassle as, by yielding we might allow a Full GC. If this
3113       happens then objects will be compacted underneath our feet, the
3114       heap might shrink, etc. We save checking for this by just
3115       aborting and doing the yield right at the end.
3116 
3117     From the above it follows that the do_marking_step() method should
3118     be called in a loop (or, otherwise, regularly) until it completes.
3119 
3120     If a marking step completes without its has_aborted() flag being
3121     true, it means it has completed the current marking phase (and
3122     also all other marking tasks have done so and have all synced up).
3123 
3124     A method called regular_clock_call() is invoked "regularly" (in
3125     sub ms intervals) throughout marking. It is this clock method that
3126     checks all the abort conditions which were mentioned above and
3127     decides when the task should abort. A work-based scheme is used to
3128     trigger this clock method: when the number of object words the
3129     marking phase has scanned or the number of references the marking
3130     phase has visited reach a given limit. Additional invocations to
3131     the method clock have been planted in a few other strategic places
3132     too. The initial reason for the clock method was to avoid calling
3133     vtime too regularly, as it is quite expensive. So, once it was in
3134     place, it was natural to piggy-back all the other conditions on it
3135     too and not constantly check them throughout the code.
3136 
3137     If do_termination is true then do_marking_step will enter its
3138     termination protocol.
3139 
3140     The value of is_serial must be true when do_marking_step is being
3141     called serially (i.e. by the VMThread) and do_marking_step should
3142     skip any synchronization in the termination and overflow code.
3143     Examples include the serial remark code and the serial reference
3144     processing closures.
3145 
3146     The value of is_serial must be false when do_marking_step is
3147     being called by any of the worker threads in a work gang.
3148     Examples include the concurrent marking code (CMMarkingTask),
3149     the MT remark code, and the MT reference processing closures.
3150 
3151  *****************************************************************************/
3152 
3153 void CMTask::do_marking_step(double time_target_ms,
3154                              bool do_termination,
3155                              bool is_serial) {
3156   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3157   assert(concurrent() == _cm->concurrent(), "they should be the same");
3158 
3159   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3160   assert(_task_queues != NULL, "invariant");
3161   assert(_task_queue != NULL, "invariant");
3162   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3163 
3164   assert(!_claimed,
3165          "only one thread should claim this task at any one time");
3166 
3167   // OK, this doesn't safeguard again all possible scenarios, as it is
3168   // possible for two threads to set the _claimed flag at the same
3169   // time. But it is only for debugging purposes anyway and it will
3170   // catch most problems.
3171   _claimed = true;
3172 
3173   _start_time_ms = os::elapsedVTime() * 1000.0;
3174 
3175   // If do_stealing is true then do_marking_step will attempt to
3176   // steal work from the other CMTasks. It only makes sense to
3177   // enable stealing when the termination protocol is enabled
3178   // and do_marking_step() is not being called serially.
3179   bool do_stealing = do_termination && !is_serial;
3180 
3181   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3182   _time_target_ms = time_target_ms - diff_prediction_ms;
3183 
3184   // set up the variables that are used in the work-based scheme to
3185   // call the regular clock method
3186   _words_scanned = 0;
3187   _refs_reached  = 0;
3188   recalculate_limits();
3189 
3190   // clear all flags
3191   clear_has_aborted();
3192   _has_timed_out = false;
3193   _draining_satb_buffers = false;
3194 
3195   ++_calls;
3196 
3197   // Set up the bitmap and oop closures. Anything that uses them is
3198   // eventually called from this method, so it is OK to allocate these
3199   // statically.
3200   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3201   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3202   set_cm_oop_closure(&cm_oop_closure);
3203 
3204   if (_cm->has_overflown()) {
3205     // This can happen if the mark stack overflows during a GC pause
3206     // and this task, after a yield point, restarts. We have to abort
3207     // as we need to get into the overflow protocol which happens
3208     // right at the end of this task.
3209     set_has_aborted();
3210   }
3211 
3212   // First drain any available SATB buffers. After this, we will not
3213   // look at SATB buffers before the next invocation of this method.
3214   // If enough completed SATB buffers are queued up, the regular clock
3215   // will abort this task so that it restarts.
3216   drain_satb_buffers();
3217   // ...then partially drain the local queue and the global stack
3218   drain_local_queue(true);
3219   drain_global_stack(true);
3220 
3221   do {
3222     if (!has_aborted() && _curr_region != NULL) {
3223       // This means that we're already holding on to a region.
3224       assert(_finger != NULL, "if region is not NULL, then the finger "
3225              "should not be NULL either");
3226 
3227       // We might have restarted this task after an evacuation pause
3228       // which might have evacuated the region we're holding on to
3229       // underneath our feet. Let's read its limit again to make sure
3230       // that we do not iterate over a region of the heap that
3231       // contains garbage (update_region_limit() will also move
3232       // _finger to the start of the region if it is found empty).
3233       update_region_limit();
3234       // We will start from _finger not from the start of the region,
3235       // as we might be restarting this task after aborting half-way
3236       // through scanning this region. In this case, _finger points to
3237       // the address where we last found a marked object. If this is a
3238       // fresh region, _finger points to start().
3239       MemRegion mr = MemRegion(_finger, _region_limit);
3240 
3241       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3242              "humongous regions should go around loop once only");
3243 
3244       // Some special cases:
3245       // If the memory region is empty, we can just give up the region.
3246       // If the current region is humongous then we only need to check
3247       // the bitmap for the bit associated with the start of the object,
3248       // scan the object if it's live, and give up the region.
3249       // Otherwise, let's iterate over the bitmap of the part of the region
3250       // that is left.
3251       // If the iteration is successful, give up the region.
3252       if (mr.is_empty()) {
3253         giveup_current_region();
3254         regular_clock_call();
3255       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3256         if (_nextMarkBitMap->isMarked(mr.start())) {
3257           // The object is marked - apply the closure
3258           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3259           bitmap_closure.do_bit(offset);
3260         }
3261         // Even if this task aborted while scanning the humongous object
3262         // we can (and should) give up the current region.
3263         giveup_current_region();
3264         regular_clock_call();
3265       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3266         giveup_current_region();
3267         regular_clock_call();
3268       } else {
3269         assert(has_aborted(), "currently the only way to do so");
3270         // The only way to abort the bitmap iteration is to return
3271         // false from the do_bit() method. However, inside the
3272         // do_bit() method we move the _finger to point to the
3273         // object currently being looked at. So, if we bail out, we
3274         // have definitely set _finger to something non-null.
3275         assert(_finger != NULL, "invariant");
3276 
3277         // Region iteration was actually aborted. So now _finger
3278         // points to the address of the object we last scanned. If we
3279         // leave it there, when we restart this task, we will rescan
3280         // the object. It is easy to avoid this. We move the finger by
3281         // enough to point to the next possible object header (the
3282         // bitmap knows by how much we need to move it as it knows its
3283         // granularity).
3284         assert(_finger < _region_limit, "invariant");
3285         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3286         // Check if bitmap iteration was aborted while scanning the last object
3287         if (new_finger >= _region_limit) {
3288           giveup_current_region();
3289         } else {
3290           move_finger_to(new_finger);
3291         }
3292       }
3293     }
3294     // At this point we have either completed iterating over the
3295     // region we were holding on to, or we have aborted.
3296 
3297     // We then partially drain the local queue and the global stack.
3298     // (Do we really need this?)
3299     drain_local_queue(true);
3300     drain_global_stack(true);
3301 
3302     // Read the note on the claim_region() method on why it might
3303     // return NULL with potentially more regions available for
3304     // claiming and why we have to check out_of_regions() to determine
3305     // whether we're done or not.
3306     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3307       // We are going to try to claim a new region. We should have
3308       // given up on the previous one.
3309       // Separated the asserts so that we know which one fires.
3310       assert(_curr_region  == NULL, "invariant");
3311       assert(_finger       == NULL, "invariant");
3312       assert(_region_limit == NULL, "invariant");
3313       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3314       if (claimed_region != NULL) {
3315         // Yes, we managed to claim one
3316         setup_for_region(claimed_region);
3317         assert(_curr_region == claimed_region, "invariant");
3318       }
3319       // It is important to call the regular clock here. It might take
3320       // a while to claim a region if, for example, we hit a large
3321       // block of empty regions. So we need to call the regular clock
3322       // method once round the loop to make sure it's called
3323       // frequently enough.
3324       regular_clock_call();
3325     }
3326 
3327     if (!has_aborted() && _curr_region == NULL) {
3328       assert(_cm->out_of_regions(),
3329              "at this point we should be out of regions");
3330     }
3331   } while ( _curr_region != NULL && !has_aborted());
3332 
3333   if (!has_aborted()) {
3334     // We cannot check whether the global stack is empty, since other
3335     // tasks might be pushing objects to it concurrently.
3336     assert(_cm->out_of_regions(),
3337            "at this point we should be out of regions");
3338     // Try to reduce the number of available SATB buffers so that
3339     // remark has less work to do.
3340     drain_satb_buffers();
3341   }
3342 
3343   // Since we've done everything else, we can now totally drain the
3344   // local queue and global stack.
3345   drain_local_queue(false);
3346   drain_global_stack(false);
3347 
3348   // Attempt at work stealing from other task's queues.
3349   if (do_stealing && !has_aborted()) {
3350     // We have not aborted. This means that we have finished all that
3351     // we could. Let's try to do some stealing...
3352 
3353     // We cannot check whether the global stack is empty, since other
3354     // tasks might be pushing objects to it concurrently.
3355     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3356            "only way to reach here");
3357     while (!has_aborted()) {
3358       oop obj;
3359       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3360         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3361                "any stolen object should be marked");
3362         scan_object(obj);
3363 
3364         // And since we're towards the end, let's totally drain the
3365         // local queue and global stack.
3366         drain_local_queue(false);
3367         drain_global_stack(false);
3368       } else {
3369         break;
3370       }
3371     }
3372   }
3373 
3374   // We still haven't aborted. Now, let's try to get into the
3375   // termination protocol.
3376   if (do_termination && !has_aborted()) {
3377     // We cannot check whether the global stack is empty, since other
3378     // tasks might be concurrently pushing objects on it.
3379     // Separated the asserts so that we know which one fires.
3380     assert(_cm->out_of_regions(), "only way to reach here");
3381     assert(_task_queue->size() == 0, "only way to reach here");
3382     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3383 
3384     // The CMTask class also extends the TerminatorTerminator class,
3385     // hence its should_exit_termination() method will also decide
3386     // whether to exit the termination protocol or not.
3387     bool finished = (is_serial ||
3388                      _cm->terminator()->offer_termination(this));
3389     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3390     _termination_time_ms +=
3391       termination_end_time_ms - _termination_start_time_ms;
3392 
3393     if (finished) {
3394       // We're all done.
3395 
3396       if (_worker_id == 0) {
3397         // let's allow task 0 to do this
3398         if (concurrent()) {
3399           assert(_cm->concurrent_marking_in_progress(), "invariant");
3400           // we need to set this to false before the next
3401           // safepoint. This way we ensure that the marking phase
3402           // doesn't observe any more heap expansions.
3403           _cm->clear_concurrent_marking_in_progress();
3404         }
3405       }
3406 
3407       // We can now guarantee that the global stack is empty, since
3408       // all other tasks have finished. We separated the guarantees so
3409       // that, if a condition is false, we can immediately find out
3410       // which one.
3411       guarantee(_cm->out_of_regions(), "only way to reach here");
3412       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3413       guarantee(_task_queue->size() == 0, "only way to reach here");
3414       guarantee(!_cm->has_overflown(), "only way to reach here");
3415       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3416     } else {
3417       // Apparently there's more work to do. Let's abort this task. It
3418       // will restart it and we can hopefully find more things to do.
3419       set_has_aborted();
3420     }
3421   }
3422 
3423   // Mainly for debugging purposes to make sure that a pointer to the
3424   // closure which was statically allocated in this frame doesn't
3425   // escape it by accident.
3426   set_cm_oop_closure(NULL);
3427   double end_time_ms = os::elapsedVTime() * 1000.0;
3428   double elapsed_time_ms = end_time_ms - _start_time_ms;
3429   // Update the step history.
3430   _step_times_ms.add(elapsed_time_ms);
3431 
3432   if (has_aborted()) {
3433     // The task was aborted for some reason.
3434     if (_has_timed_out) {
3435       double diff_ms = elapsed_time_ms - _time_target_ms;
3436       // Keep statistics of how well we did with respect to hitting
3437       // our target only if we actually timed out (if we aborted for
3438       // other reasons, then the results might get skewed).
3439       _marking_step_diffs_ms.add(diff_ms);
3440     }
3441 
3442     if (_cm->has_overflown()) {
3443       // This is the interesting one. We aborted because a global
3444       // overflow was raised. This means we have to restart the
3445       // marking phase and start iterating over regions. However, in
3446       // order to do this we have to make sure that all tasks stop
3447       // what they are doing and re-initialize in a safe manner. We
3448       // will achieve this with the use of two barrier sync points.
3449 
3450       if (!is_serial) {
3451         // We only need to enter the sync barrier if being called
3452         // from a parallel context
3453         _cm->enter_first_sync_barrier(_worker_id);
3454 
3455         // When we exit this sync barrier we know that all tasks have
3456         // stopped doing marking work. So, it's now safe to
3457         // re-initialize our data structures. At the end of this method,
3458         // task 0 will clear the global data structures.
3459       }
3460 
3461       // We clear the local state of this task...
3462       clear_region_fields();
3463 
3464       if (!is_serial) {
3465         // ...and enter the second barrier.
3466         _cm->enter_second_sync_barrier(_worker_id);
3467       }
3468       // At this point, if we're during the concurrent phase of
3469       // marking, everything has been re-initialized and we're
3470       // ready to restart.
3471     }
3472   }
3473 
3474   _claimed = false;
3475 }
3476 
3477 CMTask::CMTask(uint worker_id,
3478                ConcurrentMark* cm,
3479                size_t* marked_bytes,
3480                BitMap* card_bm,
3481                CMTaskQueue* task_queue,
3482                CMTaskQueueSet* task_queues)
3483   : _g1h(G1CollectedHeap::heap()),
3484     _worker_id(worker_id), _cm(cm),
3485     _claimed(false),
3486     _nextMarkBitMap(NULL), _hash_seed(17),
3487     _task_queue(task_queue),
3488     _task_queues(task_queues),
3489     _cm_oop_closure(NULL),
3490     _marked_bytes_array(marked_bytes),
3491     _card_bm(card_bm) {
3492   guarantee(task_queue != NULL, "invariant");
3493   guarantee(task_queues != NULL, "invariant");
3494 
3495   _marking_step_diffs_ms.add(0.5);
3496 }
3497 
3498 // These are formatting macros that are used below to ensure
3499 // consistent formatting. The *_H_* versions are used to format the
3500 // header for a particular value and they should be kept consistent
3501 // with the corresponding macro. Also note that most of the macros add
3502 // the necessary white space (as a prefix) which makes them a bit
3503 // easier to compose.
3504 
3505 // All the output lines are prefixed with this string to be able to
3506 // identify them easily in a large log file.
3507 #define G1PPRL_LINE_PREFIX            "###"
3508 
3509 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3510 #ifdef _LP64
3511 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3512 #else // _LP64
3513 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3514 #endif // _LP64
3515 
3516 // For per-region info
3517 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3518 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3519 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3520 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3521 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3522 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3523 
3524 // For summary info
3525 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3526 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3527 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3528 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3529 
3530 G1PrintRegionLivenessInfoClosure::
3531 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3532   : _total_used_bytes(0), _total_capacity_bytes(0),
3533     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3534     _hum_used_bytes(0), _hum_capacity_bytes(0),
3535     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3536     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3537   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3538   MemRegion g1_reserved = g1h->g1_reserved();
3539   double now = os::elapsedTime();
3540 
3541   // Print the header of the output.
3542   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3543   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3544                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3545                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3546                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3547                           HeapRegion::GrainBytes);
3548   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3549   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3550                           G1PPRL_TYPE_H_FORMAT
3551                           G1PPRL_ADDR_BASE_H_FORMAT
3552                           G1PPRL_BYTE_H_FORMAT
3553                           G1PPRL_BYTE_H_FORMAT
3554                           G1PPRL_BYTE_H_FORMAT
3555                           G1PPRL_DOUBLE_H_FORMAT
3556                           G1PPRL_BYTE_H_FORMAT
3557                           G1PPRL_BYTE_H_FORMAT,
3558                           "type", "address-range",
3559                           "used", "prev-live", "next-live", "gc-eff",
3560                           "remset", "code-roots");
3561   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3562                           G1PPRL_TYPE_H_FORMAT
3563                           G1PPRL_ADDR_BASE_H_FORMAT
3564                           G1PPRL_BYTE_H_FORMAT
3565                           G1PPRL_BYTE_H_FORMAT
3566                           G1PPRL_BYTE_H_FORMAT
3567                           G1PPRL_DOUBLE_H_FORMAT
3568                           G1PPRL_BYTE_H_FORMAT
3569                           G1PPRL_BYTE_H_FORMAT,
3570                           "", "",
3571                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3572                           "(bytes)", "(bytes)");
3573 }
3574 
3575 // It takes as a parameter a reference to one of the _hum_* fields, it
3576 // deduces the corresponding value for a region in a humongous region
3577 // series (either the region size, or what's left if the _hum_* field
3578 // is < the region size), and updates the _hum_* field accordingly.
3579 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3580   size_t bytes = 0;
3581   // The > 0 check is to deal with the prev and next live bytes which
3582   // could be 0.
3583   if (*hum_bytes > 0) {
3584     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3585     *hum_bytes -= bytes;
3586   }
3587   return bytes;
3588 }
3589 
3590 // It deduces the values for a region in a humongous region series
3591 // from the _hum_* fields and updates those accordingly. It assumes
3592 // that that _hum_* fields have already been set up from the "starts
3593 // humongous" region and we visit the regions in address order.
3594 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3595                                                      size_t* capacity_bytes,
3596                                                      size_t* prev_live_bytes,
3597                                                      size_t* next_live_bytes) {
3598   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3599   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3600   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3601   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3602   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3603 }
3604 
3605 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3606   const char* type       = r->get_type_str();
3607   HeapWord* bottom       = r->bottom();
3608   HeapWord* end          = r->end();
3609   size_t capacity_bytes  = r->capacity();
3610   size_t used_bytes      = r->used();
3611   size_t prev_live_bytes = r->live_bytes();
3612   size_t next_live_bytes = r->next_live_bytes();
3613   double gc_eff          = r->gc_efficiency();
3614   size_t remset_bytes    = r->rem_set()->mem_size();
3615   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3616 
3617   if (r->is_starts_humongous()) {
3618     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3619            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3620            "they should have been zeroed after the last time we used them");
3621     // Set up the _hum_* fields.
3622     _hum_capacity_bytes  = capacity_bytes;
3623     _hum_used_bytes      = used_bytes;
3624     _hum_prev_live_bytes = prev_live_bytes;
3625     _hum_next_live_bytes = next_live_bytes;
3626     get_hum_bytes(&used_bytes, &capacity_bytes,
3627                   &prev_live_bytes, &next_live_bytes);
3628     end = bottom + HeapRegion::GrainWords;
3629   } else if (r->is_continues_humongous()) {
3630     get_hum_bytes(&used_bytes, &capacity_bytes,
3631                   &prev_live_bytes, &next_live_bytes);
3632     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3633   }
3634 
3635   _total_used_bytes      += used_bytes;
3636   _total_capacity_bytes  += capacity_bytes;
3637   _total_prev_live_bytes += prev_live_bytes;
3638   _total_next_live_bytes += next_live_bytes;
3639   _total_remset_bytes    += remset_bytes;
3640   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3641 
3642   // Print a line for this particular region.
3643   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3644                           G1PPRL_TYPE_FORMAT
3645                           G1PPRL_ADDR_BASE_FORMAT
3646                           G1PPRL_BYTE_FORMAT
3647                           G1PPRL_BYTE_FORMAT
3648                           G1PPRL_BYTE_FORMAT
3649                           G1PPRL_DOUBLE_FORMAT
3650                           G1PPRL_BYTE_FORMAT
3651                           G1PPRL_BYTE_FORMAT,
3652                           type, p2i(bottom), p2i(end),
3653                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3654                           remset_bytes, strong_code_roots_bytes);
3655 
3656   return false;
3657 }
3658 
3659 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3660   // add static memory usages to remembered set sizes
3661   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3662   // Print the footer of the output.
3663   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3664   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3665                          " SUMMARY"
3666                          G1PPRL_SUM_MB_FORMAT("capacity")
3667                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3668                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3669                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3670                          G1PPRL_SUM_MB_FORMAT("remset")
3671                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3672                          bytes_to_mb(_total_capacity_bytes),
3673                          bytes_to_mb(_total_used_bytes),
3674                          perc(_total_used_bytes, _total_capacity_bytes),
3675                          bytes_to_mb(_total_prev_live_bytes),
3676                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3677                          bytes_to_mb(_total_next_live_bytes),
3678                          perc(_total_next_live_bytes, _total_capacity_bytes),
3679                          bytes_to_mb(_total_remset_bytes),
3680                          bytes_to_mb(_total_strong_code_roots_bytes));
3681 }