Old src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "memory/genOopClosures.inline.hpp"
  40 #include "memory/referencePolicy.hpp"
  41 #include "memory/resourceArea.hpp"
  42 #include "oops/oop.inline.hpp"
  43 #include "runtime/handles.inline.hpp"
  44 #include "runtime/java.hpp"
  45 #include "services/memTracker.hpp"
  46 
  47 // Concurrent marking bit map wrapper
  48 
  49 CMBitMapRO::CMBitMapRO(int shifter) :
  50   _bm(),
  51   _shifter(shifter) {
  52   _bmStartWord = 0;
  53   _bmWordSize = 0;
  54 }
  55 
  56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  57                                                HeapWord* limit) const {
  58   // First we must round addr *up* to a possible object boundary.
  59   addr = (HeapWord*)align_size_up((intptr_t)addr,
  60                                   HeapWordSize << _shifter);
  61   size_t addrOffset = heapWordToOffset(addr);
  62   if (limit == NULL) {
  63     limit = _bmStartWord + _bmWordSize;
  64   }
  65   size_t limitOffset = heapWordToOffset(limit);
  66   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  67   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  68   assert(nextAddr >= addr, "get_next_one postcondition");
  69   assert(nextAddr == limit || isMarked(nextAddr),
  70          "get_next_one postcondition");
  71   return nextAddr;
  72 }
  73 
  74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  75                                                  HeapWord* limit) const {
  76   size_t addrOffset = heapWordToOffset(addr);
  77   if (limit == NULL) {
  78     limit = _bmStartWord + _bmWordSize;
  79   }
  80   size_t limitOffset = heapWordToOffset(limit);
  81   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   assert(nextAddr >= addr, "get_next_one postcondition");
  84   assert(nextAddr == limit || !isMarked(nextAddr),
  85          "get_next_one postcondition");
  86   return nextAddr;
  87 }
  88 
  89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  90   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  91   return (int) (diff >> _shifter);
  92 }
  93 
  94 #ifndef PRODUCT
  95 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
  96   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  97   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  98          "size inconsistency");
  99   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
 100          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 101 }
 102 #endif
 103 
 104 bool CMBitMap::allocate(ReservedSpace heap_rs) {
 105   _bmStartWord = (HeapWord*)(heap_rs.base());
 106   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
 107   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
 108                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 109   if (!brs.is_reserved()) {
 110     warning("ConcurrentMark marking bit map allocation failure");
 111     return false;
 112   }
 113   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
 114   // For now we'll just commit all of the bit map up front.
 115   // Later on we'll try to be more parsimonious with swap.
 116   if (!_virtual_space.initialize(brs, brs.size())) {
 117     warning("ConcurrentMark marking bit map backing store failure");
 118     return false;
 119   }
 120   assert(_virtual_space.committed_size() == brs.size(),
 121          "didn't reserve backing store for all of concurrent marking bit map?");
 122   _bm.set_map((uintptr_t*)_virtual_space.low());
 123   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
 124          _bmWordSize, "inconsistency in bit map sizing");
 125   _bm.set_size(_bmWordSize >> _shifter);
 126   return true;
 127 }
 128 
 129 void CMBitMap::clearAll() {
 130   _bm.clear();
 131   return;
 132 }
 133 
 134 void CMBitMap::markRange(MemRegion mr) {
 135   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 136   assert(!mr.is_empty(), "unexpected empty region");
 137   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 138           ((HeapWord *) mr.end())),
 139          "markRange memory region end is not card aligned");
 140   // convert address range into offset range
 141   _bm.at_put_range(heapWordToOffset(mr.start()),
 142                    heapWordToOffset(mr.end()), true);
 143 }
 144 
 145 void CMBitMap::clearRange(MemRegion mr) {
 146   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 147   assert(!mr.is_empty(), "unexpected empty region");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), false);
 151 }
 152 
 153 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 154                                             HeapWord* end_addr) {
 155   HeapWord* start = getNextMarkedWordAddress(addr);
 156   start = MIN2(start, end_addr);
 157   HeapWord* end   = getNextUnmarkedWordAddress(start);
 158   end = MIN2(end, end_addr);
 159   assert(start <= end, "Consistency check");
 160   MemRegion mr(start, end);
 161   if (!mr.is_empty()) {
 162     clearRange(mr);
 163   }
 164   return mr;
 165 }
 166 
 167 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 168   _base(NULL), _cm(cm)
 169 #ifdef ASSERT
 170   , _drain_in_progress(false)
 171   , _drain_in_progress_yields(false)
 172 #endif
 173 {}
 174 
 175 bool CMMarkStack::allocate(size_t capacity) {
 176   // allocate a stack of the requisite depth
 177   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 178   if (!rs.is_reserved()) {
 179     warning("ConcurrentMark MarkStack allocation failure");
 180     return false;
 181   }
 182   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 183   if (!_virtual_space.initialize(rs, rs.size())) {
 184     warning("ConcurrentMark MarkStack backing store failure");
 185     // Release the virtual memory reserved for the marking stack
 186     rs.release();
 187     return false;
 188   }
 189   assert(_virtual_space.committed_size() == rs.size(),
 190          "Didn't reserve backing store for all of ConcurrentMark stack?");
 191   _base = (oop*) _virtual_space.low();
 192   setEmpty();
 193   _capacity = (jint) capacity;
 194   _saved_index = -1;
 195   _should_expand = false;
 196   NOT_PRODUCT(_max_depth = 0);
 197   return true;
 198 }
 199 
 200 void CMMarkStack::expand() {
 201   // Called, during remark, if we've overflown the marking stack during marking.
 202   assert(isEmpty(), "stack should been emptied while handling overflow");
 203   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 204   // Clear expansion flag
 205   _should_expand = false;
 206   if (_capacity == (jint) MarkStackSizeMax) {
 207     if (PrintGCDetails && Verbose) {
 208       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 209     }
 210     return;
 211   }
 212   // Double capacity if possible
 213   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 214   // Do not give up existing stack until we have managed to
 215   // get the double capacity that we desired.
 216   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 217                                                            sizeof(oop)));
 218   if (rs.is_reserved()) {
 219     // Release the backing store associated with old stack
 220     _virtual_space.release();
 221     // Reinitialize virtual space for new stack
 222     if (!_virtual_space.initialize(rs, rs.size())) {
 223       fatal("Not enough swap for expanded marking stack capacity");
 224     }
 225     _base = (oop*)(_virtual_space.low());
 226     _index = 0;
 227     _capacity = new_capacity;
 228   } else {
 229     if (PrintGCDetails && Verbose) {
 230       // Failed to double capacity, continue;
 231       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 232                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 233                           _capacity / K, new_capacity / K);
 234     }
 235   }
 236 }
 237 
 238 void CMMarkStack::set_should_expand() {
 239   // If we're resetting the marking state because of an
 240   // marking stack overflow, record that we should, if
 241   // possible, expand the stack.
 242   _should_expand = _cm->has_overflown();
 243 }
 244 
 245 CMMarkStack::~CMMarkStack() {
 246   if (_base != NULL) {
 247     _base = NULL;
 248     _virtual_space.release();
 249   }
 250 }
 251 
 252 void CMMarkStack::par_push(oop ptr) {
 253   while (true) {
 254     if (isFull()) {
 255       _overflow = true;
 256       return;
 257     }
 258     // Otherwise...
 259     jint index = _index;
 260     jint next_index = index+1;
 261     jint res = Atomic::cmpxchg(next_index, &_index, index);
 262     if (res == index) {
 263       _base[index] = ptr;
 264       // Note that we don't maintain this atomically.  We could, but it
 265       // doesn't seem necessary.
 266       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 267       return;
 268     }
 269     // Otherwise, we need to try again.
 270   }
 271 }
 272 
 273 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 274   while (true) {
 275     if (isFull()) {
 276       _overflow = true;
 277       return;
 278     }
 279     // Otherwise...
 280     jint index = _index;
 281     jint next_index = index + n;
 282     if (next_index > _capacity) {
 283       _overflow = true;
 284       return;
 285     }
 286     jint res = Atomic::cmpxchg(next_index, &_index, index);
 287     if (res == index) {
 288       for (int i = 0; i < n; i++) {
 289         int  ind = index + i;
 290         assert(ind < _capacity, "By overflow test above.");
 291         _base[ind] = ptr_arr[i];
 292       }
 293       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 294       return;
 295     }
 296     // Otherwise, we need to try again.
 297   }
 298 }
 299 
 300 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 301   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 302   jint start = _index;
 303   jint next_index = start + n;
 304   if (next_index > _capacity) {
 305     _overflow = true;
 306     return;
 307   }
 308   // Otherwise.
 309   _index = next_index;
 310   for (int i = 0; i < n; i++) {
 311     int ind = start + i;
 312     assert(ind < _capacity, "By overflow test above.");
 313     _base[ind] = ptr_arr[i];
 314   }
 315   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 316 }
 317 
 318 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 319   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 320   jint index = _index;
 321   if (index == 0) {
 322     *n = 0;
 323     return false;
 324   } else {
 325     int k = MIN2(max, index);
 326     jint  new_ind = index - k;
 327     for (int j = 0; j < k; j++) {
 328       ptr_arr[j] = _base[new_ind + j];
 329     }
 330     _index = new_ind;
 331     *n = k;
 332     return true;
 333   }
 334 }
 335 
 336 template<class OopClosureClass>
 337 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 338   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 339          || SafepointSynchronize::is_at_safepoint(),
 340          "Drain recursion must be yield-safe.");
 341   bool res = true;
 342   debug_only(_drain_in_progress = true);
 343   debug_only(_drain_in_progress_yields = yield_after);
 344   while (!isEmpty()) {
 345     oop newOop = pop();
 346     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 347     assert(newOop->is_oop(), "Expected an oop");
 348     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 349            "only grey objects on this stack");
 350     newOop->oop_iterate(cl);
 351     if (yield_after && _cm->do_yield_check()) {
 352       res = false;
 353       break;
 354     }
 355   }
 356   debug_only(_drain_in_progress = false);
 357   return res;
 358 }
 359 
 360 void CMMarkStack::note_start_of_gc() {
 361   assert(_saved_index == -1,
 362          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 363   _saved_index = _index;
 364 }
 365 
 366 void CMMarkStack::note_end_of_gc() {
 367   // This is intentionally a guarantee, instead of an assert. If we
 368   // accidentally add something to the mark stack during GC, it
 369   // will be a correctness issue so it's better if we crash. we'll
 370   // only check this once per GC anyway, so it won't be a performance
 371   // issue in any way.
 372   guarantee(_saved_index == _index,
 373             err_msg("saved index: %d index: %d", _saved_index, _index));
 374   _saved_index = -1;
 375 }
 376 
 377 void CMMarkStack::oops_do(OopClosure* f) {
 378   assert(_saved_index == _index,
 379          err_msg("saved index: %d index: %d", _saved_index, _index));
 380   for (int i = 0; i < _index; i += 1) {
 381     f->do_oop(&_base[i]);
 382   }
 383 }
 384 
 385 bool ConcurrentMark::not_yet_marked(oop obj) const {
 386   return _g1h->is_obj_ill(obj);
 387 }
 388 
 389 CMRootRegions::CMRootRegions() :
 390   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 391   _should_abort(false),  _next_survivor(NULL) { }
 392 
 393 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 394   _young_list = g1h->young_list();
 395   _cm = cm;
 396 }
 397 
 398 void CMRootRegions::prepare_for_scan() {
 399   assert(!scan_in_progress(), "pre-condition");
 400 
 401   // Currently, only survivors can be root regions.
 402   assert(_next_survivor == NULL, "pre-condition");
 403   _next_survivor = _young_list->first_survivor_region();
 404   _scan_in_progress = (_next_survivor != NULL);
 405   _should_abort = false;
 406 }
 407 
 408 HeapRegion* CMRootRegions::claim_next() {
 409   if (_should_abort) {
 410     // If someone has set the should_abort flag, we return NULL to
 411     // force the caller to bail out of their loop.
 412     return NULL;
 413   }
 414 
 415   // Currently, only survivors can be root regions.
 416   HeapRegion* res = _next_survivor;
 417   if (res != NULL) {
 418     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 419     // Read it again in case it changed while we were waiting for the lock.
 420     res = _next_survivor;
 421     if (res != NULL) {
 422       if (res == _young_list->last_survivor_region()) {
 423         // We just claimed the last survivor so store NULL to indicate
 424         // that we're done.
 425         _next_survivor = NULL;
 426       } else {
 427         _next_survivor = res->get_next_young_region();
 428       }
 429     } else {
 430       // Someone else claimed the last survivor while we were trying
 431       // to take the lock so nothing else to do.
 432     }
 433   }
 434   assert(res == NULL || res->is_survivor(), "post-condition");
 435 
 436   return res;
 437 }
 438 
 439 void CMRootRegions::scan_finished() {
 440   assert(scan_in_progress(), "pre-condition");
 441 
 442   // Currently, only survivors can be root regions.
 443   if (!_should_abort) {
 444     assert(_next_survivor == NULL, "we should have claimed all survivors");
 445   }
 446   _next_survivor = NULL;
 447 
 448   {
 449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 450     _scan_in_progress = false;
 451     RootRegionScan_lock->notify_all();
 452   }
 453 }
 454 
 455 bool CMRootRegions::wait_until_scan_finished() {
 456   if (!scan_in_progress()) return false;
 457 
 458   {
 459     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 460     while (scan_in_progress()) {
 461       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 462     }
 463   }
 464   return true;
 465 }
 466 
 467 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 468 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 469 #endif // _MSC_VER
 470 
 471 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 472   return MAX2((n_par_threads + 2) / 4, 1U);
 473 }
 474 
 475 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
 476   _g1h(g1h),
 477   _markBitMap1(MinObjAlignment - 1),
 478   _markBitMap2(MinObjAlignment - 1),
 479 
 480   _parallel_marking_threads(0),
 481   _max_parallel_marking_threads(0),
 482   _sleep_factor(0.0),
 483   _marking_task_overhead(1.0),
 484   _cleanup_sleep_factor(0.0),
 485   _cleanup_task_overhead(1.0),
 486   _cleanup_list("Cleanup List"),
 487   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 488   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
 489             CardTableModRefBS::card_shift,
 490             false /* in_resource_area*/),
 491 
 492   _prevMarkBitMap(&_markBitMap1),
 493   _nextMarkBitMap(&_markBitMap2),
 494 
 495   _markStack(this),
 496   // _finger set in set_non_marking_state
 497 
 498   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 499   // _active_tasks set in set_non_marking_state
 500   // _tasks set inside the constructor
 501   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 502   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 503 
 504   _has_overflown(false),
 505   _concurrent(false),
 506   _has_aborted(false),
 507   _restart_for_overflow(false),
 508   _concurrent_marking_in_progress(false),
 509 
 510   // _verbose_level set below
 511 
 512   _init_times(),
 513   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 514   _cleanup_times(),
 515   _total_counting_time(0.0),
 516   _total_rs_scrub_time(0.0),
 517 
 518   _parallel_workers(NULL),
 519 
 520   _count_card_bitmaps(NULL),
 521   _count_marked_bytes(NULL),
 522   _completed_initialization(false) {
 523   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 524   if (verbose_level < no_verbose) {
 525     verbose_level = no_verbose;
 526   }
 527   if (verbose_level > high_verbose) {
 528     verbose_level = high_verbose;
 529   }
 530   _verbose_level = verbose_level;
 531 
 532   if (verbose_low()) {
 533     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 534                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 535   }
 536 
 537   if (!_markBitMap1.allocate(heap_rs)) {
 538     warning("Failed to allocate first CM bit map");
 539     return;
 540   }
 541   if (!_markBitMap2.allocate(heap_rs)) {
 542     warning("Failed to allocate second CM bit map");
 543     return;
 544   }
 545 
 546   // Create & start a ConcurrentMark thread.
 547   _cmThread = new ConcurrentMarkThread(this);
 548   assert(cmThread() != NULL, "CM Thread should have been created");
 549   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 550 
 551   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 552   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
 553   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 554 
 555   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 556   satb_qs.set_buffer_size(G1SATBBufferSize);
 557 
 558   _root_regions.init(_g1h, this);
 559 
 560   if (ConcGCThreads > ParallelGCThreads) {
 561     warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
 562             "than ParallelGCThreads (" UINT32_FORMAT ").",
 563             ConcGCThreads, ParallelGCThreads);
 564     return;
 565   }
 566   if (ParallelGCThreads == 0) {
 567     // if we are not running with any parallel GC threads we will not
 568     // spawn any marking threads either
 569     _parallel_marking_threads =       0;
 570     _max_parallel_marking_threads =   0;
 571     _sleep_factor             =     0.0;
 572     _marking_task_overhead    =     1.0;
 573   } else {
 574     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 575       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 576       // if both are set
 577       _sleep_factor             = 0.0;
 578       _marking_task_overhead    = 1.0;
 579     } else if (G1MarkingOverheadPercent > 0) {
 580       // We will calculate the number of parallel marking threads based
 581       // on a target overhead with respect to the soft real-time goal
 582       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 583       double overall_cm_overhead =
 584         (double) MaxGCPauseMillis * marking_overhead /
 585         (double) GCPauseIntervalMillis;
 586       double cpu_ratio = 1.0 / (double) os::processor_count();
 587       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 588       double marking_task_overhead =
 589         overall_cm_overhead / marking_thread_num *
 590                                                 (double) os::processor_count();
 591       double sleep_factor =
 592                          (1.0 - marking_task_overhead) / marking_task_overhead;
 593 
 594       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 595       _sleep_factor             = sleep_factor;
 596       _marking_task_overhead    = marking_task_overhead;
 597     } else {
 598       // Calculate the number of parallel marking threads by scaling
 599       // the number of parallel GC threads.
 600       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 601       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 602       _sleep_factor             = 0.0;
 603       _marking_task_overhead    = 1.0;
 604     }
 605 
 606     assert(ConcGCThreads > 0, "Should have been set");
 607     _parallel_marking_threads = (uint) ConcGCThreads;
 608     _max_parallel_marking_threads = _parallel_marking_threads;
 609 
 610     if (parallel_marking_threads() > 1) {
 611       _cleanup_task_overhead = 1.0;
 612     } else {
 613       _cleanup_task_overhead = marking_task_overhead();
 614     }
 615     _cleanup_sleep_factor =
 616                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 617 
 618 #if 0
 619     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 620     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 621     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 622     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 623     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 624 #endif
 625 
 626     guarantee(parallel_marking_threads() > 0, "peace of mind");
 627     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 628          _max_parallel_marking_threads, false, true);
 629     if (_parallel_workers == NULL) {
 630       vm_exit_during_initialization("Failed necessary allocation.");
 631     } else {
 632       _parallel_workers->initialize_workers();
 633     }
 634   }
 635 
 636   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 637     uintx mark_stack_size =
 638       MIN2(MarkStackSizeMax,
 639           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 640     // Verify that the calculated value for MarkStackSize is in range.
 641     // It would be nice to use the private utility routine from Arguments.
 642     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 643       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 644               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 645               mark_stack_size, 1, MarkStackSizeMax);
 646       return;
 647     }
 648     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 649   } else {
 650     // Verify MarkStackSize is in range.
 651     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 652       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 653         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 654           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 655                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 656                   MarkStackSize, 1, MarkStackSizeMax);
 657           return;
 658         }
 659       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 660         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 661           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 662                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 663                   MarkStackSize, MarkStackSizeMax);
 664           return;
 665         }
 666       }
 667     }
 668   }
 669 
 670   if (!_markStack.allocate(MarkStackSize)) {
 671     warning("Failed to allocate CM marking stack");
 672     return;
 673   }
 674 
 675   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 676   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 677 
 678   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 679   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 680 
 681   BitMap::idx_t card_bm_size = _card_bm.size();
 682 
 683   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 684   _active_tasks = _max_worker_id;
 685 
 686   size_t max_regions = (size_t) _g1h->max_regions();
 687   for (uint i = 0; i < _max_worker_id; ++i) {
 688     CMTaskQueue* task_queue = new CMTaskQueue();
 689     task_queue->initialize();
 690     _task_queues->register_queue(i, task_queue);
 691 
 692     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 693     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 694 
 695     _tasks[i] = new CMTask(i, this,
 696                            _count_marked_bytes[i],
 697                            &_count_card_bitmaps[i],
 698                            task_queue, _task_queues);
 699 
 700     _accum_task_vtime[i] = 0.0;
 701   }
 702 
 703   // Calculate the card number for the bottom of the heap. Used
 704   // in biasing indexes into the accounting card bitmaps.
 705   _heap_bottom_card_num =
 706     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 707                                 CardTableModRefBS::card_shift);
 708 
 709   // Clear all the liveness counting data
 710   clear_all_count_data();
 711 
 712   // so that the call below can read a sensible value
 713   _heap_start = (HeapWord*) heap_rs.base();
 714   set_non_marking_state();
 715   _completed_initialization = true;
 716 }
 717 
 718 void ConcurrentMark::update_g1_committed(bool force) {
 719   // If concurrent marking is not in progress, then we do not need to
 720   // update _heap_end.
 721   if (!concurrent_marking_in_progress() && !force) return;
 722 
 723   MemRegion committed = _g1h->g1_committed();
 724   assert(committed.start() == _heap_start, "start shouldn't change");
 725   HeapWord* new_end = committed.end();
 726   if (new_end > _heap_end) {
 727     // The heap has been expanded.
 728 
 729     _heap_end = new_end;
 730   }
 731   // Notice that the heap can also shrink. However, this only happens
 732   // during a Full GC (at least currently) and the entire marking
 733   // phase will bail out and the task will not be restarted. So, let's
 734   // do nothing.
 735 }
 736 
 737 void ConcurrentMark::reset() {
 738   // Starting values for these two. This should be called in a STW
 739   // phase. CM will be notified of any future g1_committed expansions
 740   // will be at the end of evacuation pauses, when tasks are
 741   // inactive.
 742   MemRegion committed = _g1h->g1_committed();
 743   _heap_start = committed.start();
 744   _heap_end   = committed.end();
 745 
 746   // Separated the asserts so that we know which one fires.
 747   assert(_heap_start != NULL, "heap bounds should look ok");
 748   assert(_heap_end != NULL, "heap bounds should look ok");
 749   assert(_heap_start < _heap_end, "heap bounds should look ok");
 750 
 751   // Reset all the marking data structures and any necessary flags
 752   reset_marking_state();
 753 
 754   if (verbose_low()) {
 755     gclog_or_tty->print_cr("[global] resetting");
 756   }
 757 
 758   // We do reset all of them, since different phases will use
 759   // different number of active threads. So, it's easiest to have all
 760   // of them ready.
 761   for (uint i = 0; i < _max_worker_id; ++i) {
 762     _tasks[i]->reset(_nextMarkBitMap);
 763   }
 764 
 765   // we need this to make sure that the flag is on during the evac
 766   // pause with initial mark piggy-backed
 767   set_concurrent_marking_in_progress();
 768 }
 769 
 770 
 771 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 772   _markStack.set_should_expand();
 773   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 774   if (clear_overflow) {
 775     clear_has_overflown();
 776   } else {
 777     assert(has_overflown(), "pre-condition");
 778   }
 779   _finger = _heap_start;
 780 
 781   for (uint i = 0; i < _max_worker_id; ++i) {
 782     CMTaskQueue* queue = _task_queues->queue(i);
 783     queue->set_empty();
 784   }
 785 }
 786 
 787 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
 788   assert(active_tasks <= _max_worker_id, "we should not have more");
 789 
 790   _active_tasks = active_tasks;
 791   // Need to update the three data structures below according to the
 792   // number of active threads for this phase.
 793   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 794   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 795   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 796 
 797   _concurrent = concurrent;
 798   // We propagate this to all tasks, not just the active ones.
 799   for (uint i = 0; i < _max_worker_id; ++i)
 800     _tasks[i]->set_concurrent(concurrent);
 801 
 802   if (concurrent) {
 803     set_concurrent_marking_in_progress();
 804   } else {
 805     // We currently assume that the concurrent flag has been set to
 806     // false before we start remark. At this point we should also be
 807     // in a STW phase.
 808     assert(!concurrent_marking_in_progress(), "invariant");
 809     assert(_finger == _heap_end, "only way to get here");
 810     update_g1_committed(true);
 811   }
 812 }
 813 
 814 void ConcurrentMark::set_non_marking_state() {
 815   // We set the global marking state to some default values when we're
 816   // not doing marking.
 817   reset_marking_state();
 818   _active_tasks = 0;
 819   clear_concurrent_marking_in_progress();
 820 }
 821 
 822 ConcurrentMark::~ConcurrentMark() {
 823   // The ConcurrentMark instance is never freed.
 824   ShouldNotReachHere();
 825 }
 826 
 827 void ConcurrentMark::clearNextBitmap() {
 828   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 829   G1CollectorPolicy* g1p = g1h->g1_policy();
 830 
 831   // Make sure that the concurrent mark thread looks to still be in
 832   // the current cycle.
 833   guarantee(cmThread()->during_cycle(), "invariant");
 834 
 835   // We are finishing up the current cycle by clearing the next
 836   // marking bitmap and getting it ready for the next cycle. During
 837   // this time no other cycle can start. So, let's make sure that this
 838   // is the case.
 839   guarantee(!g1h->mark_in_progress(), "invariant");
 840 
 841   // clear the mark bitmap (no grey objects to start with).
 842   // We need to do this in chunks and offer to yield in between
 843   // each chunk.
 844   HeapWord* start  = _nextMarkBitMap->startWord();
 845   HeapWord* end    = _nextMarkBitMap->endWord();
 846   HeapWord* cur    = start;
 847   size_t chunkSize = M;
 848   while (cur < end) {
 849     HeapWord* next = cur + chunkSize;
 850     if (next > end) {
 851       next = end;
 852     }
 853     MemRegion mr(cur,next);
 854     _nextMarkBitMap->clearRange(mr);
 855     cur = next;
 856     do_yield_check();
 857 
 858     // Repeat the asserts from above. We'll do them as asserts here to
 859     // minimize their overhead on the product. However, we'll have
 860     // them as guarantees at the beginning / end of the bitmap
 861     // clearing to get some checking in the product.
 862     assert(cmThread()->during_cycle(), "invariant");
 863     assert(!g1h->mark_in_progress(), "invariant");
 864   }
 865 
 866   // Clear the liveness counting data
 867   clear_all_count_data();
 868 
 869   // Repeat the asserts from above.
 870   guarantee(cmThread()->during_cycle(), "invariant");
 871   guarantee(!g1h->mark_in_progress(), "invariant");
 872 }
 873 
 874 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 875 public:
 876   bool doHeapRegion(HeapRegion* r) {
 877     if (!r->continuesHumongous()) {
 878       r->note_start_of_marking();
 879     }
 880     return false;
 881   }
 882 };
 883 
 884 void ConcurrentMark::checkpointRootsInitialPre() {
 885   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 886   G1CollectorPolicy* g1p = g1h->g1_policy();
 887 
 888   _has_aborted = false;
 889 
 890 #ifndef PRODUCT
 891   if (G1PrintReachableAtInitialMark) {
 892     print_reachable("at-cycle-start",
 893                     VerifyOption_G1UsePrevMarking, true /* all */);
 894   }
 895 #endif
 896 
 897   // Initialise marking structures. This has to be done in a STW phase.
 898   reset();
 899 
 900   // For each region note start of marking.
 901   NoteStartOfMarkHRClosure startcl;
 902   g1h->heap_region_iterate(&startcl);
 903 }
 904 
 905 
 906 void ConcurrentMark::checkpointRootsInitialPost() {
 907   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 908 
 909   // If we force an overflow during remark, the remark operation will
 910   // actually abort and we'll restart concurrent marking. If we always
 911   // force an oveflow during remark we'll never actually complete the
 912   // marking phase. So, we initilize this here, at the start of the
 913   // cycle, so that at the remaining overflow number will decrease at
 914   // every remark and we'll eventually not need to cause one.
 915   force_overflow_stw()->init();
 916 
 917   // Start Concurrent Marking weak-reference discovery.
 918   ReferenceProcessor* rp = g1h->ref_processor_cm();
 919   // enable ("weak") refs discovery
 920   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 921   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 922 
 923   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 924   // This is the start of  the marking cycle, we're expected all
 925   // threads to have SATB queues with active set to false.
 926   satb_mq_set.set_active_all_threads(true, /* new active value */
 927                                      false /* expected_active */);
 928 
 929   _root_regions.prepare_for_scan();
 930 
 931   // update_g1_committed() will be called at the end of an evac pause
 932   // when marking is on. So, it's also called at the end of the
 933   // initial-mark pause to update the heap end, if the heap expands
 934   // during it. No need to call it here.
 935 }
 936 
 937 /*
 938  * Notice that in the next two methods, we actually leave the STS
 939  * during the barrier sync and join it immediately afterwards. If we
 940  * do not do this, the following deadlock can occur: one thread could
 941  * be in the barrier sync code, waiting for the other thread to also
 942  * sync up, whereas another one could be trying to yield, while also
 943  * waiting for the other threads to sync up too.
 944  *
 945  * Note, however, that this code is also used during remark and in
 946  * this case we should not attempt to leave / enter the STS, otherwise
 947  * we'll either hit an asseert (debug / fastdebug) or deadlock
 948  * (product). So we should only leave / enter the STS if we are
 949  * operating concurrently.
 950  *
 951  * Because the thread that does the sync barrier has left the STS, it
 952  * is possible to be suspended for a Full GC or an evacuation pause
 953  * could occur. This is actually safe, since the entering the sync
 954  * barrier is one of the last things do_marking_step() does, and it
 955  * doesn't manipulate any data structures afterwards.
 956  */
 957 
 958 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 959   if (verbose_low()) {
 960     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 961   }
 962 
 963   if (concurrent()) {
 964     ConcurrentGCThread::stsLeave();
 965   }
 966   _first_overflow_barrier_sync.enter();
 967   if (concurrent()) {
 968     ConcurrentGCThread::stsJoin();
 969   }
 970   // at this point everyone should have synced up and not be doing any
 971   // more work
 972 
 973   if (verbose_low()) {
 974     gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 975   }
 976 
 977   // let the task associated with with worker 0 do this
 978   if (worker_id == 0) {
 979     // task 0 is responsible for clearing the global data structures
 980     // We should be here because of an overflow. During STW we should
 981     // not clear the overflow flag since we rely on it being true when
 982     // we exit this method to abort the pause and restart concurent
 983     // marking.
 984     reset_marking_state(concurrent() /* clear_overflow */);
 985     force_overflow()->update();
 986 
 987     if (G1Log::fine()) {
 988       gclog_or_tty->date_stamp(PrintGCDateStamps);
 989       gclog_or_tty->stamp(PrintGCTimeStamps);
 990       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 991     }
 992   }
 993 
 994   // after this, each task should reset its own data structures then
 995   // then go into the second barrier
 996 }
 997 
 998 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 999   if (verbose_low()) {
1000     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1001   }
1002 
1003   if (concurrent()) {
1004     ConcurrentGCThread::stsLeave();
1005   }
1006   _second_overflow_barrier_sync.enter();
1007   if (concurrent()) {
1008     ConcurrentGCThread::stsJoin();
1009   }
1010   // at this point everything should be re-initialised and ready to go
1011 
1012   if (verbose_low()) {
1013     gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1014   }
1015 }
1016 
1017 #ifndef PRODUCT
1018 void ForceOverflowSettings::init() {
1019   _num_remaining = G1ConcMarkForceOverflow;
1020   _force = false;
1021   update();
1022 }
1023 
1024 void ForceOverflowSettings::update() {
1025   if (_num_remaining > 0) {
1026     _num_remaining -= 1;
1027     _force = true;
1028   } else {
1029     _force = false;
1030   }
1031 }
1032 
1033 bool ForceOverflowSettings::should_force() {
1034   if (_force) {
1035     _force = false;
1036     return true;
1037   } else {
1038     return false;
1039   }
1040 }
1041 #endif // !PRODUCT
1042 
1043 class CMConcurrentMarkingTask: public AbstractGangTask {
1044 private:
1045   ConcurrentMark*       _cm;
1046   ConcurrentMarkThread* _cmt;
1047 
1048 public:
1049   void work(uint worker_id) {
1050     assert(Thread::current()->is_ConcurrentGC_thread(),
1051            "this should only be done by a conc GC thread");
1052     ResourceMark rm;
1053 
1054     double start_vtime = os::elapsedVTime();
1055 
1056     ConcurrentGCThread::stsJoin();
1057 
1058     assert(worker_id < _cm->active_tasks(), "invariant");
1059     CMTask* the_task = _cm->task(worker_id);
1060     the_task->record_start_time();
1061     if (!_cm->has_aborted()) {
1062       do {
1063         double start_vtime_sec = os::elapsedVTime();
1064         double start_time_sec = os::elapsedTime();
1065         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1066 
1067         the_task->do_marking_step(mark_step_duration_ms,
1068                                   true  /* do_stealing    */,
1069                                   true  /* do_termination */,
1070                                   false /* is_serial*/);
1071 
1072         double end_time_sec = os::elapsedTime();
1073         double end_vtime_sec = os::elapsedVTime();
1074         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1075         double elapsed_time_sec = end_time_sec - start_time_sec;
1076         _cm->clear_has_overflown();
1077 
1078         bool ret = _cm->do_yield_check(worker_id);
1079 
1080         jlong sleep_time_ms;
1081         if (!_cm->has_aborted() && the_task->has_aborted()) {
1082           sleep_time_ms =
1083             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1084           ConcurrentGCThread::stsLeave();
1085           os::sleep(Thread::current(), sleep_time_ms, false);
1086           ConcurrentGCThread::stsJoin();
1087         }
1088         double end_time2_sec = os::elapsedTime();
1089         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1090 
1091 #if 0
1092           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1093                                  "overhead %1.4lf",
1094                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1095                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1096           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1097                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1098 #endif
1099       } while (!_cm->has_aborted() && the_task->has_aborted());
1100     }
1101     the_task->record_end_time();
1102     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1103 
1104     ConcurrentGCThread::stsLeave();
1105 
1106     double end_vtime = os::elapsedVTime();
1107     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1108   }
1109 
1110   CMConcurrentMarkingTask(ConcurrentMark* cm,
1111                           ConcurrentMarkThread* cmt) :
1112       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1113 
1114   ~CMConcurrentMarkingTask() { }
1115 };
1116 
1117 // Calculates the number of active workers for a concurrent
1118 // phase.
1119 uint ConcurrentMark::calc_parallel_marking_threads() {
1120   if (G1CollectedHeap::use_parallel_gc_threads()) {
1121     uint n_conc_workers = 0;
1122     if (!UseDynamicNumberOfGCThreads ||
1123         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1124          !ForceDynamicNumberOfGCThreads)) {
1125       n_conc_workers = max_parallel_marking_threads();
1126     } else {
1127       n_conc_workers =
1128         AdaptiveSizePolicy::calc_default_active_workers(
1129                                      max_parallel_marking_threads(),
1130                                      1, /* Minimum workers */
1131                                      parallel_marking_threads(),
1132                                      Threads::number_of_non_daemon_threads());
1133       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1134       // that scaling has already gone into "_max_parallel_marking_threads".
1135     }
1136     assert(n_conc_workers > 0, "Always need at least 1");
1137     return n_conc_workers;
1138   }
1139   // If we are not running with any parallel GC threads we will not
1140   // have spawned any marking threads either. Hence the number of
1141   // concurrent workers should be 0.
1142   return 0;
1143 }
1144 
1145 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1146   // Currently, only survivors can be root regions.
1147   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1148   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1149 
1150   const uintx interval = PrefetchScanIntervalInBytes;
1151   HeapWord* curr = hr->bottom();
1152   const HeapWord* end = hr->top();
1153   while (curr < end) {
1154     Prefetch::read(curr, interval);
1155     oop obj = oop(curr);
1156     int size = obj->oop_iterate(&cl);
1157     assert(size == obj->size(), "sanity");
1158     curr += size;
1159   }
1160 }
1161 
1162 class CMRootRegionScanTask : public AbstractGangTask {
1163 private:
1164   ConcurrentMark* _cm;
1165 
1166 public:
1167   CMRootRegionScanTask(ConcurrentMark* cm) :
1168     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1169 
1170   void work(uint worker_id) {
1171     assert(Thread::current()->is_ConcurrentGC_thread(),
1172            "this should only be done by a conc GC thread");
1173 
1174     CMRootRegions* root_regions = _cm->root_regions();
1175     HeapRegion* hr = root_regions->claim_next();
1176     while (hr != NULL) {
1177       _cm->scanRootRegion(hr, worker_id);
1178       hr = root_regions->claim_next();
1179     }
1180   }
1181 };
1182 
1183 void ConcurrentMark::scanRootRegions() {
1184   // scan_in_progress() will have been set to true only if there was
1185   // at least one root region to scan. So, if it's false, we
1186   // should not attempt to do any further work.
1187   if (root_regions()->scan_in_progress()) {
1188     _parallel_marking_threads = calc_parallel_marking_threads();
1189     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1190            "Maximum number of marking threads exceeded");
1191     uint active_workers = MAX2(1U, parallel_marking_threads());
1192 
1193     CMRootRegionScanTask task(this);
1194     if (use_parallel_marking_threads()) {
1195       _parallel_workers->set_active_workers((int) active_workers);
1196       _parallel_workers->run_task(&task);
1197     } else {
1198       task.work(0);
1199     }
1200 
1201     // It's possible that has_aborted() is true here without actually
1202     // aborting the survivor scan earlier. This is OK as it's
1203     // mainly used for sanity checking.
1204     root_regions()->scan_finished();
1205   }
1206 }
1207 
1208 void ConcurrentMark::markFromRoots() {
1209   // we might be tempted to assert that:
1210   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1211   //        "inconsistent argument?");
1212   // However that wouldn't be right, because it's possible that
1213   // a safepoint is indeed in progress as a younger generation
1214   // stop-the-world GC happens even as we mark in this generation.
1215 
1216   _restart_for_overflow = false;
1217   force_overflow_conc()->init();
1218 
1219   // _g1h has _n_par_threads
1220   _parallel_marking_threads = calc_parallel_marking_threads();
1221   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1222     "Maximum number of marking threads exceeded");
1223 
1224   uint active_workers = MAX2(1U, parallel_marking_threads());
1225 
1226   // Parallel task terminator is set in "set_phase()"
1227   set_phase(active_workers, true /* concurrent */);
1228 
1229   CMConcurrentMarkingTask markingTask(this, cmThread());
1230   if (use_parallel_marking_threads()) {
1231     _parallel_workers->set_active_workers((int)active_workers);
1232     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1233     // and the decisions on that MT processing is made elsewhere.
1234     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1235     _parallel_workers->run_task(&markingTask);
1236   } else {
1237     markingTask.work(0);
1238   }
1239   print_stats();
1240 }
1241 
1242 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1243   // world is stopped at this checkpoint
1244   assert(SafepointSynchronize::is_at_safepoint(),
1245          "world should be stopped");
1246 
1247   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1248 
1249   // If a full collection has happened, we shouldn't do this.
1250   if (has_aborted()) {
1251     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1252     return;
1253   }
1254 
1255   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1256 
1257   if (VerifyDuringGC) {
1258     HandleMark hm;  // handle scope
1259     gclog_or_tty->print(" VerifyDuringGC:(before)");
1260     Universe::heap()->prepare_for_verify();
1261     Universe::verify(/* silent */ false,
1262                      /* option */ VerifyOption_G1UsePrevMarking);
1263   }
1264 
1265   G1CollectorPolicy* g1p = g1h->g1_policy();
1266   g1p->record_concurrent_mark_remark_start();
1267 
1268   double start = os::elapsedTime();
1269 
1270   checkpointRootsFinalWork();
1271 
1272   double mark_work_end = os::elapsedTime();
1273 
1274   weakRefsWork(clear_all_soft_refs);
1275 
1276   if (has_overflown()) {
1277     // Oops.  We overflowed.  Restart concurrent marking.
1278     _restart_for_overflow = true;
1279     // Clear the marking state because we will be restarting
1280     // marking due to overflowing the global mark stack.
1281     reset_marking_state();
1282     if (G1TraceMarkStackOverflow) {
1283       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1284     }
1285   } else {
1286     // Aggregate the per-task counting data that we have accumulated
1287     // while marking.
1288     aggregate_count_data();
1289 
1290     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1291     // We're done with marking.
1292     // This is the end of  the marking cycle, we're expected all
1293     // threads to have SATB queues with active set to true.
1294     satb_mq_set.set_active_all_threads(false, /* new active value */
1295                                        true /* expected_active */);
1296 
1297     if (VerifyDuringGC) {
1298       HandleMark hm;  // handle scope
1299       gclog_or_tty->print(" VerifyDuringGC:(after)");
1300       Universe::heap()->prepare_for_verify();
1301       Universe::verify(/* silent */ false,
1302                        /* option */ VerifyOption_G1UseNextMarking);
1303     }
1304     assert(!restart_for_overflow(), "sanity");
1305     // Completely reset the marking state since marking completed
1306     set_non_marking_state();
1307   }
1308 
1309   // Expand the marking stack, if we have to and if we can.
1310   if (_markStack.should_expand()) {
1311     _markStack.expand();
1312   }
1313 
1314 #if VERIFY_OBJS_PROCESSED
1315   _scan_obj_cl.objs_processed = 0;
1316   ThreadLocalObjQueue::objs_enqueued = 0;
1317 #endif
1318 
1319   // Statistics
1320   double now = os::elapsedTime();
1321   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1322   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1323   _remark_times.add((now - start) * 1000.0);
1324 
1325   g1p->record_concurrent_mark_remark_end();
1326 }
1327 
1328 // Base class of the closures that finalize and verify the
1329 // liveness counting data.
1330 class CMCountDataClosureBase: public HeapRegionClosure {
1331 protected:
1332   G1CollectedHeap* _g1h;
1333   ConcurrentMark* _cm;
1334   CardTableModRefBS* _ct_bs;
1335 
1336   BitMap* _region_bm;
1337   BitMap* _card_bm;
1338 
1339   // Takes a region that's not empty (i.e., it has at least one
1340   // live object in it and sets its corresponding bit on the region
1341   // bitmap to 1. If the region is "starts humongous" it will also set
1342   // to 1 the bits on the region bitmap that correspond to its
1343   // associated "continues humongous" regions.
1344   void set_bit_for_region(HeapRegion* hr) {
1345     assert(!hr->continuesHumongous(), "should have filtered those out");
1346 
1347     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1348     if (!hr->startsHumongous()) {
1349       // Normal (non-humongous) case: just set the bit.
1350       _region_bm->par_at_put(index, true);
1351     } else {
1352       // Starts humongous case: calculate how many regions are part of
1353       // this humongous region and then set the bit range.
1354       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1355       _region_bm->par_at_put_range(index, end_index, true);
1356     }
1357   }
1358 
1359 public:
1360   CMCountDataClosureBase(G1CollectedHeap* g1h,
1361                          BitMap* region_bm, BitMap* card_bm):
1362     _g1h(g1h), _cm(g1h->concurrent_mark()),
1363     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1364     _region_bm(region_bm), _card_bm(card_bm) { }
1365 };
1366 
1367 // Closure that calculates the # live objects per region. Used
1368 // for verification purposes during the cleanup pause.
1369 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1370   CMBitMapRO* _bm;
1371   size_t _region_marked_bytes;
1372 
1373 public:
1374   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1375                          BitMap* region_bm, BitMap* card_bm) :
1376     CMCountDataClosureBase(g1h, region_bm, card_bm),
1377     _bm(bm), _region_marked_bytes(0) { }
1378 
1379   bool doHeapRegion(HeapRegion* hr) {
1380 
1381     if (hr->continuesHumongous()) {
1382       // We will ignore these here and process them when their
1383       // associated "starts humongous" region is processed (see
1384       // set_bit_for_heap_region()). Note that we cannot rely on their
1385       // associated "starts humongous" region to have their bit set to
1386       // 1 since, due to the region chunking in the parallel region
1387       // iteration, a "continues humongous" region might be visited
1388       // before its associated "starts humongous".
1389       return false;
1390     }
1391 
1392     HeapWord* ntams = hr->next_top_at_mark_start();
1393     HeapWord* start = hr->bottom();
1394 
1395     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1396            err_msg("Preconditions not met - "
1397                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1398                    start, ntams, hr->end()));
1399 
1400     // Find the first marked object at or after "start".
1401     start = _bm->getNextMarkedWordAddress(start, ntams);
1402 
1403     size_t marked_bytes = 0;
1404 
1405     while (start < ntams) {
1406       oop obj = oop(start);
1407       int obj_sz = obj->size();
1408       HeapWord* obj_end = start + obj_sz;
1409 
1410       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1411       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1412 
1413       // Note: if we're looking at the last region in heap - obj_end
1414       // could be actually just beyond the end of the heap; end_idx
1415       // will then correspond to a (non-existent) card that is also
1416       // just beyond the heap.
1417       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1418         // end of object is not card aligned - increment to cover
1419         // all the cards spanned by the object
1420         end_idx += 1;
1421       }
1422 
1423       // Set the bits in the card BM for the cards spanned by this object.
1424       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1425 
1426       // Add the size of this object to the number of marked bytes.
1427       marked_bytes += (size_t)obj_sz * HeapWordSize;
1428 
1429       // Find the next marked object after this one.
1430       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1431     }
1432 
1433     // Mark the allocated-since-marking portion...
1434     HeapWord* top = hr->top();
1435     if (ntams < top) {
1436       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1437       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1438 
1439       // Note: if we're looking at the last region in heap - top
1440       // could be actually just beyond the end of the heap; end_idx
1441       // will then correspond to a (non-existent) card that is also
1442       // just beyond the heap.
1443       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1444         // end of object is not card aligned - increment to cover
1445         // all the cards spanned by the object
1446         end_idx += 1;
1447       }
1448       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1449 
1450       // This definitely means the region has live objects.
1451       set_bit_for_region(hr);
1452     }
1453 
1454     // Update the live region bitmap.
1455     if (marked_bytes > 0) {
1456       set_bit_for_region(hr);
1457     }
1458 
1459     // Set the marked bytes for the current region so that
1460     // it can be queried by a calling verificiation routine
1461     _region_marked_bytes = marked_bytes;
1462 
1463     return false;
1464   }
1465 
1466   size_t region_marked_bytes() const { return _region_marked_bytes; }
1467 };
1468 
1469 // Heap region closure used for verifying the counting data
1470 // that was accumulated concurrently and aggregated during
1471 // the remark pause. This closure is applied to the heap
1472 // regions during the STW cleanup pause.
1473 
1474 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1475   G1CollectedHeap* _g1h;
1476   ConcurrentMark* _cm;
1477   CalcLiveObjectsClosure _calc_cl;
1478   BitMap* _region_bm;   // Region BM to be verified
1479   BitMap* _card_bm;     // Card BM to be verified
1480   bool _verbose;        // verbose output?
1481 
1482   BitMap* _exp_region_bm; // Expected Region BM values
1483   BitMap* _exp_card_bm;   // Expected card BM values
1484 
1485   int _failures;
1486 
1487 public:
1488   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1489                                 BitMap* region_bm,
1490                                 BitMap* card_bm,
1491                                 BitMap* exp_region_bm,
1492                                 BitMap* exp_card_bm,
1493                                 bool verbose) :
1494     _g1h(g1h), _cm(g1h->concurrent_mark()),
1495     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1496     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1497     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1498     _failures(0) { }
1499 
1500   int failures() const { return _failures; }
1501 
1502   bool doHeapRegion(HeapRegion* hr) {
1503     if (hr->continuesHumongous()) {
1504       // We will ignore these here and process them when their
1505       // associated "starts humongous" region is processed (see
1506       // set_bit_for_heap_region()). Note that we cannot rely on their
1507       // associated "starts humongous" region to have their bit set to
1508       // 1 since, due to the region chunking in the parallel region
1509       // iteration, a "continues humongous" region might be visited
1510       // before its associated "starts humongous".
1511       return false;
1512     }
1513 
1514     int failures = 0;
1515 
1516     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1517     // this region and set the corresponding bits in the expected region
1518     // and card bitmaps.
1519     bool res = _calc_cl.doHeapRegion(hr);
1520     assert(res == false, "should be continuing");
1521 
1522     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1523                     Mutex::_no_safepoint_check_flag);
1524 
1525     // Verify the marked bytes for this region.
1526     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1527     size_t act_marked_bytes = hr->next_marked_bytes();
1528 
1529     // We're not OK if expected marked bytes > actual marked bytes. It means
1530     // we have missed accounting some objects during the actual marking.
1531     if (exp_marked_bytes > act_marked_bytes) {
1532       if (_verbose) {
1533         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1534                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1535                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1536       }
1537       failures += 1;
1538     }
1539 
1540     // Verify the bit, for this region, in the actual and expected
1541     // (which was just calculated) region bit maps.
1542     // We're not OK if the bit in the calculated expected region
1543     // bitmap is set and the bit in the actual region bitmap is not.
1544     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1545 
1546     bool expected = _exp_region_bm->at(index);
1547     bool actual = _region_bm->at(index);
1548     if (expected && !actual) {
1549       if (_verbose) {
1550         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1551                                "expected: %s, actual: %s",
1552                                hr->hrs_index(),
1553                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1554       }
1555       failures += 1;
1556     }
1557 
1558     // Verify that the card bit maps for the cards spanned by the current
1559     // region match. We have an error if we have a set bit in the expected
1560     // bit map and the corresponding bit in the actual bitmap is not set.
1561 
1562     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1563     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1564 
1565     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1566       expected = _exp_card_bm->at(i);
1567       actual = _card_bm->at(i);
1568 
1569       if (expected && !actual) {
1570         if (_verbose) {
1571           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1572                                  "expected: %s, actual: %s",
1573                                  hr->hrs_index(), i,
1574                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1575         }
1576         failures += 1;
1577       }
1578     }
1579 
1580     if (failures > 0 && _verbose)  {
1581       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1582                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1583                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1584                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1585     }
1586 
1587     _failures += failures;
1588 
1589     // We could stop iteration over the heap when we
1590     // find the first violating region by returning true.
1591     return false;
1592   }
1593 };
1594 
1595 
1596 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1597 protected:
1598   G1CollectedHeap* _g1h;
1599   ConcurrentMark* _cm;
1600   BitMap* _actual_region_bm;
1601   BitMap* _actual_card_bm;
1602 
1603   uint    _n_workers;
1604 
1605   BitMap* _expected_region_bm;
1606   BitMap* _expected_card_bm;
1607 
1608   int  _failures;
1609   bool _verbose;
1610 
1611 public:
1612   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1613                             BitMap* region_bm, BitMap* card_bm,
1614                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1615     : AbstractGangTask("G1 verify final counting"),
1616       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1617       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1618       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1619       _failures(0), _verbose(false),
1620       _n_workers(0) {
1621     assert(VerifyDuringGC, "don't call this otherwise");
1622 
1623     // Use the value already set as the number of active threads
1624     // in the call to run_task().
1625     if (G1CollectedHeap::use_parallel_gc_threads()) {
1626       assert( _g1h->workers()->active_workers() > 0,
1627         "Should have been previously set");
1628       _n_workers = _g1h->workers()->active_workers();
1629     } else {
1630       _n_workers = 1;
1631     }
1632 
1633     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1634     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1635 
1636     _verbose = _cm->verbose_medium();
1637   }
1638 
1639   void work(uint worker_id) {
1640     assert(worker_id < _n_workers, "invariant");
1641 
1642     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1643                                             _actual_region_bm, _actual_card_bm,
1644                                             _expected_region_bm,
1645                                             _expected_card_bm,
1646                                             _verbose);
1647 
1648     if (G1CollectedHeap::use_parallel_gc_threads()) {
1649       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1650                                             worker_id,
1651                                             _n_workers,
1652                                             HeapRegion::VerifyCountClaimValue);
1653     } else {
1654       _g1h->heap_region_iterate(&verify_cl);
1655     }
1656 
1657     Atomic::add(verify_cl.failures(), &_failures);
1658   }
1659 
1660   int failures() const { return _failures; }
1661 };
1662 
1663 // Closure that finalizes the liveness counting data.
1664 // Used during the cleanup pause.
1665 // Sets the bits corresponding to the interval [NTAMS, top]
1666 // (which contains the implicitly live objects) in the
1667 // card liveness bitmap. Also sets the bit for each region,
1668 // containing live data, in the region liveness bitmap.
1669 
1670 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1671  public:
1672   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1673                               BitMap* region_bm,
1674                               BitMap* card_bm) :
1675     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1676 
1677   bool doHeapRegion(HeapRegion* hr) {
1678 
1679     if (hr->continuesHumongous()) {
1680       // We will ignore these here and process them when their
1681       // associated "starts humongous" region is processed (see
1682       // set_bit_for_heap_region()). Note that we cannot rely on their
1683       // associated "starts humongous" region to have their bit set to
1684       // 1 since, due to the region chunking in the parallel region
1685       // iteration, a "continues humongous" region might be visited
1686       // before its associated "starts humongous".
1687       return false;
1688     }
1689 
1690     HeapWord* ntams = hr->next_top_at_mark_start();
1691     HeapWord* top   = hr->top();
1692 
1693     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1694 
1695     // Mark the allocated-since-marking portion...
1696     if (ntams < top) {
1697       // This definitely means the region has live objects.
1698       set_bit_for_region(hr);
1699 
1700       // Now set the bits in the card bitmap for [ntams, top)
1701       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1702       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1703 
1704       // Note: if we're looking at the last region in heap - top
1705       // could be actually just beyond the end of the heap; end_idx
1706       // will then correspond to a (non-existent) card that is also
1707       // just beyond the heap.
1708       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1709         // end of object is not card aligned - increment to cover
1710         // all the cards spanned by the object
1711         end_idx += 1;
1712       }
1713 
1714       assert(end_idx <= _card_bm->size(),
1715              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1716                      end_idx, _card_bm->size()));
1717       assert(start_idx < _card_bm->size(),
1718              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1719                      start_idx, _card_bm->size()));
1720 
1721       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1722     }
1723 
1724     // Set the bit for the region if it contains live data
1725     if (hr->next_marked_bytes() > 0) {
1726       set_bit_for_region(hr);
1727     }
1728 
1729     return false;
1730   }
1731 };
1732 
1733 class G1ParFinalCountTask: public AbstractGangTask {
1734 protected:
1735   G1CollectedHeap* _g1h;
1736   ConcurrentMark* _cm;
1737   BitMap* _actual_region_bm;
1738   BitMap* _actual_card_bm;
1739 
1740   uint    _n_workers;
1741 
1742 public:
1743   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1744     : AbstractGangTask("G1 final counting"),
1745       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1746       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1747       _n_workers(0) {
1748     // Use the value already set as the number of active threads
1749     // in the call to run_task().
1750     if (G1CollectedHeap::use_parallel_gc_threads()) {
1751       assert( _g1h->workers()->active_workers() > 0,
1752         "Should have been previously set");
1753       _n_workers = _g1h->workers()->active_workers();
1754     } else {
1755       _n_workers = 1;
1756     }
1757   }
1758 
1759   void work(uint worker_id) {
1760     assert(worker_id < _n_workers, "invariant");
1761 
1762     FinalCountDataUpdateClosure final_update_cl(_g1h,
1763                                                 _actual_region_bm,
1764                                                 _actual_card_bm);
1765 
1766     if (G1CollectedHeap::use_parallel_gc_threads()) {
1767       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1768                                             worker_id,
1769                                             _n_workers,
1770                                             HeapRegion::FinalCountClaimValue);
1771     } else {
1772       _g1h->heap_region_iterate(&final_update_cl);
1773     }
1774   }
1775 };
1776 
1777 class G1ParNoteEndTask;
1778 
1779 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1780   G1CollectedHeap* _g1;
1781   int _worker_num;
1782   size_t _max_live_bytes;
1783   uint _regions_claimed;
1784   size_t _freed_bytes;
1785   FreeRegionList* _local_cleanup_list;
1786   OldRegionSet* _old_proxy_set;
1787   HumongousRegionSet* _humongous_proxy_set;
1788   HRRSCleanupTask* _hrrs_cleanup_task;
1789   double _claimed_region_time;
1790   double _max_region_time;
1791 
1792 public:
1793   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1794                              int worker_num,
1795                              FreeRegionList* local_cleanup_list,
1796                              OldRegionSet* old_proxy_set,
1797                              HumongousRegionSet* humongous_proxy_set,
1798                              HRRSCleanupTask* hrrs_cleanup_task) :
1799     _g1(g1), _worker_num(worker_num),
1800     _max_live_bytes(0), _regions_claimed(0),
1801     _freed_bytes(0),
1802     _claimed_region_time(0.0), _max_region_time(0.0),
1803     _local_cleanup_list(local_cleanup_list),
1804     _old_proxy_set(old_proxy_set),
1805     _humongous_proxy_set(humongous_proxy_set),
1806     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1807 
1808   size_t freed_bytes() { return _freed_bytes; }
1809 
1810   bool doHeapRegion(HeapRegion *hr) {
1811     if (hr->continuesHumongous()) {
1812       return false;
1813     }
1814     // We use a claim value of zero here because all regions
1815     // were claimed with value 1 in the FinalCount task.
1816     _g1->reset_gc_time_stamps(hr);
1817     double start = os::elapsedTime();
1818     _regions_claimed++;
1819     hr->note_end_of_marking();
1820     _max_live_bytes += hr->max_live_bytes();
1821     _g1->free_region_if_empty(hr,
1822                               &_freed_bytes,
1823                               _local_cleanup_list,
1824                               _old_proxy_set,
1825                               _humongous_proxy_set,
1826                               _hrrs_cleanup_task,
1827                               true /* par */);
1828     double region_time = (os::elapsedTime() - start);
1829     _claimed_region_time += region_time;
1830     if (region_time > _max_region_time) {
1831       _max_region_time = region_time;
1832     }
1833     return false;
1834   }
1835 
1836   size_t max_live_bytes() { return _max_live_bytes; }
1837   uint regions_claimed() { return _regions_claimed; }
1838   double claimed_region_time_sec() { return _claimed_region_time; }
1839   double max_region_time_sec() { return _max_region_time; }
1840 };
1841 
1842 class G1ParNoteEndTask: public AbstractGangTask {
1843   friend class G1NoteEndOfConcMarkClosure;
1844 
1845 protected:
1846   G1CollectedHeap* _g1h;
1847   size_t _max_live_bytes;
1848   size_t _freed_bytes;
1849   FreeRegionList* _cleanup_list;
1850 
1851 public:
1852   G1ParNoteEndTask(G1CollectedHeap* g1h,
1853                    FreeRegionList* cleanup_list) :
1854     AbstractGangTask("G1 note end"), _g1h(g1h),
1855     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1856 
1857   void work(uint worker_id) {
1858     double start = os::elapsedTime();
1859     FreeRegionList local_cleanup_list("Local Cleanup List");
1860     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1861     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1862     HRRSCleanupTask hrrs_cleanup_task;
1863     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1864                                            &old_proxy_set,
1865                                            &humongous_proxy_set,
1866                                            &hrrs_cleanup_task);
1867     if (G1CollectedHeap::use_parallel_gc_threads()) {
1868       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1869                                             _g1h->workers()->active_workers(),
1870                                             HeapRegion::NoteEndClaimValue);
1871     } else {
1872       _g1h->heap_region_iterate(&g1_note_end);
1873     }
1874     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1875 
1876     // Now update the lists
1877     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1878                                             NULL /* free_list */,
1879                                             &old_proxy_set,
1880                                             &humongous_proxy_set,
1881                                             true /* par */);
1882     {
1883       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1884       _max_live_bytes += g1_note_end.max_live_bytes();
1885       _freed_bytes += g1_note_end.freed_bytes();
1886 
1887       // If we iterate over the global cleanup list at the end of
1888       // cleanup to do this printing we will not guarantee to only
1889       // generate output for the newly-reclaimed regions (the list
1890       // might not be empty at the beginning of cleanup; we might
1891       // still be working on its previous contents). So we do the
1892       // printing here, before we append the new regions to the global
1893       // cleanup list.
1894 
1895       G1HRPrinter* hr_printer = _g1h->hr_printer();
1896       if (hr_printer->is_active()) {
1897         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1898         while (iter.more_available()) {
1899           HeapRegion* hr = iter.get_next();
1900           hr_printer->cleanup(hr);
1901         }
1902       }
1903 
1904       _cleanup_list->add_as_tail(&local_cleanup_list);
1905       assert(local_cleanup_list.is_empty(), "post-condition");
1906 
1907       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1908     }
1909   }
1910   size_t max_live_bytes() { return _max_live_bytes; }
1911   size_t freed_bytes() { return _freed_bytes; }
1912 };
1913 
1914 class G1ParScrubRemSetTask: public AbstractGangTask {
1915 protected:
1916   G1RemSet* _g1rs;
1917   BitMap* _region_bm;
1918   BitMap* _card_bm;
1919 public:
1920   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1921                        BitMap* region_bm, BitMap* card_bm) :
1922     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1923     _region_bm(region_bm), _card_bm(card_bm) { }
1924 
1925   void work(uint worker_id) {
1926     if (G1CollectedHeap::use_parallel_gc_threads()) {
1927       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1928                        HeapRegion::ScrubRemSetClaimValue);
1929     } else {
1930       _g1rs->scrub(_region_bm, _card_bm);
1931     }
1932   }
1933 
1934 };
1935 
1936 void ConcurrentMark::cleanup() {
1937   // world is stopped at this checkpoint
1938   assert(SafepointSynchronize::is_at_safepoint(),
1939          "world should be stopped");
1940   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1941 
1942   // If a full collection has happened, we shouldn't do this.
1943   if (has_aborted()) {
1944     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1945     return;
1946   }
1947 
1948   HRSPhaseSetter x(HRSPhaseCleanup);
1949   g1h->verify_region_sets_optional();
1950 
1951   if (VerifyDuringGC) {
1952     HandleMark hm;  // handle scope
1953     gclog_or_tty->print(" VerifyDuringGC:(before)");
1954     Universe::heap()->prepare_for_verify();
1955     Universe::verify(/* silent */ false,
1956                      /* option */ VerifyOption_G1UsePrevMarking);
1957   }
1958 
1959   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1960   g1p->record_concurrent_mark_cleanup_start();
1961 
1962   double start = os::elapsedTime();
1963 
1964   HeapRegionRemSet::reset_for_cleanup_tasks();
1965 
1966   uint n_workers;
1967 
1968   // Do counting once more with the world stopped for good measure.
1969   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1970 
1971   if (G1CollectedHeap::use_parallel_gc_threads()) {
1972    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1973            "sanity check");
1974 
1975     g1h->set_par_threads();
1976     n_workers = g1h->n_par_threads();
1977     assert(g1h->n_par_threads() == n_workers,
1978            "Should not have been reset");
1979     g1h->workers()->run_task(&g1_par_count_task);
1980     // Done with the parallel phase so reset to 0.
1981     g1h->set_par_threads(0);
1982 
1983     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1984            "sanity check");
1985   } else {
1986     n_workers = 1;
1987     g1_par_count_task.work(0);
1988   }
1989 
1990   if (VerifyDuringGC) {
1991     // Verify that the counting data accumulated during marking matches
1992     // that calculated by walking the marking bitmap.
1993 
1994     // Bitmaps to hold expected values
1995     BitMap expected_region_bm(_region_bm.size(), false);
1996     BitMap expected_card_bm(_card_bm.size(), false);
1997 
1998     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1999                                                  &_region_bm,
2000                                                  &_card_bm,
2001                                                  &expected_region_bm,
2002                                                  &expected_card_bm);
2003 
2004     if (G1CollectedHeap::use_parallel_gc_threads()) {
2005       g1h->set_par_threads((int)n_workers);
2006       g1h->workers()->run_task(&g1_par_verify_task);
2007       // Done with the parallel phase so reset to 0.
2008       g1h->set_par_threads(0);
2009 
2010       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2011              "sanity check");
2012     } else {
2013       g1_par_verify_task.work(0);
2014     }
2015 
2016     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2017   }
2018 
2019   size_t start_used_bytes = g1h->used();
2020   g1h->set_marking_complete();
2021 
2022   double count_end = os::elapsedTime();
2023   double this_final_counting_time = (count_end - start);
2024   _total_counting_time += this_final_counting_time;
2025 
2026   if (G1PrintRegionLivenessInfo) {
2027     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2028     _g1h->heap_region_iterate(&cl);
2029   }
2030 
2031   // Install newly created mark bitMap as "prev".
2032   swapMarkBitMaps();
2033 
2034   g1h->reset_gc_time_stamp();
2035 
2036   // Note end of marking in all heap regions.
2037   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2038   if (G1CollectedHeap::use_parallel_gc_threads()) {
2039     g1h->set_par_threads((int)n_workers);
2040     g1h->workers()->run_task(&g1_par_note_end_task);
2041     g1h->set_par_threads(0);
2042 
2043     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2044            "sanity check");
2045   } else {
2046     g1_par_note_end_task.work(0);
2047   }
2048   g1h->check_gc_time_stamps();
2049 
2050   if (!cleanup_list_is_empty()) {
2051     // The cleanup list is not empty, so we'll have to process it
2052     // concurrently. Notify anyone else that might be wanting free
2053     // regions that there will be more free regions coming soon.
2054     g1h->set_free_regions_coming();
2055   }
2056 
2057   // call below, since it affects the metric by which we sort the heap
2058   // regions.
2059   if (G1ScrubRemSets) {
2060     double rs_scrub_start = os::elapsedTime();
2061     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2062     if (G1CollectedHeap::use_parallel_gc_threads()) {
2063       g1h->set_par_threads((int)n_workers);
2064       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2065       g1h->set_par_threads(0);
2066 
2067       assert(g1h->check_heap_region_claim_values(
2068                                             HeapRegion::ScrubRemSetClaimValue),
2069              "sanity check");
2070     } else {
2071       g1_par_scrub_rs_task.work(0);
2072     }
2073 
2074     double rs_scrub_end = os::elapsedTime();
2075     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2076     _total_rs_scrub_time += this_rs_scrub_time;
2077   }
2078 
2079   // this will also free any regions totally full of garbage objects,
2080   // and sort the regions.
2081   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2082 
2083   // Statistics.
2084   double end = os::elapsedTime();
2085   _cleanup_times.add((end - start) * 1000.0);
2086 
2087   if (G1Log::fine()) {
2088     g1h->print_size_transition(gclog_or_tty,
2089                                start_used_bytes,
2090                                g1h->used(),
2091                                g1h->capacity());
2092   }
2093 
2094   // Clean up will have freed any regions completely full of garbage.
2095   // Update the soft reference policy with the new heap occupancy.
2096   Universe::update_heap_info_at_gc();
2097 
2098   // We need to make this be a "collection" so any collection pause that
2099   // races with it goes around and waits for completeCleanup to finish.
2100   g1h->increment_total_collections();
2101 
2102   // We reclaimed old regions so we should calculate the sizes to make
2103   // sure we update the old gen/space data.
2104   g1h->g1mm()->update_sizes();
2105 
2106   if (VerifyDuringGC) {
2107     HandleMark hm;  // handle scope
2108     gclog_or_tty->print(" VerifyDuringGC:(after)");
2109     Universe::heap()->prepare_for_verify();
2110     Universe::verify(/* silent */ false,
2111                      /* option */ VerifyOption_G1UsePrevMarking);
2112   }
2113 
2114   g1h->verify_region_sets_optional();
2115 }
2116 
2117 void ConcurrentMark::completeCleanup() {
2118   if (has_aborted()) return;
2119 
2120   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2121 
2122   _cleanup_list.verify_optional();
2123   FreeRegionList tmp_free_list("Tmp Free List");
2124 
2125   if (G1ConcRegionFreeingVerbose) {
2126     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2127                            "cleanup list has %u entries",
2128                            _cleanup_list.length());
2129   }
2130 
2131   // Noone else should be accessing the _cleanup_list at this point,
2132   // so it's not necessary to take any locks
2133   while (!_cleanup_list.is_empty()) {
2134     HeapRegion* hr = _cleanup_list.remove_head();
2135     assert(hr != NULL, "the list was not empty");
2136     hr->par_clear();
2137     tmp_free_list.add_as_tail(hr);
2138 
2139     // Instead of adding one region at a time to the secondary_free_list,
2140     // we accumulate them in the local list and move them a few at a
2141     // time. This also cuts down on the number of notify_all() calls
2142     // we do during this process. We'll also append the local list when
2143     // _cleanup_list is empty (which means we just removed the last
2144     // region from the _cleanup_list).
2145     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2146         _cleanup_list.is_empty()) {
2147       if (G1ConcRegionFreeingVerbose) {
2148         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2149                                "appending %u entries to the secondary_free_list, "
2150                                "cleanup list still has %u entries",
2151                                tmp_free_list.length(),
2152                                _cleanup_list.length());
2153       }
2154 
2155       {
2156         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2157         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2158         SecondaryFreeList_lock->notify_all();
2159       }
2160 
2161       if (G1StressConcRegionFreeing) {
2162         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2163           os::sleep(Thread::current(), (jlong) 1, false);
2164         }
2165       }
2166     }
2167   }
2168   assert(tmp_free_list.is_empty(), "post-condition");
2169 }
2170 
2171 // Supporting Object and Oop closures for reference discovery
2172 // and processing in during marking
2173 
2174 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2175   HeapWord* addr = (HeapWord*)obj;
2176   return addr != NULL &&
2177          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2178 }
2179 
2180 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2181 // Uses the CMTask associated with a worker thread (for serial reference
2182 // processing the CMTask for worker 0 is used) to preserve (mark) and
2183 // trace referent objects.
2184 //
2185 // Using the CMTask and embedded local queues avoids having the worker
2186 // threads operating on the global mark stack. This reduces the risk
2187 // of overflowing the stack - which we would rather avoid at this late
2188 // state. Also using the tasks' local queues removes the potential
2189 // of the workers interfering with each other that could occur if
2190 // operating on the global stack.
2191 
2192 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2193   ConcurrentMark* _cm;
2194   CMTask*         _task;
2195   int             _ref_counter_limit;
2196   int             _ref_counter;
2197   bool            _is_serial;
2198  public:
2199   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2200     _cm(cm), _task(task), _is_serial(is_serial),
2201     _ref_counter_limit(G1RefProcDrainInterval) {
2202     assert(_ref_counter_limit > 0, "sanity");
2203     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2204     _ref_counter = _ref_counter_limit;
2205   }
2206 
2207   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2208   virtual void do_oop(      oop* p) { do_oop_work(p); }
2209 
2210   template <class T> void do_oop_work(T* p) {
2211     if (!_cm->has_overflown()) {
2212       oop obj = oopDesc::load_decode_heap_oop(p);
2213       if (_cm->verbose_high()) {
2214         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2215                                "*"PTR_FORMAT" = "PTR_FORMAT,
2216                                _task->worker_id(), p, (void*) obj);
2217       }
2218 
2219       _task->deal_with_reference(obj);
2220       _ref_counter--;
2221 
2222       if (_ref_counter == 0) {
2223         // We have dealt with _ref_counter_limit references, pushing them
2224         // and objects reachable from them on to the local stack (and
2225         // possibly the global stack). Call CMTask::do_marking_step() to
2226         // process these entries.
2227         //
2228         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2229         // there's nothing more to do (i.e. we're done with the entries that
2230         // were pushed as a result of the CMTask::deal_with_reference() calls
2231         // above) or we overflow.
2232         //
2233         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2234         // flag while there may still be some work to do. (See the comment at
2235         // the beginning of CMTask::do_marking_step() for those conditions -
2236         // one of which is reaching the specified time target.) It is only
2237         // when CMTask::do_marking_step() returns without setting the
2238         // has_aborted() flag that the marking step has completed.
2239         do {
2240           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2241           _task->do_marking_step(mark_step_duration_ms,
2242                                  false      /* do_stealing    */,
2243                                  false      /* do_termination */,
2244                                  _is_serial /* is_serial      */);
2245         } while (_task->has_aborted() && !_cm->has_overflown());
2246         _ref_counter = _ref_counter_limit;
2247       }
2248     } else {
2249       if (_cm->verbose_high()) {
2250          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2251       }
2252     }
2253   }
2254 };
2255 
2256 // 'Drain' oop closure used by both serial and parallel reference processing.
2257 // Uses the CMTask associated with a given worker thread (for serial
2258 // reference processing the CMtask for worker 0 is used). Calls the
2259 // do_marking_step routine, with an unbelievably large timeout value,
2260 // to drain the marking data structures of the remaining entries
2261 // added by the 'keep alive' oop closure above.
2262 
2263 class G1CMDrainMarkingStackClosure: public VoidClosure {
2264   ConcurrentMark* _cm;
2265   CMTask*         _task;
2266   bool            _is_serial;
2267  public:
2268   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2269     _cm(cm), _task(task) {
2270     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2271   }
2272 
2273   void do_void() {
2274     do {
2275       if (_cm->verbose_high()) {
2276         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2277                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2278       }
2279 
2280       // We call CMTask::do_marking_step() to completely drain the local
2281       // and global marking stacks of entries pushed by the 'keep alive'
2282       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2283       //
2284       // CMTask::do_marking_step() is called in a loop, which we'll exit
2285       // if there's nothing more to do (i.e. we'completely drained the
2286       // entries that were pushed as a a result of applying the 'keep alive'
2287       // closure to the entries on the discovered ref lists) or we overflow
2288       // the global marking stack.
2289       //
2290       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2291       // flag while there may still be some work to do. (See the comment at
2292       // the beginning of CMTask::do_marking_step() for those conditions -
2293       // one of which is reaching the specified time target.) It is only
2294       // when CMTask::do_marking_step() returns without setting the
2295       // has_aborted() flag that the marking step has completed.
2296 
2297       // It only makes sense to allow stealing in CMTask::do_marking_step()
2298       // if this closure is being instantiated for parallel reference
2299       // processing.
2300 
2301       _task->do_marking_step(1000000000.0 /* something very large */,
2302                              !_is_serial /* do_stealing    */,
2303                              true        /* do_termination */,
2304                              _is_serial  /* is_serial      */);
2305     } while (_task->has_aborted() && !_cm->has_overflown());
2306   }
2307 };
2308 
2309 // Implementation of AbstractRefProcTaskExecutor for parallel
2310 // reference processing at the end of G1 concurrent marking
2311 
2312 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2313 private:
2314   G1CollectedHeap* _g1h;
2315   ConcurrentMark*  _cm;
2316   WorkGang*        _workers;
2317   int              _active_workers;
2318 
2319 public:
2320   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2321                         ConcurrentMark* cm,
2322                         WorkGang* workers,
2323                         int n_workers) :
2324     _g1h(g1h), _cm(cm),
2325     _workers(workers), _active_workers(n_workers) { }
2326 
2327   // Executes the given task using concurrent marking worker threads.
2328   virtual void execute(ProcessTask& task);
2329   virtual void execute(EnqueueTask& task);
2330 };
2331 
2332 class G1CMRefProcTaskProxy: public AbstractGangTask {
2333   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2334   ProcessTask&     _proc_task;
2335   G1CollectedHeap* _g1h;
2336   ConcurrentMark*  _cm;
2337 
2338 public:
2339   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2340                      G1CollectedHeap* g1h,
2341                      ConcurrentMark* cm) :
2342     AbstractGangTask("Process reference objects in parallel"),
2343     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2344     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2345     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2346   }
2347 
2348   virtual void work(uint worker_id) {
2349     CMTask* task = _cm->task(worker_id);
2350     G1CMIsAliveClosure g1_is_alive(_g1h);
2351     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2352     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2353 
2354     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2355   }
2356 };
2357 
2358 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2359   assert(_workers != NULL, "Need parallel worker threads.");
2360   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2361 
2362   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2363 
2364   // We need to reset the phase for each task execution so that
2365   // the termination protocol of CMTask::do_marking_step works.
2366   _cm->set_phase(_active_workers, false /* concurrent */);
2367   _g1h->set_par_threads(_active_workers);
2368   _workers->run_task(&proc_task_proxy);
2369   _g1h->set_par_threads(0);
2370 }
2371 
2372 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2373   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2374   EnqueueTask& _enq_task;
2375 
2376 public:
2377   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2378     AbstractGangTask("Enqueue reference objects in parallel"),
2379     _enq_task(enq_task) { }
2380 
2381   virtual void work(uint worker_id) {
2382     _enq_task.work(worker_id);
2383   }
2384 };
2385 
2386 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2387   assert(_workers != NULL, "Need parallel worker threads.");
2388   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2389 
2390   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2391 
2392   _g1h->set_par_threads(_active_workers);
2393   _workers->run_task(&enq_task_proxy);
2394   _g1h->set_par_threads(0);
2395 }
2396 
2397 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2398   ResourceMark rm;
2399   HandleMark   hm;
2400 
2401   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2402 
2403   // Is alive closure.
2404   G1CMIsAliveClosure g1_is_alive(g1h);
2405 
2406   // Inner scope to exclude the cleaning of the string and symbol
2407   // tables from the displayed time.
2408   {
2409     if (G1Log::finer()) {
2410       gclog_or_tty->put(' ');
2411     }
2412     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2413 
2414     ReferenceProcessor* rp = g1h->ref_processor_cm();
2415 
2416     // See the comment in G1CollectedHeap::ref_processing_init()
2417     // about how reference processing currently works in G1.
2418 
2419     // Set the soft reference policy
2420     rp->setup_policy(clear_all_soft_refs);
2421     assert(_markStack.isEmpty(), "mark stack should be empty");
2422 
2423     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2424     // in serial reference processing. Note these closures are also
2425     // used for serially processing (by the the current thread) the
2426     // JNI references during parallel reference processing.
2427     //
2428     // These closures do not need to synchronize with the worker
2429     // threads involved in parallel reference processing as these
2430     // instances are executed serially by the current thread (e.g.
2431     // reference processing is not multi-threaded and is thus
2432     // performed by the current thread instead of a gang worker).
2433     //
2434     // The gang tasks involved in parallel reference procssing create
2435     // their own instances of these closures, which do their own
2436     // synchronization among themselves.
2437     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2438     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2439 
2440     // We need at least one active thread. If reference processing
2441     // is not multi-threaded we use the current (VMThread) thread,
2442     // otherwise we use the work gang from the G1CollectedHeap and
2443     // we utilize all the worker threads we can.
2444     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2445     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2446     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2447 
2448     // Parallel processing task executor.
2449     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2450                                               g1h->workers(), active_workers);
2451     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2452 
2453     // Set the degree of MT processing here.  If the discovery was done MT,
2454     // the number of threads involved during discovery could differ from
2455     // the number of active workers.  This is OK as long as the discovered
2456     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2457     rp->set_active_mt_degree(active_workers);
2458 
2459     // Process the weak references.
2460     rp->process_discovered_references(&g1_is_alive,
2461                                       &g1_keep_alive,
2462                                       &g1_drain_mark_stack,
2463                                       executor);
2464 
2465     // The do_oop work routines of the keep_alive and drain_marking_stack
2466     // oop closures will set the has_overflown flag if we overflow the
2467     // global marking stack.
2468 
2469     assert(_markStack.overflow() || _markStack.isEmpty(),
2470             "mark stack should be empty (unless it overflowed)");
2471 
2472     if (_markStack.overflow()) {
2473       // This should have been done already when we tried to push an
2474       // entry on to the global mark stack. But let's do it again.
2475       set_has_overflown();
2476     }
2477 
2478     assert(rp->num_q() == active_workers, "why not");
2479 
2480     rp->enqueue_discovered_references(executor);
2481 
2482     rp->verify_no_references_recorded();
2483     assert(!rp->discovery_enabled(), "Post condition");
2484   }
2485 
2486   // Now clean up stale oops in StringTable
2487   StringTable::unlink(&g1_is_alive);
2488   // Clean up unreferenced symbols in symbol table.
2489   SymbolTable::unlink();
2490 }
2491 
2492 void ConcurrentMark::swapMarkBitMaps() {
2493   CMBitMapRO* temp = _prevMarkBitMap;
2494   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2495   _nextMarkBitMap  = (CMBitMap*)  temp;
2496 }
2497 
2498 class CMRemarkTask: public AbstractGangTask {
2499 private:
2500   ConcurrentMark* _cm;
2501   bool            _is_serial;
2502 public:
2503   void work(uint worker_id) {
2504     // Since all available tasks are actually started, we should
2505     // only proceed if we're supposed to be actived.
2506     if (worker_id < _cm->active_tasks()) {
2507       CMTask* task = _cm->task(worker_id);
2508       task->record_start_time();
2509       do {
2510         task->do_marking_step(1000000000.0 /* something very large */,
2511                               true /* do_stealing    */,
2512                               true /* do_termination */,
2513                               _is_serial);
2514       } while (task->has_aborted() && !_cm->has_overflown());
2515       // If we overflow, then we do not want to restart. We instead
2516       // want to abort remark and do concurrent marking again.
2517       task->record_end_time();
2518     }
2519   }
2520 
2521   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2522     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2523     _cm->terminator()->reset_for_reuse(active_workers);
2524   }
2525 };
2526 
2527 void ConcurrentMark::checkpointRootsFinalWork() {
2528   ResourceMark rm;
2529   HandleMark   hm;
2530   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2531 
2532   g1h->ensure_parsability(false);
2533 
2534   if (G1CollectedHeap::use_parallel_gc_threads()) {
2535     G1CollectedHeap::StrongRootsScope srs(g1h);
2536     // this is remark, so we'll use up all active threads
2537     uint active_workers = g1h->workers()->active_workers();
2538     if (active_workers == 0) {
2539       assert(active_workers > 0, "Should have been set earlier");
2540       active_workers = (uint) ParallelGCThreads;
2541       g1h->workers()->set_active_workers(active_workers);
2542     }
2543     set_phase(active_workers, false /* concurrent */);
2544     // Leave _parallel_marking_threads at it's
2545     // value originally calculated in the ConcurrentMark
2546     // constructor and pass values of the active workers
2547     // through the gang in the task.
2548 
2549     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2550     // We will start all available threads, even if we decide that the
2551     // active_workers will be fewer. The extra ones will just bail out
2552     // immediately.
2553     g1h->set_par_threads(active_workers);
2554     g1h->workers()->run_task(&remarkTask);
2555     g1h->set_par_threads(0);
2556   } else {
2557     G1CollectedHeap::StrongRootsScope srs(g1h);
2558     uint active_workers = 1;
2559     set_phase(active_workers, false /* concurrent */);
2560 
2561     // Note - if there's no work gang then the VMThread will be
2562     // the thread to execute the remark - serially. We have
2563     // to pass true for the is_serial parameter so that
2564     // CMTask::do_marking_step() doesn't enter the sync
2565     // barriers in the event of an overflow. Doing so will
2566     // cause an assert that the current thread is not a
2567     // concurrent GC thread.
2568     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2569     remarkTask.work(0);
2570   }
2571   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2572   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2573 
2574   print_stats();
2575 
2576 #if VERIFY_OBJS_PROCESSED
2577   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2578     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2579                            _scan_obj_cl.objs_processed,
2580                            ThreadLocalObjQueue::objs_enqueued);
2581     guarantee(_scan_obj_cl.objs_processed ==
2582               ThreadLocalObjQueue::objs_enqueued,
2583               "Different number of objs processed and enqueued.");
2584   }
2585 #endif
2586 }
2587 
2588 #ifndef PRODUCT
2589 
2590 class PrintReachableOopClosure: public OopClosure {
2591 private:
2592   G1CollectedHeap* _g1h;
2593   outputStream*    _out;
2594   VerifyOption     _vo;
2595   bool             _all;
2596 
2597 public:
2598   PrintReachableOopClosure(outputStream* out,
2599                            VerifyOption  vo,
2600                            bool          all) :
2601     _g1h(G1CollectedHeap::heap()),
2602     _out(out), _vo(vo), _all(all) { }
2603 
2604   void do_oop(narrowOop* p) { do_oop_work(p); }
2605   void do_oop(      oop* p) { do_oop_work(p); }
2606 
2607   template <class T> void do_oop_work(T* p) {
2608     oop         obj = oopDesc::load_decode_heap_oop(p);
2609     const char* str = NULL;
2610     const char* str2 = "";
2611 
2612     if (obj == NULL) {
2613       str = "";
2614     } else if (!_g1h->is_in_g1_reserved(obj)) {
2615       str = " O";
2616     } else {
2617       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2618       guarantee(hr != NULL, "invariant");
2619       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2620       bool marked = _g1h->is_marked(obj, _vo);
2621 
2622       if (over_tams) {
2623         str = " >";
2624         if (marked) {
2625           str2 = " AND MARKED";
2626         }
2627       } else if (marked) {
2628         str = " M";
2629       } else {
2630         str = " NOT";
2631       }
2632     }
2633 
2634     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2635                    p, (void*) obj, str, str2);
2636   }
2637 };
2638 
2639 class PrintReachableObjectClosure : public ObjectClosure {
2640 private:
2641   G1CollectedHeap* _g1h;
2642   outputStream*    _out;
2643   VerifyOption     _vo;
2644   bool             _all;
2645   HeapRegion*      _hr;
2646 
2647 public:
2648   PrintReachableObjectClosure(outputStream* out,
2649                               VerifyOption  vo,
2650                               bool          all,
2651                               HeapRegion*   hr) :
2652     _g1h(G1CollectedHeap::heap()),
2653     _out(out), _vo(vo), _all(all), _hr(hr) { }
2654 
2655   void do_object(oop o) {
2656     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2657     bool marked = _g1h->is_marked(o, _vo);
2658     bool print_it = _all || over_tams || marked;
2659 
2660     if (print_it) {
2661       _out->print_cr(" "PTR_FORMAT"%s",
2662                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2663       PrintReachableOopClosure oopCl(_out, _vo, _all);
2664       o->oop_iterate_no_header(&oopCl);
2665     }
2666   }
2667 };
2668 
2669 class PrintReachableRegionClosure : public HeapRegionClosure {
2670 private:
2671   G1CollectedHeap* _g1h;
2672   outputStream*    _out;
2673   VerifyOption     _vo;
2674   bool             _all;
2675 
2676 public:
2677   bool doHeapRegion(HeapRegion* hr) {
2678     HeapWord* b = hr->bottom();
2679     HeapWord* e = hr->end();
2680     HeapWord* t = hr->top();
2681     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2682     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2683                    "TAMS: "PTR_FORMAT, b, e, t, p);
2684     _out->cr();
2685 
2686     HeapWord* from = b;
2687     HeapWord* to   = t;
2688 
2689     if (to > from) {
2690       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2691       _out->cr();
2692       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2693       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2694       _out->cr();
2695     }
2696 
2697     return false;
2698   }
2699 
2700   PrintReachableRegionClosure(outputStream* out,
2701                               VerifyOption  vo,
2702                               bool          all) :
2703     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2704 };
2705 
2706 void ConcurrentMark::print_reachable(const char* str,
2707                                      VerifyOption vo,
2708                                      bool all) {
2709   gclog_or_tty->cr();
2710   gclog_or_tty->print_cr("== Doing heap dump... ");
2711 
2712   if (G1PrintReachableBaseFile == NULL) {
2713     gclog_or_tty->print_cr("  #### error: no base file defined");
2714     return;
2715   }
2716 
2717   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2718       (JVM_MAXPATHLEN - 1)) {
2719     gclog_or_tty->print_cr("  #### error: file name too long");
2720     return;
2721   }
2722 
2723   char file_name[JVM_MAXPATHLEN];
2724   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2725   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2726 
2727   fileStream fout(file_name);
2728   if (!fout.is_open()) {
2729     gclog_or_tty->print_cr("  #### error: could not open file");
2730     return;
2731   }
2732 
2733   outputStream* out = &fout;
2734   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2735   out->cr();
2736 
2737   out->print_cr("--- ITERATING OVER REGIONS");
2738   out->cr();
2739   PrintReachableRegionClosure rcl(out, vo, all);
2740   _g1h->heap_region_iterate(&rcl);
2741   out->cr();
2742 
2743   gclog_or_tty->print_cr("  done");
2744   gclog_or_tty->flush();
2745 }
2746 
2747 #endif // PRODUCT
2748 
2749 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2750   // Note we are overriding the read-only view of the prev map here, via
2751   // the cast.
2752   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2753 }
2754 
2755 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2756   _nextMarkBitMap->clearRange(mr);
2757 }
2758 
2759 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2760   clearRangePrevBitmap(mr);
2761   clearRangeNextBitmap(mr);
2762 }
2763 
2764 HeapRegion*
2765 ConcurrentMark::claim_region(uint worker_id) {
2766   // "checkpoint" the finger
2767   HeapWord* finger = _finger;
2768 
2769   // _heap_end will not change underneath our feet; it only changes at
2770   // yield points.
2771   while (finger < _heap_end) {
2772     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2773 
2774     // Note on how this code handles humongous regions. In the
2775     // normal case the finger will reach the start of a "starts
2776     // humongous" (SH) region. Its end will either be the end of the
2777     // last "continues humongous" (CH) region in the sequence, or the
2778     // standard end of the SH region (if the SH is the only region in
2779     // the sequence). That way claim_region() will skip over the CH
2780     // regions. However, there is a subtle race between a CM thread
2781     // executing this method and a mutator thread doing a humongous
2782     // object allocation. The two are not mutually exclusive as the CM
2783     // thread does not need to hold the Heap_lock when it gets
2784     // here. So there is a chance that claim_region() will come across
2785     // a free region that's in the progress of becoming a SH or a CH
2786     // region. In the former case, it will either
2787     //   a) Miss the update to the region's end, in which case it will
2788     //      visit every subsequent CH region, will find their bitmaps
2789     //      empty, and do nothing, or
2790     //   b) Will observe the update of the region's end (in which case
2791     //      it will skip the subsequent CH regions).
2792     // If it comes across a region that suddenly becomes CH, the
2793     // scenario will be similar to b). So, the race between
2794     // claim_region() and a humongous object allocation might force us
2795     // to do a bit of unnecessary work (due to some unnecessary bitmap
2796     // iterations) but it should not introduce and correctness issues.
2797     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2798     HeapWord*   bottom        = curr_region->bottom();
2799     HeapWord*   end           = curr_region->end();
2800     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2801 
2802     if (verbose_low()) {
2803       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2804                              "["PTR_FORMAT", "PTR_FORMAT"), "
2805                              "limit = "PTR_FORMAT,
2806                              worker_id, curr_region, bottom, end, limit);
2807     }
2808 
2809     // Is the gap between reading the finger and doing the CAS too long?
2810     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2811     if (res == finger) {
2812       // we succeeded
2813 
2814       // notice that _finger == end cannot be guaranteed here since,
2815       // someone else might have moved the finger even further
2816       assert(_finger >= end, "the finger should have moved forward");
2817 
2818       if (verbose_low()) {
2819         gclog_or_tty->print_cr("[%u] we were successful with region = "
2820                                PTR_FORMAT, worker_id, curr_region);
2821       }
2822 
2823       if (limit > bottom) {
2824         if (verbose_low()) {
2825           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2826                                  "returning it ", worker_id, curr_region);
2827         }
2828         return curr_region;
2829       } else {
2830         assert(limit == bottom,
2831                "the region limit should be at bottom");
2832         if (verbose_low()) {
2833           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2834                                  "returning NULL", worker_id, curr_region);
2835         }
2836         // we return NULL and the caller should try calling
2837         // claim_region() again.
2838         return NULL;
2839       }
2840     } else {
2841       assert(_finger > finger, "the finger should have moved forward");
2842       if (verbose_low()) {
2843         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2844                                "global finger = "PTR_FORMAT", "
2845                                "our finger = "PTR_FORMAT,
2846                                worker_id, _finger, finger);
2847       }
2848 
2849       // read it again
2850       finger = _finger;
2851     }
2852   }
2853 
2854   return NULL;
2855 }
2856 
2857 #ifndef PRODUCT
2858 enum VerifyNoCSetOopsPhase {
2859   VerifyNoCSetOopsStack,
2860   VerifyNoCSetOopsQueues,
2861   VerifyNoCSetOopsSATBCompleted,
2862   VerifyNoCSetOopsSATBThread
2863 };
2864 
2865 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2866 private:
2867   G1CollectedHeap* _g1h;
2868   VerifyNoCSetOopsPhase _phase;
2869   int _info;
2870 
2871   const char* phase_str() {
2872     switch (_phase) {
2873     case VerifyNoCSetOopsStack:         return "Stack";
2874     case VerifyNoCSetOopsQueues:        return "Queue";
2875     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2876     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2877     default:                            ShouldNotReachHere();
2878     }
2879     return NULL;
2880   }
2881 
2882   void do_object_work(oop obj) {
2883     guarantee(!_g1h->obj_in_cs(obj),
2884               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2885                       (void*) obj, phase_str(), _info));
2886   }
2887 
2888 public:
2889   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2890 
2891   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2892     _phase = phase;
2893     _info = info;
2894   }
2895 
2896   virtual void do_oop(oop* p) {
2897     oop obj = oopDesc::load_decode_heap_oop(p);
2898     do_object_work(obj);
2899   }
2900 
2901   virtual void do_oop(narrowOop* p) {
2902     // We should not come across narrow oops while scanning marking
2903     // stacks and SATB buffers.
2904     ShouldNotReachHere();
2905   }
2906 
2907   virtual void do_object(oop obj) {
2908     do_object_work(obj);
2909   }
2910 };
2911 
2912 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2913                                          bool verify_enqueued_buffers,
2914                                          bool verify_thread_buffers,
2915                                          bool verify_fingers) {
2916   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2917   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2918     return;
2919   }
2920 
2921   VerifyNoCSetOopsClosure cl;
2922 
2923   if (verify_stacks) {
2924     // Verify entries on the global mark stack
2925     cl.set_phase(VerifyNoCSetOopsStack);
2926     _markStack.oops_do(&cl);
2927 
2928     // Verify entries on the task queues
2929     for (uint i = 0; i < _max_worker_id; i += 1) {
2930       cl.set_phase(VerifyNoCSetOopsQueues, i);
2931       CMTaskQueue* queue = _task_queues->queue(i);
2932       queue->oops_do(&cl);
2933     }
2934   }
2935 
2936   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2937 
2938   // Verify entries on the enqueued SATB buffers
2939   if (verify_enqueued_buffers) {
2940     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2941     satb_qs.iterate_completed_buffers_read_only(&cl);
2942   }
2943 
2944   // Verify entries on the per-thread SATB buffers
2945   if (verify_thread_buffers) {
2946     cl.set_phase(VerifyNoCSetOopsSATBThread);
2947     satb_qs.iterate_thread_buffers_read_only(&cl);
2948   }
2949 
2950   if (verify_fingers) {
2951     // Verify the global finger
2952     HeapWord* global_finger = finger();
2953     if (global_finger != NULL && global_finger < _heap_end) {
2954       // The global finger always points to a heap region boundary. We
2955       // use heap_region_containing_raw() to get the containing region
2956       // given that the global finger could be pointing to a free region
2957       // which subsequently becomes continues humongous. If that
2958       // happens, heap_region_containing() will return the bottom of the
2959       // corresponding starts humongous region and the check below will
2960       // not hold any more.
2961       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2962       guarantee(global_finger == global_hr->bottom(),
2963                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2964                         global_finger, HR_FORMAT_PARAMS(global_hr)));
2965     }
2966 
2967     // Verify the task fingers
2968     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2969     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2970       CMTask* task = _tasks[i];
2971       HeapWord* task_finger = task->finger();
2972       if (task_finger != NULL && task_finger < _heap_end) {
2973         // See above note on the global finger verification.
2974         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2975         guarantee(task_finger == task_hr->bottom() ||
2976                   !task_hr->in_collection_set(),
2977                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2978                           task_finger, HR_FORMAT_PARAMS(task_hr)));
2979       }
2980     }
2981   }
2982 }
2983 #endif // PRODUCT
2984 
2985 // Aggregate the counting data that was constructed concurrently
2986 // with marking.
2987 class AggregateCountDataHRClosure: public HeapRegionClosure {
2988   G1CollectedHeap* _g1h;
2989   ConcurrentMark* _cm;
2990   CardTableModRefBS* _ct_bs;
2991   BitMap* _cm_card_bm;
2992   uint _max_worker_id;
2993 
2994  public:
2995   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2996                               BitMap* cm_card_bm,
2997                               uint max_worker_id) :
2998     _g1h(g1h), _cm(g1h->concurrent_mark()),
2999     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3000     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3001 
3002   bool doHeapRegion(HeapRegion* hr) {
3003     if (hr->continuesHumongous()) {
3004       // We will ignore these here and process them when their
3005       // associated "starts humongous" region is processed.
3006       // Note that we cannot rely on their associated
3007       // "starts humongous" region to have their bit set to 1
3008       // since, due to the region chunking in the parallel region
3009       // iteration, a "continues humongous" region might be visited
3010       // before its associated "starts humongous".
3011       return false;
3012     }
3013 
3014     HeapWord* start = hr->bottom();
3015     HeapWord* limit = hr->next_top_at_mark_start();
3016     HeapWord* end = hr->end();
3017 
3018     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3019            err_msg("Preconditions not met - "
3020                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3021                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3022                    start, limit, hr->top(), hr->end()));
3023 
3024     assert(hr->next_marked_bytes() == 0, "Precondition");
3025 
3026     if (start == limit) {
3027       // NTAMS of this region has not been set so nothing to do.
3028       return false;
3029     }
3030 
3031     // 'start' should be in the heap.
3032     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3033     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3034     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3035 
3036     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3037     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3038     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3039 
3040     // If ntams is not card aligned then we bump card bitmap index
3041     // for limit so that we get the all the cards spanned by
3042     // the object ending at ntams.
3043     // Note: if this is the last region in the heap then ntams
3044     // could be actually just beyond the end of the the heap;
3045     // limit_idx will then  correspond to a (non-existent) card
3046     // that is also outside the heap.
3047     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3048       limit_idx += 1;
3049     }
3050 
3051     assert(limit_idx <= end_idx, "or else use atomics");
3052 
3053     // Aggregate the "stripe" in the count data associated with hr.
3054     uint hrs_index = hr->hrs_index();
3055     size_t marked_bytes = 0;
3056 
3057     for (uint i = 0; i < _max_worker_id; i += 1) {
3058       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3059       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3060 
3061       // Fetch the marked_bytes in this region for task i and
3062       // add it to the running total for this region.
3063       marked_bytes += marked_bytes_array[hrs_index];
3064 
3065       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3066       // into the global card bitmap.
3067       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3068 
3069       while (scan_idx < limit_idx) {
3070         assert(task_card_bm->at(scan_idx) == true, "should be");
3071         _cm_card_bm->set_bit(scan_idx);
3072         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3073 
3074         // BitMap::get_next_one_offset() can handle the case when
3075         // its left_offset parameter is greater than its right_offset
3076         // parameter. It does, however, have an early exit if
3077         // left_offset == right_offset. So let's limit the value
3078         // passed in for left offset here.
3079         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3080         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3081       }
3082     }
3083 
3084     // Update the marked bytes for this region.
3085     hr->add_to_marked_bytes(marked_bytes);
3086 
3087     // Next heap region
3088     return false;
3089   }
3090 };
3091 
3092 class G1AggregateCountDataTask: public AbstractGangTask {
3093 protected:
3094   G1CollectedHeap* _g1h;
3095   ConcurrentMark* _cm;
3096   BitMap* _cm_card_bm;
3097   uint _max_worker_id;
3098   int _active_workers;
3099 
3100 public:
3101   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3102                            ConcurrentMark* cm,
3103                            BitMap* cm_card_bm,
3104                            uint max_worker_id,
3105                            int n_workers) :
3106     AbstractGangTask("Count Aggregation"),
3107     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3108     _max_worker_id(max_worker_id),
3109     _active_workers(n_workers) { }
3110 
3111   void work(uint worker_id) {
3112     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3113 
3114     if (G1CollectedHeap::use_parallel_gc_threads()) {
3115       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3116                                             _active_workers,
3117                                             HeapRegion::AggregateCountClaimValue);
3118     } else {
3119       _g1h->heap_region_iterate(&cl);
3120     }
3121   }
3122 };
3123 
3124 
3125 void ConcurrentMark::aggregate_count_data() {
3126   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3127                         _g1h->workers()->active_workers() :
3128                         1);
3129 
3130   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3131                                            _max_worker_id, n_workers);
3132 
3133   if (G1CollectedHeap::use_parallel_gc_threads()) {
3134     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3135            "sanity check");
3136     _g1h->set_par_threads(n_workers);
3137     _g1h->workers()->run_task(&g1_par_agg_task);
3138     _g1h->set_par_threads(0);
3139 
3140     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3141            "sanity check");
3142     _g1h->reset_heap_region_claim_values();
3143   } else {
3144     g1_par_agg_task.work(0);
3145   }
3146 }
3147 
3148 // Clear the per-worker arrays used to store the per-region counting data
3149 void ConcurrentMark::clear_all_count_data() {
3150   // Clear the global card bitmap - it will be filled during
3151   // liveness count aggregation (during remark) and the
3152   // final counting task.
3153   _card_bm.clear();
3154 
3155   // Clear the global region bitmap - it will be filled as part
3156   // of the final counting task.
3157   _region_bm.clear();
3158 
3159   uint max_regions = _g1h->max_regions();
3160   assert(_max_worker_id > 0, "uninitialized");
3161 
3162   for (uint i = 0; i < _max_worker_id; i += 1) {
3163     BitMap* task_card_bm = count_card_bitmap_for(i);
3164     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3165 
3166     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3167     assert(marked_bytes_array != NULL, "uninitialized");
3168 
3169     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3170     task_card_bm->clear();
3171   }
3172 }
3173 
3174 void ConcurrentMark::print_stats() {
3175   if (verbose_stats()) {
3176     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3177     for (size_t i = 0; i < _active_tasks; ++i) {
3178       _tasks[i]->print_stats();
3179       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3180     }
3181   }
3182 }
3183 
3184 // abandon current marking iteration due to a Full GC
3185 void ConcurrentMark::abort() {
3186   // Clear all marks to force marking thread to do nothing
3187   _nextMarkBitMap->clearAll();
3188   // Clear the liveness counting data
3189   clear_all_count_data();
3190   // Empty mark stack
3191   reset_marking_state();
3192   for (uint i = 0; i < _max_worker_id; ++i) {
3193     _tasks[i]->clear_region_fields();
3194   }
3195   _has_aborted = true;
3196 
3197   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3198   satb_mq_set.abandon_partial_marking();
3199   // This can be called either during or outside marking, we'll read
3200   // the expected_active value from the SATB queue set.
3201   satb_mq_set.set_active_all_threads(
3202                                  false, /* new active value */
3203                                  satb_mq_set.is_active() /* expected_active */);
3204 }
3205 
3206 static void print_ms_time_info(const char* prefix, const char* name,
3207                                NumberSeq& ns) {
3208   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3209                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3210   if (ns.num() > 0) {
3211     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3212                            prefix, ns.sd(), ns.maximum());
3213   }
3214 }
3215 
3216 void ConcurrentMark::print_summary_info() {
3217   gclog_or_tty->print_cr(" Concurrent marking:");
3218   print_ms_time_info("  ", "init marks", _init_times);
3219   print_ms_time_info("  ", "remarks", _remark_times);
3220   {
3221     print_ms_time_info("     ", "final marks", _remark_mark_times);
3222     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3223 
3224   }
3225   print_ms_time_info("  ", "cleanups", _cleanup_times);
3226   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3227                          _total_counting_time,
3228                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3229                           (double)_cleanup_times.num()
3230                          : 0.0));
3231   if (G1ScrubRemSets) {
3232     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3233                            _total_rs_scrub_time,
3234                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3235                             (double)_cleanup_times.num()
3236                            : 0.0));
3237   }
3238   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3239                          (_init_times.sum() + _remark_times.sum() +
3240                           _cleanup_times.sum())/1000.0);
3241   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3242                 "(%8.2f s marking).",
3243                 cmThread()->vtime_accum(),
3244                 cmThread()->vtime_mark_accum());
3245 }
3246 
3247 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3248   if (use_parallel_marking_threads()) {
3249     _parallel_workers->print_worker_threads_on(st);
3250   }
3251 }
3252 
3253 // We take a break if someone is trying to stop the world.
3254 bool ConcurrentMark::do_yield_check(uint worker_id) {
3255   if (should_yield()) {
3256     if (worker_id == 0) {
3257       _g1h->g1_policy()->record_concurrent_pause();
3258     }
3259     cmThread()->yield();
3260     return true;
3261   } else {
3262     return false;
3263   }
3264 }
3265 
3266 bool ConcurrentMark::should_yield() {
3267   return cmThread()->should_yield();
3268 }
3269 
3270 bool ConcurrentMark::containing_card_is_marked(void* p) {
3271   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3272   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3273 }
3274 
3275 bool ConcurrentMark::containing_cards_are_marked(void* start,
3276                                                  void* last) {
3277   return containing_card_is_marked(start) &&
3278          containing_card_is_marked(last);
3279 }
3280 
3281 #ifndef PRODUCT
3282 // for debugging purposes
3283 void ConcurrentMark::print_finger() {
3284   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3285                          _heap_start, _heap_end, _finger);
3286   for (uint i = 0; i < _max_worker_id; ++i) {
3287     gclog_or_tty->print("   %u: "PTR_FORMAT, i, _tasks[i]->finger());
3288   }
3289   gclog_or_tty->print_cr("");
3290 }
3291 #endif
3292 
3293 void CMTask::scan_object(oop obj) {
3294   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3295 
3296   if (_cm->verbose_high()) {
3297     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3298                            _worker_id, (void*) obj);
3299   }
3300 
3301   size_t obj_size = obj->size();
3302   _words_scanned += obj_size;
3303 
3304   obj->oop_iterate(_cm_oop_closure);
3305   statsOnly( ++_objs_scanned );
3306   check_limits();
3307 }
3308 
3309 // Closure for iteration over bitmaps
3310 class CMBitMapClosure : public BitMapClosure {
3311 private:
3312   // the bitmap that is being iterated over
3313   CMBitMap*                   _nextMarkBitMap;
3314   ConcurrentMark*             _cm;
3315   CMTask*                     _task;
3316 
3317 public:
3318   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3319     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3320 
3321   bool do_bit(size_t offset) {
3322     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3323     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3324     assert( addr < _cm->finger(), "invariant");
3325 
3326     statsOnly( _task->increase_objs_found_on_bitmap() );
3327     assert(addr >= _task->finger(), "invariant");
3328 
3329     // We move that task's local finger along.
3330     _task->move_finger_to(addr);
3331 
3332     _task->scan_object(oop(addr));
3333     // we only partially drain the local queue and global stack
3334     _task->drain_local_queue(true);
3335     _task->drain_global_stack(true);
3336 
3337     // if the has_aborted flag has been raised, we need to bail out of
3338     // the iteration
3339     return !_task->has_aborted();
3340   }
3341 };
3342 
3343 // Closure for iterating over objects, currently only used for
3344 // processing SATB buffers.
3345 class CMObjectClosure : public ObjectClosure {
3346 private:
3347   CMTask* _task;
3348 
3349 public:
3350   void do_object(oop obj) {
3351     _task->deal_with_reference(obj);
3352   }
3353 
3354   CMObjectClosure(CMTask* task) : _task(task) { }
3355 };
3356 
3357 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3358                                ConcurrentMark* cm,
3359                                CMTask* task)
3360   : _g1h(g1h), _cm(cm), _task(task) {
3361   assert(_ref_processor == NULL, "should be initialized to NULL");
3362 
3363   if (G1UseConcMarkReferenceProcessing) {
3364     _ref_processor = g1h->ref_processor_cm();
3365     assert(_ref_processor != NULL, "should not be NULL");
3366   }
3367 }
3368 
3369 void CMTask::setup_for_region(HeapRegion* hr) {
3370   // Separated the asserts so that we know which one fires.
3371   assert(hr != NULL,
3372         "claim_region() should have filtered out continues humongous regions");
3373   assert(!hr->continuesHumongous(),
3374         "claim_region() should have filtered out continues humongous regions");
3375 
3376   if (_cm->verbose_low()) {
3377     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3378                            _worker_id, hr);
3379   }
3380 
3381   _curr_region  = hr;
3382   _finger       = hr->bottom();
3383   update_region_limit();
3384 }
3385 
3386 void CMTask::update_region_limit() {
3387   HeapRegion* hr            = _curr_region;
3388   HeapWord* bottom          = hr->bottom();
3389   HeapWord* limit           = hr->next_top_at_mark_start();
3390 
3391   if (limit == bottom) {
3392     if (_cm->verbose_low()) {
3393       gclog_or_tty->print_cr("[%u] found an empty region "
3394                              "["PTR_FORMAT", "PTR_FORMAT")",
3395                              _worker_id, bottom, limit);
3396     }
3397     // The region was collected underneath our feet.
3398     // We set the finger to bottom to ensure that the bitmap
3399     // iteration that will follow this will not do anything.
3400     // (this is not a condition that holds when we set the region up,
3401     // as the region is not supposed to be empty in the first place)
3402     _finger = bottom;
3403   } else if (limit >= _region_limit) {
3404     assert(limit >= _finger, "peace of mind");
3405   } else {
3406     assert(limit < _region_limit, "only way to get here");
3407     // This can happen under some pretty unusual circumstances.  An
3408     // evacuation pause empties the region underneath our feet (NTAMS
3409     // at bottom). We then do some allocation in the region (NTAMS
3410     // stays at bottom), followed by the region being used as a GC
3411     // alloc region (NTAMS will move to top() and the objects
3412     // originally below it will be grayed). All objects now marked in
3413     // the region are explicitly grayed, if below the global finger,
3414     // and we do not need in fact to scan anything else. So, we simply
3415     // set _finger to be limit to ensure that the bitmap iteration
3416     // doesn't do anything.
3417     _finger = limit;
3418   }
3419 
3420   _region_limit = limit;
3421 }
3422 
3423 void CMTask::giveup_current_region() {
3424   assert(_curr_region != NULL, "invariant");
3425   if (_cm->verbose_low()) {
3426     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3427                            _worker_id, _curr_region);
3428   }
3429   clear_region_fields();
3430 }
3431 
3432 void CMTask::clear_region_fields() {
3433   // Values for these three fields that indicate that we're not
3434   // holding on to a region.
3435   _curr_region   = NULL;
3436   _finger        = NULL;
3437   _region_limit  = NULL;
3438 }
3439 
3440 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3441   if (cm_oop_closure == NULL) {
3442     assert(_cm_oop_closure != NULL, "invariant");
3443   } else {
3444     assert(_cm_oop_closure == NULL, "invariant");
3445   }
3446   _cm_oop_closure = cm_oop_closure;
3447 }
3448 
3449 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3450   guarantee(nextMarkBitMap != NULL, "invariant");
3451 
3452   if (_cm->verbose_low()) {
3453     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3454   }
3455 
3456   _nextMarkBitMap                = nextMarkBitMap;
3457   clear_region_fields();
3458 
3459   _calls                         = 0;
3460   _elapsed_time_ms               = 0.0;
3461   _termination_time_ms           = 0.0;
3462   _termination_start_time_ms     = 0.0;
3463 
3464 #if _MARKING_STATS_
3465   _local_pushes                  = 0;
3466   _local_pops                    = 0;
3467   _local_max_size                = 0;
3468   _objs_scanned                  = 0;
3469   _global_pushes                 = 0;
3470   _global_pops                   = 0;
3471   _global_max_size               = 0;
3472   _global_transfers_to           = 0;
3473   _global_transfers_from         = 0;
3474   _regions_claimed               = 0;
3475   _objs_found_on_bitmap          = 0;
3476   _satb_buffers_processed        = 0;
3477   _steal_attempts                = 0;
3478   _steals                        = 0;
3479   _aborted                       = 0;
3480   _aborted_overflow              = 0;
3481   _aborted_cm_aborted            = 0;
3482   _aborted_yield                 = 0;
3483   _aborted_timed_out             = 0;
3484   _aborted_satb                  = 0;
3485   _aborted_termination           = 0;
3486 #endif // _MARKING_STATS_
3487 }
3488 
3489 bool CMTask::should_exit_termination() {
3490   regular_clock_call();
3491   // This is called when we are in the termination protocol. We should
3492   // quit if, for some reason, this task wants to abort or the global
3493   // stack is not empty (this means that we can get work from it).
3494   return !_cm->mark_stack_empty() || has_aborted();
3495 }
3496 
3497 void CMTask::reached_limit() {
3498   assert(_words_scanned >= _words_scanned_limit ||
3499          _refs_reached >= _refs_reached_limit ,
3500          "shouldn't have been called otherwise");
3501   regular_clock_call();
3502 }
3503 
3504 void CMTask::regular_clock_call() {
3505   if (has_aborted()) return;
3506 
3507   // First, we need to recalculate the words scanned and refs reached
3508   // limits for the next clock call.
3509   recalculate_limits();
3510 
3511   // During the regular clock call we do the following
3512 
3513   // (1) If an overflow has been flagged, then we abort.
3514   if (_cm->has_overflown()) {
3515     set_has_aborted();
3516     return;
3517   }
3518 
3519   // If we are not concurrent (i.e. we're doing remark) we don't need
3520   // to check anything else. The other steps are only needed during
3521   // the concurrent marking phase.
3522   if (!concurrent()) return;
3523 
3524   // (2) If marking has been aborted for Full GC, then we also abort.
3525   if (_cm->has_aborted()) {
3526     set_has_aborted();
3527     statsOnly( ++_aborted_cm_aborted );
3528     return;
3529   }
3530 
3531   double curr_time_ms = os::elapsedVTime() * 1000.0;
3532 
3533   // (3) If marking stats are enabled, then we update the step history.
3534 #if _MARKING_STATS_
3535   if (_words_scanned >= _words_scanned_limit) {
3536     ++_clock_due_to_scanning;
3537   }
3538   if (_refs_reached >= _refs_reached_limit) {
3539     ++_clock_due_to_marking;
3540   }
3541 
3542   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3543   _interval_start_time_ms = curr_time_ms;
3544   _all_clock_intervals_ms.add(last_interval_ms);
3545 
3546   if (_cm->verbose_medium()) {
3547       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3548                         "scanned = %d%s, refs reached = %d%s",
3549                         _worker_id, last_interval_ms,
3550                         _words_scanned,
3551                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3552                         _refs_reached,
3553                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3554   }
3555 #endif // _MARKING_STATS_
3556 
3557   // (4) We check whether we should yield. If we have to, then we abort.
3558   if (_cm->should_yield()) {
3559     // We should yield. To do this we abort the task. The caller is
3560     // responsible for yielding.
3561     set_has_aborted();
3562     statsOnly( ++_aborted_yield );
3563     return;
3564   }
3565 
3566   // (5) We check whether we've reached our time quota. If we have,
3567   // then we abort.
3568   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3569   if (elapsed_time_ms > _time_target_ms) {
3570     set_has_aborted();
3571     _has_timed_out = true;
3572     statsOnly( ++_aborted_timed_out );
3573     return;
3574   }
3575 
3576   // (6) Finally, we check whether there are enough completed STAB
3577   // buffers available for processing. If there are, we abort.
3578   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3579   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3580     if (_cm->verbose_low()) {
3581       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3582                              _worker_id);
3583     }
3584     // we do need to process SATB buffers, we'll abort and restart
3585     // the marking task to do so
3586     set_has_aborted();
3587     statsOnly( ++_aborted_satb );
3588     return;
3589   }
3590 }
3591 
3592 void CMTask::recalculate_limits() {
3593   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3594   _words_scanned_limit      = _real_words_scanned_limit;
3595 
3596   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3597   _refs_reached_limit       = _real_refs_reached_limit;
3598 }
3599 
3600 void CMTask::decrease_limits() {
3601   // This is called when we believe that we're going to do an infrequent
3602   // operation which will increase the per byte scanned cost (i.e. move
3603   // entries to/from the global stack). It basically tries to decrease the
3604   // scanning limit so that the clock is called earlier.
3605 
3606   if (_cm->verbose_medium()) {
3607     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3608   }
3609 
3610   _words_scanned_limit = _real_words_scanned_limit -
3611     3 * words_scanned_period / 4;
3612   _refs_reached_limit  = _real_refs_reached_limit -
3613     3 * refs_reached_period / 4;
3614 }
3615 
3616 void CMTask::move_entries_to_global_stack() {
3617   // local array where we'll store the entries that will be popped
3618   // from the local queue
3619   oop buffer[global_stack_transfer_size];
3620 
3621   int n = 0;
3622   oop obj;
3623   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3624     buffer[n] = obj;
3625     ++n;
3626   }
3627 
3628   if (n > 0) {
3629     // we popped at least one entry from the local queue
3630 
3631     statsOnly( ++_global_transfers_to; _local_pops += n );
3632 
3633     if (!_cm->mark_stack_push(buffer, n)) {
3634       if (_cm->verbose_low()) {
3635         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3636                                _worker_id);
3637       }
3638       set_has_aborted();
3639     } else {
3640       // the transfer was successful
3641 
3642       if (_cm->verbose_medium()) {
3643         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3644                                _worker_id, n);
3645       }
3646       statsOnly( int tmp_size = _cm->mark_stack_size();
3647                  if (tmp_size > _global_max_size) {
3648                    _global_max_size = tmp_size;
3649                  }
3650                  _global_pushes += n );
3651     }
3652   }
3653 
3654   // this operation was quite expensive, so decrease the limits
3655   decrease_limits();
3656 }
3657 
3658 void CMTask::get_entries_from_global_stack() {
3659   // local array where we'll store the entries that will be popped
3660   // from the global stack.
3661   oop buffer[global_stack_transfer_size];
3662   int n;
3663   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3664   assert(n <= global_stack_transfer_size,
3665          "we should not pop more than the given limit");
3666   if (n > 0) {
3667     // yes, we did actually pop at least one entry
3668 
3669     statsOnly( ++_global_transfers_from; _global_pops += n );
3670     if (_cm->verbose_medium()) {
3671       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3672                              _worker_id, n);
3673     }
3674     for (int i = 0; i < n; ++i) {
3675       bool success = _task_queue->push(buffer[i]);
3676       // We only call this when the local queue is empty or under a
3677       // given target limit. So, we do not expect this push to fail.
3678       assert(success, "invariant");
3679     }
3680 
3681     statsOnly( int tmp_size = _task_queue->size();
3682                if (tmp_size > _local_max_size) {
3683                  _local_max_size = tmp_size;
3684                }
3685                _local_pushes += n );
3686   }
3687 
3688   // this operation was quite expensive, so decrease the limits
3689   decrease_limits();
3690 }
3691 
3692 void CMTask::drain_local_queue(bool partially) {
3693   if (has_aborted()) return;
3694 
3695   // Decide what the target size is, depending whether we're going to
3696   // drain it partially (so that other tasks can steal if they run out
3697   // of things to do) or totally (at the very end).
3698   size_t target_size;
3699   if (partially) {
3700     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3701   } else {
3702     target_size = 0;
3703   }
3704 
3705   if (_task_queue->size() > target_size) {
3706     if (_cm->verbose_high()) {
3707       gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3708                              _worker_id, target_size);
3709     }
3710 
3711     oop obj;
3712     bool ret = _task_queue->pop_local(obj);
3713     while (ret) {
3714       statsOnly( ++_local_pops );
3715 
3716       if (_cm->verbose_high()) {
3717         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3718                                (void*) obj);
3719       }
3720 
3721       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3722       assert(!_g1h->is_on_master_free_list(
3723                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3724 
3725       scan_object(obj);
3726 
3727       if (_task_queue->size() <= target_size || has_aborted()) {
3728         ret = false;
3729       } else {
3730         ret = _task_queue->pop_local(obj);
3731       }
3732     }
3733 
3734     if (_cm->verbose_high()) {
3735       gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3736                              _worker_id, _task_queue->size());
3737     }
3738   }
3739 }
3740 
3741 void CMTask::drain_global_stack(bool partially) {
3742   if (has_aborted()) return;
3743 
3744   // We have a policy to drain the local queue before we attempt to
3745   // drain the global stack.
3746   assert(partially || _task_queue->size() == 0, "invariant");
3747 
3748   // Decide what the target size is, depending whether we're going to
3749   // drain it partially (so that other tasks can steal if they run out
3750   // of things to do) or totally (at the very end).  Notice that,
3751   // because we move entries from the global stack in chunks or
3752   // because another task might be doing the same, we might in fact
3753   // drop below the target. But, this is not a problem.
3754   size_t target_size;
3755   if (partially) {
3756     target_size = _cm->partial_mark_stack_size_target();
3757   } else {
3758     target_size = 0;
3759   }
3760 
3761   if (_cm->mark_stack_size() > target_size) {
3762     if (_cm->verbose_low()) {
3763       gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3764                              _worker_id, target_size);
3765     }
3766 
3767     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3768       get_entries_from_global_stack();
3769       drain_local_queue(partially);
3770     }
3771 
3772     if (_cm->verbose_low()) {
3773       gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3774                              _worker_id, _cm->mark_stack_size());
3775     }
3776   }
3777 }
3778 
3779 // SATB Queue has several assumptions on whether to call the par or
3780 // non-par versions of the methods. this is why some of the code is
3781 // replicated. We should really get rid of the single-threaded version
3782 // of the code to simplify things.
3783 void CMTask::drain_satb_buffers() {
3784   if (has_aborted()) return;
3785 
3786   // We set this so that the regular clock knows that we're in the
3787   // middle of draining buffers and doesn't set the abort flag when it
3788   // notices that SATB buffers are available for draining. It'd be
3789   // very counter productive if it did that. :-)
3790   _draining_satb_buffers = true;
3791 
3792   CMObjectClosure oc(this);
3793   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3794   if (G1CollectedHeap::use_parallel_gc_threads()) {
3795     satb_mq_set.set_par_closure(_worker_id, &oc);
3796   } else {
3797     satb_mq_set.set_closure(&oc);
3798   }
3799 
3800   // This keeps claiming and applying the closure to completed buffers
3801   // until we run out of buffers or we need to abort.
3802   if (G1CollectedHeap::use_parallel_gc_threads()) {
3803     while (!has_aborted() &&
3804            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3805       if (_cm->verbose_medium()) {
3806         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3807       }
3808       statsOnly( ++_satb_buffers_processed );
3809       regular_clock_call();
3810     }
3811   } else {
3812     while (!has_aborted() &&
3813            satb_mq_set.apply_closure_to_completed_buffer()) {
3814       if (_cm->verbose_medium()) {
3815         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3816       }
3817       statsOnly( ++_satb_buffers_processed );
3818       regular_clock_call();
3819     }
3820   }
3821 
3822   if (!concurrent() && !has_aborted()) {
3823     // We should only do this during remark.
3824     if (G1CollectedHeap::use_parallel_gc_threads()) {
3825       satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3826     } else {
3827       satb_mq_set.iterate_closure_all_threads();
3828     }
3829   }
3830 
3831   _draining_satb_buffers = false;
3832 
3833   assert(has_aborted() ||
3834          concurrent() ||
3835          satb_mq_set.completed_buffers_num() == 0, "invariant");
3836 
3837   if (G1CollectedHeap::use_parallel_gc_threads()) {
3838     satb_mq_set.set_par_closure(_worker_id, NULL);
3839   } else {
3840     satb_mq_set.set_closure(NULL);
3841   }
3842 
3843   // again, this was a potentially expensive operation, decrease the
3844   // limits to get the regular clock call early
3845   decrease_limits();
3846 }
3847 
3848 void CMTask::print_stats() {
3849   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3850                          _worker_id, _calls);
3851   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3852                          _elapsed_time_ms, _termination_time_ms);
3853   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3854                          _step_times_ms.num(), _step_times_ms.avg(),
3855                          _step_times_ms.sd());
3856   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3857                          _step_times_ms.maximum(), _step_times_ms.sum());
3858 
3859 #if _MARKING_STATS_
3860   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3861                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3862                          _all_clock_intervals_ms.sd());
3863   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3864                          _all_clock_intervals_ms.maximum(),
3865                          _all_clock_intervals_ms.sum());
3866   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3867                          _clock_due_to_scanning, _clock_due_to_marking);
3868   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3869                          _objs_scanned, _objs_found_on_bitmap);
3870   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3871                          _local_pushes, _local_pops, _local_max_size);
3872   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3873                          _global_pushes, _global_pops, _global_max_size);
3874   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3875                          _global_transfers_to,_global_transfers_from);
3876   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3877   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3878   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3879                          _steal_attempts, _steals);
3880   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3881   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3882                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3883   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3884                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3885 #endif // _MARKING_STATS_
3886 }
3887 
3888 /*****************************************************************************
3889 
3890     The do_marking_step(time_target_ms, ...) method is the building
3891     block of the parallel marking framework. It can be called in parallel
3892     with other invocations of do_marking_step() on different tasks
3893     (but only one per task, obviously) and concurrently with the
3894     mutator threads, or during remark, hence it eliminates the need
3895     for two versions of the code. When called during remark, it will
3896     pick up from where the task left off during the concurrent marking
3897     phase. Interestingly, tasks are also claimable during evacuation
3898     pauses too, since do_marking_step() ensures that it aborts before
3899     it needs to yield.
3900 
3901     The data structures that it uses to do marking work are the
3902     following:
3903 
3904       (1) Marking Bitmap. If there are gray objects that appear only
3905       on the bitmap (this happens either when dealing with an overflow
3906       or when the initial marking phase has simply marked the roots
3907       and didn't push them on the stack), then tasks claim heap
3908       regions whose bitmap they then scan to find gray objects. A
3909       global finger indicates where the end of the last claimed region
3910       is. A local finger indicates how far into the region a task has
3911       scanned. The two fingers are used to determine how to gray an
3912       object (i.e. whether simply marking it is OK, as it will be
3913       visited by a task in the future, or whether it needs to be also
3914       pushed on a stack).
3915 
3916       (2) Local Queue. The local queue of the task which is accessed
3917       reasonably efficiently by the task. Other tasks can steal from
3918       it when they run out of work. Throughout the marking phase, a
3919       task attempts to keep its local queue short but not totally
3920       empty, so that entries are available for stealing by other
3921       tasks. Only when there is no more work, a task will totally
3922       drain its local queue.
3923 
3924       (3) Global Mark Stack. This handles local queue overflow. During
3925       marking only sets of entries are moved between it and the local
3926       queues, as access to it requires a mutex and more fine-grain
3927       interaction with it which might cause contention. If it
3928       overflows, then the marking phase should restart and iterate
3929       over the bitmap to identify gray objects. Throughout the marking
3930       phase, tasks attempt to keep the global mark stack at a small
3931       length but not totally empty, so that entries are available for
3932       popping by other tasks. Only when there is no more work, tasks
3933       will totally drain the global mark stack.
3934 
3935       (4) SATB Buffer Queue. This is where completed SATB buffers are
3936       made available. Buffers are regularly removed from this queue
3937       and scanned for roots, so that the queue doesn't get too
3938       long. During remark, all completed buffers are processed, as
3939       well as the filled in parts of any uncompleted buffers.
3940 
3941     The do_marking_step() method tries to abort when the time target
3942     has been reached. There are a few other cases when the
3943     do_marking_step() method also aborts:
3944 
3945       (1) When the marking phase has been aborted (after a Full GC).
3946 
3947       (2) When a global overflow (on the global stack) has been
3948       triggered. Before the task aborts, it will actually sync up with
3949       the other tasks to ensure that all the marking data structures
3950       (local queues, stacks, fingers etc.)  are re-initialised so that
3951       when do_marking_step() completes, the marking phase can
3952       immediately restart.
3953 
3954       (3) When enough completed SATB buffers are available. The
3955       do_marking_step() method only tries to drain SATB buffers right
3956       at the beginning. So, if enough buffers are available, the
3957       marking step aborts and the SATB buffers are processed at
3958       the beginning of the next invocation.
3959 
3960       (4) To yield. when we have to yield then we abort and yield
3961       right at the end of do_marking_step(). This saves us from a lot
3962       of hassle as, by yielding we might allow a Full GC. If this
3963       happens then objects will be compacted underneath our feet, the
3964       heap might shrink, etc. We save checking for this by just
3965       aborting and doing the yield right at the end.
3966 
3967     From the above it follows that the do_marking_step() method should
3968     be called in a loop (or, otherwise, regularly) until it completes.
3969 
3970     If a marking step completes without its has_aborted() flag being
3971     true, it means it has completed the current marking phase (and
3972     also all other marking tasks have done so and have all synced up).
3973 
3974     A method called regular_clock_call() is invoked "regularly" (in
3975     sub ms intervals) throughout marking. It is this clock method that
3976     checks all the abort conditions which were mentioned above and
3977     decides when the task should abort. A work-based scheme is used to
3978     trigger this clock method: when the number of object words the
3979     marking phase has scanned or the number of references the marking
3980     phase has visited reach a given limit. Additional invocations to
3981     the method clock have been planted in a few other strategic places
3982     too. The initial reason for the clock method was to avoid calling
3983     vtime too regularly, as it is quite expensive. So, once it was in
3984     place, it was natural to piggy-back all the other conditions on it
3985     too and not constantly check them throughout the code.
3986 
3987     If do_stealing is true then do_marking_step will attempt to steal
3988     work from the other CMTasks. It only makes sense to enable
3989     stealing when being called by multiple threads.
3990 
3991     If do_termination is true then do_marking_step will enter its
3992     termination protocol.
3993 
3994     The value of is_serial must be true when do_marking_step is being
3995     called serially (i.e. by the VMThread) and do_marking_step should
3996     skip any synchronization in the termination and overflow code.
3997     Examples include the serial remark code and the serial reference
3998     processing closures.
3999 
4000     The value of is_serial must be false when do_marking_step is
4001     being called by any of the worker threads in a work gang.
4002     Examples include the concurrent marking code (CMMarkingTask),
4003     the MT remark code, and the MT reference processing closures.
4004 
4005  *****************************************************************************/
4006 
4007 void CMTask::do_marking_step(double time_target_ms,
4008                              bool do_stealing,
4009                              bool do_termination,
4010                              bool is_serial) {
4011   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4012   assert(concurrent() == _cm->concurrent(), "they should be the same");
4013 
4014   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4015   assert(_task_queues != NULL, "invariant");
4016   assert(_task_queue != NULL, "invariant");
4017   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4018 
4019   assert(!_claimed,
4020          "only one thread should claim this task at any one time");
4021 
4022   // OK, this doesn't safeguard again all possible scenarios, as it is
4023   // possible for two threads to set the _claimed flag at the same
4024   // time. But it is only for debugging purposes anyway and it will
4025   // catch most problems.
4026   _claimed = true;
4027 
4028   _start_time_ms = os::elapsedVTime() * 1000.0;
4029   statsOnly( _interval_start_time_ms = _start_time_ms );
4030 
4031   double diff_prediction_ms =
4032     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4033   _time_target_ms = time_target_ms - diff_prediction_ms;
4034 
4035   // set up the variables that are used in the work-based scheme to
4036   // call the regular clock method
4037   _words_scanned = 0;
4038   _refs_reached  = 0;
4039   recalculate_limits();
4040 
4041   // clear all flags
4042   clear_has_aborted();
4043   _has_timed_out = false;
4044   _draining_satb_buffers = false;
4045 
4046   ++_calls;
4047 
4048   if (_cm->verbose_low()) {
4049     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4050                            "target = %1.2lfms >>>>>>>>>>",
4051                            _worker_id, _calls, _time_target_ms);
4052   }
4053 
4054   // Set up the bitmap and oop closures. Anything that uses them is
4055   // eventually called from this method, so it is OK to allocate these
4056   // statically.
4057   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4058   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4059   set_cm_oop_closure(&cm_oop_closure);
4060 
4061   if (_cm->has_overflown()) {
4062     // This can happen if the mark stack overflows during a GC pause
4063     // and this task, after a yield point, restarts. We have to abort
4064     // as we need to get into the overflow protocol which happens
4065     // right at the end of this task.
4066     set_has_aborted();
4067   }
4068 
4069   // First drain any available SATB buffers. After this, we will not
4070   // look at SATB buffers before the next invocation of this method.
4071   // If enough completed SATB buffers are queued up, the regular clock
4072   // will abort this task so that it restarts.
4073   drain_satb_buffers();
4074   // ...then partially drain the local queue and the global stack
4075   drain_local_queue(true);
4076   drain_global_stack(true);
4077 
4078   do {
4079     if (!has_aborted() && _curr_region != NULL) {
4080       // This means that we're already holding on to a region.
4081       assert(_finger != NULL, "if region is not NULL, then the finger "
4082              "should not be NULL either");
4083 
4084       // We might have restarted this task after an evacuation pause
4085       // which might have evacuated the region we're holding on to
4086       // underneath our feet. Let's read its limit again to make sure
4087       // that we do not iterate over a region of the heap that
4088       // contains garbage (update_region_limit() will also move
4089       // _finger to the start of the region if it is found empty).
4090       update_region_limit();
4091       // We will start from _finger not from the start of the region,
4092       // as we might be restarting this task after aborting half-way
4093       // through scanning this region. In this case, _finger points to
4094       // the address where we last found a marked object. If this is a
4095       // fresh region, _finger points to start().
4096       MemRegion mr = MemRegion(_finger, _region_limit);
4097 
4098       if (_cm->verbose_low()) {
4099         gclog_or_tty->print_cr("[%u] we're scanning part "
4100                                "["PTR_FORMAT", "PTR_FORMAT") "
4101                                "of region "HR_FORMAT,
4102                                _worker_id, _finger, _region_limit,
4103                                HR_FORMAT_PARAMS(_curr_region));
4104       }
4105 
4106       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4107              "humongous regions should go around loop once only");
4108 
4109       // Some special cases:
4110       // If the memory region is empty, we can just give up the region.
4111       // If the current region is humongous then we only need to check
4112       // the bitmap for the bit associated with the start of the object,
4113       // scan the object if it's live, and give up the region.
4114       // Otherwise, let's iterate over the bitmap of the part of the region
4115       // that is left.
4116       // If the iteration is successful, give up the region.
4117       if (mr.is_empty()) {
4118         giveup_current_region();
4119         regular_clock_call();
4120       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4121         if (_nextMarkBitMap->isMarked(mr.start())) {
4122           // The object is marked - apply the closure
4123           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4124           bitmap_closure.do_bit(offset);
4125         }
4126         // Even if this task aborted while scanning the humongous object
4127         // we can (and should) give up the current region.
4128         giveup_current_region();
4129         regular_clock_call();
4130       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4131         giveup_current_region();
4132         regular_clock_call();
4133       } else {
4134         assert(has_aborted(), "currently the only way to do so");
4135         // The only way to abort the bitmap iteration is to return
4136         // false from the do_bit() method. However, inside the
4137         // do_bit() method we move the _finger to point to the
4138         // object currently being looked at. So, if we bail out, we
4139         // have definitely set _finger to something non-null.
4140         assert(_finger != NULL, "invariant");
4141 
4142         // Region iteration was actually aborted. So now _finger
4143         // points to the address of the object we last scanned. If we
4144         // leave it there, when we restart this task, we will rescan
4145         // the object. It is easy to avoid this. We move the finger by
4146         // enough to point to the next possible object header (the
4147         // bitmap knows by how much we need to move it as it knows its
4148         // granularity).
4149         assert(_finger < _region_limit, "invariant");
4150         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4151         // Check if bitmap iteration was aborted while scanning the last object
4152         if (new_finger >= _region_limit) {
4153           giveup_current_region();
4154         } else {
4155           move_finger_to(new_finger);
4156         }
4157       }
4158     }
4159     // At this point we have either completed iterating over the
4160     // region we were holding on to, or we have aborted.
4161 
4162     // We then partially drain the local queue and the global stack.
4163     // (Do we really need this?)
4164     drain_local_queue(true);
4165     drain_global_stack(true);
4166 
4167     // Read the note on the claim_region() method on why it might
4168     // return NULL with potentially more regions available for
4169     // claiming and why we have to check out_of_regions() to determine
4170     // whether we're done or not.
4171     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4172       // We are going to try to claim a new region. We should have
4173       // given up on the previous one.
4174       // Separated the asserts so that we know which one fires.
4175       assert(_curr_region  == NULL, "invariant");
4176       assert(_finger       == NULL, "invariant");
4177       assert(_region_limit == NULL, "invariant");
4178       if (_cm->verbose_low()) {
4179         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4180       }
4181       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4182       if (claimed_region != NULL) {
4183         // Yes, we managed to claim one
4184         statsOnly( ++_regions_claimed );
4185 
4186         if (_cm->verbose_low()) {
4187           gclog_or_tty->print_cr("[%u] we successfully claimed "
4188                                  "region "PTR_FORMAT,
4189                                  _worker_id, claimed_region);
4190         }
4191 
4192         setup_for_region(claimed_region);
4193         assert(_curr_region == claimed_region, "invariant");
4194       }
4195       // It is important to call the regular clock here. It might take
4196       // a while to claim a region if, for example, we hit a large
4197       // block of empty regions. So we need to call the regular clock
4198       // method once round the loop to make sure it's called
4199       // frequently enough.
4200       regular_clock_call();
4201     }
4202 
4203     if (!has_aborted() && _curr_region == NULL) {
4204       assert(_cm->out_of_regions(),
4205              "at this point we should be out of regions");
4206     }
4207   } while ( _curr_region != NULL && !has_aborted());
4208 
4209   if (!has_aborted()) {
4210     // We cannot check whether the global stack is empty, since other
4211     // tasks might be pushing objects to it concurrently.
4212     assert(_cm->out_of_regions(),
4213            "at this point we should be out of regions");
4214 
4215     if (_cm->verbose_low()) {
4216       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4217     }
4218 
4219     // Try to reduce the number of available SATB buffers so that
4220     // remark has less work to do.
4221     drain_satb_buffers();
4222   }
4223 
4224   // Since we've done everything else, we can now totally drain the
4225   // local queue and global stack.
4226   drain_local_queue(false);
4227   drain_global_stack(false);
4228 
4229   // Attempt at work stealing from other task's queues.
4230   if (do_stealing && !has_aborted()) {
4231     // We have not aborted. This means that we have finished all that
4232     // we could. Let's try to do some stealing...
4233 
4234     // We cannot check whether the global stack is empty, since other
4235     // tasks might be pushing objects to it concurrently.
4236     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4237            "only way to reach here");
4238 
4239     if (_cm->verbose_low()) {
4240       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4241     }
4242 
4243     while (!has_aborted()) {
4244       oop obj;
4245       statsOnly( ++_steal_attempts );
4246 
4247       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4248         if (_cm->verbose_medium()) {
4249           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4250                                  _worker_id, (void*) obj);
4251         }
4252 
4253         statsOnly( ++_steals );
4254 
4255         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4256                "any stolen object should be marked");
4257         scan_object(obj);
4258 
4259         // And since we're towards the end, let's totally drain the
4260         // local queue and global stack.
4261         drain_local_queue(false);
4262         drain_global_stack(false);
4263       } else {
4264         break;
4265       }
4266     }
4267   }
4268 
4269   // If we are about to wrap up and go into termination, check if we
4270   // should raise the overflow flag.
4271   if (do_termination && !has_aborted()) {
4272     if (_cm->force_overflow()->should_force()) {
4273       _cm->set_has_overflown();
4274       regular_clock_call();
4275     }
4276   }
4277 
4278   // We still haven't aborted. Now, let's try to get into the
4279   // termination protocol.
4280   if (do_termination && !has_aborted()) {
4281     // We cannot check whether the global stack is empty, since other
4282     // tasks might be concurrently pushing objects on it.
4283     // Separated the asserts so that we know which one fires.
4284     assert(_cm->out_of_regions(), "only way to reach here");
4285     assert(_task_queue->size() == 0, "only way to reach here");
4286 
4287     if (_cm->verbose_low()) {
4288       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4289     }
4290 
4291     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4292 
4293     // The CMTask class also extends the TerminatorTerminator class,
4294     // hence its should_exit_termination() method will also decide
4295     // whether to exit the termination protocol or not.
4296     bool finished = (is_serial ||
4297                      _cm->terminator()->offer_termination(this));
4298     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4299     _termination_time_ms +=
4300       termination_end_time_ms - _termination_start_time_ms;
4301 
4302     if (finished) {
4303       // We're all done.
4304 
4305       if (_worker_id == 0) {
4306         // let's allow task 0 to do this
4307         if (concurrent()) {
4308           assert(_cm->concurrent_marking_in_progress(), "invariant");
4309           // we need to set this to false before the next
4310           // safepoint. This way we ensure that the marking phase
4311           // doesn't observe any more heap expansions.
4312           _cm->clear_concurrent_marking_in_progress();
4313         }
4314       }
4315 
4316       // We can now guarantee that the global stack is empty, since
4317       // all other tasks have finished. We separated the guarantees so
4318       // that, if a condition is false, we can immediately find out
4319       // which one.
4320       guarantee(_cm->out_of_regions(), "only way to reach here");
4321       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4322       guarantee(_task_queue->size() == 0, "only way to reach here");
4323       guarantee(!_cm->has_overflown(), "only way to reach here");
4324       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4325 
4326       if (_cm->verbose_low()) {
4327         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4328       }
4329     } else {
4330       // Apparently there's more work to do. Let's abort this task. It
4331       // will restart it and we can hopefully find more things to do.
4332 
4333       if (_cm->verbose_low()) {
4334         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4335                                _worker_id);
4336       }
4337 
4338       set_has_aborted();
4339       statsOnly( ++_aborted_termination );
4340     }
4341   }
4342 
4343   // Mainly for debugging purposes to make sure that a pointer to the
4344   // closure which was statically allocated in this frame doesn't
4345   // escape it by accident.
4346   set_cm_oop_closure(NULL);
4347   double end_time_ms = os::elapsedVTime() * 1000.0;
4348   double elapsed_time_ms = end_time_ms - _start_time_ms;
4349   // Update the step history.
4350   _step_times_ms.add(elapsed_time_ms);
4351 
4352   if (has_aborted()) {
4353     // The task was aborted for some reason.
4354 
4355     statsOnly( ++_aborted );
4356 
4357     if (_has_timed_out) {
4358       double diff_ms = elapsed_time_ms - _time_target_ms;
4359       // Keep statistics of how well we did with respect to hitting
4360       // our target only if we actually timed out (if we aborted for
4361       // other reasons, then the results might get skewed).
4362       _marking_step_diffs_ms.add(diff_ms);
4363     }
4364 
4365     if (_cm->has_overflown()) {
4366       // This is the interesting one. We aborted because a global
4367       // overflow was raised. This means we have to restart the
4368       // marking phase and start iterating over regions. However, in
4369       // order to do this we have to make sure that all tasks stop
4370       // what they are doing and re-initialise in a safe manner. We
4371       // will achieve this with the use of two barrier sync points.
4372 
4373       if (_cm->verbose_low()) {
4374         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4375       }
4376 
4377       if (!is_serial) {
4378         // We only need to enter the sync barrier if being called
4379         // from a parallel context
4380         _cm->enter_first_sync_barrier(_worker_id);
4381         
4382         // When we exit this sync barrier we know that all tasks have
4383         // stopped doing marking work. So, it's now safe to
4384         // re-initialise our data structures. At the end of this method,
4385         // task 0 will clear the global data structures.
4386       }
4387 
4388       statsOnly( ++_aborted_overflow );
4389 
4390       // We clear the local state of this task...
4391       clear_region_fields();
4392 
4393       if (!is_serial) {
4394         // ...and enter the second barrier.
4395         _cm->enter_second_sync_barrier(_worker_id);
4396       }
4397       // At this point everything has bee re-initialised and we're
4398       // ready to restart.
4399     }
4400 
4401     if (_cm->verbose_low()) {
4402       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4403                              "elapsed = %1.2lfms <<<<<<<<<<",
4404                              _worker_id, _time_target_ms, elapsed_time_ms);
4405       if (_cm->has_aborted()) {
4406         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4407                                _worker_id);
4408       }
4409     }
4410   } else {
4411     if (_cm->verbose_low()) {
4412       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4413                              "elapsed = %1.2lfms <<<<<<<<<<",
4414                              _worker_id, _time_target_ms, elapsed_time_ms);
4415     }
4416   }
4417 
4418   _claimed = false;
4419 }
4420 
4421 CMTask::CMTask(uint worker_id,
4422                ConcurrentMark* cm,
4423                size_t* marked_bytes,
4424                BitMap* card_bm,
4425                CMTaskQueue* task_queue,
4426                CMTaskQueueSet* task_queues)
4427   : _g1h(G1CollectedHeap::heap()),
4428     _worker_id(worker_id), _cm(cm),
4429     _claimed(false),
4430     _nextMarkBitMap(NULL), _hash_seed(17),
4431     _task_queue(task_queue),
4432     _task_queues(task_queues),
4433     _cm_oop_closure(NULL),
4434     _marked_bytes_array(marked_bytes),
4435     _card_bm(card_bm) {
4436   guarantee(task_queue != NULL, "invariant");
4437   guarantee(task_queues != NULL, "invariant");
4438 
4439   statsOnly( _clock_due_to_scanning = 0;
4440              _clock_due_to_marking  = 0 );
4441 
4442   _marking_step_diffs_ms.add(0.5);
4443 }
4444 
4445 // These are formatting macros that are used below to ensure
4446 // consistent formatting. The *_H_* versions are used to format the
4447 // header for a particular value and they should be kept consistent
4448 // with the corresponding macro. Also note that most of the macros add
4449 // the necessary white space (as a prefix) which makes them a bit
4450 // easier to compose.
4451 
4452 // All the output lines are prefixed with this string to be able to
4453 // identify them easily in a large log file.
4454 #define G1PPRL_LINE_PREFIX            "###"
4455 
4456 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4457 #ifdef _LP64
4458 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4459 #else // _LP64
4460 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4461 #endif // _LP64
4462 
4463 // For per-region info
4464 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4465 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4466 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4467 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4468 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4469 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4470 
4471 // For summary info
4472 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4473 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4474 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4475 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4476 
4477 G1PrintRegionLivenessInfoClosure::
4478 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4479   : _out(out),
4480     _total_used_bytes(0), _total_capacity_bytes(0),
4481     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4482     _hum_used_bytes(0), _hum_capacity_bytes(0),
4483     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4484   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4485   MemRegion g1_committed = g1h->g1_committed();
4486   MemRegion g1_reserved = g1h->g1_reserved();
4487   double now = os::elapsedTime();
4488 
4489   // Print the header of the output.
4490   _out->cr();
4491   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4492   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4493                  G1PPRL_SUM_ADDR_FORMAT("committed")
4494                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4495                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4496                  g1_committed.start(), g1_committed.end(),
4497                  g1_reserved.start(), g1_reserved.end(),
4498                  HeapRegion::GrainBytes);
4499   _out->print_cr(G1PPRL_LINE_PREFIX);
4500   _out->print_cr(G1PPRL_LINE_PREFIX
4501                  G1PPRL_TYPE_H_FORMAT
4502                  G1PPRL_ADDR_BASE_H_FORMAT
4503                  G1PPRL_BYTE_H_FORMAT
4504                  G1PPRL_BYTE_H_FORMAT
4505                  G1PPRL_BYTE_H_FORMAT
4506                  G1PPRL_DOUBLE_H_FORMAT,
4507                  "type", "address-range",
4508                  "used", "prev-live", "next-live", "gc-eff");
4509   _out->print_cr(G1PPRL_LINE_PREFIX
4510                  G1PPRL_TYPE_H_FORMAT
4511                  G1PPRL_ADDR_BASE_H_FORMAT
4512                  G1PPRL_BYTE_H_FORMAT
4513                  G1PPRL_BYTE_H_FORMAT
4514                  G1PPRL_BYTE_H_FORMAT
4515                  G1PPRL_DOUBLE_H_FORMAT,
4516                  "", "",
4517                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4518 }
4519 
4520 // It takes as a parameter a reference to one of the _hum_* fields, it
4521 // deduces the corresponding value for a region in a humongous region
4522 // series (either the region size, or what's left if the _hum_* field
4523 // is < the region size), and updates the _hum_* field accordingly.
4524 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4525   size_t bytes = 0;
4526   // The > 0 check is to deal with the prev and next live bytes which
4527   // could be 0.
4528   if (*hum_bytes > 0) {
4529     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4530     *hum_bytes -= bytes;
4531   }
4532   return bytes;
4533 }
4534 
4535 // It deduces the values for a region in a humongous region series
4536 // from the _hum_* fields and updates those accordingly. It assumes
4537 // that that _hum_* fields have already been set up from the "starts
4538 // humongous" region and we visit the regions in address order.
4539 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4540                                                      size_t* capacity_bytes,
4541                                                      size_t* prev_live_bytes,
4542                                                      size_t* next_live_bytes) {
4543   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4544   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4545   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4546   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4547   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4548 }
4549 
4550 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4551   const char* type = "";
4552   HeapWord* bottom       = r->bottom();
4553   HeapWord* end          = r->end();
4554   size_t capacity_bytes  = r->capacity();
4555   size_t used_bytes      = r->used();
4556   size_t prev_live_bytes = r->live_bytes();
4557   size_t next_live_bytes = r->next_live_bytes();
4558   double gc_eff          = r->gc_efficiency();
4559   if (r->used() == 0) {
4560     type = "FREE";
4561   } else if (r->is_survivor()) {
4562     type = "SURV";
4563   } else if (r->is_young()) {
4564     type = "EDEN";
4565   } else if (r->startsHumongous()) {
4566     type = "HUMS";
4567 
4568     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4569            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4570            "they should have been zeroed after the last time we used them");
4571     // Set up the _hum_* fields.
4572     _hum_capacity_bytes  = capacity_bytes;
4573     _hum_used_bytes      = used_bytes;
4574     _hum_prev_live_bytes = prev_live_bytes;
4575     _hum_next_live_bytes = next_live_bytes;
4576     get_hum_bytes(&used_bytes, &capacity_bytes,
4577                   &prev_live_bytes, &next_live_bytes);
4578     end = bottom + HeapRegion::GrainWords;
4579   } else if (r->continuesHumongous()) {
4580     type = "HUMC";
4581     get_hum_bytes(&used_bytes, &capacity_bytes,
4582                   &prev_live_bytes, &next_live_bytes);
4583     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4584   } else {
4585     type = "OLD";
4586   }
4587 
4588   _total_used_bytes      += used_bytes;
4589   _total_capacity_bytes  += capacity_bytes;
4590   _total_prev_live_bytes += prev_live_bytes;
4591   _total_next_live_bytes += next_live_bytes;
4592 
4593   // Print a line for this particular region.
4594   _out->print_cr(G1PPRL_LINE_PREFIX
4595                  G1PPRL_TYPE_FORMAT
4596                  G1PPRL_ADDR_BASE_FORMAT
4597                  G1PPRL_BYTE_FORMAT
4598                  G1PPRL_BYTE_FORMAT
4599                  G1PPRL_BYTE_FORMAT
4600                  G1PPRL_DOUBLE_FORMAT,
4601                  type, bottom, end,
4602                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4603 
4604   return false;
4605 }
4606 
4607 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4608   // Print the footer of the output.
4609   _out->print_cr(G1PPRL_LINE_PREFIX);
4610   _out->print_cr(G1PPRL_LINE_PREFIX
4611                  " SUMMARY"
4612                  G1PPRL_SUM_MB_FORMAT("capacity")
4613                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4614                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4615                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4616                  bytes_to_mb(_total_capacity_bytes),
4617                  bytes_to_mb(_total_used_bytes),
4618                  perc(_total_used_bytes, _total_capacity_bytes),
4619                  bytes_to_mb(_total_prev_live_bytes),
4620                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4621                  bytes_to_mb(_total_next_live_bytes),
4622                  perc(_total_next_live_bytes, _total_capacity_bytes));
4623   _out->cr();
4624 }