rev 4008 : 8001985: G1: Backport fix for 7200261 to hsx24 Summary: The automatic backport of the fix for 7200261 did not apply cleanly to hsx24 - there were two rejected hunks that had to be fixed up by hand. Reviewed-by:
1 /* 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/symbolTable.hpp" 27 #include "gc_implementation/g1/concurrentMark.inline.hpp" 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp" 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp" 32 #include "gc_implementation/g1/g1Log.hpp" 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp" 34 #include "gc_implementation/g1/g1RemSet.hpp" 35 #include "gc_implementation/g1/heapRegion.inline.hpp" 36 #include "gc_implementation/g1/heapRegionRemSet.hpp" 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 38 #include "gc_implementation/shared/vmGCOperations.hpp" 39 #include "memory/genOopClosures.inline.hpp" 40 #include "memory/referencePolicy.hpp" 41 #include "memory/resourceArea.hpp" 42 #include "oops/oop.inline.hpp" 43 #include "runtime/handles.inline.hpp" 44 #include "runtime/java.hpp" 45 #include "services/memTracker.hpp" 46 47 // Concurrent marking bit map wrapper 48 49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 50 _bm((uintptr_t*)NULL,0), 51 _shifter(shifter) { 52 _bmStartWord = (HeapWord*)(rs.base()); 53 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 54 ReservedSpace brs(ReservedSpace::allocation_align_size_up( 55 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 56 57 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC); 58 59 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map"); 60 // For now we'll just commit all of the bit map up fromt. 61 // Later on we'll try to be more parsimonious with swap. 62 guarantee(_virtual_space.initialize(brs, brs.size()), 63 "couldn't reseve backing store for concurrent marking bit map"); 64 assert(_virtual_space.committed_size() == brs.size(), 65 "didn't reserve backing store for all of concurrent marking bit map?"); 66 _bm.set_map((uintptr_t*)_virtual_space.low()); 67 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 68 _bmWordSize, "inconsistency in bit map sizing"); 69 _bm.set_size(_bmWordSize >> _shifter); 70 } 71 72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 73 HeapWord* limit) const { 74 // First we must round addr *up* to a possible object boundary. 75 addr = (HeapWord*)align_size_up((intptr_t)addr, 76 HeapWordSize << _shifter); 77 size_t addrOffset = heapWordToOffset(addr); 78 if (limit == NULL) { 79 limit = _bmStartWord + _bmWordSize; 80 } 81 size_t limitOffset = heapWordToOffset(limit); 82 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 83 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 84 assert(nextAddr >= addr, "get_next_one postcondition"); 85 assert(nextAddr == limit || isMarked(nextAddr), 86 "get_next_one postcondition"); 87 return nextAddr; 88 } 89 90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 91 HeapWord* limit) const { 92 size_t addrOffset = heapWordToOffset(addr); 93 if (limit == NULL) { 94 limit = _bmStartWord + _bmWordSize; 95 } 96 size_t limitOffset = heapWordToOffset(limit); 97 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 98 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 99 assert(nextAddr >= addr, "get_next_one postcondition"); 100 assert(nextAddr == limit || !isMarked(nextAddr), 101 "get_next_one postcondition"); 102 return nextAddr; 103 } 104 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 106 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 107 return (int) (diff >> _shifter); 108 } 109 110 #ifndef PRODUCT 111 bool CMBitMapRO::covers(ReservedSpace rs) const { 112 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 113 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 114 "size inconsistency"); 115 return _bmStartWord == (HeapWord*)(rs.base()) && 116 _bmWordSize == rs.size()>>LogHeapWordSize; 117 } 118 #endif 119 120 void CMBitMap::clearAll() { 121 _bm.clear(); 122 return; 123 } 124 125 void CMBitMap::markRange(MemRegion mr) { 126 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 127 assert(!mr.is_empty(), "unexpected empty region"); 128 assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 129 ((HeapWord *) mr.end())), 130 "markRange memory region end is not card aligned"); 131 // convert address range into offset range 132 _bm.at_put_range(heapWordToOffset(mr.start()), 133 heapWordToOffset(mr.end()), true); 134 } 135 136 void CMBitMap::clearRange(MemRegion mr) { 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 138 assert(!mr.is_empty(), "unexpected empty region"); 139 // convert address range into offset range 140 _bm.at_put_range(heapWordToOffset(mr.start()), 141 heapWordToOffset(mr.end()), false); 142 } 143 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 145 HeapWord* end_addr) { 146 HeapWord* start = getNextMarkedWordAddress(addr); 147 start = MIN2(start, end_addr); 148 HeapWord* end = getNextUnmarkedWordAddress(start); 149 end = MIN2(end, end_addr); 150 assert(start <= end, "Consistency check"); 151 MemRegion mr(start, end); 152 if (!mr.is_empty()) { 153 clearRange(mr); 154 } 155 return mr; 156 } 157 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 159 _base(NULL), _cm(cm) 160 #ifdef ASSERT 161 , _drain_in_progress(false) 162 , _drain_in_progress_yields(false) 163 #endif 164 {} 165 166 void CMMarkStack::allocate(size_t size) { 167 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC); 168 if (_base == NULL) { 169 vm_exit_during_initialization("Failed to allocate CM region mark stack"); 170 } 171 _index = 0; 172 _capacity = (jint) size; 173 _saved_index = -1; 174 NOT_PRODUCT(_max_depth = 0); 175 } 176 177 CMMarkStack::~CMMarkStack() { 178 if (_base != NULL) { 179 FREE_C_HEAP_ARRAY(oop, _base, mtGC); 180 } 181 } 182 183 void CMMarkStack::par_push(oop ptr) { 184 while (true) { 185 if (isFull()) { 186 _overflow = true; 187 return; 188 } 189 // Otherwise... 190 jint index = _index; 191 jint next_index = index+1; 192 jint res = Atomic::cmpxchg(next_index, &_index, index); 193 if (res == index) { 194 _base[index] = ptr; 195 // Note that we don't maintain this atomically. We could, but it 196 // doesn't seem necessary. 197 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 198 return; 199 } 200 // Otherwise, we need to try again. 201 } 202 } 203 204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 205 while (true) { 206 if (isFull()) { 207 _overflow = true; 208 return; 209 } 210 // Otherwise... 211 jint index = _index; 212 jint next_index = index + n; 213 if (next_index > _capacity) { 214 _overflow = true; 215 return; 216 } 217 jint res = Atomic::cmpxchg(next_index, &_index, index); 218 if (res == index) { 219 for (int i = 0; i < n; i++) { 220 int ind = index + i; 221 assert(ind < _capacity, "By overflow test above."); 222 _base[ind] = ptr_arr[i]; 223 } 224 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 225 return; 226 } 227 // Otherwise, we need to try again. 228 } 229 } 230 231 232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 233 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 234 jint start = _index; 235 jint next_index = start + n; 236 if (next_index > _capacity) { 237 _overflow = true; 238 return; 239 } 240 // Otherwise. 241 _index = next_index; 242 for (int i = 0; i < n; i++) { 243 int ind = start + i; 244 assert(ind < _capacity, "By overflow test above."); 245 _base[ind] = ptr_arr[i]; 246 } 247 } 248 249 250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 252 jint index = _index; 253 if (index == 0) { 254 *n = 0; 255 return false; 256 } else { 257 int k = MIN2(max, index); 258 jint new_ind = index - k; 259 for (int j = 0; j < k; j++) { 260 ptr_arr[j] = _base[new_ind + j]; 261 } 262 _index = new_ind; 263 *n = k; 264 return true; 265 } 266 } 267 268 template<class OopClosureClass> 269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 270 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 271 || SafepointSynchronize::is_at_safepoint(), 272 "Drain recursion must be yield-safe."); 273 bool res = true; 274 debug_only(_drain_in_progress = true); 275 debug_only(_drain_in_progress_yields = yield_after); 276 while (!isEmpty()) { 277 oop newOop = pop(); 278 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 279 assert(newOop->is_oop(), "Expected an oop"); 280 assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 281 "only grey objects on this stack"); 282 newOop->oop_iterate(cl); 283 if (yield_after && _cm->do_yield_check()) { 284 res = false; 285 break; 286 } 287 } 288 debug_only(_drain_in_progress = false); 289 return res; 290 } 291 292 void CMMarkStack::note_start_of_gc() { 293 assert(_saved_index == -1, 294 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 295 _saved_index = _index; 296 } 297 298 void CMMarkStack::note_end_of_gc() { 299 // This is intentionally a guarantee, instead of an assert. If we 300 // accidentally add something to the mark stack during GC, it 301 // will be a correctness issue so it's better if we crash. we'll 302 // only check this once per GC anyway, so it won't be a performance 303 // issue in any way. 304 guarantee(_saved_index == _index, 305 err_msg("saved index: %d index: %d", _saved_index, _index)); 306 _saved_index = -1; 307 } 308 309 void CMMarkStack::oops_do(OopClosure* f) { 310 assert(_saved_index == _index, 311 err_msg("saved index: %d index: %d", _saved_index, _index)); 312 for (int i = 0; i < _index; i += 1) { 313 f->do_oop(&_base[i]); 314 } 315 } 316 317 bool ConcurrentMark::not_yet_marked(oop obj) const { 318 return (_g1h->is_obj_ill(obj) 319 || (_g1h->is_in_permanent(obj) 320 && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 321 } 322 323 CMRootRegions::CMRootRegions() : 324 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 325 _should_abort(false), _next_survivor(NULL) { } 326 327 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 328 _young_list = g1h->young_list(); 329 _cm = cm; 330 } 331 332 void CMRootRegions::prepare_for_scan() { 333 assert(!scan_in_progress(), "pre-condition"); 334 335 // Currently, only survivors can be root regions. 336 assert(_next_survivor == NULL, "pre-condition"); 337 _next_survivor = _young_list->first_survivor_region(); 338 _scan_in_progress = (_next_survivor != NULL); 339 _should_abort = false; 340 } 341 342 HeapRegion* CMRootRegions::claim_next() { 343 if (_should_abort) { 344 // If someone has set the should_abort flag, we return NULL to 345 // force the caller to bail out of their loop. 346 return NULL; 347 } 348 349 // Currently, only survivors can be root regions. 350 HeapRegion* res = _next_survivor; 351 if (res != NULL) { 352 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 353 // Read it again in case it changed while we were waiting for the lock. 354 res = _next_survivor; 355 if (res != NULL) { 356 if (res == _young_list->last_survivor_region()) { 357 // We just claimed the last survivor so store NULL to indicate 358 // that we're done. 359 _next_survivor = NULL; 360 } else { 361 _next_survivor = res->get_next_young_region(); 362 } 363 } else { 364 // Someone else claimed the last survivor while we were trying 365 // to take the lock so nothing else to do. 366 } 367 } 368 assert(res == NULL || res->is_survivor(), "post-condition"); 369 370 return res; 371 } 372 373 void CMRootRegions::scan_finished() { 374 assert(scan_in_progress(), "pre-condition"); 375 376 // Currently, only survivors can be root regions. 377 if (!_should_abort) { 378 assert(_next_survivor == NULL, "we should have claimed all survivors"); 379 } 380 _next_survivor = NULL; 381 382 { 383 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 384 _scan_in_progress = false; 385 RootRegionScan_lock->notify_all(); 386 } 387 } 388 389 bool CMRootRegions::wait_until_scan_finished() { 390 if (!scan_in_progress()) return false; 391 392 { 393 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 394 while (scan_in_progress()) { 395 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 396 } 397 } 398 return true; 399 } 400 401 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 402 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 403 #endif // _MSC_VER 404 405 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 406 return MAX2((n_par_threads + 2) / 4, 1U); 407 } 408 409 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) : 410 _markBitMap1(rs, MinObjAlignment - 1), 411 _markBitMap2(rs, MinObjAlignment - 1), 412 413 _parallel_marking_threads(0), 414 _max_parallel_marking_threads(0), 415 _sleep_factor(0.0), 416 _marking_task_overhead(1.0), 417 _cleanup_sleep_factor(0.0), 418 _cleanup_task_overhead(1.0), 419 _cleanup_list("Cleanup List"), 420 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/), 421 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 422 CardTableModRefBS::card_shift, 423 false /* in_resource_area*/), 424 425 _prevMarkBitMap(&_markBitMap1), 426 _nextMarkBitMap(&_markBitMap2), 427 428 _markStack(this), 429 // _finger set in set_non_marking_state 430 431 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)), 432 // _active_tasks set in set_non_marking_state 433 // _tasks set inside the constructor 434 _task_queues(new CMTaskQueueSet((int) _max_task_num)), 435 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 436 437 _has_overflown(false), 438 _concurrent(false), 439 _has_aborted(false), 440 _restart_for_overflow(false), 441 _concurrent_marking_in_progress(false), 442 443 // _verbose_level set below 444 445 _init_times(), 446 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 447 _cleanup_times(), 448 _total_counting_time(0.0), 449 _total_rs_scrub_time(0.0), 450 451 _parallel_workers(NULL), 452 453 _count_card_bitmaps(NULL), 454 _count_marked_bytes(NULL) { 455 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; 456 if (verbose_level < no_verbose) { 457 verbose_level = no_verbose; 458 } 459 if (verbose_level > high_verbose) { 460 verbose_level = high_verbose; 461 } 462 _verbose_level = verbose_level; 463 464 if (verbose_low()) { 465 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 466 "heap end = "PTR_FORMAT, _heap_start, _heap_end); 467 } 468 469 _markStack.allocate(MarkStackSize); 470 471 // Create & start a ConcurrentMark thread. 472 _cmThread = new ConcurrentMarkThread(this); 473 assert(cmThread() != NULL, "CM Thread should have been created"); 474 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 475 476 _g1h = G1CollectedHeap::heap(); 477 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 478 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 479 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 480 481 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 482 satb_qs.set_buffer_size(G1SATBBufferSize); 483 484 _root_regions.init(_g1h, this); 485 486 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC); 487 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC); 488 489 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC); 490 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC); 491 492 BitMap::idx_t card_bm_size = _card_bm.size(); 493 494 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 495 _active_tasks = _max_task_num; 496 for (int i = 0; i < (int) _max_task_num; ++i) { 497 CMTaskQueue* task_queue = new CMTaskQueue(); 498 task_queue->initialize(); 499 _task_queues->register_queue(i, task_queue); 500 501 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 502 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC); 503 504 _tasks[i] = new CMTask(i, this, 505 _count_marked_bytes[i], 506 &_count_card_bitmaps[i], 507 task_queue, _task_queues); 508 509 _accum_task_vtime[i] = 0.0; 510 } 511 512 // Calculate the card number for the bottom of the heap. Used 513 // in biasing indexes into the accounting card bitmaps. 514 _heap_bottom_card_num = 515 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 516 CardTableModRefBS::card_shift); 517 518 // Clear all the liveness counting data 519 clear_all_count_data(); 520 521 if (ConcGCThreads > ParallelGCThreads) { 522 vm_exit_during_initialization("Can't have more ConcGCThreads " 523 "than ParallelGCThreads."); 524 } 525 if (ParallelGCThreads == 0) { 526 // if we are not running with any parallel GC threads we will not 527 // spawn any marking threads either 528 _parallel_marking_threads = 0; 529 _max_parallel_marking_threads = 0; 530 _sleep_factor = 0.0; 531 _marking_task_overhead = 1.0; 532 } else { 533 if (ConcGCThreads > 0) { 534 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 535 // if both are set 536 537 _parallel_marking_threads = (uint) ConcGCThreads; 538 _max_parallel_marking_threads = _parallel_marking_threads; 539 _sleep_factor = 0.0; 540 _marking_task_overhead = 1.0; 541 } else if (G1MarkingOverheadPercent > 0) { 542 // we will calculate the number of parallel marking threads 543 // based on a target overhead with respect to the soft real-time 544 // goal 545 546 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 547 double overall_cm_overhead = 548 (double) MaxGCPauseMillis * marking_overhead / 549 (double) GCPauseIntervalMillis; 550 double cpu_ratio = 1.0 / (double) os::processor_count(); 551 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 552 double marking_task_overhead = 553 overall_cm_overhead / marking_thread_num * 554 (double) os::processor_count(); 555 double sleep_factor = 556 (1.0 - marking_task_overhead) / marking_task_overhead; 557 558 _parallel_marking_threads = (uint) marking_thread_num; 559 _max_parallel_marking_threads = _parallel_marking_threads; 560 _sleep_factor = sleep_factor; 561 _marking_task_overhead = marking_task_overhead; 562 } else { 563 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads); 564 _max_parallel_marking_threads = _parallel_marking_threads; 565 _sleep_factor = 0.0; 566 _marking_task_overhead = 1.0; 567 } 568 569 if (parallel_marking_threads() > 1) { 570 _cleanup_task_overhead = 1.0; 571 } else { 572 _cleanup_task_overhead = marking_task_overhead(); 573 } 574 _cleanup_sleep_factor = 575 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 576 577 #if 0 578 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 579 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 580 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 581 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 582 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 583 #endif 584 585 guarantee(parallel_marking_threads() > 0, "peace of mind"); 586 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 587 _max_parallel_marking_threads, false, true); 588 if (_parallel_workers == NULL) { 589 vm_exit_during_initialization("Failed necessary allocation."); 590 } else { 591 _parallel_workers->initialize_workers(); 592 } 593 } 594 595 // so that the call below can read a sensible value 596 _heap_start = (HeapWord*) rs.base(); 597 set_non_marking_state(); 598 } 599 600 void ConcurrentMark::update_g1_committed(bool force) { 601 // If concurrent marking is not in progress, then we do not need to 602 // update _heap_end. 603 if (!concurrent_marking_in_progress() && !force) return; 604 605 MemRegion committed = _g1h->g1_committed(); 606 assert(committed.start() == _heap_start, "start shouldn't change"); 607 HeapWord* new_end = committed.end(); 608 if (new_end > _heap_end) { 609 // The heap has been expanded. 610 611 _heap_end = new_end; 612 } 613 // Notice that the heap can also shrink. However, this only happens 614 // during a Full GC (at least currently) and the entire marking 615 // phase will bail out and the task will not be restarted. So, let's 616 // do nothing. 617 } 618 619 void ConcurrentMark::reset() { 620 // Starting values for these two. This should be called in a STW 621 // phase. CM will be notified of any future g1_committed expansions 622 // will be at the end of evacuation pauses, when tasks are 623 // inactive. 624 MemRegion committed = _g1h->g1_committed(); 625 _heap_start = committed.start(); 626 _heap_end = committed.end(); 627 628 // Separated the asserts so that we know which one fires. 629 assert(_heap_start != NULL, "heap bounds should look ok"); 630 assert(_heap_end != NULL, "heap bounds should look ok"); 631 assert(_heap_start < _heap_end, "heap bounds should look ok"); 632 633 // reset all the marking data structures and any necessary flags 634 clear_marking_state(); 635 636 if (verbose_low()) { 637 gclog_or_tty->print_cr("[global] resetting"); 638 } 639 640 // We do reset all of them, since different phases will use 641 // different number of active threads. So, it's easiest to have all 642 // of them ready. 643 for (int i = 0; i < (int) _max_task_num; ++i) { 644 _tasks[i]->reset(_nextMarkBitMap); 645 } 646 647 // we need this to make sure that the flag is on during the evac 648 // pause with initial mark piggy-backed 649 set_concurrent_marking_in_progress(); 650 } 651 652 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) { 653 assert(active_tasks <= _max_task_num, "we should not have more"); 654 655 _active_tasks = active_tasks; 656 // Need to update the three data structures below according to the 657 // number of active threads for this phase. 658 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 659 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 660 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 661 662 _concurrent = concurrent; 663 // We propagate this to all tasks, not just the active ones. 664 for (int i = 0; i < (int) _max_task_num; ++i) 665 _tasks[i]->set_concurrent(concurrent); 666 667 if (concurrent) { 668 set_concurrent_marking_in_progress(); 669 } else { 670 // We currently assume that the concurrent flag has been set to 671 // false before we start remark. At this point we should also be 672 // in a STW phase. 673 assert(!concurrent_marking_in_progress(), "invariant"); 674 assert(_finger == _heap_end, "only way to get here"); 675 update_g1_committed(true); 676 } 677 } 678 679 void ConcurrentMark::set_non_marking_state() { 680 // We set the global marking state to some default values when we're 681 // not doing marking. 682 clear_marking_state(); 683 _active_tasks = 0; 684 clear_concurrent_marking_in_progress(); 685 } 686 687 ConcurrentMark::~ConcurrentMark() { 688 // The ConcurrentMark instance is never freed. 689 ShouldNotReachHere(); 690 } 691 692 void ConcurrentMark::clearNextBitmap() { 693 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 694 G1CollectorPolicy* g1p = g1h->g1_policy(); 695 696 // Make sure that the concurrent mark thread looks to still be in 697 // the current cycle. 698 guarantee(cmThread()->during_cycle(), "invariant"); 699 700 // We are finishing up the current cycle by clearing the next 701 // marking bitmap and getting it ready for the next cycle. During 702 // this time no other cycle can start. So, let's make sure that this 703 // is the case. 704 guarantee(!g1h->mark_in_progress(), "invariant"); 705 706 // clear the mark bitmap (no grey objects to start with). 707 // We need to do this in chunks and offer to yield in between 708 // each chunk. 709 HeapWord* start = _nextMarkBitMap->startWord(); 710 HeapWord* end = _nextMarkBitMap->endWord(); 711 HeapWord* cur = start; 712 size_t chunkSize = M; 713 while (cur < end) { 714 HeapWord* next = cur + chunkSize; 715 if (next > end) { 716 next = end; 717 } 718 MemRegion mr(cur,next); 719 _nextMarkBitMap->clearRange(mr); 720 cur = next; 721 do_yield_check(); 722 723 // Repeat the asserts from above. We'll do them as asserts here to 724 // minimize their overhead on the product. However, we'll have 725 // them as guarantees at the beginning / end of the bitmap 726 // clearing to get some checking in the product. 727 assert(cmThread()->during_cycle(), "invariant"); 728 assert(!g1h->mark_in_progress(), "invariant"); 729 } 730 731 // Clear the liveness counting data 732 clear_all_count_data(); 733 734 // Repeat the asserts from above. 735 guarantee(cmThread()->during_cycle(), "invariant"); 736 guarantee(!g1h->mark_in_progress(), "invariant"); 737 } 738 739 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 740 public: 741 bool doHeapRegion(HeapRegion* r) { 742 if (!r->continuesHumongous()) { 743 r->note_start_of_marking(); 744 } 745 return false; 746 } 747 }; 748 749 void ConcurrentMark::checkpointRootsInitialPre() { 750 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 751 G1CollectorPolicy* g1p = g1h->g1_policy(); 752 753 _has_aborted = false; 754 755 #ifndef PRODUCT 756 if (G1PrintReachableAtInitialMark) { 757 print_reachable("at-cycle-start", 758 VerifyOption_G1UsePrevMarking, true /* all */); 759 } 760 #endif 761 762 // Initialise marking structures. This has to be done in a STW phase. 763 reset(); 764 765 // For each region note start of marking. 766 NoteStartOfMarkHRClosure startcl; 767 g1h->heap_region_iterate(&startcl); 768 } 769 770 771 void ConcurrentMark::checkpointRootsInitialPost() { 772 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 773 774 // If we force an overflow during remark, the remark operation will 775 // actually abort and we'll restart concurrent marking. If we always 776 // force an oveflow during remark we'll never actually complete the 777 // marking phase. So, we initilize this here, at the start of the 778 // cycle, so that at the remaining overflow number will decrease at 779 // every remark and we'll eventually not need to cause one. 780 force_overflow_stw()->init(); 781 782 // Start Concurrent Marking weak-reference discovery. 783 ReferenceProcessor* rp = g1h->ref_processor_cm(); 784 // enable ("weak") refs discovery 785 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); 786 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 787 788 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 789 // This is the start of the marking cycle, we're expected all 790 // threads to have SATB queues with active set to false. 791 satb_mq_set.set_active_all_threads(true, /* new active value */ 792 false /* expected_active */); 793 794 _root_regions.prepare_for_scan(); 795 796 // update_g1_committed() will be called at the end of an evac pause 797 // when marking is on. So, it's also called at the end of the 798 // initial-mark pause to update the heap end, if the heap expands 799 // during it. No need to call it here. 800 } 801 802 /* 803 * Notice that in the next two methods, we actually leave the STS 804 * during the barrier sync and join it immediately afterwards. If we 805 * do not do this, the following deadlock can occur: one thread could 806 * be in the barrier sync code, waiting for the other thread to also 807 * sync up, whereas another one could be trying to yield, while also 808 * waiting for the other threads to sync up too. 809 * 810 * Note, however, that this code is also used during remark and in 811 * this case we should not attempt to leave / enter the STS, otherwise 812 * we'll either hit an asseert (debug / fastdebug) or deadlock 813 * (product). So we should only leave / enter the STS if we are 814 * operating concurrently. 815 * 816 * Because the thread that does the sync barrier has left the STS, it 817 * is possible to be suspended for a Full GC or an evacuation pause 818 * could occur. This is actually safe, since the entering the sync 819 * barrier is one of the last things do_marking_step() does, and it 820 * doesn't manipulate any data structures afterwards. 821 */ 822 823 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 824 if (verbose_low()) { 825 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 826 } 827 828 if (concurrent()) { 829 ConcurrentGCThread::stsLeave(); 830 } 831 _first_overflow_barrier_sync.enter(); 832 if (concurrent()) { 833 ConcurrentGCThread::stsJoin(); 834 } 835 // at this point everyone should have synced up and not be doing any 836 // more work 837 838 if (verbose_low()) { 839 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 840 } 841 842 // let task 0 do this 843 if (task_num == 0) { 844 // task 0 is responsible for clearing the global data structures 845 // We should be here because of an overflow. During STW we should 846 // not clear the overflow flag since we rely on it being true when 847 // we exit this method to abort the pause and restart concurent 848 // marking. 849 clear_marking_state(concurrent() /* clear_overflow */); 850 force_overflow()->update(); 851 852 if (G1Log::fine()) { 853 gclog_or_tty->date_stamp(PrintGCDateStamps); 854 gclog_or_tty->stamp(PrintGCTimeStamps); 855 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 856 } 857 } 858 859 // after this, each task should reset its own data structures then 860 // then go into the second barrier 861 } 862 863 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 864 if (verbose_low()) { 865 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 866 } 867 868 if (concurrent()) { 869 ConcurrentGCThread::stsLeave(); 870 } 871 _second_overflow_barrier_sync.enter(); 872 if (concurrent()) { 873 ConcurrentGCThread::stsJoin(); 874 } 875 // at this point everything should be re-initialised and ready to go 876 877 if (verbose_low()) { 878 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 879 } 880 } 881 882 #ifndef PRODUCT 883 void ForceOverflowSettings::init() { 884 _num_remaining = G1ConcMarkForceOverflow; 885 _force = false; 886 update(); 887 } 888 889 void ForceOverflowSettings::update() { 890 if (_num_remaining > 0) { 891 _num_remaining -= 1; 892 _force = true; 893 } else { 894 _force = false; 895 } 896 } 897 898 bool ForceOverflowSettings::should_force() { 899 if (_force) { 900 _force = false; 901 return true; 902 } else { 903 return false; 904 } 905 } 906 #endif // !PRODUCT 907 908 class CMConcurrentMarkingTask: public AbstractGangTask { 909 private: 910 ConcurrentMark* _cm; 911 ConcurrentMarkThread* _cmt; 912 913 public: 914 void work(uint worker_id) { 915 assert(Thread::current()->is_ConcurrentGC_thread(), 916 "this should only be done by a conc GC thread"); 917 ResourceMark rm; 918 919 double start_vtime = os::elapsedVTime(); 920 921 ConcurrentGCThread::stsJoin(); 922 923 assert(worker_id < _cm->active_tasks(), "invariant"); 924 CMTask* the_task = _cm->task(worker_id); 925 the_task->record_start_time(); 926 if (!_cm->has_aborted()) { 927 do { 928 double start_vtime_sec = os::elapsedVTime(); 929 double start_time_sec = os::elapsedTime(); 930 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 931 932 the_task->do_marking_step(mark_step_duration_ms, 933 true /* do_stealing */, 934 true /* do_termination */); 935 936 double end_time_sec = os::elapsedTime(); 937 double end_vtime_sec = os::elapsedVTime(); 938 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 939 double elapsed_time_sec = end_time_sec - start_time_sec; 940 _cm->clear_has_overflown(); 941 942 bool ret = _cm->do_yield_check(worker_id); 943 944 jlong sleep_time_ms; 945 if (!_cm->has_aborted() && the_task->has_aborted()) { 946 sleep_time_ms = 947 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 948 ConcurrentGCThread::stsLeave(); 949 os::sleep(Thread::current(), sleep_time_ms, false); 950 ConcurrentGCThread::stsJoin(); 951 } 952 double end_time2_sec = os::elapsedTime(); 953 double elapsed_time2_sec = end_time2_sec - start_time_sec; 954 955 #if 0 956 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 957 "overhead %1.4lf", 958 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 959 the_task->conc_overhead(os::elapsedTime()) * 8.0); 960 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 961 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 962 #endif 963 } while (!_cm->has_aborted() && the_task->has_aborted()); 964 } 965 the_task->record_end_time(); 966 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 967 968 ConcurrentGCThread::stsLeave(); 969 970 double end_vtime = os::elapsedVTime(); 971 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 972 } 973 974 CMConcurrentMarkingTask(ConcurrentMark* cm, 975 ConcurrentMarkThread* cmt) : 976 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 977 978 ~CMConcurrentMarkingTask() { } 979 }; 980 981 // Calculates the number of active workers for a concurrent 982 // phase. 983 uint ConcurrentMark::calc_parallel_marking_threads() { 984 if (G1CollectedHeap::use_parallel_gc_threads()) { 985 uint n_conc_workers = 0; 986 if (!UseDynamicNumberOfGCThreads || 987 (!FLAG_IS_DEFAULT(ConcGCThreads) && 988 !ForceDynamicNumberOfGCThreads)) { 989 n_conc_workers = max_parallel_marking_threads(); 990 } else { 991 n_conc_workers = 992 AdaptiveSizePolicy::calc_default_active_workers( 993 max_parallel_marking_threads(), 994 1, /* Minimum workers */ 995 parallel_marking_threads(), 996 Threads::number_of_non_daemon_threads()); 997 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 998 // that scaling has already gone into "_max_parallel_marking_threads". 999 } 1000 assert(n_conc_workers > 0, "Always need at least 1"); 1001 return n_conc_workers; 1002 } 1003 // If we are not running with any parallel GC threads we will not 1004 // have spawned any marking threads either. Hence the number of 1005 // concurrent workers should be 0. 1006 return 0; 1007 } 1008 1009 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 1010 // Currently, only survivors can be root regions. 1011 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 1012 G1RootRegionScanClosure cl(_g1h, this, worker_id); 1013 1014 const uintx interval = PrefetchScanIntervalInBytes; 1015 HeapWord* curr = hr->bottom(); 1016 const HeapWord* end = hr->top(); 1017 while (curr < end) { 1018 Prefetch::read(curr, interval); 1019 oop obj = oop(curr); 1020 int size = obj->oop_iterate(&cl); 1021 assert(size == obj->size(), "sanity"); 1022 curr += size; 1023 } 1024 } 1025 1026 class CMRootRegionScanTask : public AbstractGangTask { 1027 private: 1028 ConcurrentMark* _cm; 1029 1030 public: 1031 CMRootRegionScanTask(ConcurrentMark* cm) : 1032 AbstractGangTask("Root Region Scan"), _cm(cm) { } 1033 1034 void work(uint worker_id) { 1035 assert(Thread::current()->is_ConcurrentGC_thread(), 1036 "this should only be done by a conc GC thread"); 1037 1038 CMRootRegions* root_regions = _cm->root_regions(); 1039 HeapRegion* hr = root_regions->claim_next(); 1040 while (hr != NULL) { 1041 _cm->scanRootRegion(hr, worker_id); 1042 hr = root_regions->claim_next(); 1043 } 1044 } 1045 }; 1046 1047 void ConcurrentMark::scanRootRegions() { 1048 // scan_in_progress() will have been set to true only if there was 1049 // at least one root region to scan. So, if it's false, we 1050 // should not attempt to do any further work. 1051 if (root_regions()->scan_in_progress()) { 1052 _parallel_marking_threads = calc_parallel_marking_threads(); 1053 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1054 "Maximum number of marking threads exceeded"); 1055 uint active_workers = MAX2(1U, parallel_marking_threads()); 1056 1057 CMRootRegionScanTask task(this); 1058 if (parallel_marking_threads() > 0) { 1059 _parallel_workers->set_active_workers((int) active_workers); 1060 _parallel_workers->run_task(&task); 1061 } else { 1062 task.work(0); 1063 } 1064 1065 // It's possible that has_aborted() is true here without actually 1066 // aborting the survivor scan earlier. This is OK as it's 1067 // mainly used for sanity checking. 1068 root_regions()->scan_finished(); 1069 } 1070 } 1071 1072 void ConcurrentMark::markFromRoots() { 1073 // we might be tempted to assert that: 1074 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1075 // "inconsistent argument?"); 1076 // However that wouldn't be right, because it's possible that 1077 // a safepoint is indeed in progress as a younger generation 1078 // stop-the-world GC happens even as we mark in this generation. 1079 1080 _restart_for_overflow = false; 1081 force_overflow_conc()->init(); 1082 1083 // _g1h has _n_par_threads 1084 _parallel_marking_threads = calc_parallel_marking_threads(); 1085 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1086 "Maximum number of marking threads exceeded"); 1087 1088 uint active_workers = MAX2(1U, parallel_marking_threads()); 1089 1090 // Parallel task terminator is set in "set_phase()" 1091 set_phase(active_workers, true /* concurrent */); 1092 1093 CMConcurrentMarkingTask markingTask(this, cmThread()); 1094 if (parallel_marking_threads() > 0) { 1095 _parallel_workers->set_active_workers((int)active_workers); 1096 // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 1097 // and the decisions on that MT processing is made elsewhere. 1098 assert(_parallel_workers->active_workers() > 0, "Should have been set"); 1099 _parallel_workers->run_task(&markingTask); 1100 } else { 1101 markingTask.work(0); 1102 } 1103 print_stats(); 1104 } 1105 1106 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1107 // world is stopped at this checkpoint 1108 assert(SafepointSynchronize::is_at_safepoint(), 1109 "world should be stopped"); 1110 1111 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1112 1113 // If a full collection has happened, we shouldn't do this. 1114 if (has_aborted()) { 1115 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1116 return; 1117 } 1118 1119 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1120 1121 if (VerifyDuringGC) { 1122 HandleMark hm; // handle scope 1123 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1124 Universe::heap()->prepare_for_verify(); 1125 Universe::verify(/* silent */ false, 1126 /* option */ VerifyOption_G1UsePrevMarking); 1127 } 1128 1129 G1CollectorPolicy* g1p = g1h->g1_policy(); 1130 g1p->record_concurrent_mark_remark_start(); 1131 1132 double start = os::elapsedTime(); 1133 1134 checkpointRootsFinalWork(); 1135 1136 double mark_work_end = os::elapsedTime(); 1137 1138 weakRefsWork(clear_all_soft_refs); 1139 1140 if (has_overflown()) { 1141 // Oops. We overflowed. Restart concurrent marking. 1142 _restart_for_overflow = true; 1143 // Clear the flag. We do not need it any more. 1144 clear_has_overflown(); 1145 if (G1TraceMarkStackOverflow) { 1146 gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1147 } 1148 } else { 1149 // Aggregate the per-task counting data that we have accumulated 1150 // while marking. 1151 aggregate_count_data(); 1152 1153 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1154 // We're done with marking. 1155 // This is the end of the marking cycle, we're expected all 1156 // threads to have SATB queues with active set to true. 1157 satb_mq_set.set_active_all_threads(false, /* new active value */ 1158 true /* expected_active */); 1159 1160 if (VerifyDuringGC) { 1161 HandleMark hm; // handle scope 1162 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1163 Universe::heap()->prepare_for_verify(); 1164 Universe::verify(/* silent */ false, 1165 /* option */ VerifyOption_G1UseNextMarking); 1166 } 1167 assert(!restart_for_overflow(), "sanity"); 1168 } 1169 1170 // Reset the marking state if marking completed 1171 if (!restart_for_overflow()) { 1172 set_non_marking_state(); 1173 } 1174 1175 #if VERIFY_OBJS_PROCESSED 1176 _scan_obj_cl.objs_processed = 0; 1177 ThreadLocalObjQueue::objs_enqueued = 0; 1178 #endif 1179 1180 // Statistics 1181 double now = os::elapsedTime(); 1182 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1183 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1184 _remark_times.add((now - start) * 1000.0); 1185 1186 g1p->record_concurrent_mark_remark_end(); 1187 } 1188 1189 // Base class of the closures that finalize and verify the 1190 // liveness counting data. 1191 class CMCountDataClosureBase: public HeapRegionClosure { 1192 protected: 1193 G1CollectedHeap* _g1h; 1194 ConcurrentMark* _cm; 1195 CardTableModRefBS* _ct_bs; 1196 1197 BitMap* _region_bm; 1198 BitMap* _card_bm; 1199 1200 // Takes a region that's not empty (i.e., it has at least one 1201 // live object in it and sets its corresponding bit on the region 1202 // bitmap to 1. If the region is "starts humongous" it will also set 1203 // to 1 the bits on the region bitmap that correspond to its 1204 // associated "continues humongous" regions. 1205 void set_bit_for_region(HeapRegion* hr) { 1206 assert(!hr->continuesHumongous(), "should have filtered those out"); 1207 1208 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1209 if (!hr->startsHumongous()) { 1210 // Normal (non-humongous) case: just set the bit. 1211 _region_bm->par_at_put(index, true); 1212 } else { 1213 // Starts humongous case: calculate how many regions are part of 1214 // this humongous region and then set the bit range. 1215 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index(); 1216 _region_bm->par_at_put_range(index, end_index, true); 1217 } 1218 } 1219 1220 public: 1221 CMCountDataClosureBase(G1CollectedHeap* g1h, 1222 BitMap* region_bm, BitMap* card_bm): 1223 _g1h(g1h), _cm(g1h->concurrent_mark()), 1224 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 1225 _region_bm(region_bm), _card_bm(card_bm) { } 1226 }; 1227 1228 // Closure that calculates the # live objects per region. Used 1229 // for verification purposes during the cleanup pause. 1230 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1231 CMBitMapRO* _bm; 1232 size_t _region_marked_bytes; 1233 1234 public: 1235 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1236 BitMap* region_bm, BitMap* card_bm) : 1237 CMCountDataClosureBase(g1h, region_bm, card_bm), 1238 _bm(bm), _region_marked_bytes(0) { } 1239 1240 bool doHeapRegion(HeapRegion* hr) { 1241 1242 if (hr->continuesHumongous()) { 1243 // We will ignore these here and process them when their 1244 // associated "starts humongous" region is processed (see 1245 // set_bit_for_heap_region()). Note that we cannot rely on their 1246 // associated "starts humongous" region to have their bit set to 1247 // 1 since, due to the region chunking in the parallel region 1248 // iteration, a "continues humongous" region might be visited 1249 // before its associated "starts humongous". 1250 return false; 1251 } 1252 1253 HeapWord* ntams = hr->next_top_at_mark_start(); 1254 HeapWord* start = hr->bottom(); 1255 1256 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1257 err_msg("Preconditions not met - " 1258 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT, 1259 start, ntams, hr->end())); 1260 1261 // Find the first marked object at or after "start". 1262 start = _bm->getNextMarkedWordAddress(start, ntams); 1263 1264 size_t marked_bytes = 0; 1265 1266 while (start < ntams) { 1267 oop obj = oop(start); 1268 int obj_sz = obj->size(); 1269 HeapWord* obj_end = start + obj_sz; 1270 1271 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1272 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1273 1274 // Note: if we're looking at the last region in heap - obj_end 1275 // could be actually just beyond the end of the heap; end_idx 1276 // will then correspond to a (non-existent) card that is also 1277 // just beyond the heap. 1278 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1279 // end of object is not card aligned - increment to cover 1280 // all the cards spanned by the object 1281 end_idx += 1; 1282 } 1283 1284 // Set the bits in the card BM for the cards spanned by this object. 1285 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1286 1287 // Add the size of this object to the number of marked bytes. 1288 marked_bytes += (size_t)obj_sz * HeapWordSize; 1289 1290 // Find the next marked object after this one. 1291 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1292 } 1293 1294 // Mark the allocated-since-marking portion... 1295 HeapWord* top = hr->top(); 1296 if (ntams < top) { 1297 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1298 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1299 1300 // Note: if we're looking at the last region in heap - top 1301 // could be actually just beyond the end of the heap; end_idx 1302 // will then correspond to a (non-existent) card that is also 1303 // just beyond the heap. 1304 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1305 // end of object is not card aligned - increment to cover 1306 // all the cards spanned by the object 1307 end_idx += 1; 1308 } 1309 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1310 1311 // This definitely means the region has live objects. 1312 set_bit_for_region(hr); 1313 } 1314 1315 // Update the live region bitmap. 1316 if (marked_bytes > 0) { 1317 set_bit_for_region(hr); 1318 } 1319 1320 // Set the marked bytes for the current region so that 1321 // it can be queried by a calling verificiation routine 1322 _region_marked_bytes = marked_bytes; 1323 1324 return false; 1325 } 1326 1327 size_t region_marked_bytes() const { return _region_marked_bytes; } 1328 }; 1329 1330 // Heap region closure used for verifying the counting data 1331 // that was accumulated concurrently and aggregated during 1332 // the remark pause. This closure is applied to the heap 1333 // regions during the STW cleanup pause. 1334 1335 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1336 G1CollectedHeap* _g1h; 1337 ConcurrentMark* _cm; 1338 CalcLiveObjectsClosure _calc_cl; 1339 BitMap* _region_bm; // Region BM to be verified 1340 BitMap* _card_bm; // Card BM to be verified 1341 bool _verbose; // verbose output? 1342 1343 BitMap* _exp_region_bm; // Expected Region BM values 1344 BitMap* _exp_card_bm; // Expected card BM values 1345 1346 int _failures; 1347 1348 public: 1349 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1350 BitMap* region_bm, 1351 BitMap* card_bm, 1352 BitMap* exp_region_bm, 1353 BitMap* exp_card_bm, 1354 bool verbose) : 1355 _g1h(g1h), _cm(g1h->concurrent_mark()), 1356 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1357 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), 1358 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1359 _failures(0) { } 1360 1361 int failures() const { return _failures; } 1362 1363 bool doHeapRegion(HeapRegion* hr) { 1364 if (hr->continuesHumongous()) { 1365 // We will ignore these here and process them when their 1366 // associated "starts humongous" region is processed (see 1367 // set_bit_for_heap_region()). Note that we cannot rely on their 1368 // associated "starts humongous" region to have their bit set to 1369 // 1 since, due to the region chunking in the parallel region 1370 // iteration, a "continues humongous" region might be visited 1371 // before its associated "starts humongous". 1372 return false; 1373 } 1374 1375 int failures = 0; 1376 1377 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1378 // this region and set the corresponding bits in the expected region 1379 // and card bitmaps. 1380 bool res = _calc_cl.doHeapRegion(hr); 1381 assert(res == false, "should be continuing"); 1382 1383 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL), 1384 Mutex::_no_safepoint_check_flag); 1385 1386 // Verify the marked bytes for this region. 1387 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1388 size_t act_marked_bytes = hr->next_marked_bytes(); 1389 1390 // We're not OK if expected marked bytes > actual marked bytes. It means 1391 // we have missed accounting some objects during the actual marking. 1392 if (exp_marked_bytes > act_marked_bytes) { 1393 if (_verbose) { 1394 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: " 1395 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT, 1396 hr->hrs_index(), exp_marked_bytes, act_marked_bytes); 1397 } 1398 failures += 1; 1399 } 1400 1401 // Verify the bit, for this region, in the actual and expected 1402 // (which was just calculated) region bit maps. 1403 // We're not OK if the bit in the calculated expected region 1404 // bitmap is set and the bit in the actual region bitmap is not. 1405 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index(); 1406 1407 bool expected = _exp_region_bm->at(index); 1408 bool actual = _region_bm->at(index); 1409 if (expected && !actual) { 1410 if (_verbose) { 1411 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: " 1412 "expected: %s, actual: %s", 1413 hr->hrs_index(), 1414 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1415 } 1416 failures += 1; 1417 } 1418 1419 // Verify that the card bit maps for the cards spanned by the current 1420 // region match. We have an error if we have a set bit in the expected 1421 // bit map and the corresponding bit in the actual bitmap is not set. 1422 1423 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1424 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1425 1426 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1427 expected = _exp_card_bm->at(i); 1428 actual = _card_bm->at(i); 1429 1430 if (expected && !actual) { 1431 if (_verbose) { 1432 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": " 1433 "expected: %s, actual: %s", 1434 hr->hrs_index(), i, 1435 BOOL_TO_STR(expected), BOOL_TO_STR(actual)); 1436 } 1437 failures += 1; 1438 } 1439 } 1440 1441 if (failures > 0 && _verbose) { 1442 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", " 1443 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT, 1444 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(), 1445 _calc_cl.region_marked_bytes(), hr->next_marked_bytes()); 1446 } 1447 1448 _failures += failures; 1449 1450 // We could stop iteration over the heap when we 1451 // find the first violating region by returning true. 1452 return false; 1453 } 1454 }; 1455 1456 1457 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1458 protected: 1459 G1CollectedHeap* _g1h; 1460 ConcurrentMark* _cm; 1461 BitMap* _actual_region_bm; 1462 BitMap* _actual_card_bm; 1463 1464 uint _n_workers; 1465 1466 BitMap* _expected_region_bm; 1467 BitMap* _expected_card_bm; 1468 1469 int _failures; 1470 bool _verbose; 1471 1472 public: 1473 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1474 BitMap* region_bm, BitMap* card_bm, 1475 BitMap* expected_region_bm, BitMap* expected_card_bm) 1476 : AbstractGangTask("G1 verify final counting"), 1477 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1478 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1479 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1480 _failures(0), _verbose(false), 1481 _n_workers(0) { 1482 assert(VerifyDuringGC, "don't call this otherwise"); 1483 1484 // Use the value already set as the number of active threads 1485 // in the call to run_task(). 1486 if (G1CollectedHeap::use_parallel_gc_threads()) { 1487 assert( _g1h->workers()->active_workers() > 0, 1488 "Should have been previously set"); 1489 _n_workers = _g1h->workers()->active_workers(); 1490 } else { 1491 _n_workers = 1; 1492 } 1493 1494 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1495 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1496 1497 _verbose = _cm->verbose_medium(); 1498 } 1499 1500 void work(uint worker_id) { 1501 assert(worker_id < _n_workers, "invariant"); 1502 1503 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1504 _actual_region_bm, _actual_card_bm, 1505 _expected_region_bm, 1506 _expected_card_bm, 1507 _verbose); 1508 1509 if (G1CollectedHeap::use_parallel_gc_threads()) { 1510 _g1h->heap_region_par_iterate_chunked(&verify_cl, 1511 worker_id, 1512 _n_workers, 1513 HeapRegion::VerifyCountClaimValue); 1514 } else { 1515 _g1h->heap_region_iterate(&verify_cl); 1516 } 1517 1518 Atomic::add(verify_cl.failures(), &_failures); 1519 } 1520 1521 int failures() const { return _failures; } 1522 }; 1523 1524 // Closure that finalizes the liveness counting data. 1525 // Used during the cleanup pause. 1526 // Sets the bits corresponding to the interval [NTAMS, top] 1527 // (which contains the implicitly live objects) in the 1528 // card liveness bitmap. Also sets the bit for each region, 1529 // containing live data, in the region liveness bitmap. 1530 1531 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1532 public: 1533 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1534 BitMap* region_bm, 1535 BitMap* card_bm) : 1536 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1537 1538 bool doHeapRegion(HeapRegion* hr) { 1539 1540 if (hr->continuesHumongous()) { 1541 // We will ignore these here and process them when their 1542 // associated "starts humongous" region is processed (see 1543 // set_bit_for_heap_region()). Note that we cannot rely on their 1544 // associated "starts humongous" region to have their bit set to 1545 // 1 since, due to the region chunking in the parallel region 1546 // iteration, a "continues humongous" region might be visited 1547 // before its associated "starts humongous". 1548 return false; 1549 } 1550 1551 HeapWord* ntams = hr->next_top_at_mark_start(); 1552 HeapWord* top = hr->top(); 1553 1554 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1555 1556 // Mark the allocated-since-marking portion... 1557 if (ntams < top) { 1558 // This definitely means the region has live objects. 1559 set_bit_for_region(hr); 1560 1561 // Now set the bits in the card bitmap for [ntams, top) 1562 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1563 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1564 1565 // Note: if we're looking at the last region in heap - top 1566 // could be actually just beyond the end of the heap; end_idx 1567 // will then correspond to a (non-existent) card that is also 1568 // just beyond the heap. 1569 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1570 // end of object is not card aligned - increment to cover 1571 // all the cards spanned by the object 1572 end_idx += 1; 1573 } 1574 1575 assert(end_idx <= _card_bm->size(), 1576 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1577 end_idx, _card_bm->size())); 1578 assert(start_idx < _card_bm->size(), 1579 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT, 1580 start_idx, _card_bm->size())); 1581 1582 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1583 } 1584 1585 // Set the bit for the region if it contains live data 1586 if (hr->next_marked_bytes() > 0) { 1587 set_bit_for_region(hr); 1588 } 1589 1590 return false; 1591 } 1592 }; 1593 1594 class G1ParFinalCountTask: public AbstractGangTask { 1595 protected: 1596 G1CollectedHeap* _g1h; 1597 ConcurrentMark* _cm; 1598 BitMap* _actual_region_bm; 1599 BitMap* _actual_card_bm; 1600 1601 uint _n_workers; 1602 1603 public: 1604 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1605 : AbstractGangTask("G1 final counting"), 1606 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1607 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1608 _n_workers(0) { 1609 // Use the value already set as the number of active threads 1610 // in the call to run_task(). 1611 if (G1CollectedHeap::use_parallel_gc_threads()) { 1612 assert( _g1h->workers()->active_workers() > 0, 1613 "Should have been previously set"); 1614 _n_workers = _g1h->workers()->active_workers(); 1615 } else { 1616 _n_workers = 1; 1617 } 1618 } 1619 1620 void work(uint worker_id) { 1621 assert(worker_id < _n_workers, "invariant"); 1622 1623 FinalCountDataUpdateClosure final_update_cl(_g1h, 1624 _actual_region_bm, 1625 _actual_card_bm); 1626 1627 if (G1CollectedHeap::use_parallel_gc_threads()) { 1628 _g1h->heap_region_par_iterate_chunked(&final_update_cl, 1629 worker_id, 1630 _n_workers, 1631 HeapRegion::FinalCountClaimValue); 1632 } else { 1633 _g1h->heap_region_iterate(&final_update_cl); 1634 } 1635 } 1636 }; 1637 1638 class G1ParNoteEndTask; 1639 1640 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1641 G1CollectedHeap* _g1; 1642 int _worker_num; 1643 size_t _max_live_bytes; 1644 uint _regions_claimed; 1645 size_t _freed_bytes; 1646 FreeRegionList* _local_cleanup_list; 1647 OldRegionSet* _old_proxy_set; 1648 HumongousRegionSet* _humongous_proxy_set; 1649 HRRSCleanupTask* _hrrs_cleanup_task; 1650 double _claimed_region_time; 1651 double _max_region_time; 1652 1653 public: 1654 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1655 int worker_num, 1656 FreeRegionList* local_cleanup_list, 1657 OldRegionSet* old_proxy_set, 1658 HumongousRegionSet* humongous_proxy_set, 1659 HRRSCleanupTask* hrrs_cleanup_task) : 1660 _g1(g1), _worker_num(worker_num), 1661 _max_live_bytes(0), _regions_claimed(0), 1662 _freed_bytes(0), 1663 _claimed_region_time(0.0), _max_region_time(0.0), 1664 _local_cleanup_list(local_cleanup_list), 1665 _old_proxy_set(old_proxy_set), 1666 _humongous_proxy_set(humongous_proxy_set), 1667 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1668 1669 size_t freed_bytes() { return _freed_bytes; } 1670 1671 bool doHeapRegion(HeapRegion *hr) { 1672 if (hr->continuesHumongous()) { 1673 return false; 1674 } 1675 // We use a claim value of zero here because all regions 1676 // were claimed with value 1 in the FinalCount task. 1677 _g1->reset_gc_time_stamps(hr); 1678 double start = os::elapsedTime(); 1679 _regions_claimed++; 1680 hr->note_end_of_marking(); 1681 _max_live_bytes += hr->max_live_bytes(); 1682 _g1->free_region_if_empty(hr, 1683 &_freed_bytes, 1684 _local_cleanup_list, 1685 _old_proxy_set, 1686 _humongous_proxy_set, 1687 _hrrs_cleanup_task, 1688 true /* par */); 1689 double region_time = (os::elapsedTime() - start); 1690 _claimed_region_time += region_time; 1691 if (region_time > _max_region_time) { 1692 _max_region_time = region_time; 1693 } 1694 return false; 1695 } 1696 1697 size_t max_live_bytes() { return _max_live_bytes; } 1698 uint regions_claimed() { return _regions_claimed; } 1699 double claimed_region_time_sec() { return _claimed_region_time; } 1700 double max_region_time_sec() { return _max_region_time; } 1701 }; 1702 1703 class G1ParNoteEndTask: public AbstractGangTask { 1704 friend class G1NoteEndOfConcMarkClosure; 1705 1706 protected: 1707 G1CollectedHeap* _g1h; 1708 size_t _max_live_bytes; 1709 size_t _freed_bytes; 1710 FreeRegionList* _cleanup_list; 1711 1712 public: 1713 G1ParNoteEndTask(G1CollectedHeap* g1h, 1714 FreeRegionList* cleanup_list) : 1715 AbstractGangTask("G1 note end"), _g1h(g1h), 1716 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { } 1717 1718 void work(uint worker_id) { 1719 double start = os::elapsedTime(); 1720 FreeRegionList local_cleanup_list("Local Cleanup List"); 1721 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); 1722 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); 1723 HRRSCleanupTask hrrs_cleanup_task; 1724 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, 1725 &old_proxy_set, 1726 &humongous_proxy_set, 1727 &hrrs_cleanup_task); 1728 if (G1CollectedHeap::use_parallel_gc_threads()) { 1729 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, 1730 _g1h->workers()->active_workers(), 1731 HeapRegion::NoteEndClaimValue); 1732 } else { 1733 _g1h->heap_region_iterate(&g1_note_end); 1734 } 1735 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1736 1737 // Now update the lists 1738 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), 1739 NULL /* free_list */, 1740 &old_proxy_set, 1741 &humongous_proxy_set, 1742 true /* par */); 1743 { 1744 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1745 _max_live_bytes += g1_note_end.max_live_bytes(); 1746 _freed_bytes += g1_note_end.freed_bytes(); 1747 1748 // If we iterate over the global cleanup list at the end of 1749 // cleanup to do this printing we will not guarantee to only 1750 // generate output for the newly-reclaimed regions (the list 1751 // might not be empty at the beginning of cleanup; we might 1752 // still be working on its previous contents). So we do the 1753 // printing here, before we append the new regions to the global 1754 // cleanup list. 1755 1756 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1757 if (hr_printer->is_active()) { 1758 HeapRegionLinkedListIterator iter(&local_cleanup_list); 1759 while (iter.more_available()) { 1760 HeapRegion* hr = iter.get_next(); 1761 hr_printer->cleanup(hr); 1762 } 1763 } 1764 1765 _cleanup_list->add_as_tail(&local_cleanup_list); 1766 assert(local_cleanup_list.is_empty(), "post-condition"); 1767 1768 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1769 } 1770 } 1771 size_t max_live_bytes() { return _max_live_bytes; } 1772 size_t freed_bytes() { return _freed_bytes; } 1773 }; 1774 1775 class G1ParScrubRemSetTask: public AbstractGangTask { 1776 protected: 1777 G1RemSet* _g1rs; 1778 BitMap* _region_bm; 1779 BitMap* _card_bm; 1780 public: 1781 G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1782 BitMap* region_bm, BitMap* card_bm) : 1783 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1784 _region_bm(region_bm), _card_bm(card_bm) { } 1785 1786 void work(uint worker_id) { 1787 if (G1CollectedHeap::use_parallel_gc_threads()) { 1788 _g1rs->scrub_par(_region_bm, _card_bm, worker_id, 1789 HeapRegion::ScrubRemSetClaimValue); 1790 } else { 1791 _g1rs->scrub(_region_bm, _card_bm); 1792 } 1793 } 1794 1795 }; 1796 1797 void ConcurrentMark::cleanup() { 1798 // world is stopped at this checkpoint 1799 assert(SafepointSynchronize::is_at_safepoint(), 1800 "world should be stopped"); 1801 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1802 1803 // If a full collection has happened, we shouldn't do this. 1804 if (has_aborted()) { 1805 g1h->set_marking_complete(); // So bitmap clearing isn't confused 1806 return; 1807 } 1808 1809 HRSPhaseSetter x(HRSPhaseCleanup); 1810 g1h->verify_region_sets_optional(); 1811 1812 if (VerifyDuringGC) { 1813 HandleMark hm; // handle scope 1814 gclog_or_tty->print(" VerifyDuringGC:(before)"); 1815 Universe::heap()->prepare_for_verify(); 1816 Universe::verify(/* silent */ false, 1817 /* option */ VerifyOption_G1UsePrevMarking); 1818 } 1819 1820 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1821 g1p->record_concurrent_mark_cleanup_start(); 1822 1823 double start = os::elapsedTime(); 1824 1825 HeapRegionRemSet::reset_for_cleanup_tasks(); 1826 1827 uint n_workers; 1828 1829 // Do counting once more with the world stopped for good measure. 1830 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1831 1832 if (G1CollectedHeap::use_parallel_gc_threads()) { 1833 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 1834 "sanity check"); 1835 1836 g1h->set_par_threads(); 1837 n_workers = g1h->n_par_threads(); 1838 assert(g1h->n_par_threads() == n_workers, 1839 "Should not have been reset"); 1840 g1h->workers()->run_task(&g1_par_count_task); 1841 // Done with the parallel phase so reset to 0. 1842 g1h->set_par_threads(0); 1843 1844 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), 1845 "sanity check"); 1846 } else { 1847 n_workers = 1; 1848 g1_par_count_task.work(0); 1849 } 1850 1851 if (VerifyDuringGC) { 1852 // Verify that the counting data accumulated during marking matches 1853 // that calculated by walking the marking bitmap. 1854 1855 // Bitmaps to hold expected values 1856 BitMap expected_region_bm(_region_bm.size(), false); 1857 BitMap expected_card_bm(_card_bm.size(), false); 1858 1859 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1860 &_region_bm, 1861 &_card_bm, 1862 &expected_region_bm, 1863 &expected_card_bm); 1864 1865 if (G1CollectedHeap::use_parallel_gc_threads()) { 1866 g1h->set_par_threads((int)n_workers); 1867 g1h->workers()->run_task(&g1_par_verify_task); 1868 // Done with the parallel phase so reset to 0. 1869 g1h->set_par_threads(0); 1870 1871 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), 1872 "sanity check"); 1873 } else { 1874 g1_par_verify_task.work(0); 1875 } 1876 1877 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1878 } 1879 1880 size_t start_used_bytes = g1h->used(); 1881 g1h->set_marking_complete(); 1882 1883 double count_end = os::elapsedTime(); 1884 double this_final_counting_time = (count_end - start); 1885 _total_counting_time += this_final_counting_time; 1886 1887 if (G1PrintRegionLivenessInfo) { 1888 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking"); 1889 _g1h->heap_region_iterate(&cl); 1890 } 1891 1892 // Install newly created mark bitMap as "prev". 1893 swapMarkBitMaps(); 1894 1895 g1h->reset_gc_time_stamp(); 1896 1897 // Note end of marking in all heap regions. 1898 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 1899 if (G1CollectedHeap::use_parallel_gc_threads()) { 1900 g1h->set_par_threads((int)n_workers); 1901 g1h->workers()->run_task(&g1_par_note_end_task); 1902 g1h->set_par_threads(0); 1903 1904 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), 1905 "sanity check"); 1906 } else { 1907 g1_par_note_end_task.work(0); 1908 } 1909 g1h->check_gc_time_stamps(); 1910 1911 if (!cleanup_list_is_empty()) { 1912 // The cleanup list is not empty, so we'll have to process it 1913 // concurrently. Notify anyone else that might be wanting free 1914 // regions that there will be more free regions coming soon. 1915 g1h->set_free_regions_coming(); 1916 } 1917 1918 // call below, since it affects the metric by which we sort the heap 1919 // regions. 1920 if (G1ScrubRemSets) { 1921 double rs_scrub_start = os::elapsedTime(); 1922 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1923 if (G1CollectedHeap::use_parallel_gc_threads()) { 1924 g1h->set_par_threads((int)n_workers); 1925 g1h->workers()->run_task(&g1_par_scrub_rs_task); 1926 g1h->set_par_threads(0); 1927 1928 assert(g1h->check_heap_region_claim_values( 1929 HeapRegion::ScrubRemSetClaimValue), 1930 "sanity check"); 1931 } else { 1932 g1_par_scrub_rs_task.work(0); 1933 } 1934 1935 double rs_scrub_end = os::elapsedTime(); 1936 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1937 _total_rs_scrub_time += this_rs_scrub_time; 1938 } 1939 1940 // this will also free any regions totally full of garbage objects, 1941 // and sort the regions. 1942 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 1943 1944 // Statistics. 1945 double end = os::elapsedTime(); 1946 _cleanup_times.add((end - start) * 1000.0); 1947 1948 if (G1Log::fine()) { 1949 g1h->print_size_transition(gclog_or_tty, 1950 start_used_bytes, 1951 g1h->used(), 1952 g1h->capacity()); 1953 } 1954 1955 // Clean up will have freed any regions completely full of garbage. 1956 // Update the soft reference policy with the new heap occupancy. 1957 Universe::update_heap_info_at_gc(); 1958 1959 // We need to make this be a "collection" so any collection pause that 1960 // races with it goes around and waits for completeCleanup to finish. 1961 g1h->increment_total_collections(); 1962 1963 // We reclaimed old regions so we should calculate the sizes to make 1964 // sure we update the old gen/space data. 1965 g1h->g1mm()->update_sizes(); 1966 1967 if (VerifyDuringGC) { 1968 HandleMark hm; // handle scope 1969 gclog_or_tty->print(" VerifyDuringGC:(after)"); 1970 Universe::heap()->prepare_for_verify(); 1971 Universe::verify(/* silent */ false, 1972 /* option */ VerifyOption_G1UsePrevMarking); 1973 } 1974 1975 g1h->verify_region_sets_optional(); 1976 } 1977 1978 void ConcurrentMark::completeCleanup() { 1979 if (has_aborted()) return; 1980 1981 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1982 1983 _cleanup_list.verify_optional(); 1984 FreeRegionList tmp_free_list("Tmp Free List"); 1985 1986 if (G1ConcRegionFreeingVerbose) { 1987 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 1988 "cleanup list has %u entries", 1989 _cleanup_list.length()); 1990 } 1991 1992 // Noone else should be accessing the _cleanup_list at this point, 1993 // so it's not necessary to take any locks 1994 while (!_cleanup_list.is_empty()) { 1995 HeapRegion* hr = _cleanup_list.remove_head(); 1996 assert(hr != NULL, "the list was not empty"); 1997 hr->par_clear(); 1998 tmp_free_list.add_as_tail(hr); 1999 2000 // Instead of adding one region at a time to the secondary_free_list, 2001 // we accumulate them in the local list and move them a few at a 2002 // time. This also cuts down on the number of notify_all() calls 2003 // we do during this process. We'll also append the local list when 2004 // _cleanup_list is empty (which means we just removed the last 2005 // region from the _cleanup_list). 2006 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 2007 _cleanup_list.is_empty()) { 2008 if (G1ConcRegionFreeingVerbose) { 2009 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : " 2010 "appending %u entries to the secondary_free_list, " 2011 "cleanup list still has %u entries", 2012 tmp_free_list.length(), 2013 _cleanup_list.length()); 2014 } 2015 2016 { 2017 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 2018 g1h->secondary_free_list_add_as_tail(&tmp_free_list); 2019 SecondaryFreeList_lock->notify_all(); 2020 } 2021 2022 if (G1StressConcRegionFreeing) { 2023 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 2024 os::sleep(Thread::current(), (jlong) 1, false); 2025 } 2026 } 2027 } 2028 } 2029 assert(tmp_free_list.is_empty(), "post-condition"); 2030 } 2031 2032 // Support closures for reference procssing in G1 2033 2034 bool G1CMIsAliveClosure::do_object_b(oop obj) { 2035 HeapWord* addr = (HeapWord*)obj; 2036 return addr != NULL && 2037 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 2038 } 2039 2040 class G1CMKeepAliveClosure: public OopClosure { 2041 G1CollectedHeap* _g1; 2042 ConcurrentMark* _cm; 2043 public: 2044 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : 2045 _g1(g1), _cm(cm) { 2046 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); 2047 } 2048 2049 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2050 virtual void do_oop( oop* p) { do_oop_work(p); } 2051 2052 template <class T> void do_oop_work(T* p) { 2053 oop obj = oopDesc::load_decode_heap_oop(p); 2054 HeapWord* addr = (HeapWord*)obj; 2055 2056 if (_cm->verbose_high()) { 2057 gclog_or_tty->print_cr("\t[0] we're looking at location " 2058 "*"PTR_FORMAT" = "PTR_FORMAT, 2059 p, (void*) obj); 2060 } 2061 2062 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { 2063 _cm->mark_and_count(obj); 2064 _cm->mark_stack_push(obj); 2065 } 2066 } 2067 }; 2068 2069 class G1CMDrainMarkingStackClosure: public VoidClosure { 2070 ConcurrentMark* _cm; 2071 CMMarkStack* _markStack; 2072 G1CMKeepAliveClosure* _oopClosure; 2073 public: 2074 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, 2075 G1CMKeepAliveClosure* oopClosure) : 2076 _cm(cm), 2077 _markStack(markStack), 2078 _oopClosure(oopClosure) { } 2079 2080 void do_void() { 2081 _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false); 2082 } 2083 }; 2084 2085 // 'Keep Alive' closure used by parallel reference processing. 2086 // An instance of this closure is used in the parallel reference processing 2087 // code rather than an instance of G1CMKeepAliveClosure. We could have used 2088 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are 2089 // placed on to discovered ref lists once so we can mark and push with no 2090 // need to check whether the object has already been marked. Using the 2091 // G1CMKeepAliveClosure would mean, however, having all the worker threads 2092 // operating on the global mark stack. This means that an individual 2093 // worker would be doing lock-free pushes while it processes its own 2094 // discovered ref list followed by drain call. If the discovered ref lists 2095 // are unbalanced then this could cause interference with the other 2096 // workers. Using a CMTask (and its embedded local data structures) 2097 // avoids that potential interference. 2098 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 2099 ConcurrentMark* _cm; 2100 CMTask* _task; 2101 int _ref_counter_limit; 2102 int _ref_counter; 2103 public: 2104 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 2105 _cm(cm), _task(task), 2106 _ref_counter_limit(G1RefProcDrainInterval) { 2107 assert(_ref_counter_limit > 0, "sanity"); 2108 _ref_counter = _ref_counter_limit; 2109 } 2110 2111 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 2112 virtual void do_oop( oop* p) { do_oop_work(p); } 2113 2114 template <class T> void do_oop_work(T* p) { 2115 if (!_cm->has_overflown()) { 2116 oop obj = oopDesc::load_decode_heap_oop(p); 2117 if (_cm->verbose_high()) { 2118 gclog_or_tty->print_cr("\t[%d] we're looking at location " 2119 "*"PTR_FORMAT" = "PTR_FORMAT, 2120 _task->task_id(), p, (void*) obj); 2121 } 2122 2123 _task->deal_with_reference(obj); 2124 _ref_counter--; 2125 2126 if (_ref_counter == 0) { 2127 // We have dealt with _ref_counter_limit references, pushing them and objects 2128 // reachable from them on to the local stack (and possibly the global stack). 2129 // Call do_marking_step() to process these entries. We call the routine in a 2130 // loop, which we'll exit if there's nothing more to do (i.e. we're done 2131 // with the entries that we've pushed as a result of the deal_with_reference 2132 // calls above) or we overflow. 2133 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2134 // while there may still be some work to do. (See the comment at the 2135 // beginning of CMTask::do_marking_step() for those conditions - one of which 2136 // is reaching the specified time target.) It is only when 2137 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2138 // that the marking has completed. 2139 do { 2140 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 2141 _task->do_marking_step(mark_step_duration_ms, 2142 false /* do_stealing */, 2143 false /* do_termination */); 2144 } while (_task->has_aborted() && !_cm->has_overflown()); 2145 _ref_counter = _ref_counter_limit; 2146 } 2147 } else { 2148 if (_cm->verbose_high()) { 2149 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id()); 2150 } 2151 } 2152 } 2153 }; 2154 2155 class G1CMParDrainMarkingStackClosure: public VoidClosure { 2156 ConcurrentMark* _cm; 2157 CMTask* _task; 2158 public: 2159 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) : 2160 _cm(cm), _task(task) { } 2161 2162 void do_void() { 2163 do { 2164 if (_cm->verbose_high()) { 2165 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", 2166 _task->task_id()); 2167 } 2168 2169 // We call CMTask::do_marking_step() to completely drain the local and 2170 // global marking stacks. The routine is called in a loop, which we'll 2171 // exit if there's nothing more to do (i.e. we'completely drained the 2172 // entries that were pushed as a result of applying the 2173 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref 2174 // lists above) or we overflow the global marking stack. 2175 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag 2176 // while there may still be some work to do. (See the comment at the 2177 // beginning of CMTask::do_marking_step() for those conditions - one of which 2178 // is reaching the specified time target.) It is only when 2179 // CMTask::do_marking_step() returns without setting the has_aborted() flag 2180 // that the marking has completed. 2181 2182 _task->do_marking_step(1000000000.0 /* something very large */, 2183 true /* do_stealing */, 2184 true /* do_termination */); 2185 } while (_task->has_aborted() && !_cm->has_overflown()); 2186 } 2187 }; 2188 2189 // Implementation of AbstractRefProcTaskExecutor for parallel 2190 // reference processing at the end of G1 concurrent marking 2191 2192 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 2193 private: 2194 G1CollectedHeap* _g1h; 2195 ConcurrentMark* _cm; 2196 WorkGang* _workers; 2197 int _active_workers; 2198 2199 public: 2200 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 2201 ConcurrentMark* cm, 2202 WorkGang* workers, 2203 int n_workers) : 2204 _g1h(g1h), _cm(cm), 2205 _workers(workers), _active_workers(n_workers) { } 2206 2207 // Executes the given task using concurrent marking worker threads. 2208 virtual void execute(ProcessTask& task); 2209 virtual void execute(EnqueueTask& task); 2210 }; 2211 2212 class G1CMRefProcTaskProxy: public AbstractGangTask { 2213 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 2214 ProcessTask& _proc_task; 2215 G1CollectedHeap* _g1h; 2216 ConcurrentMark* _cm; 2217 2218 public: 2219 G1CMRefProcTaskProxy(ProcessTask& proc_task, 2220 G1CollectedHeap* g1h, 2221 ConcurrentMark* cm) : 2222 AbstractGangTask("Process reference objects in parallel"), 2223 _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 2224 2225 virtual void work(uint worker_id) { 2226 CMTask* marking_task = _cm->task(worker_id); 2227 G1CMIsAliveClosure g1_is_alive(_g1h); 2228 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 2229 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 2230 2231 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 2232 } 2233 }; 2234 2235 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 2236 assert(_workers != NULL, "Need parallel worker threads."); 2237 2238 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 2239 2240 // We need to reset the phase for each task execution so that 2241 // the termination protocol of CMTask::do_marking_step works. 2242 _cm->set_phase(_active_workers, false /* concurrent */); 2243 _g1h->set_par_threads(_active_workers); 2244 _workers->run_task(&proc_task_proxy); 2245 _g1h->set_par_threads(0); 2246 } 2247 2248 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 2249 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 2250 EnqueueTask& _enq_task; 2251 2252 public: 2253 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 2254 AbstractGangTask("Enqueue reference objects in parallel"), 2255 _enq_task(enq_task) { } 2256 2257 virtual void work(uint worker_id) { 2258 _enq_task.work(worker_id); 2259 } 2260 }; 2261 2262 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 2263 assert(_workers != NULL, "Need parallel worker threads."); 2264 2265 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 2266 2267 _g1h->set_par_threads(_active_workers); 2268 _workers->run_task(&enq_task_proxy); 2269 _g1h->set_par_threads(0); 2270 } 2271 2272 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 2273 ResourceMark rm; 2274 HandleMark hm; 2275 2276 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2277 2278 // Is alive closure. 2279 G1CMIsAliveClosure g1_is_alive(g1h); 2280 2281 // Inner scope to exclude the cleaning of the string and symbol 2282 // tables from the displayed time. 2283 { 2284 if (G1Log::finer()) { 2285 gclog_or_tty->put(' '); 2286 } 2287 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty); 2288 2289 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2290 2291 // See the comment in G1CollectedHeap::ref_processing_init() 2292 // about how reference processing currently works in G1. 2293 2294 // Process weak references. 2295 rp->setup_policy(clear_all_soft_refs); 2296 assert(_markStack.isEmpty(), "mark stack should be empty"); 2297 2298 G1CMKeepAliveClosure g1_keep_alive(g1h, this); 2299 G1CMDrainMarkingStackClosure 2300 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); 2301 2302 // We use the work gang from the G1CollectedHeap and we utilize all 2303 // the worker threads. 2304 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U; 2305 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U); 2306 2307 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2308 g1h->workers(), active_workers); 2309 2310 if (rp->processing_is_mt()) { 2311 // Set the degree of MT here. If the discovery is done MT, there 2312 // may have been a different number of threads doing the discovery 2313 // and a different number of discovered lists may have Ref objects. 2314 // That is OK as long as the Reference lists are balanced (see 2315 // balance_all_queues() and balance_queues()). 2316 rp->set_active_mt_degree(active_workers); 2317 2318 rp->process_discovered_references(&g1_is_alive, 2319 &g1_keep_alive, 2320 &g1_drain_mark_stack, 2321 &par_task_executor); 2322 2323 // The work routines of the parallel keep_alive and drain_marking_stack 2324 // will set the has_overflown flag if we overflow the global marking 2325 // stack. 2326 } else { 2327 rp->process_discovered_references(&g1_is_alive, 2328 &g1_keep_alive, 2329 &g1_drain_mark_stack, 2330 NULL); 2331 } 2332 2333 assert(_markStack.overflow() || _markStack.isEmpty(), 2334 "mark stack should be empty (unless it overflowed)"); 2335 if (_markStack.overflow()) { 2336 // Should have been done already when we tried to push an 2337 // entry on to the global mark stack. But let's do it again. 2338 set_has_overflown(); 2339 } 2340 2341 if (rp->processing_is_mt()) { 2342 assert(rp->num_q() == active_workers, "why not"); 2343 rp->enqueue_discovered_references(&par_task_executor); 2344 } else { 2345 rp->enqueue_discovered_references(); 2346 } 2347 2348 rp->verify_no_references_recorded(); 2349 assert(!rp->discovery_enabled(), "Post condition"); 2350 } 2351 2352 // Now clean up stale oops in StringTable 2353 StringTable::unlink(&g1_is_alive); 2354 // Clean up unreferenced symbols in symbol table. 2355 SymbolTable::unlink(); 2356 } 2357 2358 void ConcurrentMark::swapMarkBitMaps() { 2359 CMBitMapRO* temp = _prevMarkBitMap; 2360 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2361 _nextMarkBitMap = (CMBitMap*) temp; 2362 } 2363 2364 class CMRemarkTask: public AbstractGangTask { 2365 private: 2366 ConcurrentMark *_cm; 2367 2368 public: 2369 void work(uint worker_id) { 2370 // Since all available tasks are actually started, we should 2371 // only proceed if we're supposed to be actived. 2372 if (worker_id < _cm->active_tasks()) { 2373 CMTask* task = _cm->task(worker_id); 2374 task->record_start_time(); 2375 do { 2376 task->do_marking_step(1000000000.0 /* something very large */, 2377 true /* do_stealing */, 2378 true /* do_termination */); 2379 } while (task->has_aborted() && !_cm->has_overflown()); 2380 // If we overflow, then we do not want to restart. We instead 2381 // want to abort remark and do concurrent marking again. 2382 task->record_end_time(); 2383 } 2384 } 2385 2386 CMRemarkTask(ConcurrentMark* cm, int active_workers) : 2387 AbstractGangTask("Par Remark"), _cm(cm) { 2388 _cm->terminator()->reset_for_reuse(active_workers); 2389 } 2390 }; 2391 2392 void ConcurrentMark::checkpointRootsFinalWork() { 2393 ResourceMark rm; 2394 HandleMark hm; 2395 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2396 2397 g1h->ensure_parsability(false); 2398 2399 if (G1CollectedHeap::use_parallel_gc_threads()) { 2400 G1CollectedHeap::StrongRootsScope srs(g1h); 2401 // this is remark, so we'll use up all active threads 2402 uint active_workers = g1h->workers()->active_workers(); 2403 if (active_workers == 0) { 2404 assert(active_workers > 0, "Should have been set earlier"); 2405 active_workers = (uint) ParallelGCThreads; 2406 g1h->workers()->set_active_workers(active_workers); 2407 } 2408 set_phase(active_workers, false /* concurrent */); 2409 // Leave _parallel_marking_threads at it's 2410 // value originally calculated in the ConcurrentMark 2411 // constructor and pass values of the active workers 2412 // through the gang in the task. 2413 2414 CMRemarkTask remarkTask(this, active_workers); 2415 g1h->set_par_threads(active_workers); 2416 g1h->workers()->run_task(&remarkTask); 2417 g1h->set_par_threads(0); 2418 } else { 2419 G1CollectedHeap::StrongRootsScope srs(g1h); 2420 // this is remark, so we'll use up all available threads 2421 uint active_workers = 1; 2422 set_phase(active_workers, false /* concurrent */); 2423 2424 CMRemarkTask remarkTask(this, active_workers); 2425 // We will start all available threads, even if we decide that the 2426 // active_workers will be fewer. The extra ones will just bail out 2427 // immediately. 2428 remarkTask.work(0); 2429 } 2430 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2431 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant"); 2432 2433 print_stats(); 2434 2435 #if VERIFY_OBJS_PROCESSED 2436 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 2437 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 2438 _scan_obj_cl.objs_processed, 2439 ThreadLocalObjQueue::objs_enqueued); 2440 guarantee(_scan_obj_cl.objs_processed == 2441 ThreadLocalObjQueue::objs_enqueued, 2442 "Different number of objs processed and enqueued."); 2443 } 2444 #endif 2445 } 2446 2447 #ifndef PRODUCT 2448 2449 class PrintReachableOopClosure: public OopClosure { 2450 private: 2451 G1CollectedHeap* _g1h; 2452 outputStream* _out; 2453 VerifyOption _vo; 2454 bool _all; 2455 2456 public: 2457 PrintReachableOopClosure(outputStream* out, 2458 VerifyOption vo, 2459 bool all) : 2460 _g1h(G1CollectedHeap::heap()), 2461 _out(out), _vo(vo), _all(all) { } 2462 2463 void do_oop(narrowOop* p) { do_oop_work(p); } 2464 void do_oop( oop* p) { do_oop_work(p); } 2465 2466 template <class T> void do_oop_work(T* p) { 2467 oop obj = oopDesc::load_decode_heap_oop(p); 2468 const char* str = NULL; 2469 const char* str2 = ""; 2470 2471 if (obj == NULL) { 2472 str = ""; 2473 } else if (!_g1h->is_in_g1_reserved(obj)) { 2474 str = " O"; 2475 } else { 2476 HeapRegion* hr = _g1h->heap_region_containing(obj); 2477 guarantee(hr != NULL, "invariant"); 2478 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo); 2479 bool marked = _g1h->is_marked(obj, _vo); 2480 2481 if (over_tams) { 2482 str = " >"; 2483 if (marked) { 2484 str2 = " AND MARKED"; 2485 } 2486 } else if (marked) { 2487 str = " M"; 2488 } else { 2489 str = " NOT"; 2490 } 2491 } 2492 2493 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s", 2494 p, (void*) obj, str, str2); 2495 } 2496 }; 2497 2498 class PrintReachableObjectClosure : public ObjectClosure { 2499 private: 2500 G1CollectedHeap* _g1h; 2501 outputStream* _out; 2502 VerifyOption _vo; 2503 bool _all; 2504 HeapRegion* _hr; 2505 2506 public: 2507 PrintReachableObjectClosure(outputStream* out, 2508 VerifyOption vo, 2509 bool all, 2510 HeapRegion* hr) : 2511 _g1h(G1CollectedHeap::heap()), 2512 _out(out), _vo(vo), _all(all), _hr(hr) { } 2513 2514 void do_object(oop o) { 2515 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo); 2516 bool marked = _g1h->is_marked(o, _vo); 2517 bool print_it = _all || over_tams || marked; 2518 2519 if (print_it) { 2520 _out->print_cr(" "PTR_FORMAT"%s", 2521 o, (over_tams) ? " >" : (marked) ? " M" : ""); 2522 PrintReachableOopClosure oopCl(_out, _vo, _all); 2523 o->oop_iterate(&oopCl); 2524 } 2525 } 2526 }; 2527 2528 class PrintReachableRegionClosure : public HeapRegionClosure { 2529 private: 2530 G1CollectedHeap* _g1h; 2531 outputStream* _out; 2532 VerifyOption _vo; 2533 bool _all; 2534 2535 public: 2536 bool doHeapRegion(HeapRegion* hr) { 2537 HeapWord* b = hr->bottom(); 2538 HeapWord* e = hr->end(); 2539 HeapWord* t = hr->top(); 2540 HeapWord* p = _g1h->top_at_mark_start(hr, _vo); 2541 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 2542 "TAMS: "PTR_FORMAT, b, e, t, p); 2543 _out->cr(); 2544 2545 HeapWord* from = b; 2546 HeapWord* to = t; 2547 2548 if (to > from) { 2549 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to); 2550 _out->cr(); 2551 PrintReachableObjectClosure ocl(_out, _vo, _all, hr); 2552 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl); 2553 _out->cr(); 2554 } 2555 2556 return false; 2557 } 2558 2559 PrintReachableRegionClosure(outputStream* out, 2560 VerifyOption vo, 2561 bool all) : 2562 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { } 2563 }; 2564 2565 void ConcurrentMark::print_reachable(const char* str, 2566 VerifyOption vo, 2567 bool all) { 2568 gclog_or_tty->cr(); 2569 gclog_or_tty->print_cr("== Doing heap dump... "); 2570 2571 if (G1PrintReachableBaseFile == NULL) { 2572 gclog_or_tty->print_cr(" #### error: no base file defined"); 2573 return; 2574 } 2575 2576 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) > 2577 (JVM_MAXPATHLEN - 1)) { 2578 gclog_or_tty->print_cr(" #### error: file name too long"); 2579 return; 2580 } 2581 2582 char file_name[JVM_MAXPATHLEN]; 2583 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str); 2584 gclog_or_tty->print_cr(" dumping to file %s", file_name); 2585 2586 fileStream fout(file_name); 2587 if (!fout.is_open()) { 2588 gclog_or_tty->print_cr(" #### error: could not open file"); 2589 return; 2590 } 2591 2592 outputStream* out = &fout; 2593 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo)); 2594 out->cr(); 2595 2596 out->print_cr("--- ITERATING OVER REGIONS"); 2597 out->cr(); 2598 PrintReachableRegionClosure rcl(out, vo, all); 2599 _g1h->heap_region_iterate(&rcl); 2600 out->cr(); 2601 2602 gclog_or_tty->print_cr(" done"); 2603 gclog_or_tty->flush(); 2604 } 2605 2606 #endif // PRODUCT 2607 2608 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2609 // Note we are overriding the read-only view of the prev map here, via 2610 // the cast. 2611 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2612 } 2613 2614 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) { 2615 _nextMarkBitMap->clearRange(mr); 2616 } 2617 2618 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) { 2619 clearRangePrevBitmap(mr); 2620 clearRangeNextBitmap(mr); 2621 } 2622 2623 HeapRegion* 2624 ConcurrentMark::claim_region(int task_num) { 2625 // "checkpoint" the finger 2626 HeapWord* finger = _finger; 2627 2628 // _heap_end will not change underneath our feet; it only changes at 2629 // yield points. 2630 while (finger < _heap_end) { 2631 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2632 2633 // Note on how this code handles humongous regions. In the 2634 // normal case the finger will reach the start of a "starts 2635 // humongous" (SH) region. Its end will either be the end of the 2636 // last "continues humongous" (CH) region in the sequence, or the 2637 // standard end of the SH region (if the SH is the only region in 2638 // the sequence). That way claim_region() will skip over the CH 2639 // regions. However, there is a subtle race between a CM thread 2640 // executing this method and a mutator thread doing a humongous 2641 // object allocation. The two are not mutually exclusive as the CM 2642 // thread does not need to hold the Heap_lock when it gets 2643 // here. So there is a chance that claim_region() will come across 2644 // a free region that's in the progress of becoming a SH or a CH 2645 // region. In the former case, it will either 2646 // a) Miss the update to the region's end, in which case it will 2647 // visit every subsequent CH region, will find their bitmaps 2648 // empty, and do nothing, or 2649 // b) Will observe the update of the region's end (in which case 2650 // it will skip the subsequent CH regions). 2651 // If it comes across a region that suddenly becomes CH, the 2652 // scenario will be similar to b). So, the race between 2653 // claim_region() and a humongous object allocation might force us 2654 // to do a bit of unnecessary work (due to some unnecessary bitmap 2655 // iterations) but it should not introduce and correctness issues. 2656 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger); 2657 HeapWord* bottom = curr_region->bottom(); 2658 HeapWord* end = curr_region->end(); 2659 HeapWord* limit = curr_region->next_top_at_mark_start(); 2660 2661 if (verbose_low()) { 2662 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 2663 "["PTR_FORMAT", "PTR_FORMAT"), " 2664 "limit = "PTR_FORMAT, 2665 task_num, curr_region, bottom, end, limit); 2666 } 2667 2668 // Is the gap between reading the finger and doing the CAS too long? 2669 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2670 if (res == finger) { 2671 // we succeeded 2672 2673 // notice that _finger == end cannot be guaranteed here since, 2674 // someone else might have moved the finger even further 2675 assert(_finger >= end, "the finger should have moved forward"); 2676 2677 if (verbose_low()) { 2678 gclog_or_tty->print_cr("[%d] we were successful with region = " 2679 PTR_FORMAT, task_num, curr_region); 2680 } 2681 2682 if (limit > bottom) { 2683 if (verbose_low()) { 2684 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 2685 "returning it ", task_num, curr_region); 2686 } 2687 return curr_region; 2688 } else { 2689 assert(limit == bottom, 2690 "the region limit should be at bottom"); 2691 if (verbose_low()) { 2692 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 2693 "returning NULL", task_num, curr_region); 2694 } 2695 // we return NULL and the caller should try calling 2696 // claim_region() again. 2697 return NULL; 2698 } 2699 } else { 2700 assert(_finger > finger, "the finger should have moved forward"); 2701 if (verbose_low()) { 2702 gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 2703 "global finger = "PTR_FORMAT", " 2704 "our finger = "PTR_FORMAT, 2705 task_num, _finger, finger); 2706 } 2707 2708 // read it again 2709 finger = _finger; 2710 } 2711 } 2712 2713 return NULL; 2714 } 2715 2716 #ifndef PRODUCT 2717 enum VerifyNoCSetOopsPhase { 2718 VerifyNoCSetOopsStack, 2719 VerifyNoCSetOopsQueues, 2720 VerifyNoCSetOopsSATBCompleted, 2721 VerifyNoCSetOopsSATBThread 2722 }; 2723 2724 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure { 2725 private: 2726 G1CollectedHeap* _g1h; 2727 VerifyNoCSetOopsPhase _phase; 2728 int _info; 2729 2730 const char* phase_str() { 2731 switch (_phase) { 2732 case VerifyNoCSetOopsStack: return "Stack"; 2733 case VerifyNoCSetOopsQueues: return "Queue"; 2734 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers"; 2735 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers"; 2736 default: ShouldNotReachHere(); 2737 } 2738 return NULL; 2739 } 2740 2741 void do_object_work(oop obj) { 2742 guarantee(!_g1h->obj_in_cs(obj), 2743 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d", 2744 (void*) obj, phase_str(), _info)); 2745 } 2746 2747 public: 2748 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { } 2749 2750 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) { 2751 _phase = phase; 2752 _info = info; 2753 } 2754 2755 virtual void do_oop(oop* p) { 2756 oop obj = oopDesc::load_decode_heap_oop(p); 2757 do_object_work(obj); 2758 } 2759 2760 virtual void do_oop(narrowOop* p) { 2761 // We should not come across narrow oops while scanning marking 2762 // stacks and SATB buffers. 2763 ShouldNotReachHere(); 2764 } 2765 2766 virtual void do_object(oop obj) { 2767 do_object_work(obj); 2768 } 2769 }; 2770 2771 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks, 2772 bool verify_enqueued_buffers, 2773 bool verify_thread_buffers, 2774 bool verify_fingers) { 2775 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2776 if (!G1CollectedHeap::heap()->mark_in_progress()) { 2777 return; 2778 } 2779 2780 VerifyNoCSetOopsClosure cl; 2781 2782 if (verify_stacks) { 2783 // Verify entries on the global mark stack 2784 cl.set_phase(VerifyNoCSetOopsStack); 2785 _markStack.oops_do(&cl); 2786 2787 // Verify entries on the task queues 2788 for (int i = 0; i < (int) _max_task_num; i += 1) { 2789 cl.set_phase(VerifyNoCSetOopsQueues, i); 2790 OopTaskQueue* queue = _task_queues->queue(i); 2791 queue->oops_do(&cl); 2792 } 2793 } 2794 2795 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 2796 2797 // Verify entries on the enqueued SATB buffers 2798 if (verify_enqueued_buffers) { 2799 cl.set_phase(VerifyNoCSetOopsSATBCompleted); 2800 satb_qs.iterate_completed_buffers_read_only(&cl); 2801 } 2802 2803 // Verify entries on the per-thread SATB buffers 2804 if (verify_thread_buffers) { 2805 cl.set_phase(VerifyNoCSetOopsSATBThread); 2806 satb_qs.iterate_thread_buffers_read_only(&cl); 2807 } 2808 2809 if (verify_fingers) { 2810 // Verify the global finger 2811 HeapWord* global_finger = finger(); 2812 if (global_finger != NULL && global_finger < _heap_end) { 2813 // The global finger always points to a heap region boundary. We 2814 // use heap_region_containing_raw() to get the containing region 2815 // given that the global finger could be pointing to a free region 2816 // which subsequently becomes continues humongous. If that 2817 // happens, heap_region_containing() will return the bottom of the 2818 // corresponding starts humongous region and the check below will 2819 // not hold any more. 2820 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger); 2821 guarantee(global_finger == global_hr->bottom(), 2822 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT, 2823 global_finger, HR_FORMAT_PARAMS(global_hr))); 2824 } 2825 2826 // Verify the task fingers 2827 assert(parallel_marking_threads() <= _max_task_num, "sanity"); 2828 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) { 2829 CMTask* task = _tasks[i]; 2830 HeapWord* task_finger = task->finger(); 2831 if (task_finger != NULL && task_finger < _heap_end) { 2832 // See above note on the global finger verification. 2833 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger); 2834 guarantee(task_finger == task_hr->bottom() || 2835 !task_hr->in_collection_set(), 2836 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT, 2837 task_finger, HR_FORMAT_PARAMS(task_hr))); 2838 } 2839 } 2840 } 2841 } 2842 #endif // PRODUCT 2843 2844 void ConcurrentMark::clear_marking_state(bool clear_overflow) { 2845 _markStack.setEmpty(); 2846 _markStack.clear_overflow(); 2847 if (clear_overflow) { 2848 clear_has_overflown(); 2849 } else { 2850 assert(has_overflown(), "pre-condition"); 2851 } 2852 _finger = _heap_start; 2853 2854 for (int i = 0; i < (int)_max_task_num; ++i) { 2855 OopTaskQueue* queue = _task_queues->queue(i); 2856 queue->set_empty(); 2857 } 2858 } 2859 2860 // Aggregate the counting data that was constructed concurrently 2861 // with marking. 2862 class AggregateCountDataHRClosure: public HeapRegionClosure { 2863 G1CollectedHeap* _g1h; 2864 ConcurrentMark* _cm; 2865 CardTableModRefBS* _ct_bs; 2866 BitMap* _cm_card_bm; 2867 size_t _max_task_num; 2868 2869 public: 2870 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2871 BitMap* cm_card_bm, 2872 size_t max_task_num) : 2873 _g1h(g1h), _cm(g1h->concurrent_mark()), 2874 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())), 2875 _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { } 2876 2877 bool doHeapRegion(HeapRegion* hr) { 2878 if (hr->continuesHumongous()) { 2879 // We will ignore these here and process them when their 2880 // associated "starts humongous" region is processed. 2881 // Note that we cannot rely on their associated 2882 // "starts humongous" region to have their bit set to 1 2883 // since, due to the region chunking in the parallel region 2884 // iteration, a "continues humongous" region might be visited 2885 // before its associated "starts humongous". 2886 return false; 2887 } 2888 2889 HeapWord* start = hr->bottom(); 2890 HeapWord* limit = hr->next_top_at_mark_start(); 2891 HeapWord* end = hr->end(); 2892 2893 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2894 err_msg("Preconditions not met - " 2895 "start: "PTR_FORMAT", limit: "PTR_FORMAT", " 2896 "top: "PTR_FORMAT", end: "PTR_FORMAT, 2897 start, limit, hr->top(), hr->end())); 2898 2899 assert(hr->next_marked_bytes() == 0, "Precondition"); 2900 2901 if (start == limit) { 2902 // NTAMS of this region has not been set so nothing to do. 2903 return false; 2904 } 2905 2906 // 'start' should be in the heap. 2907 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2908 // 'end' *may* be just beyone the end of the heap (if hr is the last region) 2909 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2910 2911 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2912 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2913 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2914 2915 // If ntams is not card aligned then we bump card bitmap index 2916 // for limit so that we get the all the cards spanned by 2917 // the object ending at ntams. 2918 // Note: if this is the last region in the heap then ntams 2919 // could be actually just beyond the end of the the heap; 2920 // limit_idx will then correspond to a (non-existent) card 2921 // that is also outside the heap. 2922 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2923 limit_idx += 1; 2924 } 2925 2926 assert(limit_idx <= end_idx, "or else use atomics"); 2927 2928 // Aggregate the "stripe" in the count data associated with hr. 2929 uint hrs_index = hr->hrs_index(); 2930 size_t marked_bytes = 0; 2931 2932 for (int i = 0; (size_t)i < _max_task_num; i += 1) { 2933 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2934 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2935 2936 // Fetch the marked_bytes in this region for task i and 2937 // add it to the running total for this region. 2938 marked_bytes += marked_bytes_array[hrs_index]; 2939 2940 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) 2941 // into the global card bitmap. 2942 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2943 2944 while (scan_idx < limit_idx) { 2945 assert(task_card_bm->at(scan_idx) == true, "should be"); 2946 _cm_card_bm->set_bit(scan_idx); 2947 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2948 2949 // BitMap::get_next_one_offset() can handle the case when 2950 // its left_offset parameter is greater than its right_offset 2951 // parameter. It does, however, have an early exit if 2952 // left_offset == right_offset. So let's limit the value 2953 // passed in for left offset here. 2954 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2955 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2956 } 2957 } 2958 2959 // Update the marked bytes for this region. 2960 hr->add_to_marked_bytes(marked_bytes); 2961 2962 // Next heap region 2963 return false; 2964 } 2965 }; 2966 2967 class G1AggregateCountDataTask: public AbstractGangTask { 2968 protected: 2969 G1CollectedHeap* _g1h; 2970 ConcurrentMark* _cm; 2971 BitMap* _cm_card_bm; 2972 size_t _max_task_num; 2973 int _active_workers; 2974 2975 public: 2976 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2977 ConcurrentMark* cm, 2978 BitMap* cm_card_bm, 2979 size_t max_task_num, 2980 int n_workers) : 2981 AbstractGangTask("Count Aggregation"), 2982 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2983 _max_task_num(max_task_num), 2984 _active_workers(n_workers) { } 2985 2986 void work(uint worker_id) { 2987 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num); 2988 2989 if (G1CollectedHeap::use_parallel_gc_threads()) { 2990 _g1h->heap_region_par_iterate_chunked(&cl, worker_id, 2991 _active_workers, 2992 HeapRegion::AggregateCountClaimValue); 2993 } else { 2994 _g1h->heap_region_iterate(&cl); 2995 } 2996 } 2997 }; 2998 2999 3000 void ConcurrentMark::aggregate_count_data() { 3001 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 3002 _g1h->workers()->active_workers() : 3003 1); 3004 3005 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 3006 _max_task_num, n_workers); 3007 3008 if (G1CollectedHeap::use_parallel_gc_threads()) { 3009 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), 3010 "sanity check"); 3011 _g1h->set_par_threads(n_workers); 3012 _g1h->workers()->run_task(&g1_par_agg_task); 3013 _g1h->set_par_threads(0); 3014 3015 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), 3016 "sanity check"); 3017 _g1h->reset_heap_region_claim_values(); 3018 } else { 3019 g1_par_agg_task.work(0); 3020 } 3021 } 3022 3023 // Clear the per-worker arrays used to store the per-region counting data 3024 void ConcurrentMark::clear_all_count_data() { 3025 // Clear the global card bitmap - it will be filled during 3026 // liveness count aggregation (during remark) and the 3027 // final counting task. 3028 _card_bm.clear(); 3029 3030 // Clear the global region bitmap - it will be filled as part 3031 // of the final counting task. 3032 _region_bm.clear(); 3033 3034 uint max_regions = _g1h->max_regions(); 3035 assert(_max_task_num != 0, "unitialized"); 3036 3037 for (int i = 0; (size_t) i < _max_task_num; i += 1) { 3038 BitMap* task_card_bm = count_card_bitmap_for(i); 3039 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 3040 3041 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 3042 assert(marked_bytes_array != NULL, "uninitialized"); 3043 3044 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 3045 task_card_bm->clear(); 3046 } 3047 } 3048 3049 void ConcurrentMark::print_stats() { 3050 if (verbose_stats()) { 3051 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3052 for (size_t i = 0; i < _active_tasks; ++i) { 3053 _tasks[i]->print_stats(); 3054 gclog_or_tty->print_cr("---------------------------------------------------------------------"); 3055 } 3056 } 3057 } 3058 3059 // abandon current marking iteration due to a Full GC 3060 void ConcurrentMark::abort() { 3061 // Clear all marks to force marking thread to do nothing 3062 _nextMarkBitMap->clearAll(); 3063 // Clear the liveness counting data 3064 clear_all_count_data(); 3065 // Empty mark stack 3066 clear_marking_state(); 3067 for (int i = 0; i < (int)_max_task_num; ++i) { 3068 _tasks[i]->clear_region_fields(); 3069 } 3070 _has_aborted = true; 3071 3072 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3073 satb_mq_set.abandon_partial_marking(); 3074 // This can be called either during or outside marking, we'll read 3075 // the expected_active value from the SATB queue set. 3076 satb_mq_set.set_active_all_threads( 3077 false, /* new active value */ 3078 satb_mq_set.is_active() /* expected_active */); 3079 } 3080 3081 static void print_ms_time_info(const char* prefix, const char* name, 3082 NumberSeq& ns) { 3083 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 3084 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 3085 if (ns.num() > 0) { 3086 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 3087 prefix, ns.sd(), ns.maximum()); 3088 } 3089 } 3090 3091 void ConcurrentMark::print_summary_info() { 3092 gclog_or_tty->print_cr(" Concurrent marking:"); 3093 print_ms_time_info(" ", "init marks", _init_times); 3094 print_ms_time_info(" ", "remarks", _remark_times); 3095 { 3096 print_ms_time_info(" ", "final marks", _remark_mark_times); 3097 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 3098 3099 } 3100 print_ms_time_info(" ", "cleanups", _cleanup_times); 3101 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 3102 _total_counting_time, 3103 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 3104 (double)_cleanup_times.num() 3105 : 0.0)); 3106 if (G1ScrubRemSets) { 3107 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 3108 _total_rs_scrub_time, 3109 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 3110 (double)_cleanup_times.num() 3111 : 0.0)); 3112 } 3113 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 3114 (_init_times.sum() + _remark_times.sum() + 3115 _cleanup_times.sum())/1000.0); 3116 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 3117 "(%8.2f s marking).", 3118 cmThread()->vtime_accum(), 3119 cmThread()->vtime_mark_accum()); 3120 } 3121 3122 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 3123 _parallel_workers->print_worker_threads_on(st); 3124 } 3125 3126 // We take a break if someone is trying to stop the world. 3127 bool ConcurrentMark::do_yield_check(uint worker_id) { 3128 if (should_yield()) { 3129 if (worker_id == 0) { 3130 _g1h->g1_policy()->record_concurrent_pause(); 3131 } 3132 cmThread()->yield(); 3133 return true; 3134 } else { 3135 return false; 3136 } 3137 } 3138 3139 bool ConcurrentMark::should_yield() { 3140 return cmThread()->should_yield(); 3141 } 3142 3143 bool ConcurrentMark::containing_card_is_marked(void* p) { 3144 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 3145 return _card_bm.at(offset >> CardTableModRefBS::card_shift); 3146 } 3147 3148 bool ConcurrentMark::containing_cards_are_marked(void* start, 3149 void* last) { 3150 return containing_card_is_marked(start) && 3151 containing_card_is_marked(last); 3152 } 3153 3154 #ifndef PRODUCT 3155 // for debugging purposes 3156 void ConcurrentMark::print_finger() { 3157 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 3158 _heap_start, _heap_end, _finger); 3159 for (int i = 0; i < (int) _max_task_num; ++i) { 3160 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 3161 } 3162 gclog_or_tty->print_cr(""); 3163 } 3164 #endif 3165 3166 void CMTask::scan_object(oop obj) { 3167 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); 3168 3169 if (_cm->verbose_high()) { 3170 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT, 3171 _task_id, (void*) obj); 3172 } 3173 3174 size_t obj_size = obj->size(); 3175 _words_scanned += obj_size; 3176 3177 obj->oop_iterate(_cm_oop_closure); 3178 statsOnly( ++_objs_scanned ); 3179 check_limits(); 3180 } 3181 3182 // Closure for iteration over bitmaps 3183 class CMBitMapClosure : public BitMapClosure { 3184 private: 3185 // the bitmap that is being iterated over 3186 CMBitMap* _nextMarkBitMap; 3187 ConcurrentMark* _cm; 3188 CMTask* _task; 3189 3190 public: 3191 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 3192 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 3193 3194 bool do_bit(size_t offset) { 3195 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 3196 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 3197 assert( addr < _cm->finger(), "invariant"); 3198 3199 statsOnly( _task->increase_objs_found_on_bitmap() ); 3200 assert(addr >= _task->finger(), "invariant"); 3201 3202 // We move that task's local finger along. 3203 _task->move_finger_to(addr); 3204 3205 _task->scan_object(oop(addr)); 3206 // we only partially drain the local queue and global stack 3207 _task->drain_local_queue(true); 3208 _task->drain_global_stack(true); 3209 3210 // if the has_aborted flag has been raised, we need to bail out of 3211 // the iteration 3212 return !_task->has_aborted(); 3213 } 3214 }; 3215 3216 // Closure for iterating over objects, currently only used for 3217 // processing SATB buffers. 3218 class CMObjectClosure : public ObjectClosure { 3219 private: 3220 CMTask* _task; 3221 3222 public: 3223 void do_object(oop obj) { 3224 _task->deal_with_reference(obj); 3225 } 3226 3227 CMObjectClosure(CMTask* task) : _task(task) { } 3228 }; 3229 3230 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 3231 ConcurrentMark* cm, 3232 CMTask* task) 3233 : _g1h(g1h), _cm(cm), _task(task) { 3234 assert(_ref_processor == NULL, "should be initialized to NULL"); 3235 3236 if (G1UseConcMarkReferenceProcessing) { 3237 _ref_processor = g1h->ref_processor_cm(); 3238 assert(_ref_processor != NULL, "should not be NULL"); 3239 } 3240 } 3241 3242 void CMTask::setup_for_region(HeapRegion* hr) { 3243 // Separated the asserts so that we know which one fires. 3244 assert(hr != NULL, 3245 "claim_region() should have filtered out continues humongous regions"); 3246 assert(!hr->continuesHumongous(), 3247 "claim_region() should have filtered out continues humongous regions"); 3248 3249 if (_cm->verbose_low()) { 3250 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 3251 _task_id, hr); 3252 } 3253 3254 _curr_region = hr; 3255 _finger = hr->bottom(); 3256 update_region_limit(); 3257 } 3258 3259 void CMTask::update_region_limit() { 3260 HeapRegion* hr = _curr_region; 3261 HeapWord* bottom = hr->bottom(); 3262 HeapWord* limit = hr->next_top_at_mark_start(); 3263 3264 if (limit == bottom) { 3265 if (_cm->verbose_low()) { 3266 gclog_or_tty->print_cr("[%d] found an empty region " 3267 "["PTR_FORMAT", "PTR_FORMAT")", 3268 _task_id, bottom, limit); 3269 } 3270 // The region was collected underneath our feet. 3271 // We set the finger to bottom to ensure that the bitmap 3272 // iteration that will follow this will not do anything. 3273 // (this is not a condition that holds when we set the region up, 3274 // as the region is not supposed to be empty in the first place) 3275 _finger = bottom; 3276 } else if (limit >= _region_limit) { 3277 assert(limit >= _finger, "peace of mind"); 3278 } else { 3279 assert(limit < _region_limit, "only way to get here"); 3280 // This can happen under some pretty unusual circumstances. An 3281 // evacuation pause empties the region underneath our feet (NTAMS 3282 // at bottom). We then do some allocation in the region (NTAMS 3283 // stays at bottom), followed by the region being used as a GC 3284 // alloc region (NTAMS will move to top() and the objects 3285 // originally below it will be grayed). All objects now marked in 3286 // the region are explicitly grayed, if below the global finger, 3287 // and we do not need in fact to scan anything else. So, we simply 3288 // set _finger to be limit to ensure that the bitmap iteration 3289 // doesn't do anything. 3290 _finger = limit; 3291 } 3292 3293 _region_limit = limit; 3294 } 3295 3296 void CMTask::giveup_current_region() { 3297 assert(_curr_region != NULL, "invariant"); 3298 if (_cm->verbose_low()) { 3299 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 3300 _task_id, _curr_region); 3301 } 3302 clear_region_fields(); 3303 } 3304 3305 void CMTask::clear_region_fields() { 3306 // Values for these three fields that indicate that we're not 3307 // holding on to a region. 3308 _curr_region = NULL; 3309 _finger = NULL; 3310 _region_limit = NULL; 3311 } 3312 3313 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 3314 if (cm_oop_closure == NULL) { 3315 assert(_cm_oop_closure != NULL, "invariant"); 3316 } else { 3317 assert(_cm_oop_closure == NULL, "invariant"); 3318 } 3319 _cm_oop_closure = cm_oop_closure; 3320 } 3321 3322 void CMTask::reset(CMBitMap* nextMarkBitMap) { 3323 guarantee(nextMarkBitMap != NULL, "invariant"); 3324 3325 if (_cm->verbose_low()) { 3326 gclog_or_tty->print_cr("[%d] resetting", _task_id); 3327 } 3328 3329 _nextMarkBitMap = nextMarkBitMap; 3330 clear_region_fields(); 3331 3332 _calls = 0; 3333 _elapsed_time_ms = 0.0; 3334 _termination_time_ms = 0.0; 3335 _termination_start_time_ms = 0.0; 3336 3337 #if _MARKING_STATS_ 3338 _local_pushes = 0; 3339 _local_pops = 0; 3340 _local_max_size = 0; 3341 _objs_scanned = 0; 3342 _global_pushes = 0; 3343 _global_pops = 0; 3344 _global_max_size = 0; 3345 _global_transfers_to = 0; 3346 _global_transfers_from = 0; 3347 _regions_claimed = 0; 3348 _objs_found_on_bitmap = 0; 3349 _satb_buffers_processed = 0; 3350 _steal_attempts = 0; 3351 _steals = 0; 3352 _aborted = 0; 3353 _aborted_overflow = 0; 3354 _aborted_cm_aborted = 0; 3355 _aborted_yield = 0; 3356 _aborted_timed_out = 0; 3357 _aborted_satb = 0; 3358 _aborted_termination = 0; 3359 #endif // _MARKING_STATS_ 3360 } 3361 3362 bool CMTask::should_exit_termination() { 3363 regular_clock_call(); 3364 // This is called when we are in the termination protocol. We should 3365 // quit if, for some reason, this task wants to abort or the global 3366 // stack is not empty (this means that we can get work from it). 3367 return !_cm->mark_stack_empty() || has_aborted(); 3368 } 3369 3370 void CMTask::reached_limit() { 3371 assert(_words_scanned >= _words_scanned_limit || 3372 _refs_reached >= _refs_reached_limit , 3373 "shouldn't have been called otherwise"); 3374 regular_clock_call(); 3375 } 3376 3377 void CMTask::regular_clock_call() { 3378 if (has_aborted()) return; 3379 3380 // First, we need to recalculate the words scanned and refs reached 3381 // limits for the next clock call. 3382 recalculate_limits(); 3383 3384 // During the regular clock call we do the following 3385 3386 // (1) If an overflow has been flagged, then we abort. 3387 if (_cm->has_overflown()) { 3388 set_has_aborted(); 3389 return; 3390 } 3391 3392 // If we are not concurrent (i.e. we're doing remark) we don't need 3393 // to check anything else. The other steps are only needed during 3394 // the concurrent marking phase. 3395 if (!concurrent()) return; 3396 3397 // (2) If marking has been aborted for Full GC, then we also abort. 3398 if (_cm->has_aborted()) { 3399 set_has_aborted(); 3400 statsOnly( ++_aborted_cm_aborted ); 3401 return; 3402 } 3403 3404 double curr_time_ms = os::elapsedVTime() * 1000.0; 3405 3406 // (3) If marking stats are enabled, then we update the step history. 3407 #if _MARKING_STATS_ 3408 if (_words_scanned >= _words_scanned_limit) { 3409 ++_clock_due_to_scanning; 3410 } 3411 if (_refs_reached >= _refs_reached_limit) { 3412 ++_clock_due_to_marking; 3413 } 3414 3415 double last_interval_ms = curr_time_ms - _interval_start_time_ms; 3416 _interval_start_time_ms = curr_time_ms; 3417 _all_clock_intervals_ms.add(last_interval_ms); 3418 3419 if (_cm->verbose_medium()) { 3420 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 3421 "scanned = %d%s, refs reached = %d%s", 3422 _task_id, last_interval_ms, 3423 _words_scanned, 3424 (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 3425 _refs_reached, 3426 (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 3427 } 3428 #endif // _MARKING_STATS_ 3429 3430 // (4) We check whether we should yield. If we have to, then we abort. 3431 if (_cm->should_yield()) { 3432 // We should yield. To do this we abort the task. The caller is 3433 // responsible for yielding. 3434 set_has_aborted(); 3435 statsOnly( ++_aborted_yield ); 3436 return; 3437 } 3438 3439 // (5) We check whether we've reached our time quota. If we have, 3440 // then we abort. 3441 double elapsed_time_ms = curr_time_ms - _start_time_ms; 3442 if (elapsed_time_ms > _time_target_ms) { 3443 set_has_aborted(); 3444 _has_timed_out = true; 3445 statsOnly( ++_aborted_timed_out ); 3446 return; 3447 } 3448 3449 // (6) Finally, we check whether there are enough completed STAB 3450 // buffers available for processing. If there are, we abort. 3451 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3452 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 3453 if (_cm->verbose_low()) { 3454 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 3455 _task_id); 3456 } 3457 // we do need to process SATB buffers, we'll abort and restart 3458 // the marking task to do so 3459 set_has_aborted(); 3460 statsOnly( ++_aborted_satb ); 3461 return; 3462 } 3463 } 3464 3465 void CMTask::recalculate_limits() { 3466 _real_words_scanned_limit = _words_scanned + words_scanned_period; 3467 _words_scanned_limit = _real_words_scanned_limit; 3468 3469 _real_refs_reached_limit = _refs_reached + refs_reached_period; 3470 _refs_reached_limit = _real_refs_reached_limit; 3471 } 3472 3473 void CMTask::decrease_limits() { 3474 // This is called when we believe that we're going to do an infrequent 3475 // operation which will increase the per byte scanned cost (i.e. move 3476 // entries to/from the global stack). It basically tries to decrease the 3477 // scanning limit so that the clock is called earlier. 3478 3479 if (_cm->verbose_medium()) { 3480 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 3481 } 3482 3483 _words_scanned_limit = _real_words_scanned_limit - 3484 3 * words_scanned_period / 4; 3485 _refs_reached_limit = _real_refs_reached_limit - 3486 3 * refs_reached_period / 4; 3487 } 3488 3489 void CMTask::move_entries_to_global_stack() { 3490 // local array where we'll store the entries that will be popped 3491 // from the local queue 3492 oop buffer[global_stack_transfer_size]; 3493 3494 int n = 0; 3495 oop obj; 3496 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 3497 buffer[n] = obj; 3498 ++n; 3499 } 3500 3501 if (n > 0) { 3502 // we popped at least one entry from the local queue 3503 3504 statsOnly( ++_global_transfers_to; _local_pops += n ); 3505 3506 if (!_cm->mark_stack_push(buffer, n)) { 3507 if (_cm->verbose_low()) { 3508 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", 3509 _task_id); 3510 } 3511 set_has_aborted(); 3512 } else { 3513 // the transfer was successful 3514 3515 if (_cm->verbose_medium()) { 3516 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 3517 _task_id, n); 3518 } 3519 statsOnly( int tmp_size = _cm->mark_stack_size(); 3520 if (tmp_size > _global_max_size) { 3521 _global_max_size = tmp_size; 3522 } 3523 _global_pushes += n ); 3524 } 3525 } 3526 3527 // this operation was quite expensive, so decrease the limits 3528 decrease_limits(); 3529 } 3530 3531 void CMTask::get_entries_from_global_stack() { 3532 // local array where we'll store the entries that will be popped 3533 // from the global stack. 3534 oop buffer[global_stack_transfer_size]; 3535 int n; 3536 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 3537 assert(n <= global_stack_transfer_size, 3538 "we should not pop more than the given limit"); 3539 if (n > 0) { 3540 // yes, we did actually pop at least one entry 3541 3542 statsOnly( ++_global_transfers_from; _global_pops += n ); 3543 if (_cm->verbose_medium()) { 3544 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 3545 _task_id, n); 3546 } 3547 for (int i = 0; i < n; ++i) { 3548 bool success = _task_queue->push(buffer[i]); 3549 // We only call this when the local queue is empty or under a 3550 // given target limit. So, we do not expect this push to fail. 3551 assert(success, "invariant"); 3552 } 3553 3554 statsOnly( int tmp_size = _task_queue->size(); 3555 if (tmp_size > _local_max_size) { 3556 _local_max_size = tmp_size; 3557 } 3558 _local_pushes += n ); 3559 } 3560 3561 // this operation was quite expensive, so decrease the limits 3562 decrease_limits(); 3563 } 3564 3565 void CMTask::drain_local_queue(bool partially) { 3566 if (has_aborted()) return; 3567 3568 // Decide what the target size is, depending whether we're going to 3569 // drain it partially (so that other tasks can steal if they run out 3570 // of things to do) or totally (at the very end). 3571 size_t target_size; 3572 if (partially) { 3573 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 3574 } else { 3575 target_size = 0; 3576 } 3577 3578 if (_task_queue->size() > target_size) { 3579 if (_cm->verbose_high()) { 3580 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 3581 _task_id, target_size); 3582 } 3583 3584 oop obj; 3585 bool ret = _task_queue->pop_local(obj); 3586 while (ret) { 3587 statsOnly( ++_local_pops ); 3588 3589 if (_cm->verbose_high()) { 3590 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 3591 (void*) obj); 3592 } 3593 3594 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 3595 assert(!_g1h->is_on_master_free_list( 3596 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 3597 3598 scan_object(obj); 3599 3600 if (_task_queue->size() <= target_size || has_aborted()) { 3601 ret = false; 3602 } else { 3603 ret = _task_queue->pop_local(obj); 3604 } 3605 } 3606 3607 if (_cm->verbose_high()) { 3608 gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 3609 _task_id, _task_queue->size()); 3610 } 3611 } 3612 } 3613 3614 void CMTask::drain_global_stack(bool partially) { 3615 if (has_aborted()) return; 3616 3617 // We have a policy to drain the local queue before we attempt to 3618 // drain the global stack. 3619 assert(partially || _task_queue->size() == 0, "invariant"); 3620 3621 // Decide what the target size is, depending whether we're going to 3622 // drain it partially (so that other tasks can steal if they run out 3623 // of things to do) or totally (at the very end). Notice that, 3624 // because we move entries from the global stack in chunks or 3625 // because another task might be doing the same, we might in fact 3626 // drop below the target. But, this is not a problem. 3627 size_t target_size; 3628 if (partially) { 3629 target_size = _cm->partial_mark_stack_size_target(); 3630 } else { 3631 target_size = 0; 3632 } 3633 3634 if (_cm->mark_stack_size() > target_size) { 3635 if (_cm->verbose_low()) { 3636 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 3637 _task_id, target_size); 3638 } 3639 3640 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 3641 get_entries_from_global_stack(); 3642 drain_local_queue(partially); 3643 } 3644 3645 if (_cm->verbose_low()) { 3646 gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 3647 _task_id, _cm->mark_stack_size()); 3648 } 3649 } 3650 } 3651 3652 // SATB Queue has several assumptions on whether to call the par or 3653 // non-par versions of the methods. this is why some of the code is 3654 // replicated. We should really get rid of the single-threaded version 3655 // of the code to simplify things. 3656 void CMTask::drain_satb_buffers() { 3657 if (has_aborted()) return; 3658 3659 // We set this so that the regular clock knows that we're in the 3660 // middle of draining buffers and doesn't set the abort flag when it 3661 // notices that SATB buffers are available for draining. It'd be 3662 // very counter productive if it did that. :-) 3663 _draining_satb_buffers = true; 3664 3665 CMObjectClosure oc(this); 3666 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3667 if (G1CollectedHeap::use_parallel_gc_threads()) { 3668 satb_mq_set.set_par_closure(_task_id, &oc); 3669 } else { 3670 satb_mq_set.set_closure(&oc); 3671 } 3672 3673 // This keeps claiming and applying the closure to completed buffers 3674 // until we run out of buffers or we need to abort. 3675 if (G1CollectedHeap::use_parallel_gc_threads()) { 3676 while (!has_aborted() && 3677 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 3678 if (_cm->verbose_medium()) { 3679 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3680 } 3681 statsOnly( ++_satb_buffers_processed ); 3682 regular_clock_call(); 3683 } 3684 } else { 3685 while (!has_aborted() && 3686 satb_mq_set.apply_closure_to_completed_buffer()) { 3687 if (_cm->verbose_medium()) { 3688 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 3689 } 3690 statsOnly( ++_satb_buffers_processed ); 3691 regular_clock_call(); 3692 } 3693 } 3694 3695 if (!concurrent() && !has_aborted()) { 3696 // We should only do this during remark. 3697 if (G1CollectedHeap::use_parallel_gc_threads()) { 3698 satb_mq_set.par_iterate_closure_all_threads(_task_id); 3699 } else { 3700 satb_mq_set.iterate_closure_all_threads(); 3701 } 3702 } 3703 3704 _draining_satb_buffers = false; 3705 3706 assert(has_aborted() || 3707 concurrent() || 3708 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3709 3710 if (G1CollectedHeap::use_parallel_gc_threads()) { 3711 satb_mq_set.set_par_closure(_task_id, NULL); 3712 } else { 3713 satb_mq_set.set_closure(NULL); 3714 } 3715 3716 // again, this was a potentially expensive operation, decrease the 3717 // limits to get the regular clock call early 3718 decrease_limits(); 3719 } 3720 3721 void CMTask::print_stats() { 3722 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 3723 _task_id, _calls); 3724 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3725 _elapsed_time_ms, _termination_time_ms); 3726 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3727 _step_times_ms.num(), _step_times_ms.avg(), 3728 _step_times_ms.sd()); 3729 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3730 _step_times_ms.maximum(), _step_times_ms.sum()); 3731 3732 #if _MARKING_STATS_ 3733 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3734 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 3735 _all_clock_intervals_ms.sd()); 3736 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 3737 _all_clock_intervals_ms.maximum(), 3738 _all_clock_intervals_ms.sum()); 3739 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 3740 _clock_due_to_scanning, _clock_due_to_marking); 3741 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 3742 _objs_scanned, _objs_found_on_bitmap); 3743 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 3744 _local_pushes, _local_pops, _local_max_size); 3745 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 3746 _global_pushes, _global_pops, _global_max_size); 3747 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 3748 _global_transfers_to,_global_transfers_from); 3749 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed); 3750 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 3751 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 3752 _steal_attempts, _steals); 3753 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 3754 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 3755 _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 3756 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 3757 _aborted_timed_out, _aborted_satb, _aborted_termination); 3758 #endif // _MARKING_STATS_ 3759 } 3760 3761 /***************************************************************************** 3762 3763 The do_marking_step(time_target_ms) method is the building block 3764 of the parallel marking framework. It can be called in parallel 3765 with other invocations of do_marking_step() on different tasks 3766 (but only one per task, obviously) and concurrently with the 3767 mutator threads, or during remark, hence it eliminates the need 3768 for two versions of the code. When called during remark, it will 3769 pick up from where the task left off during the concurrent marking 3770 phase. Interestingly, tasks are also claimable during evacuation 3771 pauses too, since do_marking_step() ensures that it aborts before 3772 it needs to yield. 3773 3774 The data structures that is uses to do marking work are the 3775 following: 3776 3777 (1) Marking Bitmap. If there are gray objects that appear only 3778 on the bitmap (this happens either when dealing with an overflow 3779 or when the initial marking phase has simply marked the roots 3780 and didn't push them on the stack), then tasks claim heap 3781 regions whose bitmap they then scan to find gray objects. A 3782 global finger indicates where the end of the last claimed region 3783 is. A local finger indicates how far into the region a task has 3784 scanned. The two fingers are used to determine how to gray an 3785 object (i.e. whether simply marking it is OK, as it will be 3786 visited by a task in the future, or whether it needs to be also 3787 pushed on a stack). 3788 3789 (2) Local Queue. The local queue of the task which is accessed 3790 reasonably efficiently by the task. Other tasks can steal from 3791 it when they run out of work. Throughout the marking phase, a 3792 task attempts to keep its local queue short but not totally 3793 empty, so that entries are available for stealing by other 3794 tasks. Only when there is no more work, a task will totally 3795 drain its local queue. 3796 3797 (3) Global Mark Stack. This handles local queue overflow. During 3798 marking only sets of entries are moved between it and the local 3799 queues, as access to it requires a mutex and more fine-grain 3800 interaction with it which might cause contention. If it 3801 overflows, then the marking phase should restart and iterate 3802 over the bitmap to identify gray objects. Throughout the marking 3803 phase, tasks attempt to keep the global mark stack at a small 3804 length but not totally empty, so that entries are available for 3805 popping by other tasks. Only when there is no more work, tasks 3806 will totally drain the global mark stack. 3807 3808 (4) SATB Buffer Queue. This is where completed SATB buffers are 3809 made available. Buffers are regularly removed from this queue 3810 and scanned for roots, so that the queue doesn't get too 3811 long. During remark, all completed buffers are processed, as 3812 well as the filled in parts of any uncompleted buffers. 3813 3814 The do_marking_step() method tries to abort when the time target 3815 has been reached. There are a few other cases when the 3816 do_marking_step() method also aborts: 3817 3818 (1) When the marking phase has been aborted (after a Full GC). 3819 3820 (2) When a global overflow (on the global stack) has been 3821 triggered. Before the task aborts, it will actually sync up with 3822 the other tasks to ensure that all the marking data structures 3823 (local queues, stacks, fingers etc.) are re-initialised so that 3824 when do_marking_step() completes, the marking phase can 3825 immediately restart. 3826 3827 (3) When enough completed SATB buffers are available. The 3828 do_marking_step() method only tries to drain SATB buffers right 3829 at the beginning. So, if enough buffers are available, the 3830 marking step aborts and the SATB buffers are processed at 3831 the beginning of the next invocation. 3832 3833 (4) To yield. when we have to yield then we abort and yield 3834 right at the end of do_marking_step(). This saves us from a lot 3835 of hassle as, by yielding we might allow a Full GC. If this 3836 happens then objects will be compacted underneath our feet, the 3837 heap might shrink, etc. We save checking for this by just 3838 aborting and doing the yield right at the end. 3839 3840 From the above it follows that the do_marking_step() method should 3841 be called in a loop (or, otherwise, regularly) until it completes. 3842 3843 If a marking step completes without its has_aborted() flag being 3844 true, it means it has completed the current marking phase (and 3845 also all other marking tasks have done so and have all synced up). 3846 3847 A method called regular_clock_call() is invoked "regularly" (in 3848 sub ms intervals) throughout marking. It is this clock method that 3849 checks all the abort conditions which were mentioned above and 3850 decides when the task should abort. A work-based scheme is used to 3851 trigger this clock method: when the number of object words the 3852 marking phase has scanned or the number of references the marking 3853 phase has visited reach a given limit. Additional invocations to 3854 the method clock have been planted in a few other strategic places 3855 too. The initial reason for the clock method was to avoid calling 3856 vtime too regularly, as it is quite expensive. So, once it was in 3857 place, it was natural to piggy-back all the other conditions on it 3858 too and not constantly check them throughout the code. 3859 3860 *****************************************************************************/ 3861 3862 void CMTask::do_marking_step(double time_target_ms, 3863 bool do_stealing, 3864 bool do_termination) { 3865 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3866 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3867 3868 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3869 assert(_task_queues != NULL, "invariant"); 3870 assert(_task_queue != NULL, "invariant"); 3871 assert(_task_queues->queue(_task_id) == _task_queue, "invariant"); 3872 3873 assert(!_claimed, 3874 "only one thread should claim this task at any one time"); 3875 3876 // OK, this doesn't safeguard again all possible scenarios, as it is 3877 // possible for two threads to set the _claimed flag at the same 3878 // time. But it is only for debugging purposes anyway and it will 3879 // catch most problems. 3880 _claimed = true; 3881 3882 _start_time_ms = os::elapsedVTime() * 1000.0; 3883 statsOnly( _interval_start_time_ms = _start_time_ms ); 3884 3885 double diff_prediction_ms = 3886 g1_policy->get_new_prediction(&_marking_step_diffs_ms); 3887 _time_target_ms = time_target_ms - diff_prediction_ms; 3888 3889 // set up the variables that are used in the work-based scheme to 3890 // call the regular clock method 3891 _words_scanned = 0; 3892 _refs_reached = 0; 3893 recalculate_limits(); 3894 3895 // clear all flags 3896 clear_has_aborted(); 3897 _has_timed_out = false; 3898 _draining_satb_buffers = false; 3899 3900 ++_calls; 3901 3902 if (_cm->verbose_low()) { 3903 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 3904 "target = %1.2lfms >>>>>>>>>>", 3905 _task_id, _calls, _time_target_ms); 3906 } 3907 3908 // Set up the bitmap and oop closures. Anything that uses them is 3909 // eventually called from this method, so it is OK to allocate these 3910 // statically. 3911 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3912 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3913 set_cm_oop_closure(&cm_oop_closure); 3914 3915 if (_cm->has_overflown()) { 3916 // This can happen if the mark stack overflows during a GC pause 3917 // and this task, after a yield point, restarts. We have to abort 3918 // as we need to get into the overflow protocol which happens 3919 // right at the end of this task. 3920 set_has_aborted(); 3921 } 3922 3923 // First drain any available SATB buffers. After this, we will not 3924 // look at SATB buffers before the next invocation of this method. 3925 // If enough completed SATB buffers are queued up, the regular clock 3926 // will abort this task so that it restarts. 3927 drain_satb_buffers(); 3928 // ...then partially drain the local queue and the global stack 3929 drain_local_queue(true); 3930 drain_global_stack(true); 3931 3932 do { 3933 if (!has_aborted() && _curr_region != NULL) { 3934 // This means that we're already holding on to a region. 3935 assert(_finger != NULL, "if region is not NULL, then the finger " 3936 "should not be NULL either"); 3937 3938 // We might have restarted this task after an evacuation pause 3939 // which might have evacuated the region we're holding on to 3940 // underneath our feet. Let's read its limit again to make sure 3941 // that we do not iterate over a region of the heap that 3942 // contains garbage (update_region_limit() will also move 3943 // _finger to the start of the region if it is found empty). 3944 update_region_limit(); 3945 // We will start from _finger not from the start of the region, 3946 // as we might be restarting this task after aborting half-way 3947 // through scanning this region. In this case, _finger points to 3948 // the address where we last found a marked object. If this is a 3949 // fresh region, _finger points to start(). 3950 MemRegion mr = MemRegion(_finger, _region_limit); 3951 3952 if (_cm->verbose_low()) { 3953 gclog_or_tty->print_cr("[%d] we're scanning part " 3954 "["PTR_FORMAT", "PTR_FORMAT") " 3955 "of region "PTR_FORMAT, 3956 _task_id, _finger, _region_limit, _curr_region); 3957 } 3958 3959 // Let's iterate over the bitmap of the part of the 3960 // region that is left. 3961 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3962 // We successfully completed iterating over the region. Now, 3963 // let's give up the region. 3964 giveup_current_region(); 3965 regular_clock_call(); 3966 } else { 3967 assert(has_aborted(), "currently the only way to do so"); 3968 // The only way to abort the bitmap iteration is to return 3969 // false from the do_bit() method. However, inside the 3970 // do_bit() method we move the _finger to point to the 3971 // object currently being looked at. So, if we bail out, we 3972 // have definitely set _finger to something non-null. 3973 assert(_finger != NULL, "invariant"); 3974 3975 // Region iteration was actually aborted. So now _finger 3976 // points to the address of the object we last scanned. If we 3977 // leave it there, when we restart this task, we will rescan 3978 // the object. It is easy to avoid this. We move the finger by 3979 // enough to point to the next possible object header (the 3980 // bitmap knows by how much we need to move it as it knows its 3981 // granularity). 3982 assert(_finger < _region_limit, "invariant"); 3983 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger); 3984 // Check if bitmap iteration was aborted while scanning the last object 3985 if (new_finger >= _region_limit) { 3986 giveup_current_region(); 3987 } else { 3988 move_finger_to(new_finger); 3989 } 3990 } 3991 } 3992 // At this point we have either completed iterating over the 3993 // region we were holding on to, or we have aborted. 3994 3995 // We then partially drain the local queue and the global stack. 3996 // (Do we really need this?) 3997 drain_local_queue(true); 3998 drain_global_stack(true); 3999 4000 // Read the note on the claim_region() method on why it might 4001 // return NULL with potentially more regions available for 4002 // claiming and why we have to check out_of_regions() to determine 4003 // whether we're done or not. 4004 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 4005 // We are going to try to claim a new region. We should have 4006 // given up on the previous one. 4007 // Separated the asserts so that we know which one fires. 4008 assert(_curr_region == NULL, "invariant"); 4009 assert(_finger == NULL, "invariant"); 4010 assert(_region_limit == NULL, "invariant"); 4011 if (_cm->verbose_low()) { 4012 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 4013 } 4014 HeapRegion* claimed_region = _cm->claim_region(_task_id); 4015 if (claimed_region != NULL) { 4016 // Yes, we managed to claim one 4017 statsOnly( ++_regions_claimed ); 4018 4019 if (_cm->verbose_low()) { 4020 gclog_or_tty->print_cr("[%d] we successfully claimed " 4021 "region "PTR_FORMAT, 4022 _task_id, claimed_region); 4023 } 4024 4025 setup_for_region(claimed_region); 4026 assert(_curr_region == claimed_region, "invariant"); 4027 } 4028 // It is important to call the regular clock here. It might take 4029 // a while to claim a region if, for example, we hit a large 4030 // block of empty regions. So we need to call the regular clock 4031 // method once round the loop to make sure it's called 4032 // frequently enough. 4033 regular_clock_call(); 4034 } 4035 4036 if (!has_aborted() && _curr_region == NULL) { 4037 assert(_cm->out_of_regions(), 4038 "at this point we should be out of regions"); 4039 } 4040 } while ( _curr_region != NULL && !has_aborted()); 4041 4042 if (!has_aborted()) { 4043 // We cannot check whether the global stack is empty, since other 4044 // tasks might be pushing objects to it concurrently. 4045 assert(_cm->out_of_regions(), 4046 "at this point we should be out of regions"); 4047 4048 if (_cm->verbose_low()) { 4049 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 4050 } 4051 4052 // Try to reduce the number of available SATB buffers so that 4053 // remark has less work to do. 4054 drain_satb_buffers(); 4055 } 4056 4057 // Since we've done everything else, we can now totally drain the 4058 // local queue and global stack. 4059 drain_local_queue(false); 4060 drain_global_stack(false); 4061 4062 // Attempt at work stealing from other task's queues. 4063 if (do_stealing && !has_aborted()) { 4064 // We have not aborted. This means that we have finished all that 4065 // we could. Let's try to do some stealing... 4066 4067 // We cannot check whether the global stack is empty, since other 4068 // tasks might be pushing objects to it concurrently. 4069 assert(_cm->out_of_regions() && _task_queue->size() == 0, 4070 "only way to reach here"); 4071 4072 if (_cm->verbose_low()) { 4073 gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 4074 } 4075 4076 while (!has_aborted()) { 4077 oop obj; 4078 statsOnly( ++_steal_attempts ); 4079 4080 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 4081 if (_cm->verbose_medium()) { 4082 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 4083 _task_id, (void*) obj); 4084 } 4085 4086 statsOnly( ++_steals ); 4087 4088 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 4089 "any stolen object should be marked"); 4090 scan_object(obj); 4091 4092 // And since we're towards the end, let's totally drain the 4093 // local queue and global stack. 4094 drain_local_queue(false); 4095 drain_global_stack(false); 4096 } else { 4097 break; 4098 } 4099 } 4100 } 4101 4102 // If we are about to wrap up and go into termination, check if we 4103 // should raise the overflow flag. 4104 if (do_termination && !has_aborted()) { 4105 if (_cm->force_overflow()->should_force()) { 4106 _cm->set_has_overflown(); 4107 regular_clock_call(); 4108 } 4109 } 4110 4111 // We still haven't aborted. Now, let's try to get into the 4112 // termination protocol. 4113 if (do_termination && !has_aborted()) { 4114 // We cannot check whether the global stack is empty, since other 4115 // tasks might be concurrently pushing objects on it. 4116 // Separated the asserts so that we know which one fires. 4117 assert(_cm->out_of_regions(), "only way to reach here"); 4118 assert(_task_queue->size() == 0, "only way to reach here"); 4119 4120 if (_cm->verbose_low()) { 4121 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 4122 } 4123 4124 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 4125 // The CMTask class also extends the TerminatorTerminator class, 4126 // hence its should_exit_termination() method will also decide 4127 // whether to exit the termination protocol or not. 4128 bool finished = _cm->terminator()->offer_termination(this); 4129 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 4130 _termination_time_ms += 4131 termination_end_time_ms - _termination_start_time_ms; 4132 4133 if (finished) { 4134 // We're all done. 4135 4136 if (_task_id == 0) { 4137 // let's allow task 0 to do this 4138 if (concurrent()) { 4139 assert(_cm->concurrent_marking_in_progress(), "invariant"); 4140 // we need to set this to false before the next 4141 // safepoint. This way we ensure that the marking phase 4142 // doesn't observe any more heap expansions. 4143 _cm->clear_concurrent_marking_in_progress(); 4144 } 4145 } 4146 4147 // We can now guarantee that the global stack is empty, since 4148 // all other tasks have finished. We separated the guarantees so 4149 // that, if a condition is false, we can immediately find out 4150 // which one. 4151 guarantee(_cm->out_of_regions(), "only way to reach here"); 4152 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 4153 guarantee(_task_queue->size() == 0, "only way to reach here"); 4154 guarantee(!_cm->has_overflown(), "only way to reach here"); 4155 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 4156 4157 if (_cm->verbose_low()) { 4158 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 4159 } 4160 } else { 4161 // Apparently there's more work to do. Let's abort this task. It 4162 // will restart it and we can hopefully find more things to do. 4163 4164 if (_cm->verbose_low()) { 4165 gclog_or_tty->print_cr("[%d] apparently there is more work to do", 4166 _task_id); 4167 } 4168 4169 set_has_aborted(); 4170 statsOnly( ++_aborted_termination ); 4171 } 4172 } 4173 4174 // Mainly for debugging purposes to make sure that a pointer to the 4175 // closure which was statically allocated in this frame doesn't 4176 // escape it by accident. 4177 set_cm_oop_closure(NULL); 4178 double end_time_ms = os::elapsedVTime() * 1000.0; 4179 double elapsed_time_ms = end_time_ms - _start_time_ms; 4180 // Update the step history. 4181 _step_times_ms.add(elapsed_time_ms); 4182 4183 if (has_aborted()) { 4184 // The task was aborted for some reason. 4185 4186 statsOnly( ++_aborted ); 4187 4188 if (_has_timed_out) { 4189 double diff_ms = elapsed_time_ms - _time_target_ms; 4190 // Keep statistics of how well we did with respect to hitting 4191 // our target only if we actually timed out (if we aborted for 4192 // other reasons, then the results might get skewed). 4193 _marking_step_diffs_ms.add(diff_ms); 4194 } 4195 4196 if (_cm->has_overflown()) { 4197 // This is the interesting one. We aborted because a global 4198 // overflow was raised. This means we have to restart the 4199 // marking phase and start iterating over regions. However, in 4200 // order to do this we have to make sure that all tasks stop 4201 // what they are doing and re-initialise in a safe manner. We 4202 // will achieve this with the use of two barrier sync points. 4203 4204 if (_cm->verbose_low()) { 4205 gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 4206 } 4207 4208 _cm->enter_first_sync_barrier(_task_id); 4209 // When we exit this sync barrier we know that all tasks have 4210 // stopped doing marking work. So, it's now safe to 4211 // re-initialise our data structures. At the end of this method, 4212 // task 0 will clear the global data structures. 4213 4214 statsOnly( ++_aborted_overflow ); 4215 4216 // We clear the local state of this task... 4217 clear_region_fields(); 4218 4219 // ...and enter the second barrier. 4220 _cm->enter_second_sync_barrier(_task_id); 4221 // At this point everything has bee re-initialised and we're 4222 // ready to restart. 4223 } 4224 4225 if (_cm->verbose_low()) { 4226 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 4227 "elapsed = %1.2lfms <<<<<<<<<<", 4228 _task_id, _time_target_ms, elapsed_time_ms); 4229 if (_cm->has_aborted()) { 4230 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 4231 _task_id); 4232 } 4233 } 4234 } else { 4235 if (_cm->verbose_low()) { 4236 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 4237 "elapsed = %1.2lfms <<<<<<<<<<", 4238 _task_id, _time_target_ms, elapsed_time_ms); 4239 } 4240 } 4241 4242 _claimed = false; 4243 } 4244 4245 CMTask::CMTask(int task_id, 4246 ConcurrentMark* cm, 4247 size_t* marked_bytes, 4248 BitMap* card_bm, 4249 CMTaskQueue* task_queue, 4250 CMTaskQueueSet* task_queues) 4251 : _g1h(G1CollectedHeap::heap()), 4252 _task_id(task_id), _cm(cm), 4253 _claimed(false), 4254 _nextMarkBitMap(NULL), _hash_seed(17), 4255 _task_queue(task_queue), 4256 _task_queues(task_queues), 4257 _cm_oop_closure(NULL), 4258 _marked_bytes_array(marked_bytes), 4259 _card_bm(card_bm) { 4260 guarantee(task_queue != NULL, "invariant"); 4261 guarantee(task_queues != NULL, "invariant"); 4262 4263 statsOnly( _clock_due_to_scanning = 0; 4264 _clock_due_to_marking = 0 ); 4265 4266 _marking_step_diffs_ms.add(0.5); 4267 } 4268 4269 // These are formatting macros that are used below to ensure 4270 // consistent formatting. The *_H_* versions are used to format the 4271 // header for a particular value and they should be kept consistent 4272 // with the corresponding macro. Also note that most of the macros add 4273 // the necessary white space (as a prefix) which makes them a bit 4274 // easier to compose. 4275 4276 // All the output lines are prefixed with this string to be able to 4277 // identify them easily in a large log file. 4278 #define G1PPRL_LINE_PREFIX "###" 4279 4280 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT 4281 #ifdef _LP64 4282 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 4283 #else // _LP64 4284 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 4285 #endif // _LP64 4286 4287 // For per-region info 4288 #define G1PPRL_TYPE_FORMAT " %-4s" 4289 #define G1PPRL_TYPE_H_FORMAT " %4s" 4290 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9) 4291 #define G1PPRL_BYTE_H_FORMAT " %9s" 4292 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 4293 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 4294 4295 // For summary info 4296 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT 4297 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT 4298 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB" 4299 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%" 4300 4301 G1PrintRegionLivenessInfoClosure:: 4302 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name) 4303 : _out(out), 4304 _total_used_bytes(0), _total_capacity_bytes(0), 4305 _total_prev_live_bytes(0), _total_next_live_bytes(0), 4306 _hum_used_bytes(0), _hum_capacity_bytes(0), 4307 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) { 4308 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4309 MemRegion g1_committed = g1h->g1_committed(); 4310 MemRegion g1_reserved = g1h->g1_reserved(); 4311 double now = os::elapsedTime(); 4312 4313 // Print the header of the output. 4314 _out->cr(); 4315 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 4316 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP" 4317 G1PPRL_SUM_ADDR_FORMAT("committed") 4318 G1PPRL_SUM_ADDR_FORMAT("reserved") 4319 G1PPRL_SUM_BYTE_FORMAT("region-size"), 4320 g1_committed.start(), g1_committed.end(), 4321 g1_reserved.start(), g1_reserved.end(), 4322 HeapRegion::GrainBytes); 4323 _out->print_cr(G1PPRL_LINE_PREFIX); 4324 _out->print_cr(G1PPRL_LINE_PREFIX 4325 G1PPRL_TYPE_H_FORMAT 4326 G1PPRL_ADDR_BASE_H_FORMAT 4327 G1PPRL_BYTE_H_FORMAT 4328 G1PPRL_BYTE_H_FORMAT 4329 G1PPRL_BYTE_H_FORMAT 4330 G1PPRL_DOUBLE_H_FORMAT, 4331 "type", "address-range", 4332 "used", "prev-live", "next-live", "gc-eff"); 4333 _out->print_cr(G1PPRL_LINE_PREFIX 4334 G1PPRL_TYPE_H_FORMAT 4335 G1PPRL_ADDR_BASE_H_FORMAT 4336 G1PPRL_BYTE_H_FORMAT 4337 G1PPRL_BYTE_H_FORMAT 4338 G1PPRL_BYTE_H_FORMAT 4339 G1PPRL_DOUBLE_H_FORMAT, 4340 "", "", 4341 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)"); 4342 } 4343 4344 // It takes as a parameter a reference to one of the _hum_* fields, it 4345 // deduces the corresponding value for a region in a humongous region 4346 // series (either the region size, or what's left if the _hum_* field 4347 // is < the region size), and updates the _hum_* field accordingly. 4348 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 4349 size_t bytes = 0; 4350 // The > 0 check is to deal with the prev and next live bytes which 4351 // could be 0. 4352 if (*hum_bytes > 0) { 4353 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 4354 *hum_bytes -= bytes; 4355 } 4356 return bytes; 4357 } 4358 4359 // It deduces the values for a region in a humongous region series 4360 // from the _hum_* fields and updates those accordingly. It assumes 4361 // that that _hum_* fields have already been set up from the "starts 4362 // humongous" region and we visit the regions in address order. 4363 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 4364 size_t* capacity_bytes, 4365 size_t* prev_live_bytes, 4366 size_t* next_live_bytes) { 4367 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 4368 *used_bytes = get_hum_bytes(&_hum_used_bytes); 4369 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 4370 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 4371 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 4372 } 4373 4374 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 4375 const char* type = ""; 4376 HeapWord* bottom = r->bottom(); 4377 HeapWord* end = r->end(); 4378 size_t capacity_bytes = r->capacity(); 4379 size_t used_bytes = r->used(); 4380 size_t prev_live_bytes = r->live_bytes(); 4381 size_t next_live_bytes = r->next_live_bytes(); 4382 double gc_eff = r->gc_efficiency(); 4383 if (r->used() == 0) { 4384 type = "FREE"; 4385 } else if (r->is_survivor()) { 4386 type = "SURV"; 4387 } else if (r->is_young()) { 4388 type = "EDEN"; 4389 } else if (r->startsHumongous()) { 4390 type = "HUMS"; 4391 4392 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 4393 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 4394 "they should have been zeroed after the last time we used them"); 4395 // Set up the _hum_* fields. 4396 _hum_capacity_bytes = capacity_bytes; 4397 _hum_used_bytes = used_bytes; 4398 _hum_prev_live_bytes = prev_live_bytes; 4399 _hum_next_live_bytes = next_live_bytes; 4400 get_hum_bytes(&used_bytes, &capacity_bytes, 4401 &prev_live_bytes, &next_live_bytes); 4402 end = bottom + HeapRegion::GrainWords; 4403 } else if (r->continuesHumongous()) { 4404 type = "HUMC"; 4405 get_hum_bytes(&used_bytes, &capacity_bytes, 4406 &prev_live_bytes, &next_live_bytes); 4407 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 4408 } else { 4409 type = "OLD"; 4410 } 4411 4412 _total_used_bytes += used_bytes; 4413 _total_capacity_bytes += capacity_bytes; 4414 _total_prev_live_bytes += prev_live_bytes; 4415 _total_next_live_bytes += next_live_bytes; 4416 4417 // Print a line for this particular region. 4418 _out->print_cr(G1PPRL_LINE_PREFIX 4419 G1PPRL_TYPE_FORMAT 4420 G1PPRL_ADDR_BASE_FORMAT 4421 G1PPRL_BYTE_FORMAT 4422 G1PPRL_BYTE_FORMAT 4423 G1PPRL_BYTE_FORMAT 4424 G1PPRL_DOUBLE_FORMAT, 4425 type, bottom, end, 4426 used_bytes, prev_live_bytes, next_live_bytes, gc_eff); 4427 4428 return false; 4429 } 4430 4431 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 4432 // Print the footer of the output. 4433 _out->print_cr(G1PPRL_LINE_PREFIX); 4434 _out->print_cr(G1PPRL_LINE_PREFIX 4435 " SUMMARY" 4436 G1PPRL_SUM_MB_FORMAT("capacity") 4437 G1PPRL_SUM_MB_PERC_FORMAT("used") 4438 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 4439 G1PPRL_SUM_MB_PERC_FORMAT("next-live"), 4440 bytes_to_mb(_total_capacity_bytes), 4441 bytes_to_mb(_total_used_bytes), 4442 perc(_total_used_bytes, _total_capacity_bytes), 4443 bytes_to_mb(_total_prev_live_bytes), 4444 perc(_total_prev_live_bytes, _total_capacity_bytes), 4445 bytes_to_mb(_total_next_live_bytes), 4446 perc(_total_next_live_bytes, _total_capacity_bytes)); 4447 _out->cr(); 4448 } --- EOF ---