1 /* 2 * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/concurrentMark.inline.hpp" 30 #include "gc/g1/concurrentMarkThread.inline.hpp" 31 #include "gc/g1/g1CollectedHeap.inline.hpp" 32 #include "gc/g1/g1CollectorPolicy.hpp" 33 #include "gc/g1/g1CollectorState.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1StringDedup.hpp" 36 #include "gc/g1/heapRegion.inline.hpp" 37 #include "gc/g1/heapRegionRemSet.hpp" 38 #include "gc/g1/heapRegionSet.inline.hpp" 39 #include "gc/g1/suspendibleThreadSet.hpp" 40 #include "gc/shared/gcId.hpp" 41 #include "gc/shared/gcTimer.hpp" 42 #include "gc/shared/gcTrace.hpp" 43 #include "gc/shared/gcTraceTime.inline.hpp" 44 #include "gc/shared/genOopClosures.inline.hpp" 45 #include "gc/shared/referencePolicy.hpp" 46 #include "gc/shared/strongRootsScope.hpp" 47 #include "gc/shared/taskqueue.inline.hpp" 48 #include "gc/shared/vmGCOperations.hpp" 49 #include "logging/log.hpp" 50 #include "memory/allocation.hpp" 51 #include "memory/resourceArea.hpp" 52 #include "oops/oop.inline.hpp" 53 #include "runtime/atomic.inline.hpp" 54 #include "runtime/handles.inline.hpp" 55 #include "runtime/java.hpp" 56 #include "runtime/prefetch.inline.hpp" 57 #include "services/memTracker.hpp" 58 59 // Concurrent marking bit map wrapper 60 61 CMBitMapRO::CMBitMapRO(int shifter) : 62 _bm(), 63 _shifter(shifter) { 64 _bmStartWord = 0; 65 _bmWordSize = 0; 66 } 67 68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr, 69 const HeapWord* limit) const { 70 // First we must round addr *up* to a possible object boundary. 71 addr = (HeapWord*)align_size_up((intptr_t)addr, 72 HeapWordSize << _shifter); 73 size_t addrOffset = heapWordToOffset(addr); 74 assert(limit != NULL, "limit must not be NULL"); 75 size_t limitOffset = heapWordToOffset(limit); 76 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 77 HeapWord* nextAddr = offsetToHeapWord(nextOffset); 78 assert(nextAddr >= addr, "get_next_one postcondition"); 79 assert(nextAddr == limit || isMarked(nextAddr), 80 "get_next_one postcondition"); 81 return nextAddr; 82 } 83 84 #ifndef PRODUCT 85 bool CMBitMapRO::covers(MemRegion heap_rs) const { 86 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 87 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize, 88 "size inconsistency"); 89 return _bmStartWord == (HeapWord*)(heap_rs.start()) && 90 _bmWordSize == heap_rs.word_size(); 91 } 92 #endif 93 94 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const { 95 _bm.print_on_error(st, prefix); 96 } 97 98 size_t CMBitMap::compute_size(size_t heap_size) { 99 return ReservedSpace::allocation_align_size_up(heap_size / mark_distance()); 100 } 101 102 size_t CMBitMap::mark_distance() { 103 return MinObjAlignmentInBytes * BitsPerByte; 104 } 105 106 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { 107 _bmStartWord = heap.start(); 108 _bmWordSize = heap.word_size(); 109 110 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start()); 111 _bm.set_size(_bmWordSize >> _shifter); 112 113 storage->set_mapping_changed_listener(&_listener); 114 } 115 116 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) { 117 if (zero_filled) { 118 return; 119 } 120 // We need to clear the bitmap on commit, removing any existing information. 121 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords); 122 _bm->clearRange(mr); 123 } 124 125 // Closure used for clearing the given mark bitmap. 126 class ClearBitmapHRClosure : public HeapRegionClosure { 127 private: 128 ConcurrentMark* _cm; 129 CMBitMap* _bitmap; 130 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration. 131 public: 132 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) { 133 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield."); 134 } 135 136 virtual bool doHeapRegion(HeapRegion* r) { 137 size_t const chunk_size_in_words = M / HeapWordSize; 138 139 HeapWord* cur = r->bottom(); 140 HeapWord* const end = r->end(); 141 142 while (cur < end) { 143 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 144 _bitmap->clearRange(mr); 145 146 cur += chunk_size_in_words; 147 148 // Abort iteration if after yielding the marking has been aborted. 149 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) { 150 return true; 151 } 152 // Repeat the asserts from before the start of the closure. We will do them 153 // as asserts here to minimize their overhead on the product. However, we 154 // will have them as guarantees at the beginning / end of the bitmap 155 // clearing to get some checking in the product. 156 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant"); 157 assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant"); 158 } 159 160 return false; 161 } 162 }; 163 164 class ParClearNextMarkBitmapTask : public AbstractGangTask { 165 ClearBitmapHRClosure* _cl; 166 HeapRegionClaimer _hrclaimer; 167 bool _suspendible; // If the task is suspendible, workers must join the STS. 168 169 public: 170 ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : 171 _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {} 172 173 void work(uint worker_id) { 174 SuspendibleThreadSetJoiner sts_join(_suspendible); 175 G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true); 176 } 177 }; 178 179 void CMBitMap::clearAll() { 180 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 181 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */); 182 uint n_workers = g1h->workers()->active_workers(); 183 ParClearNextMarkBitmapTask task(&cl, n_workers, false); 184 g1h->workers()->run_task(&task); 185 guarantee(cl.complete(), "Must have completed iteration."); 186 return; 187 } 188 189 void CMBitMap::clearRange(MemRegion mr) { 190 mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 191 assert(!mr.is_empty(), "unexpected empty region"); 192 // convert address range into offset range 193 _bm.at_put_range(heapWordToOffset(mr.start()), 194 heapWordToOffset(mr.end()), false); 195 } 196 197 CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 198 _base(NULL), _cm(cm) 199 {} 200 201 bool CMMarkStack::allocate(size_t capacity) { 202 // allocate a stack of the requisite depth 203 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop))); 204 if (!rs.is_reserved()) { 205 warning("ConcurrentMark MarkStack allocation failure"); 206 return false; 207 } 208 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC); 209 if (!_virtual_space.initialize(rs, rs.size())) { 210 warning("ConcurrentMark MarkStack backing store failure"); 211 // Release the virtual memory reserved for the marking stack 212 rs.release(); 213 return false; 214 } 215 assert(_virtual_space.committed_size() == rs.size(), 216 "Didn't reserve backing store for all of ConcurrentMark stack?"); 217 _base = (oop*) _virtual_space.low(); 218 setEmpty(); 219 _capacity = (jint) capacity; 220 _saved_index = -1; 221 _should_expand = false; 222 return true; 223 } 224 225 void CMMarkStack::expand() { 226 // Called, during remark, if we've overflown the marking stack during marking. 227 assert(isEmpty(), "stack should been emptied while handling overflow"); 228 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted"); 229 // Clear expansion flag 230 _should_expand = false; 231 if (_capacity == (jint) MarkStackSizeMax) { 232 log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit"); 233 return; 234 } 235 // Double capacity if possible 236 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax); 237 // Do not give up existing stack until we have managed to 238 // get the double capacity that we desired. 239 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity * 240 sizeof(oop))); 241 if (rs.is_reserved()) { 242 // Release the backing store associated with old stack 243 _virtual_space.release(); 244 // Reinitialize virtual space for new stack 245 if (!_virtual_space.initialize(rs, rs.size())) { 246 fatal("Not enough swap for expanded marking stack capacity"); 247 } 248 _base = (oop*)(_virtual_space.low()); 249 _index = 0; 250 _capacity = new_capacity; 251 } else { 252 // Failed to double capacity, continue; 253 log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K", 254 _capacity / K, new_capacity / K); 255 } 256 } 257 258 void CMMarkStack::set_should_expand() { 259 // If we're resetting the marking state because of an 260 // marking stack overflow, record that we should, if 261 // possible, expand the stack. 262 _should_expand = _cm->has_overflown(); 263 } 264 265 CMMarkStack::~CMMarkStack() { 266 if (_base != NULL) { 267 _base = NULL; 268 _virtual_space.release(); 269 } 270 } 271 272 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 273 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 274 jint start = _index; 275 jint next_index = start + n; 276 if (next_index > _capacity) { 277 _overflow = true; 278 return; 279 } 280 // Otherwise. 281 _index = next_index; 282 for (int i = 0; i < n; i++) { 283 int ind = start + i; 284 assert(ind < _capacity, "By overflow test above."); 285 _base[ind] = ptr_arr[i]; 286 } 287 } 288 289 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 290 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 291 jint index = _index; 292 if (index == 0) { 293 *n = 0; 294 return false; 295 } else { 296 int k = MIN2(max, index); 297 jint new_ind = index - k; 298 for (int j = 0; j < k; j++) { 299 ptr_arr[j] = _base[new_ind + j]; 300 } 301 _index = new_ind; 302 *n = k; 303 return true; 304 } 305 } 306 307 void CMMarkStack::note_start_of_gc() { 308 assert(_saved_index == -1, 309 "note_start_of_gc()/end_of_gc() bracketed incorrectly"); 310 _saved_index = _index; 311 } 312 313 void CMMarkStack::note_end_of_gc() { 314 // This is intentionally a guarantee, instead of an assert. If we 315 // accidentally add something to the mark stack during GC, it 316 // will be a correctness issue so it's better if we crash. we'll 317 // only check this once per GC anyway, so it won't be a performance 318 // issue in any way. 319 guarantee(_saved_index == _index, 320 "saved index: %d index: %d", _saved_index, _index); 321 _saved_index = -1; 322 } 323 324 CMRootRegions::CMRootRegions() : 325 _young_list(NULL), _cm(NULL), _scan_in_progress(false), 326 _should_abort(false), _next_survivor(NULL) { } 327 328 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) { 329 _young_list = g1h->young_list(); 330 _cm = cm; 331 } 332 333 void CMRootRegions::prepare_for_scan() { 334 assert(!scan_in_progress(), "pre-condition"); 335 336 // Currently, only survivors can be root regions. 337 assert(_next_survivor == NULL, "pre-condition"); 338 _next_survivor = _young_list->first_survivor_region(); 339 _scan_in_progress = (_next_survivor != NULL); 340 _should_abort = false; 341 } 342 343 HeapRegion* CMRootRegions::claim_next() { 344 if (_should_abort) { 345 // If someone has set the should_abort flag, we return NULL to 346 // force the caller to bail out of their loop. 347 return NULL; 348 } 349 350 // Currently, only survivors can be root regions. 351 HeapRegion* res = _next_survivor; 352 if (res != NULL) { 353 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 354 // Read it again in case it changed while we were waiting for the lock. 355 res = _next_survivor; 356 if (res != NULL) { 357 if (res == _young_list->last_survivor_region()) { 358 // We just claimed the last survivor so store NULL to indicate 359 // that we're done. 360 _next_survivor = NULL; 361 } else { 362 _next_survivor = res->get_next_young_region(); 363 } 364 } else { 365 // Someone else claimed the last survivor while we were trying 366 // to take the lock so nothing else to do. 367 } 368 } 369 assert(res == NULL || res->is_survivor(), "post-condition"); 370 371 return res; 372 } 373 374 void CMRootRegions::scan_finished() { 375 assert(scan_in_progress(), "pre-condition"); 376 377 // Currently, only survivors can be root regions. 378 if (!_should_abort) { 379 assert(_next_survivor == NULL, "we should have claimed all survivors"); 380 } 381 _next_survivor = NULL; 382 383 { 384 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 385 _scan_in_progress = false; 386 RootRegionScan_lock->notify_all(); 387 } 388 } 389 390 bool CMRootRegions::wait_until_scan_finished() { 391 if (!scan_in_progress()) return false; 392 393 { 394 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 395 while (scan_in_progress()) { 396 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 397 } 398 } 399 return true; 400 } 401 402 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) { 403 return MAX2((n_par_threads + 2) / 4, 1U); 404 } 405 406 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) : 407 _g1h(g1h), 408 _markBitMap1(), 409 _markBitMap2(), 410 _parallel_marking_threads(0), 411 _max_parallel_marking_threads(0), 412 _sleep_factor(0.0), 413 _marking_task_overhead(1.0), 414 _cleanup_list("Cleanup List"), 415 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), 416 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> 417 CardTableModRefBS::card_shift, 418 false /* in_resource_area*/), 419 420 _prevMarkBitMap(&_markBitMap1), 421 _nextMarkBitMap(&_markBitMap2), 422 423 _markStack(this), 424 // _finger set in set_non_marking_state 425 426 _max_worker_id(ParallelGCThreads), 427 // _active_tasks set in set_non_marking_state 428 // _tasks set inside the constructor 429 _task_queues(new CMTaskQueueSet((int) _max_worker_id)), 430 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)), 431 432 _has_overflown(false), 433 _concurrent(false), 434 _has_aborted(false), 435 _restart_for_overflow(false), 436 _concurrent_marking_in_progress(false), 437 _concurrent_phase_started(false), 438 439 // _verbose_level set below 440 441 _init_times(), 442 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 443 _cleanup_times(), 444 _total_counting_time(0.0), 445 _total_rs_scrub_time(0.0), 446 447 _parallel_workers(NULL), 448 449 _count_card_bitmaps(NULL), 450 _count_marked_bytes(NULL), 451 _completed_initialization(false) { 452 453 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); 454 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage); 455 456 // Create & start a ConcurrentMark thread. 457 _cmThread = new ConcurrentMarkThread(this); 458 assert(cmThread() != NULL, "CM Thread should have been created"); 459 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 460 if (_cmThread->osthread() == NULL) { 461 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 462 } 463 464 assert(CGC_lock != NULL, "Where's the CGC_lock?"); 465 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency"); 466 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency"); 467 468 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 469 satb_qs.set_buffer_size(G1SATBBufferSize); 470 471 _root_regions.init(_g1h, this); 472 473 if (ConcGCThreads > ParallelGCThreads) { 474 warning("Can't have more ConcGCThreads (%u) " 475 "than ParallelGCThreads (%u).", 476 ConcGCThreads, ParallelGCThreads); 477 return; 478 } 479 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) { 480 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent 481 // if both are set 482 _sleep_factor = 0.0; 483 _marking_task_overhead = 1.0; 484 } else if (G1MarkingOverheadPercent > 0) { 485 // We will calculate the number of parallel marking threads based 486 // on a target overhead with respect to the soft real-time goal 487 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0; 488 double overall_cm_overhead = 489 (double) MaxGCPauseMillis * marking_overhead / 490 (double) GCPauseIntervalMillis; 491 double cpu_ratio = 1.0 / (double) os::processor_count(); 492 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 493 double marking_task_overhead = 494 overall_cm_overhead / marking_thread_num * 495 (double) os::processor_count(); 496 double sleep_factor = 497 (1.0 - marking_task_overhead) / marking_task_overhead; 498 499 FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num); 500 _sleep_factor = sleep_factor; 501 _marking_task_overhead = marking_task_overhead; 502 } else { 503 // Calculate the number of parallel marking threads by scaling 504 // the number of parallel GC threads. 505 uint marking_thread_num = scale_parallel_threads(ParallelGCThreads); 506 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 507 _sleep_factor = 0.0; 508 _marking_task_overhead = 1.0; 509 } 510 511 assert(ConcGCThreads > 0, "Should have been set"); 512 _parallel_marking_threads = ConcGCThreads; 513 _max_parallel_marking_threads = _parallel_marking_threads; 514 515 _parallel_workers = new WorkGang("G1 Marker", 516 _max_parallel_marking_threads, false, true); 517 if (_parallel_workers == NULL) { 518 vm_exit_during_initialization("Failed necessary allocation."); 519 } else { 520 _parallel_workers->initialize_workers(); 521 } 522 523 if (FLAG_IS_DEFAULT(MarkStackSize)) { 524 size_t mark_stack_size = 525 MIN2(MarkStackSizeMax, 526 MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE))); 527 // Verify that the calculated value for MarkStackSize is in range. 528 // It would be nice to use the private utility routine from Arguments. 529 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 530 warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 531 "must be between 1 and " SIZE_FORMAT, 532 mark_stack_size, MarkStackSizeMax); 533 return; 534 } 535 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 536 } else { 537 // Verify MarkStackSize is in range. 538 if (FLAG_IS_CMDLINE(MarkStackSize)) { 539 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 540 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 541 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 542 "must be between 1 and " SIZE_FORMAT, 543 MarkStackSize, MarkStackSizeMax); 544 return; 545 } 546 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 547 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 548 warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 549 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 550 MarkStackSize, MarkStackSizeMax); 551 return; 552 } 553 } 554 } 555 } 556 557 if (!_markStack.allocate(MarkStackSize)) { 558 warning("Failed to allocate CM marking stack"); 559 return; 560 } 561 562 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC); 563 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); 564 565 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); 566 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); 567 568 BitMap::idx_t card_bm_size = _card_bm.size(); 569 570 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 571 _active_tasks = _max_worker_id; 572 573 uint max_regions = _g1h->max_regions(); 574 for (uint i = 0; i < _max_worker_id; ++i) { 575 CMTaskQueue* task_queue = new CMTaskQueue(); 576 task_queue->initialize(); 577 _task_queues->register_queue(i, task_queue); 578 579 _count_card_bitmaps[i] = BitMap(card_bm_size, false); 580 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); 581 582 _tasks[i] = new CMTask(i, this, 583 _count_marked_bytes[i], 584 &_count_card_bitmaps[i], 585 task_queue, _task_queues); 586 587 _accum_task_vtime[i] = 0.0; 588 } 589 590 // Calculate the card number for the bottom of the heap. Used 591 // in biasing indexes into the accounting card bitmaps. 592 _heap_bottom_card_num = 593 intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 594 CardTableModRefBS::card_shift); 595 596 // Clear all the liveness counting data 597 clear_all_count_data(); 598 599 // so that the call below can read a sensible value 600 _heap_start = g1h->reserved_region().start(); 601 set_non_marking_state(); 602 _completed_initialization = true; 603 } 604 605 void ConcurrentMark::reset() { 606 // Starting values for these two. This should be called in a STW 607 // phase. 608 MemRegion reserved = _g1h->g1_reserved(); 609 _heap_start = reserved.start(); 610 _heap_end = reserved.end(); 611 612 // Separated the asserts so that we know which one fires. 613 assert(_heap_start != NULL, "heap bounds should look ok"); 614 assert(_heap_end != NULL, "heap bounds should look ok"); 615 assert(_heap_start < _heap_end, "heap bounds should look ok"); 616 617 // Reset all the marking data structures and any necessary flags 618 reset_marking_state(); 619 620 // We do reset all of them, since different phases will use 621 // different number of active threads. So, it's easiest to have all 622 // of them ready. 623 for (uint i = 0; i < _max_worker_id; ++i) { 624 _tasks[i]->reset(_nextMarkBitMap); 625 } 626 627 // we need this to make sure that the flag is on during the evac 628 // pause with initial mark piggy-backed 629 set_concurrent_marking_in_progress(); 630 } 631 632 633 void ConcurrentMark::reset_marking_state(bool clear_overflow) { 634 _markStack.set_should_expand(); 635 _markStack.setEmpty(); // Also clears the _markStack overflow flag 636 if (clear_overflow) { 637 clear_has_overflown(); 638 } else { 639 assert(has_overflown(), "pre-condition"); 640 } 641 _finger = _heap_start; 642 643 for (uint i = 0; i < _max_worker_id; ++i) { 644 CMTaskQueue* queue = _task_queues->queue(i); 645 queue->set_empty(); 646 } 647 } 648 649 void ConcurrentMark::set_concurrency(uint active_tasks) { 650 assert(active_tasks <= _max_worker_id, "we should not have more"); 651 652 _active_tasks = active_tasks; 653 // Need to update the three data structures below according to the 654 // number of active threads for this phase. 655 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 656 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 657 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 658 } 659 660 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 661 set_concurrency(active_tasks); 662 663 _concurrent = concurrent; 664 // We propagate this to all tasks, not just the active ones. 665 for (uint i = 0; i < _max_worker_id; ++i) 666 _tasks[i]->set_concurrent(concurrent); 667 668 if (concurrent) { 669 set_concurrent_marking_in_progress(); 670 } else { 671 // We currently assume that the concurrent flag has been set to 672 // false before we start remark. At this point we should also be 673 // in a STW phase. 674 assert(!concurrent_marking_in_progress(), "invariant"); 675 assert(out_of_regions(), 676 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 677 p2i(_finger), p2i(_heap_end)); 678 } 679 } 680 681 void ConcurrentMark::set_non_marking_state() { 682 // We set the global marking state to some default values when we're 683 // not doing marking. 684 reset_marking_state(); 685 _active_tasks = 0; 686 clear_concurrent_marking_in_progress(); 687 } 688 689 ConcurrentMark::~ConcurrentMark() { 690 // The ConcurrentMark instance is never freed. 691 ShouldNotReachHere(); 692 } 693 694 void ConcurrentMark::clearNextBitmap() { 695 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 696 697 // Make sure that the concurrent mark thread looks to still be in 698 // the current cycle. 699 guarantee(cmThread()->during_cycle(), "invariant"); 700 701 // We are finishing up the current cycle by clearing the next 702 // marking bitmap and getting it ready for the next cycle. During 703 // this time no other cycle can start. So, let's make sure that this 704 // is the case. 705 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 706 707 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */); 708 ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true); 709 _parallel_workers->run_task(&task); 710 711 // Clear the liveness counting data. If the marking has been aborted, the abort() 712 // call already did that. 713 if (cl.complete()) { 714 clear_all_count_data(); 715 } 716 717 // Repeat the asserts from above. 718 guarantee(cmThread()->during_cycle(), "invariant"); 719 guarantee(!g1h->collector_state()->mark_in_progress(), "invariant"); 720 } 721 722 class CheckBitmapClearHRClosure : public HeapRegionClosure { 723 CMBitMap* _bitmap; 724 bool _error; 725 public: 726 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) { 727 } 728 729 virtual bool doHeapRegion(HeapRegion* r) { 730 // This closure can be called concurrently to the mutator, so we must make sure 731 // that the result of the getNextMarkedWordAddress() call is compared to the 732 // value passed to it as limit to detect any found bits. 733 // end never changes in G1. 734 HeapWord* end = r->end(); 735 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end; 736 } 737 }; 738 739 bool ConcurrentMark::nextMarkBitmapIsClear() { 740 CheckBitmapClearHRClosure cl(_nextMarkBitMap); 741 _g1h->heap_region_iterate(&cl); 742 return cl.complete(); 743 } 744 745 class NoteStartOfMarkHRClosure: public HeapRegionClosure { 746 public: 747 bool doHeapRegion(HeapRegion* r) { 748 r->note_start_of_marking(); 749 return false; 750 } 751 }; 752 753 void ConcurrentMark::checkpointRootsInitialPre() { 754 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 755 G1CollectorPolicy* g1p = g1h->g1_policy(); 756 757 _has_aborted = false; 758 759 // Initialize marking structures. This has to be done in a STW phase. 760 reset(); 761 762 // For each region note start of marking. 763 NoteStartOfMarkHRClosure startcl; 764 g1h->heap_region_iterate(&startcl); 765 } 766 767 768 void ConcurrentMark::checkpointRootsInitialPost() { 769 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 770 771 // Start Concurrent Marking weak-reference discovery. 772 ReferenceProcessor* rp = g1h->ref_processor_cm(); 773 // enable ("weak") refs discovery 774 rp->enable_discovery(); 775 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 776 777 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 778 // This is the start of the marking cycle, we're expected all 779 // threads to have SATB queues with active set to false. 780 satb_mq_set.set_active_all_threads(true, /* new active value */ 781 false /* expected_active */); 782 783 _root_regions.prepare_for_scan(); 784 785 // update_g1_committed() will be called at the end of an evac pause 786 // when marking is on. So, it's also called at the end of the 787 // initial-mark pause to update the heap end, if the heap expands 788 // during it. No need to call it here. 789 } 790 791 /* 792 * Notice that in the next two methods, we actually leave the STS 793 * during the barrier sync and join it immediately afterwards. If we 794 * do not do this, the following deadlock can occur: one thread could 795 * be in the barrier sync code, waiting for the other thread to also 796 * sync up, whereas another one could be trying to yield, while also 797 * waiting for the other threads to sync up too. 798 * 799 * Note, however, that this code is also used during remark and in 800 * this case we should not attempt to leave / enter the STS, otherwise 801 * we'll either hit an assert (debug / fastdebug) or deadlock 802 * (product). So we should only leave / enter the STS if we are 803 * operating concurrently. 804 * 805 * Because the thread that does the sync barrier has left the STS, it 806 * is possible to be suspended for a Full GC or an evacuation pause 807 * could occur. This is actually safe, since the entering the sync 808 * barrier is one of the last things do_marking_step() does, and it 809 * doesn't manipulate any data structures afterwards. 810 */ 811 812 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 813 bool barrier_aborted; 814 { 815 SuspendibleThreadSetLeaver sts_leave(concurrent()); 816 barrier_aborted = !_first_overflow_barrier_sync.enter(); 817 } 818 819 // at this point everyone should have synced up and not be doing any 820 // more work 821 822 if (barrier_aborted) { 823 // If the barrier aborted we ignore the overflow condition and 824 // just abort the whole marking phase as quickly as possible. 825 return; 826 } 827 828 // If we're executing the concurrent phase of marking, reset the marking 829 // state; otherwise the marking state is reset after reference processing, 830 // during the remark pause. 831 // If we reset here as a result of an overflow during the remark we will 832 // see assertion failures from any subsequent set_concurrency_and_phase() 833 // calls. 834 if (concurrent()) { 835 // let the task associated with with worker 0 do this 836 if (worker_id == 0) { 837 // task 0 is responsible for clearing the global data structures 838 // We should be here because of an overflow. During STW we should 839 // not clear the overflow flag since we rely on it being true when 840 // we exit this method to abort the pause and restart concurrent 841 // marking. 842 reset_marking_state(true /* clear_overflow */); 843 844 log_info(gc)("Concurrent Mark reset for overflow"); 845 } 846 } 847 848 // after this, each task should reset its own data structures then 849 // then go into the second barrier 850 } 851 852 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 853 SuspendibleThreadSetLeaver sts_leave(concurrent()); 854 _second_overflow_barrier_sync.enter(); 855 856 // at this point everything should be re-initialized and ready to go 857 } 858 859 class CMConcurrentMarkingTask: public AbstractGangTask { 860 private: 861 ConcurrentMark* _cm; 862 ConcurrentMarkThread* _cmt; 863 864 public: 865 void work(uint worker_id) { 866 assert(Thread::current()->is_ConcurrentGC_thread(), 867 "this should only be done by a conc GC thread"); 868 ResourceMark rm; 869 870 double start_vtime = os::elapsedVTime(); 871 872 { 873 SuspendibleThreadSetJoiner sts_join; 874 875 assert(worker_id < _cm->active_tasks(), "invariant"); 876 CMTask* the_task = _cm->task(worker_id); 877 the_task->record_start_time(); 878 if (!_cm->has_aborted()) { 879 do { 880 double start_vtime_sec = os::elapsedVTime(); 881 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 882 883 the_task->do_marking_step(mark_step_duration_ms, 884 true /* do_termination */, 885 false /* is_serial*/); 886 887 double end_vtime_sec = os::elapsedVTime(); 888 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 889 _cm->clear_has_overflown(); 890 891 _cm->do_yield_check(worker_id); 892 893 jlong sleep_time_ms; 894 if (!_cm->has_aborted() && the_task->has_aborted()) { 895 sleep_time_ms = 896 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 897 { 898 SuspendibleThreadSetLeaver sts_leave; 899 os::sleep(Thread::current(), sleep_time_ms, false); 900 } 901 } 902 } while (!_cm->has_aborted() && the_task->has_aborted()); 903 } 904 the_task->record_end_time(); 905 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant"); 906 } 907 908 double end_vtime = os::elapsedVTime(); 909 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 910 } 911 912 CMConcurrentMarkingTask(ConcurrentMark* cm, 913 ConcurrentMarkThread* cmt) : 914 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 915 916 ~CMConcurrentMarkingTask() { } 917 }; 918 919 // Calculates the number of active workers for a concurrent 920 // phase. 921 uint ConcurrentMark::calc_parallel_marking_threads() { 922 uint n_conc_workers = 0; 923 if (!UseDynamicNumberOfGCThreads || 924 (!FLAG_IS_DEFAULT(ConcGCThreads) && 925 !ForceDynamicNumberOfGCThreads)) { 926 n_conc_workers = max_parallel_marking_threads(); 927 } else { 928 n_conc_workers = 929 AdaptiveSizePolicy::calc_default_active_workers( 930 max_parallel_marking_threads(), 931 1, /* Minimum workers */ 932 parallel_marking_threads(), 933 Threads::number_of_non_daemon_threads()); 934 // Don't scale down "n_conc_workers" by scale_parallel_threads() because 935 // that scaling has already gone into "_max_parallel_marking_threads". 936 } 937 assert(n_conc_workers > 0, "Always need at least 1"); 938 return n_conc_workers; 939 } 940 941 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) { 942 // Currently, only survivors can be root regions. 943 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 944 G1RootRegionScanClosure cl(_g1h, this, worker_id); 945 946 const uintx interval = PrefetchScanIntervalInBytes; 947 HeapWord* curr = hr->bottom(); 948 const HeapWord* end = hr->top(); 949 while (curr < end) { 950 Prefetch::read(curr, interval); 951 oop obj = oop(curr); 952 int size = obj->oop_iterate_size(&cl); 953 assert(size == obj->size(), "sanity"); 954 curr += size; 955 } 956 } 957 958 class CMRootRegionScanTask : public AbstractGangTask { 959 private: 960 ConcurrentMark* _cm; 961 962 public: 963 CMRootRegionScanTask(ConcurrentMark* cm) : 964 AbstractGangTask("Root Region Scan"), _cm(cm) { } 965 966 void work(uint worker_id) { 967 assert(Thread::current()->is_ConcurrentGC_thread(), 968 "this should only be done by a conc GC thread"); 969 970 CMRootRegions* root_regions = _cm->root_regions(); 971 HeapRegion* hr = root_regions->claim_next(); 972 while (hr != NULL) { 973 _cm->scanRootRegion(hr, worker_id); 974 hr = root_regions->claim_next(); 975 } 976 } 977 }; 978 979 void ConcurrentMark::scanRootRegions() { 980 // Start of concurrent marking. 981 ClassLoaderDataGraph::clear_claimed_marks(); 982 983 // scan_in_progress() will have been set to true only if there was 984 // at least one root region to scan. So, if it's false, we 985 // should not attempt to do any further work. 986 if (root_regions()->scan_in_progress()) { 987 GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan"); 988 989 _parallel_marking_threads = calc_parallel_marking_threads(); 990 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 991 "Maximum number of marking threads exceeded"); 992 uint active_workers = MAX2(1U, parallel_marking_threads()); 993 994 CMRootRegionScanTask task(this); 995 _parallel_workers->set_active_workers(active_workers); 996 _parallel_workers->run_task(&task); 997 998 // It's possible that has_aborted() is true here without actually 999 // aborting the survivor scan earlier. This is OK as it's 1000 // mainly used for sanity checking. 1001 root_regions()->scan_finished(); 1002 } 1003 } 1004 1005 void ConcurrentMark::register_concurrent_phase_start(const char* title) { 1006 assert(!_concurrent_phase_started, "Sanity"); 1007 _concurrent_phase_started = true; 1008 _g1h->gc_timer_cm()->register_gc_concurrent_start(title); 1009 } 1010 1011 void ConcurrentMark::register_concurrent_phase_end() { 1012 if (_concurrent_phase_started) { 1013 _concurrent_phase_started = false; 1014 _g1h->gc_timer_cm()->register_gc_concurrent_end(); 1015 } 1016 } 1017 1018 void ConcurrentMark::markFromRoots() { 1019 // we might be tempted to assert that: 1020 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1021 // "inconsistent argument?"); 1022 // However that wouldn't be right, because it's possible that 1023 // a safepoint is indeed in progress as a younger generation 1024 // stop-the-world GC happens even as we mark in this generation. 1025 1026 _restart_for_overflow = false; 1027 1028 // _g1h has _n_par_threads 1029 _parallel_marking_threads = calc_parallel_marking_threads(); 1030 assert(parallel_marking_threads() <= max_parallel_marking_threads(), 1031 "Maximum number of marking threads exceeded"); 1032 1033 uint active_workers = MAX2(1U, parallel_marking_threads()); 1034 assert(active_workers > 0, "Should have been set"); 1035 1036 // Parallel task terminator is set in "set_concurrency_and_phase()" 1037 set_concurrency_and_phase(active_workers, true /* concurrent */); 1038 1039 CMConcurrentMarkingTask markingTask(this, cmThread()); 1040 _parallel_workers->set_active_workers(active_workers); 1041 _parallel_workers->run_task(&markingTask); 1042 print_stats(); 1043 } 1044 1045 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1046 // world is stopped at this checkpoint 1047 assert(SafepointSynchronize::is_at_safepoint(), 1048 "world should be stopped"); 1049 1050 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1051 1052 // If a full collection has happened, we shouldn't do this. 1053 if (has_aborted()) { 1054 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1055 return; 1056 } 1057 1058 SvcGCMarker sgcm(SvcGCMarker::OTHER); 1059 1060 if (VerifyDuringGC) { 1061 HandleMark hm; // handle scope 1062 g1h->prepare_for_verify(); 1063 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1064 } 1065 g1h->verifier()->check_bitmaps("Remark Start"); 1066 1067 G1CollectorPolicy* g1p = g1h->g1_policy(); 1068 g1p->record_concurrent_mark_remark_start(); 1069 1070 double start = os::elapsedTime(); 1071 1072 checkpointRootsFinalWork(); 1073 1074 double mark_work_end = os::elapsedTime(); 1075 1076 weakRefsWork(clear_all_soft_refs); 1077 1078 if (has_overflown()) { 1079 // Oops. We overflowed. Restart concurrent marking. 1080 _restart_for_overflow = true; 1081 log_develop_trace(gc)("Remark led to restart for overflow."); 1082 1083 // Verify the heap w.r.t. the previous marking bitmap. 1084 if (VerifyDuringGC) { 1085 HandleMark hm; // handle scope 1086 g1h->prepare_for_verify(); 1087 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)"); 1088 } 1089 1090 // Clear the marking state because we will be restarting 1091 // marking due to overflowing the global mark stack. 1092 reset_marking_state(); 1093 } else { 1094 { 1095 GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm()); 1096 1097 // Aggregate the per-task counting data that we have accumulated 1098 // while marking. 1099 aggregate_count_data(); 1100 } 1101 1102 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1103 // We're done with marking. 1104 // This is the end of the marking cycle, we're expected all 1105 // threads to have SATB queues with active set to true. 1106 satb_mq_set.set_active_all_threads(false, /* new active value */ 1107 true /* expected_active */); 1108 1109 if (VerifyDuringGC) { 1110 HandleMark hm; // handle scope 1111 g1h->prepare_for_verify(); 1112 Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)"); 1113 } 1114 g1h->verifier()->check_bitmaps("Remark End"); 1115 assert(!restart_for_overflow(), "sanity"); 1116 // Completely reset the marking state since marking completed 1117 set_non_marking_state(); 1118 } 1119 1120 // Expand the marking stack, if we have to and if we can. 1121 if (_markStack.should_expand()) { 1122 _markStack.expand(); 1123 } 1124 1125 // Statistics 1126 double now = os::elapsedTime(); 1127 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1128 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1129 _remark_times.add((now - start) * 1000.0); 1130 1131 g1p->record_concurrent_mark_remark_end(); 1132 1133 G1CMIsAliveClosure is_alive(g1h); 1134 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive); 1135 } 1136 1137 // Base class of the closures that finalize and verify the 1138 // liveness counting data. 1139 class CMCountDataClosureBase: public HeapRegionClosure { 1140 protected: 1141 G1CollectedHeap* _g1h; 1142 ConcurrentMark* _cm; 1143 CardTableModRefBS* _ct_bs; 1144 1145 BitMap* _region_bm; 1146 BitMap* _card_bm; 1147 1148 // Takes a region that's not empty (i.e., it has at least one 1149 // live object in it and sets its corresponding bit on the region 1150 // bitmap to 1. 1151 void set_bit_for_region(HeapRegion* hr) { 1152 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1153 _region_bm->par_at_put(index, true); 1154 } 1155 1156 public: 1157 CMCountDataClosureBase(G1CollectedHeap* g1h, 1158 BitMap* region_bm, BitMap* card_bm): 1159 _g1h(g1h), _cm(g1h->concurrent_mark()), 1160 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 1161 _region_bm(region_bm), _card_bm(card_bm) { } 1162 }; 1163 1164 // Closure that calculates the # live objects per region. Used 1165 // for verification purposes during the cleanup pause. 1166 class CalcLiveObjectsClosure: public CMCountDataClosureBase { 1167 CMBitMapRO* _bm; 1168 size_t _region_marked_bytes; 1169 1170 public: 1171 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h, 1172 BitMap* region_bm, BitMap* card_bm) : 1173 CMCountDataClosureBase(g1h, region_bm, card_bm), 1174 _bm(bm), _region_marked_bytes(0) { } 1175 1176 bool doHeapRegion(HeapRegion* hr) { 1177 HeapWord* ntams = hr->next_top_at_mark_start(); 1178 HeapWord* start = hr->bottom(); 1179 1180 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), 1181 "Preconditions not met - " 1182 "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, 1183 p2i(start), p2i(ntams), p2i(hr->end())); 1184 1185 // Find the first marked object at or after "start". 1186 start = _bm->getNextMarkedWordAddress(start, ntams); 1187 1188 size_t marked_bytes = 0; 1189 1190 while (start < ntams) { 1191 oop obj = oop(start); 1192 int obj_sz = obj->size(); 1193 HeapWord* obj_end = start + obj_sz; 1194 1195 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 1196 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); 1197 1198 // Note: if we're looking at the last region in heap - obj_end 1199 // could be actually just beyond the end of the heap; end_idx 1200 // will then correspond to a (non-existent) card that is also 1201 // just beyond the heap. 1202 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { 1203 // end of object is not card aligned - increment to cover 1204 // all the cards spanned by the object 1205 end_idx += 1; 1206 } 1207 1208 // Set the bits in the card BM for the cards spanned by this object. 1209 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1210 1211 // Add the size of this object to the number of marked bytes. 1212 marked_bytes += (size_t)obj_sz * HeapWordSize; 1213 1214 // This will happen if we are handling a humongous object that spans 1215 // several heap regions. 1216 if (obj_end > hr->end()) { 1217 break; 1218 } 1219 // Find the next marked object after this one. 1220 start = _bm->getNextMarkedWordAddress(obj_end, ntams); 1221 } 1222 1223 // Mark the allocated-since-marking portion... 1224 HeapWord* top = hr->top(); 1225 if (ntams < top) { 1226 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1227 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1228 1229 // Note: if we're looking at the last region in heap - top 1230 // could be actually just beyond the end of the heap; end_idx 1231 // will then correspond to a (non-existent) card that is also 1232 // just beyond the heap. 1233 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1234 // end of object is not card aligned - increment to cover 1235 // all the cards spanned by the object 1236 end_idx += 1; 1237 } 1238 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1239 1240 // This definitely means the region has live objects. 1241 set_bit_for_region(hr); 1242 } 1243 1244 // Update the live region bitmap. 1245 if (marked_bytes > 0) { 1246 set_bit_for_region(hr); 1247 } 1248 1249 // Set the marked bytes for the current region so that 1250 // it can be queried by a calling verification routine 1251 _region_marked_bytes = marked_bytes; 1252 1253 return false; 1254 } 1255 1256 size_t region_marked_bytes() const { return _region_marked_bytes; } 1257 }; 1258 1259 // Heap region closure used for verifying the counting data 1260 // that was accumulated concurrently and aggregated during 1261 // the remark pause. This closure is applied to the heap 1262 // regions during the STW cleanup pause. 1263 1264 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { 1265 G1CollectedHeap* _g1h; 1266 ConcurrentMark* _cm; 1267 CalcLiveObjectsClosure _calc_cl; 1268 BitMap* _region_bm; // Region BM to be verified 1269 BitMap* _card_bm; // Card BM to be verified 1270 1271 BitMap* _exp_region_bm; // Expected Region BM values 1272 BitMap* _exp_card_bm; // Expected card BM values 1273 1274 int _failures; 1275 1276 public: 1277 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, 1278 BitMap* region_bm, 1279 BitMap* card_bm, 1280 BitMap* exp_region_bm, 1281 BitMap* exp_card_bm) : 1282 _g1h(g1h), _cm(g1h->concurrent_mark()), 1283 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), 1284 _region_bm(region_bm), _card_bm(card_bm), 1285 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), 1286 _failures(0) { } 1287 1288 int failures() const { return _failures; } 1289 1290 bool doHeapRegion(HeapRegion* hr) { 1291 int failures = 0; 1292 1293 // Call the CalcLiveObjectsClosure to walk the marking bitmap for 1294 // this region and set the corresponding bits in the expected region 1295 // and card bitmaps. 1296 bool res = _calc_cl.doHeapRegion(hr); 1297 assert(res == false, "should be continuing"); 1298 1299 // Verify the marked bytes for this region. 1300 size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); 1301 size_t act_marked_bytes = hr->next_marked_bytes(); 1302 1303 if (exp_marked_bytes > act_marked_bytes) { 1304 if (hr->is_starts_humongous()) { 1305 // For start_humongous regions, the size of the whole object will be 1306 // in exp_marked_bytes. 1307 HeapRegion* region = hr; 1308 int num_regions; 1309 for (num_regions = 0; region != NULL; num_regions++) { 1310 region = _g1h->next_region_in_humongous(region); 1311 } 1312 if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) { 1313 failures += 1; 1314 } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) { 1315 failures += 1; 1316 } 1317 } else { 1318 // We're not OK if expected marked bytes > actual marked bytes. It means 1319 // we have missed accounting some objects during the actual marking. 1320 failures += 1; 1321 } 1322 } 1323 1324 // Verify the bit, for this region, in the actual and expected 1325 // (which was just calculated) region bit maps. 1326 // We're not OK if the bit in the calculated expected region 1327 // bitmap is set and the bit in the actual region bitmap is not. 1328 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); 1329 1330 bool expected = _exp_region_bm->at(index); 1331 bool actual = _region_bm->at(index); 1332 if (expected && !actual) { 1333 failures += 1; 1334 } 1335 1336 // Verify that the card bit maps for the cards spanned by the current 1337 // region match. We have an error if we have a set bit in the expected 1338 // bit map and the corresponding bit in the actual bitmap is not set. 1339 1340 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); 1341 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); 1342 1343 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { 1344 expected = _exp_card_bm->at(i); 1345 actual = _card_bm->at(i); 1346 1347 if (expected && !actual) { 1348 failures += 1; 1349 } 1350 } 1351 1352 _failures += failures; 1353 1354 // We could stop iteration over the heap when we 1355 // find the first violating region by returning true. 1356 return false; 1357 } 1358 }; 1359 1360 class G1ParVerifyFinalCountTask: public AbstractGangTask { 1361 protected: 1362 G1CollectedHeap* _g1h; 1363 ConcurrentMark* _cm; 1364 BitMap* _actual_region_bm; 1365 BitMap* _actual_card_bm; 1366 1367 uint _n_workers; 1368 1369 BitMap* _expected_region_bm; 1370 BitMap* _expected_card_bm; 1371 1372 int _failures; 1373 1374 HeapRegionClaimer _hrclaimer; 1375 1376 public: 1377 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, 1378 BitMap* region_bm, BitMap* card_bm, 1379 BitMap* expected_region_bm, BitMap* expected_card_bm) 1380 : AbstractGangTask("G1 verify final counting"), 1381 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1382 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1383 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), 1384 _failures(0), 1385 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1386 assert(VerifyDuringGC, "don't call this otherwise"); 1387 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); 1388 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); 1389 } 1390 1391 void work(uint worker_id) { 1392 assert(worker_id < _n_workers, "invariant"); 1393 1394 VerifyLiveObjectDataHRClosure verify_cl(_g1h, 1395 _actual_region_bm, _actual_card_bm, 1396 _expected_region_bm, 1397 _expected_card_bm); 1398 1399 _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); 1400 1401 Atomic::add(verify_cl.failures(), &_failures); 1402 } 1403 1404 int failures() const { return _failures; } 1405 }; 1406 1407 // Closure that finalizes the liveness counting data. 1408 // Used during the cleanup pause. 1409 // Sets the bits corresponding to the interval [NTAMS, top] 1410 // (which contains the implicitly live objects) in the 1411 // card liveness bitmap. Also sets the bit for each region, 1412 // containing live data, in the region liveness bitmap. 1413 1414 class FinalCountDataUpdateClosure: public CMCountDataClosureBase { 1415 public: 1416 FinalCountDataUpdateClosure(G1CollectedHeap* g1h, 1417 BitMap* region_bm, 1418 BitMap* card_bm) : 1419 CMCountDataClosureBase(g1h, region_bm, card_bm) { } 1420 1421 bool doHeapRegion(HeapRegion* hr) { 1422 HeapWord* ntams = hr->next_top_at_mark_start(); 1423 HeapWord* top = hr->top(); 1424 1425 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); 1426 1427 // Mark the allocated-since-marking portion... 1428 if (ntams < top) { 1429 // This definitely means the region has live objects. 1430 set_bit_for_region(hr); 1431 1432 // Now set the bits in the card bitmap for [ntams, top) 1433 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); 1434 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); 1435 1436 // Note: if we're looking at the last region in heap - top 1437 // could be actually just beyond the end of the heap; end_idx 1438 // will then correspond to a (non-existent) card that is also 1439 // just beyond the heap. 1440 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { 1441 // end of object is not card aligned - increment to cover 1442 // all the cards spanned by the object 1443 end_idx += 1; 1444 } 1445 1446 assert(end_idx <= _card_bm->size(), 1447 "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1448 end_idx, _card_bm->size()); 1449 assert(start_idx < _card_bm->size(), 1450 "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, 1451 start_idx, _card_bm->size()); 1452 1453 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); 1454 } 1455 1456 // Set the bit for the region if it contains live data 1457 if (hr->next_marked_bytes() > 0) { 1458 set_bit_for_region(hr); 1459 } 1460 1461 return false; 1462 } 1463 }; 1464 1465 class G1ParFinalCountTask: public AbstractGangTask { 1466 protected: 1467 G1CollectedHeap* _g1h; 1468 ConcurrentMark* _cm; 1469 BitMap* _actual_region_bm; 1470 BitMap* _actual_card_bm; 1471 1472 uint _n_workers; 1473 HeapRegionClaimer _hrclaimer; 1474 1475 public: 1476 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) 1477 : AbstractGangTask("G1 final counting"), 1478 _g1h(g1h), _cm(_g1h->concurrent_mark()), 1479 _actual_region_bm(region_bm), _actual_card_bm(card_bm), 1480 _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { 1481 } 1482 1483 void work(uint worker_id) { 1484 assert(worker_id < _n_workers, "invariant"); 1485 1486 FinalCountDataUpdateClosure final_update_cl(_g1h, 1487 _actual_region_bm, 1488 _actual_card_bm); 1489 1490 _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); 1491 } 1492 }; 1493 1494 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1495 G1CollectedHeap* _g1; 1496 size_t _freed_bytes; 1497 FreeRegionList* _local_cleanup_list; 1498 uint _old_regions_removed; 1499 uint _humongous_regions_removed; 1500 HRRSCleanupTask* _hrrs_cleanup_task; 1501 1502 public: 1503 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1504 FreeRegionList* local_cleanup_list, 1505 HRRSCleanupTask* hrrs_cleanup_task) : 1506 _g1(g1), 1507 _freed_bytes(0), 1508 _local_cleanup_list(local_cleanup_list), 1509 _old_regions_removed(0), 1510 _humongous_regions_removed(0), 1511 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1512 1513 size_t freed_bytes() { return _freed_bytes; } 1514 const uint old_regions_removed() { return _old_regions_removed; } 1515 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1516 1517 bool doHeapRegion(HeapRegion *hr) { 1518 if (hr->is_archive()) { 1519 return false; 1520 } 1521 // We use a claim value of zero here because all regions 1522 // were claimed with value 1 in the FinalCount task. 1523 _g1->reset_gc_time_stamps(hr); 1524 hr->note_end_of_marking(); 1525 1526 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { 1527 _freed_bytes += hr->used(); 1528 hr->set_containing_set(NULL); 1529 if (hr->is_humongous()) { 1530 _humongous_regions_removed++; 1531 _g1->free_humongous_region(hr, _local_cleanup_list, true); 1532 } else { 1533 _old_regions_removed++; 1534 _g1->free_region(hr, _local_cleanup_list, true); 1535 } 1536 } else { 1537 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1538 } 1539 1540 return false; 1541 } 1542 }; 1543 1544 class G1ParNoteEndTask: public AbstractGangTask { 1545 friend class G1NoteEndOfConcMarkClosure; 1546 1547 protected: 1548 G1CollectedHeap* _g1h; 1549 FreeRegionList* _cleanup_list; 1550 HeapRegionClaimer _hrclaimer; 1551 1552 public: 1553 G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1554 AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) { 1555 } 1556 1557 void work(uint worker_id) { 1558 FreeRegionList local_cleanup_list("Local Cleanup List"); 1559 HRRSCleanupTask hrrs_cleanup_task; 1560 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list, 1561 &hrrs_cleanup_task); 1562 _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer); 1563 assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1564 1565 // Now update the lists 1566 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); 1567 { 1568 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1569 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); 1570 1571 // If we iterate over the global cleanup list at the end of 1572 // cleanup to do this printing we will not guarantee to only 1573 // generate output for the newly-reclaimed regions (the list 1574 // might not be empty at the beginning of cleanup; we might 1575 // still be working on its previous contents). So we do the 1576 // printing here, before we append the new regions to the global 1577 // cleanup list. 1578 1579 G1HRPrinter* hr_printer = _g1h->hr_printer(); 1580 if (hr_printer->is_active()) { 1581 FreeRegionListIterator iter(&local_cleanup_list); 1582 while (iter.more_available()) { 1583 HeapRegion* hr = iter.get_next(); 1584 hr_printer->cleanup(hr); 1585 } 1586 } 1587 1588 _cleanup_list->add_ordered(&local_cleanup_list); 1589 assert(local_cleanup_list.is_empty(), "post-condition"); 1590 1591 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1592 } 1593 } 1594 }; 1595 1596 void ConcurrentMark::cleanup() { 1597 // world is stopped at this checkpoint 1598 assert(SafepointSynchronize::is_at_safepoint(), 1599 "world should be stopped"); 1600 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1601 1602 // If a full collection has happened, we shouldn't do this. 1603 if (has_aborted()) { 1604 g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused 1605 return; 1606 } 1607 1608 g1h->verifier()->verify_region_sets_optional(); 1609 1610 if (VerifyDuringGC) { 1611 HandleMark hm; // handle scope 1612 g1h->prepare_for_verify(); 1613 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)"); 1614 } 1615 g1h->verifier()->check_bitmaps("Cleanup Start"); 1616 1617 G1CollectorPolicy* g1p = g1h->g1_policy(); 1618 g1p->record_concurrent_mark_cleanup_start(); 1619 1620 double start = os::elapsedTime(); 1621 1622 HeapRegionRemSet::reset_for_cleanup_tasks(); 1623 1624 // Do counting once more with the world stopped for good measure. 1625 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); 1626 1627 g1h->workers()->run_task(&g1_par_count_task); 1628 1629 if (VerifyDuringGC) { 1630 // Verify that the counting data accumulated during marking matches 1631 // that calculated by walking the marking bitmap. 1632 1633 // Bitmaps to hold expected values 1634 BitMap expected_region_bm(_region_bm.size(), true); 1635 BitMap expected_card_bm(_card_bm.size(), true); 1636 1637 G1ParVerifyFinalCountTask g1_par_verify_task(g1h, 1638 &_region_bm, 1639 &_card_bm, 1640 &expected_region_bm, 1641 &expected_card_bm); 1642 1643 g1h->workers()->run_task(&g1_par_verify_task); 1644 1645 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); 1646 } 1647 1648 size_t start_used_bytes = g1h->used(); 1649 g1h->collector_state()->set_mark_in_progress(false); 1650 1651 double count_end = os::elapsedTime(); 1652 double this_final_counting_time = (count_end - start); 1653 _total_counting_time += this_final_counting_time; 1654 1655 if (log_is_enabled(Trace, gc, liveness)) { 1656 G1PrintRegionLivenessInfoClosure cl("Post-Marking"); 1657 _g1h->heap_region_iterate(&cl); 1658 } 1659 1660 // Install newly created mark bitMap as "prev". 1661 swapMarkBitMaps(); 1662 1663 g1h->reset_gc_time_stamp(); 1664 1665 uint n_workers = _g1h->workers()->active_workers(); 1666 1667 // Note end of marking in all heap regions. 1668 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers); 1669 g1h->workers()->run_task(&g1_par_note_end_task); 1670 g1h->check_gc_time_stamps(); 1671 1672 if (!cleanup_list_is_empty()) { 1673 // The cleanup list is not empty, so we'll have to process it 1674 // concurrently. Notify anyone else that might be wanting free 1675 // regions that there will be more free regions coming soon. 1676 g1h->set_free_regions_coming(); 1677 } 1678 1679 // call below, since it affects the metric by which we sort the heap 1680 // regions. 1681 if (G1ScrubRemSets) { 1682 double rs_scrub_start = os::elapsedTime(); 1683 g1h->scrub_rem_set(&_region_bm, &_card_bm); 1684 _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); 1685 } 1686 1687 // this will also free any regions totally full of garbage objects, 1688 // and sort the regions. 1689 g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1690 1691 // Statistics. 1692 double end = os::elapsedTime(); 1693 _cleanup_times.add((end - start) * 1000.0); 1694 1695 // Clean up will have freed any regions completely full of garbage. 1696 // Update the soft reference policy with the new heap occupancy. 1697 Universe::update_heap_info_at_gc(); 1698 1699 if (VerifyDuringGC) { 1700 HandleMark hm; // handle scope 1701 g1h->prepare_for_verify(); 1702 Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)"); 1703 } 1704 1705 g1h->verifier()->check_bitmaps("Cleanup End"); 1706 1707 g1h->verifier()->verify_region_sets_optional(); 1708 1709 // We need to make this be a "collection" so any collection pause that 1710 // races with it goes around and waits for completeCleanup to finish. 1711 g1h->increment_total_collections(); 1712 1713 // Clean out dead classes and update Metaspace sizes. 1714 if (ClassUnloadingWithConcurrentMark) { 1715 ClassLoaderDataGraph::purge(); 1716 } 1717 MetaspaceGC::compute_new_size(); 1718 1719 // We reclaimed old regions so we should calculate the sizes to make 1720 // sure we update the old gen/space data. 1721 g1h->g1mm()->update_sizes(); 1722 g1h->allocation_context_stats().update_after_mark(); 1723 1724 g1h->trace_heap_after_concurrent_cycle(); 1725 } 1726 1727 void ConcurrentMark::completeCleanup() { 1728 if (has_aborted()) return; 1729 1730 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1731 1732 _cleanup_list.verify_optional(); 1733 FreeRegionList tmp_free_list("Tmp Free List"); 1734 1735 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1736 "cleanup list has %u entries", 1737 _cleanup_list.length()); 1738 1739 // No one else should be accessing the _cleanup_list at this point, 1740 // so it is not necessary to take any locks 1741 while (!_cleanup_list.is_empty()) { 1742 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */); 1743 assert(hr != NULL, "Got NULL from a non-empty list"); 1744 hr->par_clear(); 1745 tmp_free_list.add_ordered(hr); 1746 1747 // Instead of adding one region at a time to the secondary_free_list, 1748 // we accumulate them in the local list and move them a few at a 1749 // time. This also cuts down on the number of notify_all() calls 1750 // we do during this process. We'll also append the local list when 1751 // _cleanup_list is empty (which means we just removed the last 1752 // region from the _cleanup_list). 1753 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) || 1754 _cleanup_list.is_empty()) { 1755 log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : " 1756 "appending %u entries to the secondary_free_list, " 1757 "cleanup list still has %u entries", 1758 tmp_free_list.length(), 1759 _cleanup_list.length()); 1760 1761 { 1762 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); 1763 g1h->secondary_free_list_add(&tmp_free_list); 1764 SecondaryFreeList_lock->notify_all(); 1765 } 1766 #ifndef PRODUCT 1767 if (G1StressConcRegionFreeing) { 1768 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) { 1769 os::sleep(Thread::current(), (jlong) 1, false); 1770 } 1771 } 1772 #endif 1773 } 1774 } 1775 assert(tmp_free_list.is_empty(), "post-condition"); 1776 } 1777 1778 // Supporting Object and Oop closures for reference discovery 1779 // and processing in during marking 1780 1781 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1782 HeapWord* addr = (HeapWord*)obj; 1783 return addr != NULL && 1784 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1785 } 1786 1787 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1788 // Uses the CMTask associated with a worker thread (for serial reference 1789 // processing the CMTask for worker 0 is used) to preserve (mark) and 1790 // trace referent objects. 1791 // 1792 // Using the CMTask and embedded local queues avoids having the worker 1793 // threads operating on the global mark stack. This reduces the risk 1794 // of overflowing the stack - which we would rather avoid at this late 1795 // state. Also using the tasks' local queues removes the potential 1796 // of the workers interfering with each other that could occur if 1797 // operating on the global stack. 1798 1799 class G1CMKeepAliveAndDrainClosure: public OopClosure { 1800 ConcurrentMark* _cm; 1801 CMTask* _task; 1802 int _ref_counter_limit; 1803 int _ref_counter; 1804 bool _is_serial; 1805 public: 1806 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1807 _cm(cm), _task(task), _is_serial(is_serial), 1808 _ref_counter_limit(G1RefProcDrainInterval) { 1809 assert(_ref_counter_limit > 0, "sanity"); 1810 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1811 _ref_counter = _ref_counter_limit; 1812 } 1813 1814 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1815 virtual void do_oop( oop* p) { do_oop_work(p); } 1816 1817 template <class T> void do_oop_work(T* p) { 1818 if (!_cm->has_overflown()) { 1819 oop obj = oopDesc::load_decode_heap_oop(p); 1820 _task->deal_with_reference(obj); 1821 _ref_counter--; 1822 1823 if (_ref_counter == 0) { 1824 // We have dealt with _ref_counter_limit references, pushing them 1825 // and objects reachable from them on to the local stack (and 1826 // possibly the global stack). Call CMTask::do_marking_step() to 1827 // process these entries. 1828 // 1829 // We call CMTask::do_marking_step() in a loop, which we'll exit if 1830 // there's nothing more to do (i.e. we're done with the entries that 1831 // were pushed as a result of the CMTask::deal_with_reference() calls 1832 // above) or we overflow. 1833 // 1834 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1835 // flag while there may still be some work to do. (See the comment at 1836 // the beginning of CMTask::do_marking_step() for those conditions - 1837 // one of which is reaching the specified time target.) It is only 1838 // when CMTask::do_marking_step() returns without setting the 1839 // has_aborted() flag that the marking step has completed. 1840 do { 1841 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1842 _task->do_marking_step(mark_step_duration_ms, 1843 false /* do_termination */, 1844 _is_serial); 1845 } while (_task->has_aborted() && !_cm->has_overflown()); 1846 _ref_counter = _ref_counter_limit; 1847 } 1848 } 1849 } 1850 }; 1851 1852 // 'Drain' oop closure used by both serial and parallel reference processing. 1853 // Uses the CMTask associated with a given worker thread (for serial 1854 // reference processing the CMtask for worker 0 is used). Calls the 1855 // do_marking_step routine, with an unbelievably large timeout value, 1856 // to drain the marking data structures of the remaining entries 1857 // added by the 'keep alive' oop closure above. 1858 1859 class G1CMDrainMarkingStackClosure: public VoidClosure { 1860 ConcurrentMark* _cm; 1861 CMTask* _task; 1862 bool _is_serial; 1863 public: 1864 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) : 1865 _cm(cm), _task(task), _is_serial(is_serial) { 1866 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1867 } 1868 1869 void do_void() { 1870 do { 1871 // We call CMTask::do_marking_step() to completely drain the local 1872 // and global marking stacks of entries pushed by the 'keep alive' 1873 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1874 // 1875 // CMTask::do_marking_step() is called in a loop, which we'll exit 1876 // if there's nothing more to do (i.e. we've completely drained the 1877 // entries that were pushed as a a result of applying the 'keep alive' 1878 // closure to the entries on the discovered ref lists) or we overflow 1879 // the global marking stack. 1880 // 1881 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() 1882 // flag while there may still be some work to do. (See the comment at 1883 // the beginning of CMTask::do_marking_step() for those conditions - 1884 // one of which is reaching the specified time target.) It is only 1885 // when CMTask::do_marking_step() returns without setting the 1886 // has_aborted() flag that the marking step has completed. 1887 1888 _task->do_marking_step(1000000000.0 /* something very large */, 1889 true /* do_termination */, 1890 _is_serial); 1891 } while (_task->has_aborted() && !_cm->has_overflown()); 1892 } 1893 }; 1894 1895 // Implementation of AbstractRefProcTaskExecutor for parallel 1896 // reference processing at the end of G1 concurrent marking 1897 1898 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { 1899 private: 1900 G1CollectedHeap* _g1h; 1901 ConcurrentMark* _cm; 1902 WorkGang* _workers; 1903 uint _active_workers; 1904 1905 public: 1906 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1907 ConcurrentMark* cm, 1908 WorkGang* workers, 1909 uint n_workers) : 1910 _g1h(g1h), _cm(cm), 1911 _workers(workers), _active_workers(n_workers) { } 1912 1913 // Executes the given task using concurrent marking worker threads. 1914 virtual void execute(ProcessTask& task); 1915 virtual void execute(EnqueueTask& task); 1916 }; 1917 1918 class G1CMRefProcTaskProxy: public AbstractGangTask { 1919 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1920 ProcessTask& _proc_task; 1921 G1CollectedHeap* _g1h; 1922 ConcurrentMark* _cm; 1923 1924 public: 1925 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1926 G1CollectedHeap* g1h, 1927 ConcurrentMark* cm) : 1928 AbstractGangTask("Process reference objects in parallel"), 1929 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1930 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1931 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1932 } 1933 1934 virtual void work(uint worker_id) { 1935 ResourceMark rm; 1936 HandleMark hm; 1937 CMTask* task = _cm->task(worker_id); 1938 G1CMIsAliveClosure g1_is_alive(_g1h); 1939 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1940 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1941 1942 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1943 } 1944 }; 1945 1946 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1947 assert(_workers != NULL, "Need parallel worker threads."); 1948 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1949 1950 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1951 1952 // We need to reset the concurrency level before each 1953 // proxy task execution, so that the termination protocol 1954 // and overflow handling in CMTask::do_marking_step() knows 1955 // how many workers to wait for. 1956 _cm->set_concurrency(_active_workers); 1957 _workers->run_task(&proc_task_proxy); 1958 } 1959 1960 class G1CMRefEnqueueTaskProxy: public AbstractGangTask { 1961 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1962 EnqueueTask& _enq_task; 1963 1964 public: 1965 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1966 AbstractGangTask("Enqueue reference objects in parallel"), 1967 _enq_task(enq_task) { } 1968 1969 virtual void work(uint worker_id) { 1970 _enq_task.work(worker_id); 1971 } 1972 }; 1973 1974 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1975 assert(_workers != NULL, "Need parallel worker threads."); 1976 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1977 1978 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1979 1980 // Not strictly necessary but... 1981 // 1982 // We need to reset the concurrency level before each 1983 // proxy task execution, so that the termination protocol 1984 // and overflow handling in CMTask::do_marking_step() knows 1985 // how many workers to wait for. 1986 _cm->set_concurrency(_active_workers); 1987 _workers->run_task(&enq_task_proxy); 1988 } 1989 1990 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) { 1991 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes); 1992 } 1993 1994 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1995 if (has_overflown()) { 1996 // Skip processing the discovered references if we have 1997 // overflown the global marking stack. Reference objects 1998 // only get discovered once so it is OK to not 1999 // de-populate the discovered reference lists. We could have, 2000 // but the only benefit would be that, when marking restarts, 2001 // less reference objects are discovered. 2002 return; 2003 } 2004 2005 ResourceMark rm; 2006 HandleMark hm; 2007 2008 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2009 2010 // Is alive closure. 2011 G1CMIsAliveClosure g1_is_alive(g1h); 2012 2013 // Inner scope to exclude the cleaning of the string and symbol 2014 // tables from the displayed time. 2015 { 2016 GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm()); 2017 2018 ReferenceProcessor* rp = g1h->ref_processor_cm(); 2019 2020 // See the comment in G1CollectedHeap::ref_processing_init() 2021 // about how reference processing currently works in G1. 2022 2023 // Set the soft reference policy 2024 rp->setup_policy(clear_all_soft_refs); 2025 assert(_markStack.isEmpty(), "mark stack should be empty"); 2026 2027 // Instances of the 'Keep Alive' and 'Complete GC' closures used 2028 // in serial reference processing. Note these closures are also 2029 // used for serially processing (by the the current thread) the 2030 // JNI references during parallel reference processing. 2031 // 2032 // These closures do not need to synchronize with the worker 2033 // threads involved in parallel reference processing as these 2034 // instances are executed serially by the current thread (e.g. 2035 // reference processing is not multi-threaded and is thus 2036 // performed by the current thread instead of a gang worker). 2037 // 2038 // The gang tasks involved in parallel reference processing create 2039 // their own instances of these closures, which do their own 2040 // synchronization among themselves. 2041 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 2042 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 2043 2044 // We need at least one active thread. If reference processing 2045 // is not multi-threaded we use the current (VMThread) thread, 2046 // otherwise we use the work gang from the G1CollectedHeap and 2047 // we utilize all the worker threads we can. 2048 bool processing_is_mt = rp->processing_is_mt(); 2049 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U); 2050 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U); 2051 2052 // Parallel processing task executor. 2053 G1CMRefProcTaskExecutor par_task_executor(g1h, this, 2054 g1h->workers(), active_workers); 2055 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 2056 2057 // Set the concurrency level. The phase was already set prior to 2058 // executing the remark task. 2059 set_concurrency(active_workers); 2060 2061 // Set the degree of MT processing here. If the discovery was done MT, 2062 // the number of threads involved during discovery could differ from 2063 // the number of active workers. This is OK as long as the discovered 2064 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 2065 rp->set_active_mt_degree(active_workers); 2066 2067 // Process the weak references. 2068 const ReferenceProcessorStats& stats = 2069 rp->process_discovered_references(&g1_is_alive, 2070 &g1_keep_alive, 2071 &g1_drain_mark_stack, 2072 executor, 2073 g1h->gc_timer_cm()); 2074 g1h->gc_tracer_cm()->report_gc_reference_stats(stats); 2075 2076 // The do_oop work routines of the keep_alive and drain_marking_stack 2077 // oop closures will set the has_overflown flag if we overflow the 2078 // global marking stack. 2079 2080 assert(_markStack.overflow() || _markStack.isEmpty(), 2081 "mark stack should be empty (unless it overflowed)"); 2082 2083 if (_markStack.overflow()) { 2084 // This should have been done already when we tried to push an 2085 // entry on to the global mark stack. But let's do it again. 2086 set_has_overflown(); 2087 } 2088 2089 assert(rp->num_q() == active_workers, "why not"); 2090 2091 rp->enqueue_discovered_references(executor); 2092 2093 rp->verify_no_references_recorded(); 2094 assert(!rp->discovery_enabled(), "Post condition"); 2095 } 2096 2097 if (has_overflown()) { 2098 // We can not trust g1_is_alive if the marking stack overflowed 2099 return; 2100 } 2101 2102 assert(_markStack.isEmpty(), "Marking should have completed"); 2103 2104 // Unload Klasses, String, Symbols, Code Cache, etc. 2105 { 2106 GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm()); 2107 2108 if (ClassUnloadingWithConcurrentMark) { 2109 bool purged_classes; 2110 2111 { 2112 GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm()); 2113 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */); 2114 } 2115 2116 { 2117 GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm()); 2118 weakRefsWorkParallelPart(&g1_is_alive, purged_classes); 2119 } 2120 } 2121 2122 if (G1StringDedup::is_enabled()) { 2123 GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm()); 2124 G1StringDedup::unlink(&g1_is_alive); 2125 } 2126 } 2127 } 2128 2129 void ConcurrentMark::swapMarkBitMaps() { 2130 CMBitMapRO* temp = _prevMarkBitMap; 2131 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 2132 _nextMarkBitMap = (CMBitMap*) temp; 2133 } 2134 2135 // Closure for marking entries in SATB buffers. 2136 class CMSATBBufferClosure : public SATBBufferClosure { 2137 private: 2138 CMTask* _task; 2139 G1CollectedHeap* _g1h; 2140 2141 // This is very similar to CMTask::deal_with_reference, but with 2142 // more relaxed requirements for the argument, so this must be more 2143 // circumspect about treating the argument as an object. 2144 void do_entry(void* entry) const { 2145 _task->increment_refs_reached(); 2146 HeapRegion* hr = _g1h->heap_region_containing(entry); 2147 if (entry < hr->next_top_at_mark_start()) { 2148 // Until we get here, we don't know whether entry refers to a valid 2149 // object; it could instead have been a stale reference. 2150 oop obj = static_cast<oop>(entry); 2151 assert(obj->is_oop(true /* ignore mark word */), 2152 "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); 2153 _task->make_reference_grey(obj, hr); 2154 } 2155 } 2156 2157 public: 2158 CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h) 2159 : _task(task), _g1h(g1h) { } 2160 2161 virtual void do_buffer(void** buffer, size_t size) { 2162 for (size_t i = 0; i < size; ++i) { 2163 do_entry(buffer[i]); 2164 } 2165 } 2166 }; 2167 2168 class G1RemarkThreadsClosure : public ThreadClosure { 2169 CMSATBBufferClosure _cm_satb_cl; 2170 G1CMOopClosure _cm_cl; 2171 MarkingCodeBlobClosure _code_cl; 2172 int _thread_parity; 2173 2174 public: 2175 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) : 2176 _cm_satb_cl(task, g1h), 2177 _cm_cl(g1h, g1h->concurrent_mark(), task), 2178 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 2179 _thread_parity(Threads::thread_claim_parity()) {} 2180 2181 void do_thread(Thread* thread) { 2182 if (thread->is_Java_thread()) { 2183 if (thread->claim_oops_do(true, _thread_parity)) { 2184 JavaThread* jt = (JavaThread*)thread; 2185 2186 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 2187 // however the liveness of oops reachable from nmethods have very complex lifecycles: 2188 // * Alive if on the stack of an executing method 2189 // * Weakly reachable otherwise 2190 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 2191 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 2192 jt->nmethods_do(&_code_cl); 2193 2194 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 2195 } 2196 } else if (thread->is_VM_thread()) { 2197 if (thread->claim_oops_do(true, _thread_parity)) { 2198 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 2199 } 2200 } 2201 } 2202 }; 2203 2204 class CMRemarkTask: public AbstractGangTask { 2205 private: 2206 ConcurrentMark* _cm; 2207 public: 2208 void work(uint worker_id) { 2209 // Since all available tasks are actually started, we should 2210 // only proceed if we're supposed to be active. 2211 if (worker_id < _cm->active_tasks()) { 2212 CMTask* task = _cm->task(worker_id); 2213 task->record_start_time(); 2214 { 2215 ResourceMark rm; 2216 HandleMark hm; 2217 2218 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 2219 Threads::threads_do(&threads_f); 2220 } 2221 2222 do { 2223 task->do_marking_step(1000000000.0 /* something very large */, 2224 true /* do_termination */, 2225 false /* is_serial */); 2226 } while (task->has_aborted() && !_cm->has_overflown()); 2227 // If we overflow, then we do not want to restart. We instead 2228 // want to abort remark and do concurrent marking again. 2229 task->record_end_time(); 2230 } 2231 } 2232 2233 CMRemarkTask(ConcurrentMark* cm, uint active_workers) : 2234 AbstractGangTask("Par Remark"), _cm(cm) { 2235 _cm->terminator()->reset_for_reuse(active_workers); 2236 } 2237 }; 2238 2239 void ConcurrentMark::checkpointRootsFinalWork() { 2240 ResourceMark rm; 2241 HandleMark hm; 2242 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2243 2244 GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm()); 2245 2246 g1h->ensure_parsability(false); 2247 2248 // this is remark, so we'll use up all active threads 2249 uint active_workers = g1h->workers()->active_workers(); 2250 set_concurrency_and_phase(active_workers, false /* concurrent */); 2251 // Leave _parallel_marking_threads at it's 2252 // value originally calculated in the ConcurrentMark 2253 // constructor and pass values of the active workers 2254 // through the gang in the task. 2255 2256 { 2257 StrongRootsScope srs(active_workers); 2258 2259 CMRemarkTask remarkTask(this, active_workers); 2260 // We will start all available threads, even if we decide that the 2261 // active_workers will be fewer. The extra ones will just bail out 2262 // immediately. 2263 g1h->workers()->run_task(&remarkTask); 2264 } 2265 2266 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2267 guarantee(has_overflown() || 2268 satb_mq_set.completed_buffers_num() == 0, 2269 "Invariant: has_overflown = %s, num buffers = %d", 2270 BOOL_TO_STR(has_overflown()), 2271 satb_mq_set.completed_buffers_num()); 2272 2273 print_stats(); 2274 } 2275 2276 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) { 2277 // Note we are overriding the read-only view of the prev map here, via 2278 // the cast. 2279 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 2280 } 2281 2282 HeapRegion* 2283 ConcurrentMark::claim_region(uint worker_id) { 2284 // "checkpoint" the finger 2285 HeapWord* finger = _finger; 2286 2287 // _heap_end will not change underneath our feet; it only changes at 2288 // yield points. 2289 while (finger < _heap_end) { 2290 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 2291 2292 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 2293 2294 // Above heap_region_containing may return NULL as we always scan claim 2295 // until the end of the heap. In this case, just jump to the next region. 2296 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 2297 2298 // Is the gap between reading the finger and doing the CAS too long? 2299 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 2300 if (res == finger && curr_region != NULL) { 2301 // we succeeded 2302 HeapWord* bottom = curr_region->bottom(); 2303 HeapWord* limit = curr_region->next_top_at_mark_start(); 2304 2305 // notice that _finger == end cannot be guaranteed here since, 2306 // someone else might have moved the finger even further 2307 assert(_finger >= end, "the finger should have moved forward"); 2308 2309 if (limit > bottom) { 2310 return curr_region; 2311 } else { 2312 assert(limit == bottom, 2313 "the region limit should be at bottom"); 2314 // we return NULL and the caller should try calling 2315 // claim_region() again. 2316 return NULL; 2317 } 2318 } else { 2319 assert(_finger > finger, "the finger should have moved forward"); 2320 // read it again 2321 finger = _finger; 2322 } 2323 } 2324 2325 return NULL; 2326 } 2327 2328 #ifndef PRODUCT 2329 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC { 2330 private: 2331 G1CollectedHeap* _g1h; 2332 const char* _phase; 2333 int _info; 2334 2335 public: 2336 VerifyNoCSetOops(const char* phase, int info = -1) : 2337 _g1h(G1CollectedHeap::heap()), 2338 _phase(phase), 2339 _info(info) 2340 { } 2341 2342 void operator()(oop obj) const { 2343 guarantee(obj->is_oop(), 2344 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 2345 p2i(obj), _phase, _info); 2346 guarantee(!_g1h->obj_in_cs(obj), 2347 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 2348 p2i(obj), _phase, _info); 2349 } 2350 }; 2351 2352 void ConcurrentMark::verify_no_cset_oops() { 2353 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 2354 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) { 2355 return; 2356 } 2357 2358 // Verify entries on the global mark stack 2359 _markStack.iterate(VerifyNoCSetOops("Stack")); 2360 2361 // Verify entries on the task queues 2362 for (uint i = 0; i < _max_worker_id; ++i) { 2363 CMTaskQueue* queue = _task_queues->queue(i); 2364 queue->iterate(VerifyNoCSetOops("Queue", i)); 2365 } 2366 2367 // Verify the global finger 2368 HeapWord* global_finger = finger(); 2369 if (global_finger != NULL && global_finger < _heap_end) { 2370 // Since we always iterate over all regions, we might get a NULL HeapRegion 2371 // here. 2372 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 2373 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 2374 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 2375 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 2376 } 2377 2378 // Verify the task fingers 2379 assert(parallel_marking_threads() <= _max_worker_id, "sanity"); 2380 for (uint i = 0; i < parallel_marking_threads(); ++i) { 2381 CMTask* task = _tasks[i]; 2382 HeapWord* task_finger = task->finger(); 2383 if (task_finger != NULL && task_finger < _heap_end) { 2384 // See above note on the global finger verification. 2385 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 2386 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 2387 !task_hr->in_collection_set(), 2388 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 2389 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 2390 } 2391 } 2392 } 2393 #endif // PRODUCT 2394 2395 // Aggregate the counting data that was constructed concurrently 2396 // with marking. 2397 class AggregateCountDataHRClosure: public HeapRegionClosure { 2398 G1CollectedHeap* _g1h; 2399 ConcurrentMark* _cm; 2400 CardTableModRefBS* _ct_bs; 2401 BitMap* _cm_card_bm; 2402 uint _max_worker_id; 2403 2404 public: 2405 AggregateCountDataHRClosure(G1CollectedHeap* g1h, 2406 BitMap* cm_card_bm, 2407 uint max_worker_id) : 2408 _g1h(g1h), _cm(g1h->concurrent_mark()), 2409 _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())), 2410 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } 2411 2412 bool doHeapRegion(HeapRegion* hr) { 2413 HeapWord* start = hr->bottom(); 2414 HeapWord* limit = hr->next_top_at_mark_start(); 2415 HeapWord* end = hr->end(); 2416 2417 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), 2418 "Preconditions not met - " 2419 "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " 2420 "top: " PTR_FORMAT ", end: " PTR_FORMAT, 2421 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); 2422 2423 assert(hr->next_marked_bytes() == 0, "Precondition"); 2424 2425 if (start == limit) { 2426 // NTAMS of this region has not been set so nothing to do. 2427 return false; 2428 } 2429 2430 // 'start' should be in the heap. 2431 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); 2432 // 'end' *may* be just beyond the end of the heap (if hr is the last region) 2433 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); 2434 2435 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); 2436 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); 2437 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); 2438 2439 // If ntams is not card aligned then we bump card bitmap index 2440 // for limit so that we get the all the cards spanned by 2441 // the object ending at ntams. 2442 // Note: if this is the last region in the heap then ntams 2443 // could be actually just beyond the end of the the heap; 2444 // limit_idx will then correspond to a (non-existent) card 2445 // that is also outside the heap. 2446 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { 2447 limit_idx += 1; 2448 } 2449 2450 assert(limit_idx <= end_idx, "or else use atomics"); 2451 2452 // Aggregate the "stripe" in the count data associated with hr. 2453 uint hrm_index = hr->hrm_index(); 2454 size_t marked_bytes = 0; 2455 2456 for (uint i = 0; i < _max_worker_id; i += 1) { 2457 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); 2458 BitMap* task_card_bm = _cm->count_card_bitmap_for(i); 2459 2460 // Fetch the marked_bytes in this region for task i and 2461 // add it to the running total for this region. 2462 marked_bytes += marked_bytes_array[hrm_index]; 2463 2464 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) 2465 // into the global card bitmap. 2466 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); 2467 2468 while (scan_idx < limit_idx) { 2469 assert(task_card_bm->at(scan_idx) == true, "should be"); 2470 _cm_card_bm->set_bit(scan_idx); 2471 assert(_cm_card_bm->at(scan_idx) == true, "should be"); 2472 2473 // BitMap::get_next_one_offset() can handle the case when 2474 // its left_offset parameter is greater than its right_offset 2475 // parameter. It does, however, have an early exit if 2476 // left_offset == right_offset. So let's limit the value 2477 // passed in for left offset here. 2478 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); 2479 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); 2480 } 2481 } 2482 2483 // Update the marked bytes for this region. 2484 hr->add_to_marked_bytes(marked_bytes); 2485 2486 // Next heap region 2487 return false; 2488 } 2489 }; 2490 2491 class G1AggregateCountDataTask: public AbstractGangTask { 2492 protected: 2493 G1CollectedHeap* _g1h; 2494 ConcurrentMark* _cm; 2495 BitMap* _cm_card_bm; 2496 uint _max_worker_id; 2497 uint _active_workers; 2498 HeapRegionClaimer _hrclaimer; 2499 2500 public: 2501 G1AggregateCountDataTask(G1CollectedHeap* g1h, 2502 ConcurrentMark* cm, 2503 BitMap* cm_card_bm, 2504 uint max_worker_id, 2505 uint n_workers) : 2506 AbstractGangTask("Count Aggregation"), 2507 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), 2508 _max_worker_id(max_worker_id), 2509 _active_workers(n_workers), 2510 _hrclaimer(_active_workers) { 2511 } 2512 2513 void work(uint worker_id) { 2514 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); 2515 2516 _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); 2517 } 2518 }; 2519 2520 2521 void ConcurrentMark::aggregate_count_data() { 2522 uint n_workers = _g1h->workers()->active_workers(); 2523 2524 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, 2525 _max_worker_id, n_workers); 2526 2527 _g1h->workers()->run_task(&g1_par_agg_task); 2528 } 2529 2530 // Clear the per-worker arrays used to store the per-region counting data 2531 void ConcurrentMark::clear_all_count_data() { 2532 // Clear the global card bitmap - it will be filled during 2533 // liveness count aggregation (during remark) and the 2534 // final counting task. 2535 _card_bm.clear(); 2536 2537 // Clear the global region bitmap - it will be filled as part 2538 // of the final counting task. 2539 _region_bm.clear(); 2540 2541 uint max_regions = _g1h->max_regions(); 2542 assert(_max_worker_id > 0, "uninitialized"); 2543 2544 for (uint i = 0; i < _max_worker_id; i += 1) { 2545 BitMap* task_card_bm = count_card_bitmap_for(i); 2546 size_t* marked_bytes_array = count_marked_bytes_array_for(i); 2547 2548 assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); 2549 assert(marked_bytes_array != NULL, "uninitialized"); 2550 2551 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); 2552 task_card_bm->clear(); 2553 } 2554 } 2555 2556 void ConcurrentMark::print_stats() { 2557 if (!log_is_enabled(Debug, gc, stats)) { 2558 return; 2559 } 2560 log_debug(gc, stats)("---------------------------------------------------------------------"); 2561 for (size_t i = 0; i < _active_tasks; ++i) { 2562 _tasks[i]->print_stats(); 2563 log_debug(gc, stats)("---------------------------------------------------------------------"); 2564 } 2565 } 2566 2567 // abandon current marking iteration due to a Full GC 2568 void ConcurrentMark::abort() { 2569 if (!cmThread()->during_cycle() || _has_aborted) { 2570 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 2571 return; 2572 } 2573 2574 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 2575 // concurrent bitmap clearing. 2576 _nextMarkBitMap->clearAll(); 2577 2578 // Note we cannot clear the previous marking bitmap here 2579 // since VerifyDuringGC verifies the objects marked during 2580 // a full GC against the previous bitmap. 2581 2582 // Clear the liveness counting data 2583 clear_all_count_data(); 2584 // Empty mark stack 2585 reset_marking_state(); 2586 for (uint i = 0; i < _max_worker_id; ++i) { 2587 _tasks[i]->clear_region_fields(); 2588 } 2589 _first_overflow_barrier_sync.abort(); 2590 _second_overflow_barrier_sync.abort(); 2591 _has_aborted = true; 2592 2593 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2594 satb_mq_set.abandon_partial_marking(); 2595 // This can be called either during or outside marking, we'll read 2596 // the expected_active value from the SATB queue set. 2597 satb_mq_set.set_active_all_threads( 2598 false, /* new active value */ 2599 satb_mq_set.is_active() /* expected_active */); 2600 2601 _g1h->trace_heap_after_concurrent_cycle(); 2602 2603 // Close any open concurrent phase timing 2604 register_concurrent_phase_end(); 2605 2606 _g1h->register_concurrent_cycle_end(); 2607 } 2608 2609 static void print_ms_time_info(const char* prefix, const char* name, 2610 NumberSeq& ns) { 2611 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2612 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2613 if (ns.num() > 0) { 2614 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2615 prefix, ns.sd(), ns.maximum()); 2616 } 2617 } 2618 2619 void ConcurrentMark::print_summary_info() { 2620 LogHandle(gc, marking) log; 2621 if (!log.is_trace()) { 2622 return; 2623 } 2624 2625 log.trace(" Concurrent marking:"); 2626 print_ms_time_info(" ", "init marks", _init_times); 2627 print_ms_time_info(" ", "remarks", _remark_times); 2628 { 2629 print_ms_time_info(" ", "final marks", _remark_mark_times); 2630 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2631 2632 } 2633 print_ms_time_info(" ", "cleanups", _cleanup_times); 2634 log.trace(" Final counting total time = %8.2f s (avg = %8.2f ms).", 2635 _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2636 if (G1ScrubRemSets) { 2637 log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 2638 _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2639 } 2640 log.trace(" Total stop_world time = %8.2f s.", 2641 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2642 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2643 cmThread()->vtime_accum(), cmThread()->vtime_mark_accum()); 2644 } 2645 2646 void ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2647 _parallel_workers->print_worker_threads_on(st); 2648 } 2649 2650 void ConcurrentMark::print_on_error(outputStream* st) const { 2651 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2652 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap)); 2653 _prevMarkBitMap->print_on_error(st, " Prev Bits: "); 2654 _nextMarkBitMap->print_on_error(st, " Next Bits: "); 2655 } 2656 2657 // We take a break if someone is trying to stop the world. 2658 bool ConcurrentMark::do_yield_check(uint worker_id) { 2659 if (SuspendibleThreadSet::should_yield()) { 2660 if (worker_id == 0) { 2661 _g1h->g1_policy()->record_concurrent_pause(); 2662 } 2663 SuspendibleThreadSet::yield(); 2664 return true; 2665 } else { 2666 return false; 2667 } 2668 } 2669 2670 // Closure for iteration over bitmaps 2671 class CMBitMapClosure : public BitMapClosure { 2672 private: 2673 // the bitmap that is being iterated over 2674 CMBitMap* _nextMarkBitMap; 2675 ConcurrentMark* _cm; 2676 CMTask* _task; 2677 2678 public: 2679 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) : 2680 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 2681 2682 bool do_bit(size_t offset) { 2683 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 2684 assert(_nextMarkBitMap->isMarked(addr), "invariant"); 2685 assert( addr < _cm->finger(), "invariant"); 2686 assert(addr >= _task->finger(), "invariant"); 2687 2688 // We move that task's local finger along. 2689 _task->move_finger_to(addr); 2690 2691 _task->scan_object(oop(addr)); 2692 // we only partially drain the local queue and global stack 2693 _task->drain_local_queue(true); 2694 _task->drain_global_stack(true); 2695 2696 // if the has_aborted flag has been raised, we need to bail out of 2697 // the iteration 2698 return !_task->has_aborted(); 2699 } 2700 }; 2701 2702 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2703 ReferenceProcessor* result = NULL; 2704 if (G1UseConcMarkReferenceProcessing) { 2705 result = g1h->ref_processor_cm(); 2706 assert(result != NULL, "should not be NULL"); 2707 } 2708 return result; 2709 } 2710 2711 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2712 ConcurrentMark* cm, 2713 CMTask* task) 2714 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2715 _g1h(g1h), _cm(cm), _task(task) 2716 { } 2717 2718 void CMTask::setup_for_region(HeapRegion* hr) { 2719 assert(hr != NULL, 2720 "claim_region() should have filtered out NULL regions"); 2721 _curr_region = hr; 2722 _finger = hr->bottom(); 2723 update_region_limit(); 2724 } 2725 2726 void CMTask::update_region_limit() { 2727 HeapRegion* hr = _curr_region; 2728 HeapWord* bottom = hr->bottom(); 2729 HeapWord* limit = hr->next_top_at_mark_start(); 2730 2731 if (limit == bottom) { 2732 // The region was collected underneath our feet. 2733 // We set the finger to bottom to ensure that the bitmap 2734 // iteration that will follow this will not do anything. 2735 // (this is not a condition that holds when we set the region up, 2736 // as the region is not supposed to be empty in the first place) 2737 _finger = bottom; 2738 } else if (limit >= _region_limit) { 2739 assert(limit >= _finger, "peace of mind"); 2740 } else { 2741 assert(limit < _region_limit, "only way to get here"); 2742 // This can happen under some pretty unusual circumstances. An 2743 // evacuation pause empties the region underneath our feet (NTAMS 2744 // at bottom). We then do some allocation in the region (NTAMS 2745 // stays at bottom), followed by the region being used as a GC 2746 // alloc region (NTAMS will move to top() and the objects 2747 // originally below it will be grayed). All objects now marked in 2748 // the region are explicitly grayed, if below the global finger, 2749 // and we do not need in fact to scan anything else. So, we simply 2750 // set _finger to be limit to ensure that the bitmap iteration 2751 // doesn't do anything. 2752 _finger = limit; 2753 } 2754 2755 _region_limit = limit; 2756 } 2757 2758 void CMTask::giveup_current_region() { 2759 assert(_curr_region != NULL, "invariant"); 2760 clear_region_fields(); 2761 } 2762 2763 void CMTask::clear_region_fields() { 2764 // Values for these three fields that indicate that we're not 2765 // holding on to a region. 2766 _curr_region = NULL; 2767 _finger = NULL; 2768 _region_limit = NULL; 2769 } 2770 2771 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2772 if (cm_oop_closure == NULL) { 2773 assert(_cm_oop_closure != NULL, "invariant"); 2774 } else { 2775 assert(_cm_oop_closure == NULL, "invariant"); 2776 } 2777 _cm_oop_closure = cm_oop_closure; 2778 } 2779 2780 void CMTask::reset(CMBitMap* nextMarkBitMap) { 2781 guarantee(nextMarkBitMap != NULL, "invariant"); 2782 _nextMarkBitMap = nextMarkBitMap; 2783 clear_region_fields(); 2784 2785 _calls = 0; 2786 _elapsed_time_ms = 0.0; 2787 _termination_time_ms = 0.0; 2788 _termination_start_time_ms = 0.0; 2789 } 2790 2791 bool CMTask::should_exit_termination() { 2792 regular_clock_call(); 2793 // This is called when we are in the termination protocol. We should 2794 // quit if, for some reason, this task wants to abort or the global 2795 // stack is not empty (this means that we can get work from it). 2796 return !_cm->mark_stack_empty() || has_aborted(); 2797 } 2798 2799 void CMTask::reached_limit() { 2800 assert(_words_scanned >= _words_scanned_limit || 2801 _refs_reached >= _refs_reached_limit , 2802 "shouldn't have been called otherwise"); 2803 regular_clock_call(); 2804 } 2805 2806 void CMTask::regular_clock_call() { 2807 if (has_aborted()) return; 2808 2809 // First, we need to recalculate the words scanned and refs reached 2810 // limits for the next clock call. 2811 recalculate_limits(); 2812 2813 // During the regular clock call we do the following 2814 2815 // (1) If an overflow has been flagged, then we abort. 2816 if (_cm->has_overflown()) { 2817 set_has_aborted(); 2818 return; 2819 } 2820 2821 // If we are not concurrent (i.e. we're doing remark) we don't need 2822 // to check anything else. The other steps are only needed during 2823 // the concurrent marking phase. 2824 if (!concurrent()) return; 2825 2826 // (2) If marking has been aborted for Full GC, then we also abort. 2827 if (_cm->has_aborted()) { 2828 set_has_aborted(); 2829 return; 2830 } 2831 2832 double curr_time_ms = os::elapsedVTime() * 1000.0; 2833 2834 // (4) We check whether we should yield. If we have to, then we abort. 2835 if (SuspendibleThreadSet::should_yield()) { 2836 // We should yield. To do this we abort the task. The caller is 2837 // responsible for yielding. 2838 set_has_aborted(); 2839 return; 2840 } 2841 2842 // (5) We check whether we've reached our time quota. If we have, 2843 // then we abort. 2844 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2845 if (elapsed_time_ms > _time_target_ms) { 2846 set_has_aborted(); 2847 _has_timed_out = true; 2848 return; 2849 } 2850 2851 // (6) Finally, we check whether there are enough completed STAB 2852 // buffers available for processing. If there are, we abort. 2853 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2854 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2855 // we do need to process SATB buffers, we'll abort and restart 2856 // the marking task to do so 2857 set_has_aborted(); 2858 return; 2859 } 2860 } 2861 2862 void CMTask::recalculate_limits() { 2863 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2864 _words_scanned_limit = _real_words_scanned_limit; 2865 2866 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2867 _refs_reached_limit = _real_refs_reached_limit; 2868 } 2869 2870 void CMTask::decrease_limits() { 2871 // This is called when we believe that we're going to do an infrequent 2872 // operation which will increase the per byte scanned cost (i.e. move 2873 // entries to/from the global stack). It basically tries to decrease the 2874 // scanning limit so that the clock is called earlier. 2875 2876 _words_scanned_limit = _real_words_scanned_limit - 2877 3 * words_scanned_period / 4; 2878 _refs_reached_limit = _real_refs_reached_limit - 2879 3 * refs_reached_period / 4; 2880 } 2881 2882 void CMTask::move_entries_to_global_stack() { 2883 // local array where we'll store the entries that will be popped 2884 // from the local queue 2885 oop buffer[global_stack_transfer_size]; 2886 2887 int n = 0; 2888 oop obj; 2889 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 2890 buffer[n] = obj; 2891 ++n; 2892 } 2893 2894 if (n > 0) { 2895 // we popped at least one entry from the local queue 2896 2897 if (!_cm->mark_stack_push(buffer, n)) { 2898 set_has_aborted(); 2899 } 2900 } 2901 2902 // this operation was quite expensive, so decrease the limits 2903 decrease_limits(); 2904 } 2905 2906 void CMTask::get_entries_from_global_stack() { 2907 // local array where we'll store the entries that will be popped 2908 // from the global stack. 2909 oop buffer[global_stack_transfer_size]; 2910 int n; 2911 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 2912 assert(n <= global_stack_transfer_size, 2913 "we should not pop more than the given limit"); 2914 if (n > 0) { 2915 // yes, we did actually pop at least one entry 2916 for (int i = 0; i < n; ++i) { 2917 bool success = _task_queue->push(buffer[i]); 2918 // We only call this when the local queue is empty or under a 2919 // given target limit. So, we do not expect this push to fail. 2920 assert(success, "invariant"); 2921 } 2922 } 2923 2924 // this operation was quite expensive, so decrease the limits 2925 decrease_limits(); 2926 } 2927 2928 void CMTask::drain_local_queue(bool partially) { 2929 if (has_aborted()) return; 2930 2931 // Decide what the target size is, depending whether we're going to 2932 // drain it partially (so that other tasks can steal if they run out 2933 // of things to do) or totally (at the very end). 2934 size_t target_size; 2935 if (partially) { 2936 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2937 } else { 2938 target_size = 0; 2939 } 2940 2941 if (_task_queue->size() > target_size) { 2942 oop obj; 2943 bool ret = _task_queue->pop_local(obj); 2944 while (ret) { 2945 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); 2946 assert(!_g1h->is_on_master_free_list( 2947 _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); 2948 2949 scan_object(obj); 2950 2951 if (_task_queue->size() <= target_size || has_aborted()) { 2952 ret = false; 2953 } else { 2954 ret = _task_queue->pop_local(obj); 2955 } 2956 } 2957 } 2958 } 2959 2960 void CMTask::drain_global_stack(bool partially) { 2961 if (has_aborted()) return; 2962 2963 // We have a policy to drain the local queue before we attempt to 2964 // drain the global stack. 2965 assert(partially || _task_queue->size() == 0, "invariant"); 2966 2967 // Decide what the target size is, depending whether we're going to 2968 // drain it partially (so that other tasks can steal if they run out 2969 // of things to do) or totally (at the very end). Notice that, 2970 // because we move entries from the global stack in chunks or 2971 // because another task might be doing the same, we might in fact 2972 // drop below the target. But, this is not a problem. 2973 size_t target_size; 2974 if (partially) { 2975 target_size = _cm->partial_mark_stack_size_target(); 2976 } else { 2977 target_size = 0; 2978 } 2979 2980 if (_cm->mark_stack_size() > target_size) { 2981 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2982 get_entries_from_global_stack(); 2983 drain_local_queue(partially); 2984 } 2985 } 2986 } 2987 2988 // SATB Queue has several assumptions on whether to call the par or 2989 // non-par versions of the methods. this is why some of the code is 2990 // replicated. We should really get rid of the single-threaded version 2991 // of the code to simplify things. 2992 void CMTask::drain_satb_buffers() { 2993 if (has_aborted()) return; 2994 2995 // We set this so that the regular clock knows that we're in the 2996 // middle of draining buffers and doesn't set the abort flag when it 2997 // notices that SATB buffers are available for draining. It'd be 2998 // very counter productive if it did that. :-) 2999 _draining_satb_buffers = true; 3000 3001 CMSATBBufferClosure satb_cl(this, _g1h); 3002 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 3003 3004 // This keeps claiming and applying the closure to completed buffers 3005 // until we run out of buffers or we need to abort. 3006 while (!has_aborted() && 3007 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 3008 regular_clock_call(); 3009 } 3010 3011 _draining_satb_buffers = false; 3012 3013 assert(has_aborted() || 3014 concurrent() || 3015 satb_mq_set.completed_buffers_num() == 0, "invariant"); 3016 3017 // again, this was a potentially expensive operation, decrease the 3018 // limits to get the regular clock call early 3019 decrease_limits(); 3020 } 3021 3022 void CMTask::print_stats() { 3023 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d", 3024 _worker_id, _calls); 3025 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 3026 _elapsed_time_ms, _termination_time_ms); 3027 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 3028 _step_times_ms.num(), _step_times_ms.avg(), 3029 _step_times_ms.sd()); 3030 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms", 3031 _step_times_ms.maximum(), _step_times_ms.sum()); 3032 } 3033 3034 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) { 3035 return _task_queues->steal(worker_id, hash_seed, obj); 3036 } 3037 3038 /***************************************************************************** 3039 3040 The do_marking_step(time_target_ms, ...) method is the building 3041 block of the parallel marking framework. It can be called in parallel 3042 with other invocations of do_marking_step() on different tasks 3043 (but only one per task, obviously) and concurrently with the 3044 mutator threads, or during remark, hence it eliminates the need 3045 for two versions of the code. When called during remark, it will 3046 pick up from where the task left off during the concurrent marking 3047 phase. Interestingly, tasks are also claimable during evacuation 3048 pauses too, since do_marking_step() ensures that it aborts before 3049 it needs to yield. 3050 3051 The data structures that it uses to do marking work are the 3052 following: 3053 3054 (1) Marking Bitmap. If there are gray objects that appear only 3055 on the bitmap (this happens either when dealing with an overflow 3056 or when the initial marking phase has simply marked the roots 3057 and didn't push them on the stack), then tasks claim heap 3058 regions whose bitmap they then scan to find gray objects. A 3059 global finger indicates where the end of the last claimed region 3060 is. A local finger indicates how far into the region a task has 3061 scanned. The two fingers are used to determine how to gray an 3062 object (i.e. whether simply marking it is OK, as it will be 3063 visited by a task in the future, or whether it needs to be also 3064 pushed on a stack). 3065 3066 (2) Local Queue. The local queue of the task which is accessed 3067 reasonably efficiently by the task. Other tasks can steal from 3068 it when they run out of work. Throughout the marking phase, a 3069 task attempts to keep its local queue short but not totally 3070 empty, so that entries are available for stealing by other 3071 tasks. Only when there is no more work, a task will totally 3072 drain its local queue. 3073 3074 (3) Global Mark Stack. This handles local queue overflow. During 3075 marking only sets of entries are moved between it and the local 3076 queues, as access to it requires a mutex and more fine-grain 3077 interaction with it which might cause contention. If it 3078 overflows, then the marking phase should restart and iterate 3079 over the bitmap to identify gray objects. Throughout the marking 3080 phase, tasks attempt to keep the global mark stack at a small 3081 length but not totally empty, so that entries are available for 3082 popping by other tasks. Only when there is no more work, tasks 3083 will totally drain the global mark stack. 3084 3085 (4) SATB Buffer Queue. This is where completed SATB buffers are 3086 made available. Buffers are regularly removed from this queue 3087 and scanned for roots, so that the queue doesn't get too 3088 long. During remark, all completed buffers are processed, as 3089 well as the filled in parts of any uncompleted buffers. 3090 3091 The do_marking_step() method tries to abort when the time target 3092 has been reached. There are a few other cases when the 3093 do_marking_step() method also aborts: 3094 3095 (1) When the marking phase has been aborted (after a Full GC). 3096 3097 (2) When a global overflow (on the global stack) has been 3098 triggered. Before the task aborts, it will actually sync up with 3099 the other tasks to ensure that all the marking data structures 3100 (local queues, stacks, fingers etc.) are re-initialized so that 3101 when do_marking_step() completes, the marking phase can 3102 immediately restart. 3103 3104 (3) When enough completed SATB buffers are available. The 3105 do_marking_step() method only tries to drain SATB buffers right 3106 at the beginning. So, if enough buffers are available, the 3107 marking step aborts and the SATB buffers are processed at 3108 the beginning of the next invocation. 3109 3110 (4) To yield. when we have to yield then we abort and yield 3111 right at the end of do_marking_step(). This saves us from a lot 3112 of hassle as, by yielding we might allow a Full GC. If this 3113 happens then objects will be compacted underneath our feet, the 3114 heap might shrink, etc. We save checking for this by just 3115 aborting and doing the yield right at the end. 3116 3117 From the above it follows that the do_marking_step() method should 3118 be called in a loop (or, otherwise, regularly) until it completes. 3119 3120 If a marking step completes without its has_aborted() flag being 3121 true, it means it has completed the current marking phase (and 3122 also all other marking tasks have done so and have all synced up). 3123 3124 A method called regular_clock_call() is invoked "regularly" (in 3125 sub ms intervals) throughout marking. It is this clock method that 3126 checks all the abort conditions which were mentioned above and 3127 decides when the task should abort. A work-based scheme is used to 3128 trigger this clock method: when the number of object words the 3129 marking phase has scanned or the number of references the marking 3130 phase has visited reach a given limit. Additional invocations to 3131 the method clock have been planted in a few other strategic places 3132 too. The initial reason for the clock method was to avoid calling 3133 vtime too regularly, as it is quite expensive. So, once it was in 3134 place, it was natural to piggy-back all the other conditions on it 3135 too and not constantly check them throughout the code. 3136 3137 If do_termination is true then do_marking_step will enter its 3138 termination protocol. 3139 3140 The value of is_serial must be true when do_marking_step is being 3141 called serially (i.e. by the VMThread) and do_marking_step should 3142 skip any synchronization in the termination and overflow code. 3143 Examples include the serial remark code and the serial reference 3144 processing closures. 3145 3146 The value of is_serial must be false when do_marking_step is 3147 being called by any of the worker threads in a work gang. 3148 Examples include the concurrent marking code (CMMarkingTask), 3149 the MT remark code, and the MT reference processing closures. 3150 3151 *****************************************************************************/ 3152 3153 void CMTask::do_marking_step(double time_target_ms, 3154 bool do_termination, 3155 bool is_serial) { 3156 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 3157 assert(concurrent() == _cm->concurrent(), "they should be the same"); 3158 3159 G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 3160 assert(_task_queues != NULL, "invariant"); 3161 assert(_task_queue != NULL, "invariant"); 3162 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant"); 3163 3164 assert(!_claimed, 3165 "only one thread should claim this task at any one time"); 3166 3167 // OK, this doesn't safeguard again all possible scenarios, as it is 3168 // possible for two threads to set the _claimed flag at the same 3169 // time. But it is only for debugging purposes anyway and it will 3170 // catch most problems. 3171 _claimed = true; 3172 3173 _start_time_ms = os::elapsedVTime() * 1000.0; 3174 3175 // If do_stealing is true then do_marking_step will attempt to 3176 // steal work from the other CMTasks. It only makes sense to 3177 // enable stealing when the termination protocol is enabled 3178 // and do_marking_step() is not being called serially. 3179 bool do_stealing = do_termination && !is_serial; 3180 3181 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 3182 _time_target_ms = time_target_ms - diff_prediction_ms; 3183 3184 // set up the variables that are used in the work-based scheme to 3185 // call the regular clock method 3186 _words_scanned = 0; 3187 _refs_reached = 0; 3188 recalculate_limits(); 3189 3190 // clear all flags 3191 clear_has_aborted(); 3192 _has_timed_out = false; 3193 _draining_satb_buffers = false; 3194 3195 ++_calls; 3196 3197 // Set up the bitmap and oop closures. Anything that uses them is 3198 // eventually called from this method, so it is OK to allocate these 3199 // statically. 3200 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 3201 G1CMOopClosure cm_oop_closure(_g1h, _cm, this); 3202 set_cm_oop_closure(&cm_oop_closure); 3203 3204 if (_cm->has_overflown()) { 3205 // This can happen if the mark stack overflows during a GC pause 3206 // and this task, after a yield point, restarts. We have to abort 3207 // as we need to get into the overflow protocol which happens 3208 // right at the end of this task. 3209 set_has_aborted(); 3210 } 3211 3212 // First drain any available SATB buffers. After this, we will not 3213 // look at SATB buffers before the next invocation of this method. 3214 // If enough completed SATB buffers are queued up, the regular clock 3215 // will abort this task so that it restarts. 3216 drain_satb_buffers(); 3217 // ...then partially drain the local queue and the global stack 3218 drain_local_queue(true); 3219 drain_global_stack(true); 3220 3221 do { 3222 if (!has_aborted() && _curr_region != NULL) { 3223 // This means that we're already holding on to a region. 3224 assert(_finger != NULL, "if region is not NULL, then the finger " 3225 "should not be NULL either"); 3226 3227 // We might have restarted this task after an evacuation pause 3228 // which might have evacuated the region we're holding on to 3229 // underneath our feet. Let's read its limit again to make sure 3230 // that we do not iterate over a region of the heap that 3231 // contains garbage (update_region_limit() will also move 3232 // _finger to the start of the region if it is found empty). 3233 update_region_limit(); 3234 // We will start from _finger not from the start of the region, 3235 // as we might be restarting this task after aborting half-way 3236 // through scanning this region. In this case, _finger points to 3237 // the address where we last found a marked object. If this is a 3238 // fresh region, _finger points to start(). 3239 MemRegion mr = MemRegion(_finger, _region_limit); 3240 3241 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 3242 "humongous regions should go around loop once only"); 3243 3244 // Some special cases: 3245 // If the memory region is empty, we can just give up the region. 3246 // If the current region is humongous then we only need to check 3247 // the bitmap for the bit associated with the start of the object, 3248 // scan the object if it's live, and give up the region. 3249 // Otherwise, let's iterate over the bitmap of the part of the region 3250 // that is left. 3251 // If the iteration is successful, give up the region. 3252 if (mr.is_empty()) { 3253 giveup_current_region(); 3254 regular_clock_call(); 3255 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 3256 if (_nextMarkBitMap->isMarked(mr.start())) { 3257 // The object is marked - apply the closure 3258 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); 3259 bitmap_closure.do_bit(offset); 3260 } 3261 // Even if this task aborted while scanning the humongous object 3262 // we can (and should) give up the current region. 3263 giveup_current_region(); 3264 regular_clock_call(); 3265 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) { 3266 giveup_current_region(); 3267 regular_clock_call(); 3268 } else { 3269 assert(has_aborted(), "currently the only way to do so"); 3270 // The only way to abort the bitmap iteration is to return 3271 // false from the do_bit() method. However, inside the 3272 // do_bit() method we move the _finger to point to the 3273 // object currently being looked at. So, if we bail out, we 3274 // have definitely set _finger to something non-null. 3275 assert(_finger != NULL, "invariant"); 3276 3277 // Region iteration was actually aborted. So now _finger 3278 // points to the address of the object we last scanned. If we 3279 // leave it there, when we restart this task, we will rescan 3280 // the object. It is easy to avoid this. We move the finger by 3281 // enough to point to the next possible object header (the 3282 // bitmap knows by how much we need to move it as it knows its 3283 // granularity). 3284 assert(_finger < _region_limit, "invariant"); 3285 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger); 3286 // Check if bitmap iteration was aborted while scanning the last object 3287 if (new_finger >= _region_limit) { 3288 giveup_current_region(); 3289 } else { 3290 move_finger_to(new_finger); 3291 } 3292 } 3293 } 3294 // At this point we have either completed iterating over the 3295 // region we were holding on to, or we have aborted. 3296 3297 // We then partially drain the local queue and the global stack. 3298 // (Do we really need this?) 3299 drain_local_queue(true); 3300 drain_global_stack(true); 3301 3302 // Read the note on the claim_region() method on why it might 3303 // return NULL with potentially more regions available for 3304 // claiming and why we have to check out_of_regions() to determine 3305 // whether we're done or not. 3306 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 3307 // We are going to try to claim a new region. We should have 3308 // given up on the previous one. 3309 // Separated the asserts so that we know which one fires. 3310 assert(_curr_region == NULL, "invariant"); 3311 assert(_finger == NULL, "invariant"); 3312 assert(_region_limit == NULL, "invariant"); 3313 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 3314 if (claimed_region != NULL) { 3315 // Yes, we managed to claim one 3316 setup_for_region(claimed_region); 3317 assert(_curr_region == claimed_region, "invariant"); 3318 } 3319 // It is important to call the regular clock here. It might take 3320 // a while to claim a region if, for example, we hit a large 3321 // block of empty regions. So we need to call the regular clock 3322 // method once round the loop to make sure it's called 3323 // frequently enough. 3324 regular_clock_call(); 3325 } 3326 3327 if (!has_aborted() && _curr_region == NULL) { 3328 assert(_cm->out_of_regions(), 3329 "at this point we should be out of regions"); 3330 } 3331 } while ( _curr_region != NULL && !has_aborted()); 3332 3333 if (!has_aborted()) { 3334 // We cannot check whether the global stack is empty, since other 3335 // tasks might be pushing objects to it concurrently. 3336 assert(_cm->out_of_regions(), 3337 "at this point we should be out of regions"); 3338 // Try to reduce the number of available SATB buffers so that 3339 // remark has less work to do. 3340 drain_satb_buffers(); 3341 } 3342 3343 // Since we've done everything else, we can now totally drain the 3344 // local queue and global stack. 3345 drain_local_queue(false); 3346 drain_global_stack(false); 3347 3348 // Attempt at work stealing from other task's queues. 3349 if (do_stealing && !has_aborted()) { 3350 // We have not aborted. This means that we have finished all that 3351 // we could. Let's try to do some stealing... 3352 3353 // We cannot check whether the global stack is empty, since other 3354 // tasks might be pushing objects to it concurrently. 3355 assert(_cm->out_of_regions() && _task_queue->size() == 0, 3356 "only way to reach here"); 3357 while (!has_aborted()) { 3358 oop obj; 3359 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { 3360 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), 3361 "any stolen object should be marked"); 3362 scan_object(obj); 3363 3364 // And since we're towards the end, let's totally drain the 3365 // local queue and global stack. 3366 drain_local_queue(false); 3367 drain_global_stack(false); 3368 } else { 3369 break; 3370 } 3371 } 3372 } 3373 3374 // We still haven't aborted. Now, let's try to get into the 3375 // termination protocol. 3376 if (do_termination && !has_aborted()) { 3377 // We cannot check whether the global stack is empty, since other 3378 // tasks might be concurrently pushing objects on it. 3379 // Separated the asserts so that we know which one fires. 3380 assert(_cm->out_of_regions(), "only way to reach here"); 3381 assert(_task_queue->size() == 0, "only way to reach here"); 3382 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 3383 3384 // The CMTask class also extends the TerminatorTerminator class, 3385 // hence its should_exit_termination() method will also decide 3386 // whether to exit the termination protocol or not. 3387 bool finished = (is_serial || 3388 _cm->terminator()->offer_termination(this)); 3389 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 3390 _termination_time_ms += 3391 termination_end_time_ms - _termination_start_time_ms; 3392 3393 if (finished) { 3394 // We're all done. 3395 3396 if (_worker_id == 0) { 3397 // let's allow task 0 to do this 3398 if (concurrent()) { 3399 assert(_cm->concurrent_marking_in_progress(), "invariant"); 3400 // we need to set this to false before the next 3401 // safepoint. This way we ensure that the marking phase 3402 // doesn't observe any more heap expansions. 3403 _cm->clear_concurrent_marking_in_progress(); 3404 } 3405 } 3406 3407 // We can now guarantee that the global stack is empty, since 3408 // all other tasks have finished. We separated the guarantees so 3409 // that, if a condition is false, we can immediately find out 3410 // which one. 3411 guarantee(_cm->out_of_regions(), "only way to reach here"); 3412 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 3413 guarantee(_task_queue->size() == 0, "only way to reach here"); 3414 guarantee(!_cm->has_overflown(), "only way to reach here"); 3415 guarantee(!_cm->mark_stack_overflow(), "only way to reach here"); 3416 } else { 3417 // Apparently there's more work to do. Let's abort this task. It 3418 // will restart it and we can hopefully find more things to do. 3419 set_has_aborted(); 3420 } 3421 } 3422 3423 // Mainly for debugging purposes to make sure that a pointer to the 3424 // closure which was statically allocated in this frame doesn't 3425 // escape it by accident. 3426 set_cm_oop_closure(NULL); 3427 double end_time_ms = os::elapsedVTime() * 1000.0; 3428 double elapsed_time_ms = end_time_ms - _start_time_ms; 3429 // Update the step history. 3430 _step_times_ms.add(elapsed_time_ms); 3431 3432 if (has_aborted()) { 3433 // The task was aborted for some reason. 3434 if (_has_timed_out) { 3435 double diff_ms = elapsed_time_ms - _time_target_ms; 3436 // Keep statistics of how well we did with respect to hitting 3437 // our target only if we actually timed out (if we aborted for 3438 // other reasons, then the results might get skewed). 3439 _marking_step_diffs_ms.add(diff_ms); 3440 } 3441 3442 if (_cm->has_overflown()) { 3443 // This is the interesting one. We aborted because a global 3444 // overflow was raised. This means we have to restart the 3445 // marking phase and start iterating over regions. However, in 3446 // order to do this we have to make sure that all tasks stop 3447 // what they are doing and re-initialize in a safe manner. We 3448 // will achieve this with the use of two barrier sync points. 3449 3450 if (!is_serial) { 3451 // We only need to enter the sync barrier if being called 3452 // from a parallel context 3453 _cm->enter_first_sync_barrier(_worker_id); 3454 3455 // When we exit this sync barrier we know that all tasks have 3456 // stopped doing marking work. So, it's now safe to 3457 // re-initialize our data structures. At the end of this method, 3458 // task 0 will clear the global data structures. 3459 } 3460 3461 // We clear the local state of this task... 3462 clear_region_fields(); 3463 3464 if (!is_serial) { 3465 // ...and enter the second barrier. 3466 _cm->enter_second_sync_barrier(_worker_id); 3467 } 3468 // At this point, if we're during the concurrent phase of 3469 // marking, everything has been re-initialized and we're 3470 // ready to restart. 3471 } 3472 } 3473 3474 _claimed = false; 3475 } 3476 3477 CMTask::CMTask(uint worker_id, 3478 ConcurrentMark* cm, 3479 size_t* marked_bytes, 3480 BitMap* card_bm, 3481 CMTaskQueue* task_queue, 3482 CMTaskQueueSet* task_queues) 3483 : _g1h(G1CollectedHeap::heap()), 3484 _worker_id(worker_id), _cm(cm), 3485 _claimed(false), 3486 _nextMarkBitMap(NULL), _hash_seed(17), 3487 _task_queue(task_queue), 3488 _task_queues(task_queues), 3489 _cm_oop_closure(NULL), 3490 _marked_bytes_array(marked_bytes), 3491 _card_bm(card_bm) { 3492 guarantee(task_queue != NULL, "invariant"); 3493 guarantee(task_queues != NULL, "invariant"); 3494 3495 _marking_step_diffs_ms.add(0.5); 3496 } 3497 3498 // These are formatting macros that are used below to ensure 3499 // consistent formatting. The *_H_* versions are used to format the 3500 // header for a particular value and they should be kept consistent 3501 // with the corresponding macro. Also note that most of the macros add 3502 // the necessary white space (as a prefix) which makes them a bit 3503 // easier to compose. 3504 3505 // All the output lines are prefixed with this string to be able to 3506 // identify them easily in a large log file. 3507 #define G1PPRL_LINE_PREFIX "###" 3508 3509 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 3510 #ifdef _LP64 3511 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 3512 #else // _LP64 3513 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 3514 #endif // _LP64 3515 3516 // For per-region info 3517 #define G1PPRL_TYPE_FORMAT " %-4s" 3518 #define G1PPRL_TYPE_H_FORMAT " %4s" 3519 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 3520 #define G1PPRL_BYTE_H_FORMAT " %9s" 3521 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 3522 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 3523 3524 // For summary info 3525 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 3526 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 3527 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 3528 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 3529 3530 G1PrintRegionLivenessInfoClosure:: 3531 G1PrintRegionLivenessInfoClosure(const char* phase_name) 3532 : _total_used_bytes(0), _total_capacity_bytes(0), 3533 _total_prev_live_bytes(0), _total_next_live_bytes(0), 3534 _hum_used_bytes(0), _hum_capacity_bytes(0), 3535 _hum_prev_live_bytes(0), _hum_next_live_bytes(0), 3536 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { 3537 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3538 MemRegion g1_reserved = g1h->g1_reserved(); 3539 double now = os::elapsedTime(); 3540 3541 // Print the header of the output. 3542 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 3543 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 3544 G1PPRL_SUM_ADDR_FORMAT("reserved") 3545 G1PPRL_SUM_BYTE_FORMAT("region-size"), 3546 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 3547 HeapRegion::GrainBytes); 3548 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3549 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3550 G1PPRL_TYPE_H_FORMAT 3551 G1PPRL_ADDR_BASE_H_FORMAT 3552 G1PPRL_BYTE_H_FORMAT 3553 G1PPRL_BYTE_H_FORMAT 3554 G1PPRL_BYTE_H_FORMAT 3555 G1PPRL_DOUBLE_H_FORMAT 3556 G1PPRL_BYTE_H_FORMAT 3557 G1PPRL_BYTE_H_FORMAT, 3558 "type", "address-range", 3559 "used", "prev-live", "next-live", "gc-eff", 3560 "remset", "code-roots"); 3561 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3562 G1PPRL_TYPE_H_FORMAT 3563 G1PPRL_ADDR_BASE_H_FORMAT 3564 G1PPRL_BYTE_H_FORMAT 3565 G1PPRL_BYTE_H_FORMAT 3566 G1PPRL_BYTE_H_FORMAT 3567 G1PPRL_DOUBLE_H_FORMAT 3568 G1PPRL_BYTE_H_FORMAT 3569 G1PPRL_BYTE_H_FORMAT, 3570 "", "", 3571 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 3572 "(bytes)", "(bytes)"); 3573 } 3574 3575 // It takes as a parameter a reference to one of the _hum_* fields, it 3576 // deduces the corresponding value for a region in a humongous region 3577 // series (either the region size, or what's left if the _hum_* field 3578 // is < the region size), and updates the _hum_* field accordingly. 3579 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) { 3580 size_t bytes = 0; 3581 // The > 0 check is to deal with the prev and next live bytes which 3582 // could be 0. 3583 if (*hum_bytes > 0) { 3584 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes); 3585 *hum_bytes -= bytes; 3586 } 3587 return bytes; 3588 } 3589 3590 // It deduces the values for a region in a humongous region series 3591 // from the _hum_* fields and updates those accordingly. It assumes 3592 // that that _hum_* fields have already been set up from the "starts 3593 // humongous" region and we visit the regions in address order. 3594 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes, 3595 size_t* capacity_bytes, 3596 size_t* prev_live_bytes, 3597 size_t* next_live_bytes) { 3598 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition"); 3599 *used_bytes = get_hum_bytes(&_hum_used_bytes); 3600 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes); 3601 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes); 3602 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes); 3603 } 3604 3605 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) { 3606 const char* type = r->get_type_str(); 3607 HeapWord* bottom = r->bottom(); 3608 HeapWord* end = r->end(); 3609 size_t capacity_bytes = r->capacity(); 3610 size_t used_bytes = r->used(); 3611 size_t prev_live_bytes = r->live_bytes(); 3612 size_t next_live_bytes = r->next_live_bytes(); 3613 double gc_eff = r->gc_efficiency(); 3614 size_t remset_bytes = r->rem_set()->mem_size(); 3615 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 3616 3617 if (r->is_starts_humongous()) { 3618 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 && 3619 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0, 3620 "they should have been zeroed after the last time we used them"); 3621 // Set up the _hum_* fields. 3622 _hum_capacity_bytes = capacity_bytes; 3623 _hum_used_bytes = used_bytes; 3624 _hum_prev_live_bytes = prev_live_bytes; 3625 _hum_next_live_bytes = next_live_bytes; 3626 get_hum_bytes(&used_bytes, &capacity_bytes, 3627 &prev_live_bytes, &next_live_bytes); 3628 end = bottom + HeapRegion::GrainWords; 3629 } else if (r->is_continues_humongous()) { 3630 get_hum_bytes(&used_bytes, &capacity_bytes, 3631 &prev_live_bytes, &next_live_bytes); 3632 assert(end == bottom + HeapRegion::GrainWords, "invariant"); 3633 } 3634 3635 _total_used_bytes += used_bytes; 3636 _total_capacity_bytes += capacity_bytes; 3637 _total_prev_live_bytes += prev_live_bytes; 3638 _total_next_live_bytes += next_live_bytes; 3639 _total_remset_bytes += remset_bytes; 3640 _total_strong_code_roots_bytes += strong_code_roots_bytes; 3641 3642 // Print a line for this particular region. 3643 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3644 G1PPRL_TYPE_FORMAT 3645 G1PPRL_ADDR_BASE_FORMAT 3646 G1PPRL_BYTE_FORMAT 3647 G1PPRL_BYTE_FORMAT 3648 G1PPRL_BYTE_FORMAT 3649 G1PPRL_DOUBLE_FORMAT 3650 G1PPRL_BYTE_FORMAT 3651 G1PPRL_BYTE_FORMAT, 3652 type, p2i(bottom), p2i(end), 3653 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 3654 remset_bytes, strong_code_roots_bytes); 3655 3656 return false; 3657 } 3658 3659 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 3660 // add static memory usages to remembered set sizes 3661 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 3662 // Print the footer of the output. 3663 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3664 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3665 " SUMMARY" 3666 G1PPRL_SUM_MB_FORMAT("capacity") 3667 G1PPRL_SUM_MB_PERC_FORMAT("used") 3668 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3669 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3670 G1PPRL_SUM_MB_FORMAT("remset") 3671 G1PPRL_SUM_MB_FORMAT("code-roots"), 3672 bytes_to_mb(_total_capacity_bytes), 3673 bytes_to_mb(_total_used_bytes), 3674 perc(_total_used_bytes, _total_capacity_bytes), 3675 bytes_to_mb(_total_prev_live_bytes), 3676 perc(_total_prev_live_bytes, _total_capacity_bytes), 3677 bytes_to_mb(_total_next_live_bytes), 3678 perc(_total_next_live_bytes, _total_capacity_bytes), 3679 bytes_to_mb(_total_remset_bytes), 3680 bytes_to_mb(_total_strong_code_roots_bytes)); 3681 }