1 /* 2 * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/metadataOnStackMark.hpp" 27 #include "classfile/symbolTable.hpp" 28 #include "code/codeCache.hpp" 29 #include "gc/g1/g1CollectedHeap.inline.hpp" 30 #include "gc/g1/g1CollectorState.hpp" 31 #include "gc/g1/g1ConcurrentMark.inline.hpp" 32 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp" 33 #include "gc/g1/g1HeapVerifier.hpp" 34 #include "gc/g1/g1OopClosures.inline.hpp" 35 #include "gc/g1/g1Policy.hpp" 36 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp" 37 #include "gc/g1/g1StringDedup.hpp" 38 #include "gc/g1/heapRegion.inline.hpp" 39 #include "gc/g1/heapRegionRemSet.hpp" 40 #include "gc/g1/heapRegionSet.inline.hpp" 41 #include "gc/shared/adaptiveSizePolicy.hpp" 42 #include "gc/shared/gcId.hpp" 43 #include "gc/shared/gcTimer.hpp" 44 #include "gc/shared/gcTrace.hpp" 45 #include "gc/shared/gcTraceTime.inline.hpp" 46 #include "gc/shared/genOopClosures.inline.hpp" 47 #include "gc/shared/referencePolicy.hpp" 48 #include "gc/shared/strongRootsScope.hpp" 49 #include "gc/shared/suspendibleThreadSet.hpp" 50 #include "gc/shared/taskqueue.inline.hpp" 51 #include "gc/shared/vmGCOperations.hpp" 52 #include "gc/shared/weakProcessor.hpp" 53 #include "include/jvm.h" 54 #include "logging/log.hpp" 55 #include "memory/allocation.hpp" 56 #include "memory/resourceArea.hpp" 57 #include "oops/access.inline.hpp" 58 #include "oops/oop.inline.hpp" 59 #include "runtime/atomic.hpp" 60 #include "runtime/handles.inline.hpp" 61 #include "runtime/java.hpp" 62 #include "runtime/prefetch.inline.hpp" 63 #include "services/memTracker.hpp" 64 #include "utilities/align.hpp" 65 #include "utilities/growableArray.hpp" 66 67 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { 68 assert(addr < _cm->finger(), "invariant"); 69 assert(addr >= _task->finger(), "invariant"); 70 71 // We move that task's local finger along. 72 _task->move_finger_to(addr); 73 74 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); 75 // we only partially drain the local queue and global stack 76 _task->drain_local_queue(true); 77 _task->drain_global_stack(true); 78 79 // if the has_aborted flag has been raised, we need to bail out of 80 // the iteration 81 return !_task->has_aborted(); 82 } 83 84 G1CMMarkStack::G1CMMarkStack() : 85 _max_chunk_capacity(0), 86 _base(NULL), 87 _chunk_capacity(0) { 88 set_empty(); 89 } 90 91 bool G1CMMarkStack::resize(size_t new_capacity) { 92 assert(is_empty(), "Only resize when stack is empty."); 93 assert(new_capacity <= _max_chunk_capacity, 94 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); 95 96 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC); 97 98 if (new_base == NULL) { 99 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); 100 return false; 101 } 102 // Release old mapping. 103 if (_base != NULL) { 104 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 105 } 106 107 _base = new_base; 108 _chunk_capacity = new_capacity; 109 set_empty(); 110 111 return true; 112 } 113 114 size_t G1CMMarkStack::capacity_alignment() { 115 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); 116 } 117 118 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { 119 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); 120 121 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); 122 123 _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 124 size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; 125 126 guarantee(initial_chunk_capacity <= _max_chunk_capacity, 127 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, 128 _max_chunk_capacity, 129 initial_chunk_capacity); 130 131 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, 132 initial_chunk_capacity, _max_chunk_capacity); 133 134 return resize(initial_chunk_capacity); 135 } 136 137 void G1CMMarkStack::expand() { 138 if (_chunk_capacity == _max_chunk_capacity) { 139 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); 140 return; 141 } 142 size_t old_capacity = _chunk_capacity; 143 // Double capacity if possible 144 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); 145 146 if (resize(new_capacity)) { 147 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 148 old_capacity, new_capacity); 149 } else { 150 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", 151 old_capacity, new_capacity); 152 } 153 } 154 155 G1CMMarkStack::~G1CMMarkStack() { 156 if (_base != NULL) { 157 MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity); 158 } 159 } 160 161 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { 162 elem->next = *list; 163 *list = elem; 164 } 165 166 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { 167 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 168 add_chunk_to_list(&_chunk_list, elem); 169 _chunks_in_chunk_list++; 170 } 171 172 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { 173 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 174 add_chunk_to_list(&_free_list, elem); 175 } 176 177 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { 178 TaskQueueEntryChunk* result = *list; 179 if (result != NULL) { 180 *list = (*list)->next; 181 } 182 return result; 183 } 184 185 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { 186 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); 187 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); 188 if (result != NULL) { 189 _chunks_in_chunk_list--; 190 } 191 return result; 192 } 193 194 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { 195 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); 196 return remove_chunk_from_list(&_free_list); 197 } 198 199 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { 200 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. 201 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding 202 // wraparound of _hwm. 203 if (_hwm >= _chunk_capacity) { 204 return NULL; 205 } 206 207 size_t cur_idx = Atomic::add(1u, &_hwm) - 1; 208 if (cur_idx >= _chunk_capacity) { 209 return NULL; 210 } 211 212 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; 213 result->next = NULL; 214 return result; 215 } 216 217 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { 218 // Get a new chunk. 219 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); 220 221 if (new_chunk == NULL) { 222 // Did not get a chunk from the free list. Allocate from backing memory. 223 new_chunk = allocate_new_chunk(); 224 225 if (new_chunk == NULL) { 226 return false; 227 } 228 } 229 230 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 231 232 add_chunk_to_chunk_list(new_chunk); 233 234 return true; 235 } 236 237 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { 238 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); 239 240 if (cur == NULL) { 241 return false; 242 } 243 244 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry)); 245 246 add_chunk_to_free_list(cur); 247 return true; 248 } 249 250 void G1CMMarkStack::set_empty() { 251 _chunks_in_chunk_list = 0; 252 _hwm = 0; 253 _chunk_list = NULL; 254 _free_list = NULL; 255 } 256 257 G1CMRootRegions::G1CMRootRegions() : 258 _survivors(NULL), _cm(NULL), _scan_in_progress(false), 259 _should_abort(false), _claimed_survivor_index(0) { } 260 261 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) { 262 _survivors = survivors; 263 _cm = cm; 264 } 265 266 void G1CMRootRegions::prepare_for_scan() { 267 assert(!scan_in_progress(), "pre-condition"); 268 269 // Currently, only survivors can be root regions. 270 _claimed_survivor_index = 0; 271 _scan_in_progress = _survivors->regions()->is_nonempty(); 272 _should_abort = false; 273 } 274 275 HeapRegion* G1CMRootRegions::claim_next() { 276 if (_should_abort) { 277 // If someone has set the should_abort flag, we return NULL to 278 // force the caller to bail out of their loop. 279 return NULL; 280 } 281 282 // Currently, only survivors can be root regions. 283 const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions(); 284 285 int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1; 286 if (claimed_index < survivor_regions->length()) { 287 return survivor_regions->at(claimed_index); 288 } 289 return NULL; 290 } 291 292 uint G1CMRootRegions::num_root_regions() const { 293 return (uint)_survivors->regions()->length(); 294 } 295 296 void G1CMRootRegions::notify_scan_done() { 297 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 298 _scan_in_progress = false; 299 RootRegionScan_lock->notify_all(); 300 } 301 302 void G1CMRootRegions::cancel_scan() { 303 notify_scan_done(); 304 } 305 306 void G1CMRootRegions::scan_finished() { 307 assert(scan_in_progress(), "pre-condition"); 308 309 // Currently, only survivors can be root regions. 310 if (!_should_abort) { 311 assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index); 312 assert((uint)_claimed_survivor_index >= _survivors->length(), 313 "we should have claimed all survivors, claimed index = %u, length = %u", 314 (uint)_claimed_survivor_index, _survivors->length()); 315 } 316 317 notify_scan_done(); 318 } 319 320 bool G1CMRootRegions::wait_until_scan_finished() { 321 if (!scan_in_progress()) { 322 return false; 323 } 324 325 { 326 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag); 327 while (scan_in_progress()) { 328 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag); 329 } 330 } 331 return true; 332 } 333 334 // Returns the maximum number of workers to be used in a concurrent 335 // phase based on the number of GC workers being used in a STW 336 // phase. 337 static uint scale_concurrent_worker_threads(uint num_gc_workers) { 338 return MAX2((num_gc_workers + 2) / 4, 1U); 339 } 340 341 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, 342 G1RegionToSpaceMapper* prev_bitmap_storage, 343 G1RegionToSpaceMapper* next_bitmap_storage) : 344 // _cm_thread set inside the constructor 345 _g1h(g1h), 346 _completed_initialization(false), 347 348 _mark_bitmap_1(), 349 _mark_bitmap_2(), 350 _prev_mark_bitmap(&_mark_bitmap_1), 351 _next_mark_bitmap(&_mark_bitmap_2), 352 353 _heap(_g1h->reserved_region()), 354 355 _root_regions(), 356 357 _global_mark_stack(), 358 359 // _finger set in set_non_marking_state 360 361 _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads), 362 _max_num_tasks(ParallelGCThreads), 363 // _num_active_tasks set in set_non_marking_state() 364 // _tasks set inside the constructor 365 366 _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)), 367 _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)), 368 369 _first_overflow_barrier_sync(), 370 _second_overflow_barrier_sync(), 371 372 _has_overflown(false), 373 _concurrent(false), 374 _has_aborted(false), 375 _restart_for_overflow(false), 376 _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()), 377 _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()), 378 379 // _verbose_level set below 380 381 _init_times(), 382 _remark_times(), 383 _remark_mark_times(), 384 _remark_weak_ref_times(), 385 _cleanup_times(), 386 _total_cleanup_time(0.0), 387 388 _accum_task_vtime(NULL), 389 390 _concurrent_workers(NULL), 391 _num_concurrent_workers(0), 392 _max_concurrent_workers(0), 393 394 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)), 395 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC)) 396 { 397 _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage); 398 _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage); 399 400 // Create & start ConcurrentMark thread. 401 _cm_thread = new G1ConcurrentMarkThread(this); 402 if (_cm_thread->osthread() == NULL) { 403 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread"); 404 } 405 406 assert(CGC_lock != NULL, "CGC_lock must be initialized"); 407 408 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 409 satb_qs.set_buffer_size(G1SATBBufferSize); 410 411 _root_regions.init(_g1h->survivor(), this); 412 413 if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) { 414 // Calculate the number of concurrent worker threads by scaling 415 // the number of parallel GC threads. 416 uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads); 417 FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num); 418 } 419 420 assert(ConcGCThreads > 0, "ConcGCThreads have been set."); 421 if (ConcGCThreads > ParallelGCThreads) { 422 log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).", 423 ConcGCThreads, ParallelGCThreads); 424 return; 425 } 426 427 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset); 428 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads); 429 430 _num_concurrent_workers = ConcGCThreads; 431 _max_concurrent_workers = _num_concurrent_workers; 432 433 _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true); 434 _concurrent_workers->initialize_workers(); 435 436 if (FLAG_IS_DEFAULT(MarkStackSize)) { 437 size_t mark_stack_size = 438 MIN2(MarkStackSizeMax, 439 MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE))); 440 // Verify that the calculated value for MarkStackSize is in range. 441 // It would be nice to use the private utility routine from Arguments. 442 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) { 443 log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): " 444 "must be between 1 and " SIZE_FORMAT, 445 mark_stack_size, MarkStackSizeMax); 446 return; 447 } 448 FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size); 449 } else { 450 // Verify MarkStackSize is in range. 451 if (FLAG_IS_CMDLINE(MarkStackSize)) { 452 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) { 453 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 454 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): " 455 "must be between 1 and " SIZE_FORMAT, 456 MarkStackSize, MarkStackSizeMax); 457 return; 458 } 459 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) { 460 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) { 461 log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")" 462 " or for MarkStackSizeMax (" SIZE_FORMAT ")", 463 MarkStackSize, MarkStackSizeMax); 464 return; 465 } 466 } 467 } 468 } 469 470 if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { 471 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); 472 } 473 474 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC); 475 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC); 476 477 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 478 _num_active_tasks = _max_num_tasks; 479 480 for (uint i = 0; i < _max_num_tasks; ++i) { 481 G1CMTaskQueue* task_queue = new G1CMTaskQueue(); 482 task_queue->initialize(); 483 _task_queues->register_queue(i, task_queue); 484 485 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions()); 486 487 _accum_task_vtime[i] = 0.0; 488 } 489 490 reset_at_marking_complete(); 491 _completed_initialization = true; 492 } 493 494 void G1ConcurrentMark::reset() { 495 _has_aborted = false; 496 497 reset_marking_for_restart(); 498 499 // Reset all tasks, since different phases will use different number of active 500 // threads. So, it's easiest to have all of them ready. 501 for (uint i = 0; i < _max_num_tasks; ++i) { 502 _tasks[i]->reset(_next_mark_bitmap); 503 } 504 505 uint max_regions = _g1h->max_regions(); 506 for (uint i = 0; i < max_regions; i++) { 507 _top_at_rebuild_starts[i] = NULL; 508 _region_mark_stats[i].clear(); 509 } 510 } 511 512 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) { 513 for (uint j = 0; j < _max_num_tasks; ++j) { 514 _tasks[j]->clear_mark_stats_cache(region_idx); 515 } 516 _top_at_rebuild_starts[region_idx] = NULL; 517 _region_mark_stats[region_idx].clear(); 518 } 519 520 void G1ConcurrentMark::clear_statistics(HeapRegion* r) { 521 uint const region_idx = r->hrm_index(); 522 if (r->is_humongous()) { 523 assert(r->is_starts_humongous(), "Got humongous continues region here"); 524 uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size()); 525 for (uint j = region_idx; j < (region_idx + size_in_regions); j++) { 526 clear_statistics_in_region(j); 527 } 528 } else { 529 clear_statistics_in_region(region_idx); 530 } 531 } 532 533 static void clear_mark_if_set(G1CMBitMap* bitmap, HeapWord* addr) { 534 if (bitmap->is_marked(addr)) { 535 bitmap->clear(addr); 536 } 537 } 538 539 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) { 540 assert_at_safepoint_on_vm_thread(); 541 542 // Need to clear all mark bits of the humongous object. 543 clear_mark_if_set(_prev_mark_bitmap, r->bottom()); 544 clear_mark_if_set(_next_mark_bitmap, r->bottom()); 545 546 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 547 return; 548 } 549 550 // Clear any statistics about the region gathered so far. 551 clear_statistics(r); 552 } 553 554 void G1ConcurrentMark::reset_marking_for_restart() { 555 _global_mark_stack.set_empty(); 556 557 // Expand the marking stack, if we have to and if we can. 558 if (has_overflown()) { 559 _global_mark_stack.expand(); 560 561 uint max_regions = _g1h->max_regions(); 562 for (uint i = 0; i < max_regions; i++) { 563 _region_mark_stats[i].clear_during_overflow(); 564 } 565 } 566 567 clear_has_overflown(); 568 _finger = _heap.start(); 569 570 for (uint i = 0; i < _max_num_tasks; ++i) { 571 G1CMTaskQueue* queue = _task_queues->queue(i); 572 queue->set_empty(); 573 } 574 } 575 576 void G1ConcurrentMark::set_concurrency(uint active_tasks) { 577 assert(active_tasks <= _max_num_tasks, "we should not have more"); 578 579 _num_active_tasks = active_tasks; 580 // Need to update the three data structures below according to the 581 // number of active threads for this phase. 582 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 583 _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 584 _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 585 } 586 587 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) { 588 set_concurrency(active_tasks); 589 590 _concurrent = concurrent; 591 592 if (!concurrent) { 593 // At this point we should be in a STW phase, and completed marking. 594 assert_at_safepoint_on_vm_thread(); 595 assert(out_of_regions(), 596 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT, 597 p2i(_finger), p2i(_heap.end())); 598 } 599 } 600 601 void G1ConcurrentMark::reset_at_marking_complete() { 602 // We set the global marking state to some default values when we're 603 // not doing marking. 604 reset_marking_for_restart(); 605 _num_active_tasks = 0; 606 } 607 608 G1ConcurrentMark::~G1ConcurrentMark() { 609 FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts); 610 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats); 611 // The G1ConcurrentMark instance is never freed. 612 ShouldNotReachHere(); 613 } 614 615 class G1ClearBitMapTask : public AbstractGangTask { 616 public: 617 static size_t chunk_size() { return M; } 618 619 private: 620 // Heap region closure used for clearing the given mark bitmap. 621 class G1ClearBitmapHRClosure : public HeapRegionClosure { 622 private: 623 G1CMBitMap* _bitmap; 624 G1ConcurrentMark* _cm; 625 public: 626 G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) { 627 } 628 629 virtual bool do_heap_region(HeapRegion* r) { 630 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize; 631 632 HeapWord* cur = r->bottom(); 633 HeapWord* const end = r->end(); 634 635 while (cur < end) { 636 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end)); 637 _bitmap->clear_range(mr); 638 639 cur += chunk_size_in_words; 640 641 // Abort iteration if after yielding the marking has been aborted. 642 if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) { 643 return true; 644 } 645 // Repeat the asserts from before the start of the closure. We will do them 646 // as asserts here to minimize their overhead on the product. However, we 647 // will have them as guarantees at the beginning / end of the bitmap 648 // clearing to get some checking in the product. 649 assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant"); 650 assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 651 } 652 assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index()); 653 654 return false; 655 } 656 }; 657 658 G1ClearBitmapHRClosure _cl; 659 HeapRegionClaimer _hr_claimer; 660 bool _suspendible; // If the task is suspendible, workers must join the STS. 661 662 public: 663 G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) : 664 AbstractGangTask("G1 Clear Bitmap"), 665 _cl(bitmap, suspendible ? cm : NULL), 666 _hr_claimer(n_workers), 667 _suspendible(suspendible) 668 { } 669 670 void work(uint worker_id) { 671 SuspendibleThreadSetJoiner sts_join(_suspendible); 672 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id); 673 } 674 675 bool is_complete() { 676 return _cl.is_complete(); 677 } 678 }; 679 680 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) { 681 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint."); 682 683 size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor(); 684 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size(); 685 686 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers()); 687 688 G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield); 689 690 log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks); 691 workers->run_task(&cl, num_workers); 692 guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding."); 693 } 694 695 void G1ConcurrentMark::cleanup_for_next_mark() { 696 // Make sure that the concurrent mark thread looks to still be in 697 // the current cycle. 698 guarantee(cm_thread()->during_cycle(), "invariant"); 699 700 // We are finishing up the current cycle by clearing the next 701 // marking bitmap and getting it ready for the next cycle. During 702 // this time no other cycle can start. So, let's make sure that this 703 // is the case. 704 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 705 706 clear_bitmap(_next_mark_bitmap, _concurrent_workers, true); 707 708 // Repeat the asserts from above. 709 guarantee(cm_thread()->during_cycle(), "invariant"); 710 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); 711 } 712 713 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { 714 assert_at_safepoint_on_vm_thread(); 715 clear_bitmap(_prev_mark_bitmap, workers, false); 716 } 717 718 class CheckBitmapClearHRClosure : public HeapRegionClosure { 719 G1CMBitMap* _bitmap; 720 public: 721 CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) { 722 } 723 724 virtual bool do_heap_region(HeapRegion* r) { 725 // This closure can be called concurrently to the mutator, so we must make sure 726 // that the result of the getNextMarkedWordAddress() call is compared to the 727 // value passed to it as limit to detect any found bits. 728 // end never changes in G1. 729 HeapWord* end = r->end(); 730 return _bitmap->get_next_marked_addr(r->bottom(), end) != end; 731 } 732 }; 733 734 bool G1ConcurrentMark::next_mark_bitmap_is_clear() { 735 CheckBitmapClearHRClosure cl(_next_mark_bitmap); 736 _g1h->heap_region_iterate(&cl); 737 return cl.is_complete(); 738 } 739 740 class NoteStartOfMarkHRClosure : public HeapRegionClosure { 741 public: 742 bool do_heap_region(HeapRegion* r) { 743 r->note_start_of_marking(); 744 return false; 745 } 746 }; 747 748 void G1ConcurrentMark::pre_initial_mark() { 749 // Initialize marking structures. This has to be done in a STW phase. 750 reset(); 751 752 // For each region note start of marking. 753 NoteStartOfMarkHRClosure startcl; 754 _g1h->heap_region_iterate(&startcl); 755 } 756 757 758 void G1ConcurrentMark::post_initial_mark() { 759 // Start Concurrent Marking weak-reference discovery. 760 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 761 // enable ("weak") refs discovery 762 rp->enable_discovery(); 763 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 764 765 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 766 // This is the start of the marking cycle, we're expected all 767 // threads to have SATB queues with active set to false. 768 satb_mq_set.set_active_all_threads(true, /* new active value */ 769 false /* expected_active */); 770 771 _root_regions.prepare_for_scan(); 772 773 // update_g1_committed() will be called at the end of an evac pause 774 // when marking is on. So, it's also called at the end of the 775 // initial-mark pause to update the heap end, if the heap expands 776 // during it. No need to call it here. 777 } 778 779 /* 780 * Notice that in the next two methods, we actually leave the STS 781 * during the barrier sync and join it immediately afterwards. If we 782 * do not do this, the following deadlock can occur: one thread could 783 * be in the barrier sync code, waiting for the other thread to also 784 * sync up, whereas another one could be trying to yield, while also 785 * waiting for the other threads to sync up too. 786 * 787 * Note, however, that this code is also used during remark and in 788 * this case we should not attempt to leave / enter the STS, otherwise 789 * we'll either hit an assert (debug / fastdebug) or deadlock 790 * (product). So we should only leave / enter the STS if we are 791 * operating concurrently. 792 * 793 * Because the thread that does the sync barrier has left the STS, it 794 * is possible to be suspended for a Full GC or an evacuation pause 795 * could occur. This is actually safe, since the entering the sync 796 * barrier is one of the last things do_marking_step() does, and it 797 * doesn't manipulate any data structures afterwards. 798 */ 799 800 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) { 801 bool barrier_aborted; 802 { 803 SuspendibleThreadSetLeaver sts_leave(concurrent()); 804 barrier_aborted = !_first_overflow_barrier_sync.enter(); 805 } 806 807 // at this point everyone should have synced up and not be doing any 808 // more work 809 810 if (barrier_aborted) { 811 // If the barrier aborted we ignore the overflow condition and 812 // just abort the whole marking phase as quickly as possible. 813 return; 814 } 815 } 816 817 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) { 818 SuspendibleThreadSetLeaver sts_leave(concurrent()); 819 _second_overflow_barrier_sync.enter(); 820 821 // at this point everything should be re-initialized and ready to go 822 } 823 824 class G1CMConcurrentMarkingTask : public AbstractGangTask { 825 G1ConcurrentMark* _cm; 826 827 public: 828 void work(uint worker_id) { 829 assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread"); 830 ResourceMark rm; 831 832 double start_vtime = os::elapsedVTime(); 833 834 { 835 SuspendibleThreadSetJoiner sts_join; 836 837 assert(worker_id < _cm->active_tasks(), "invariant"); 838 839 G1CMTask* task = _cm->task(worker_id); 840 task->record_start_time(); 841 if (!_cm->has_aborted()) { 842 do { 843 task->do_marking_step(G1ConcMarkStepDurationMillis, 844 true /* do_termination */, 845 false /* is_serial*/); 846 847 _cm->do_yield_check(); 848 } while (!_cm->has_aborted() && task->has_aborted()); 849 } 850 task->record_end_time(); 851 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant"); 852 } 853 854 double end_vtime = os::elapsedVTime(); 855 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime); 856 } 857 858 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) : 859 AbstractGangTask("Concurrent Mark"), _cm(cm) { } 860 861 ~G1CMConcurrentMarkingTask() { } 862 }; 863 864 uint G1ConcurrentMark::calc_active_marking_workers() { 865 uint result = 0; 866 if (!UseDynamicNumberOfGCThreads || 867 (!FLAG_IS_DEFAULT(ConcGCThreads) && 868 !ForceDynamicNumberOfGCThreads)) { 869 result = _max_concurrent_workers; 870 } else { 871 result = 872 AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers, 873 1, /* Minimum workers */ 874 _num_concurrent_workers, 875 Threads::number_of_non_daemon_threads()); 876 // Don't scale the result down by scale_concurrent_workers() because 877 // that scaling has already gone into "_max_concurrent_workers". 878 } 879 assert(result > 0 && result <= _max_concurrent_workers, 880 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u", 881 _max_concurrent_workers, result); 882 return result; 883 } 884 885 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) { 886 // Currently, only survivors can be root regions. 887 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant"); 888 G1RootRegionScanClosure cl(_g1h, this, worker_id); 889 890 const uintx interval = PrefetchScanIntervalInBytes; 891 HeapWord* curr = hr->bottom(); 892 const HeapWord* end = hr->top(); 893 while (curr < end) { 894 Prefetch::read(curr, interval); 895 oop obj = oop(curr); 896 int size = obj->oop_iterate_size(&cl); 897 assert(size == obj->size(), "sanity"); 898 curr += size; 899 } 900 } 901 902 class G1CMRootRegionScanTask : public AbstractGangTask { 903 G1ConcurrentMark* _cm; 904 public: 905 G1CMRootRegionScanTask(G1ConcurrentMark* cm) : 906 AbstractGangTask("G1 Root Region Scan"), _cm(cm) { } 907 908 void work(uint worker_id) { 909 assert(Thread::current()->is_ConcurrentGC_thread(), 910 "this should only be done by a conc GC thread"); 911 912 G1CMRootRegions* root_regions = _cm->root_regions(); 913 HeapRegion* hr = root_regions->claim_next(); 914 while (hr != NULL) { 915 _cm->scan_root_region(hr, worker_id); 916 hr = root_regions->claim_next(); 917 } 918 } 919 }; 920 921 void G1ConcurrentMark::scan_root_regions() { 922 // scan_in_progress() will have been set to true only if there was 923 // at least one root region to scan. So, if it's false, we 924 // should not attempt to do any further work. 925 if (root_regions()->scan_in_progress()) { 926 assert(!has_aborted(), "Aborting before root region scanning is finished not supported."); 927 928 _num_concurrent_workers = MIN2(calc_active_marking_workers(), 929 // We distribute work on a per-region basis, so starting 930 // more threads than that is useless. 931 root_regions()->num_root_regions()); 932 assert(_num_concurrent_workers <= _max_concurrent_workers, 933 "Maximum number of marking threads exceeded"); 934 935 G1CMRootRegionScanTask task(this); 936 log_debug(gc, ergo)("Running %s using %u workers for %u work units.", 937 task.name(), _num_concurrent_workers, root_regions()->num_root_regions()); 938 _concurrent_workers->run_task(&task, _num_concurrent_workers); 939 940 // It's possible that has_aborted() is true here without actually 941 // aborting the survivor scan earlier. This is OK as it's 942 // mainly used for sanity checking. 943 root_regions()->scan_finished(); 944 } 945 } 946 947 void G1ConcurrentMark::concurrent_cycle_start() { 948 _gc_timer_cm->register_gc_start(); 949 950 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start()); 951 952 _g1h->trace_heap_before_gc(_gc_tracer_cm); 953 } 954 955 void G1ConcurrentMark::concurrent_cycle_end() { 956 _g1h->collector_state()->set_clearing_next_bitmap(false); 957 958 _g1h->trace_heap_after_gc(_gc_tracer_cm); 959 960 if (has_aborted()) { 961 log_info(gc, marking)("Concurrent Mark Abort"); 962 _gc_tracer_cm->report_concurrent_mode_failure(); 963 } 964 965 _gc_timer_cm->register_gc_end(); 966 967 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions()); 968 } 969 970 void G1ConcurrentMark::mark_from_roots() { 971 _restart_for_overflow = false; 972 973 _num_concurrent_workers = calc_active_marking_workers(); 974 975 uint active_workers = MAX2(1U, _num_concurrent_workers); 976 977 // Setting active workers is not guaranteed since fewer 978 // worker threads may currently exist and more may not be 979 // available. 980 active_workers = _concurrent_workers->update_active_workers(active_workers); 981 log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers()); 982 983 // Parallel task terminator is set in "set_concurrency_and_phase()" 984 set_concurrency_and_phase(active_workers, true /* concurrent */); 985 986 G1CMConcurrentMarkingTask marking_task(this); 987 _concurrent_workers->run_task(&marking_task); 988 print_stats(); 989 } 990 991 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) { 992 G1HeapVerifier* verifier = _g1h->verifier(); 993 994 verifier->verify_region_sets_optional(); 995 996 if (VerifyDuringGC) { 997 GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm); 998 999 size_t const BufLen = 512; 1000 char buffer[BufLen]; 1001 1002 jio_snprintf(buffer, BufLen, "During GC (%s)", caller); 1003 verifier->verify(type, vo, buffer); 1004 } 1005 1006 verifier->check_bitmaps(caller); 1007 } 1008 1009 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure { 1010 G1CollectedHeap* _g1h; 1011 G1ConcurrentMark* _cm; 1012 1013 G1PrintRegionLivenessInfoClosure _cl; 1014 1015 uint _num_regions_selected_for_rebuild; // The number of regions actually selected for rebuild. 1016 1017 void update_remset_before_rebuild(HeapRegion * hr) { 1018 G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker(); 1019 1020 size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize; 1021 bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes); 1022 if (selected_for_rebuild) { 1023 _num_regions_selected_for_rebuild++; 1024 } 1025 _cm->update_top_at_rebuild_start(hr); 1026 } 1027 1028 void distribute_marked_bytes(HeapRegion* hr, size_t marked_words) { 1029 uint const region_idx = hr->hrm_index(); 1030 uint num_regions_in_humongous = (uint)G1CollectedHeap::humongous_obj_size_in_regions(marked_words); 1031 1032 for (uint i = region_idx; i < (region_idx + num_regions_in_humongous); i++) { 1033 HeapRegion* const r = _g1h->region_at(i); 1034 size_t const words_to_add = MIN2(HeapRegion::GrainWords, marked_words); 1035 assert(words_to_add > 0, "Out of space to distribute before end of humongous object in region %u (starts %u)", i, region_idx); 1036 1037 log_trace(gc, marking)("Adding " SIZE_FORMAT " words to humongous region %u (%s)", 1038 words_to_add, i, r->get_type_str()); 1039 r->add_to_marked_bytes(words_to_add * HeapWordSize); 1040 marked_words -= words_to_add; 1041 } 1042 assert(marked_words == 0, 1043 SIZE_FORMAT " words left after distributing space across %u regions", 1044 marked_words, num_regions_in_humongous); 1045 } 1046 1047 void update_marked_bytes(HeapRegion* hr) { 1048 uint const region_idx = hr->hrm_index(); 1049 size_t marked_words = _cm->liveness(region_idx); 1050 // The marking attributes the object's size completely to the humongous starts 1051 // region. We need to distribute this value across the entire set of regions a 1052 // humongous object spans. 1053 if (hr->is_humongous()) { 1054 assert(hr->is_starts_humongous() || marked_words == 0, 1055 "Should not have marked words " SIZE_FORMAT " in non-starts humongous region %u (%s)", 1056 marked_words, region_idx, hr->get_type_str()); 1057 1058 if (marked_words > 0) { 1059 distribute_marked_bytes(hr, marked_words); 1060 } 1061 } else { 1062 log_trace(gc, marking)("Adding " SIZE_FORMAT " words to region %u (%s)", marked_words, region_idx, hr->get_type_str()); 1063 hr->add_to_marked_bytes(marked_words * HeapWordSize); 1064 } 1065 } 1066 1067 public: 1068 G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) : 1069 _g1h(g1h), _cm(cm), _cl("Post-Marking"), _num_regions_selected_for_rebuild(0) { } 1070 1071 virtual bool do_heap_region(HeapRegion* r) { 1072 update_remset_before_rebuild(r); 1073 update_marked_bytes(r); 1074 if (log_is_enabled(Trace, gc, liveness)) { 1075 _cl.do_heap_region(r); 1076 } 1077 r->note_end_of_marking(); 1078 return false; 1079 } 1080 1081 uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; } 1082 }; 1083 1084 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure { 1085 G1CollectedHeap* _g1h; 1086 public: 1087 G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { } 1088 1089 virtual bool do_heap_region(HeapRegion* r) { 1090 _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r); 1091 return false; 1092 } 1093 }; 1094 1095 void G1ConcurrentMark::remark() { 1096 assert_at_safepoint_on_vm_thread(); 1097 1098 // If a full collection has happened, we should not continue. However we might 1099 // have ended up here as the Remark VM operation has been scheduled already. 1100 if (has_aborted()) { 1101 return; 1102 } 1103 1104 G1Policy* g1p = _g1h->g1_policy(); 1105 g1p->record_concurrent_mark_remark_start(); 1106 1107 double start = os::elapsedTime(); 1108 1109 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before"); 1110 1111 { 1112 GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm); 1113 finalize_marking(); 1114 } 1115 1116 double mark_work_end = os::elapsedTime(); 1117 1118 bool const mark_finished = !has_overflown(); 1119 if (mark_finished) { 1120 weak_refs_work(false /* clear_all_soft_refs */); 1121 1122 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1123 // We're done with marking. 1124 // This is the end of the marking cycle, we're expected all 1125 // threads to have SATB queues with active set to true. 1126 satb_mq_set.set_active_all_threads(false, /* new active value */ 1127 true /* expected_active */); 1128 1129 { 1130 GCTraceTime(Debug, gc, phases)("Flush Task Caches"); 1131 flush_all_task_caches(); 1132 } 1133 1134 // Install newly created mark bitmap as "prev". 1135 swap_mark_bitmaps(); 1136 { 1137 GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking Before Rebuild"); 1138 G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this); 1139 _g1h->heap_region_iterate(&cl); 1140 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u", 1141 _g1h->num_regions(), cl.num_selected_for_rebuild()); 1142 } 1143 1144 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark after"); 1145 1146 assert(!restart_for_overflow(), "sanity"); 1147 // Completely reset the marking state since marking completed 1148 reset_at_marking_complete(); 1149 } else { 1150 // We overflowed. Restart concurrent marking. 1151 _restart_for_overflow = true; 1152 1153 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow"); 1154 1155 // Clear the marking state because we will be restarting 1156 // marking due to overflowing the global mark stack. 1157 reset_marking_for_restart(); 1158 } 1159 1160 { 1161 GCTraceTime(Debug, gc, phases)("Report Object Count"); 1162 report_object_count(mark_finished); 1163 } 1164 1165 // Statistics 1166 double now = os::elapsedTime(); 1167 _remark_mark_times.add((mark_work_end - start) * 1000.0); 1168 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1169 _remark_times.add((now - start) * 1000.0); 1170 1171 g1p->record_concurrent_mark_remark_end(); 1172 } 1173 1174 class G1CleanupTask : public AbstractGangTask { 1175 // Per-region work during the Cleanup pause. 1176 class G1CleanupRegionsClosure : public HeapRegionClosure { 1177 G1CollectedHeap* _g1h; 1178 size_t _freed_bytes; 1179 FreeRegionList* _local_cleanup_list; 1180 uint _old_regions_removed; 1181 uint _humongous_regions_removed; 1182 HRRSCleanupTask* _hrrs_cleanup_task; 1183 1184 public: 1185 G1CleanupRegionsClosure(G1CollectedHeap* g1, 1186 FreeRegionList* local_cleanup_list, 1187 HRRSCleanupTask* hrrs_cleanup_task) : 1188 _g1h(g1), 1189 _freed_bytes(0), 1190 _local_cleanup_list(local_cleanup_list), 1191 _old_regions_removed(0), 1192 _humongous_regions_removed(0), 1193 _hrrs_cleanup_task(hrrs_cleanup_task) { } 1194 1195 size_t freed_bytes() { return _freed_bytes; } 1196 const uint old_regions_removed() { return _old_regions_removed; } 1197 const uint humongous_regions_removed() { return _humongous_regions_removed; } 1198 1199 bool do_heap_region(HeapRegion *hr) { 1200 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) { 1201 _freed_bytes += hr->used(); 1202 hr->set_containing_set(NULL); 1203 if (hr->is_humongous()) { 1204 _humongous_regions_removed++; 1205 _g1h->free_humongous_region(hr, _local_cleanup_list); 1206 } else { 1207 _old_regions_removed++; 1208 _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */); 1209 } 1210 hr->clear_cardtable(); 1211 _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index()); 1212 log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom())); 1213 } else { 1214 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); 1215 } 1216 1217 return false; 1218 } 1219 }; 1220 1221 G1CollectedHeap* _g1h; 1222 FreeRegionList* _cleanup_list; 1223 HeapRegionClaimer _hrclaimer; 1224 1225 public: 1226 G1CleanupTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) : 1227 AbstractGangTask("G1 Cleanup"), 1228 _g1h(g1h), 1229 _cleanup_list(cleanup_list), 1230 _hrclaimer(n_workers) { 1231 1232 HeapRegionRemSet::reset_for_cleanup_tasks(); 1233 } 1234 1235 void work(uint worker_id) { 1236 FreeRegionList local_cleanup_list("Local Cleanup List"); 1237 HRRSCleanupTask hrrs_cleanup_task; 1238 G1CleanupRegionsClosure cl(_g1h, 1239 &local_cleanup_list, 1240 &hrrs_cleanup_task); 1241 _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id); 1242 assert(cl.is_complete(), "Shouldn't have aborted!"); 1243 1244 // Now update the old/humongous region sets 1245 _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed()); 1246 { 1247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1248 _g1h->decrement_summary_bytes(cl.freed_bytes()); 1249 1250 _cleanup_list->add_ordered(&local_cleanup_list); 1251 assert(local_cleanup_list.is_empty(), "post-condition"); 1252 1253 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); 1254 } 1255 } 1256 }; 1257 1258 void G1ConcurrentMark::reclaim_empty_regions() { 1259 WorkGang* workers = _g1h->workers(); 1260 FreeRegionList empty_regions_list("Empty Regions After Mark List"); 1261 1262 G1CleanupTask cl(_g1h, &empty_regions_list, workers->active_workers()); 1263 workers->run_task(&cl); 1264 1265 if (!empty_regions_list.is_empty()) { 1266 log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length()); 1267 // Now print the empty regions list. 1268 G1HRPrinter* hrp = _g1h->hr_printer(); 1269 if (hrp->is_active()) { 1270 FreeRegionListIterator iter(&empty_regions_list); 1271 while (iter.more_available()) { 1272 HeapRegion* hr = iter.get_next(); 1273 hrp->cleanup(hr); 1274 } 1275 } 1276 // And actually make them available. 1277 _g1h->prepend_to_freelist(&empty_regions_list); 1278 } 1279 } 1280 1281 void G1ConcurrentMark::cleanup() { 1282 assert_at_safepoint_on_vm_thread(); 1283 1284 // If a full collection has happened, we shouldn't do this. 1285 if (has_aborted()) { 1286 return; 1287 } 1288 1289 G1Policy* g1p = _g1h->g1_policy(); 1290 g1p->record_concurrent_mark_cleanup_start(); 1291 1292 double start = os::elapsedTime(); 1293 1294 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup before"); 1295 1296 { 1297 GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking After Rebuild"); 1298 G1UpdateRemSetTrackingAfterRebuild cl(_g1h); 1299 _g1h->heap_region_iterate(&cl); 1300 } 1301 1302 if (log_is_enabled(Trace, gc, liveness)) { 1303 G1PrintRegionLivenessInfoClosure cl("Post-Cleanup"); 1304 _g1h->heap_region_iterate(&cl); 1305 } 1306 1307 { 1308 GCTraceTime(Debug, gc, phases)("Reclaim Empty Regions"); 1309 reclaim_empty_regions(); 1310 } 1311 1312 // Cleanup will have freed any regions completely full of garbage. 1313 // Update the soft reference policy with the new heap occupancy. 1314 Universe::update_heap_info_at_gc(); 1315 1316 // Clean out dead classes and update Metaspace sizes. 1317 if (ClassUnloadingWithConcurrentMark) { 1318 GCTraceTime(Debug, gc, phases)("Purge Metaspace"); 1319 ClassLoaderDataGraph::purge(); 1320 } 1321 MetaspaceGC::compute_new_size(); 1322 1323 // We reclaimed old regions so we should calculate the sizes to make 1324 // sure we update the old gen/space data. 1325 _g1h->g1mm()->update_sizes(); 1326 1327 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after"); 1328 1329 // We need to make this be a "collection" so any collection pause that 1330 // races with it goes around and waits for Cleanup to finish. 1331 _g1h->increment_total_collections(); 1332 1333 // Local statistics 1334 double recent_cleanup_time = (os::elapsedTime() - start); 1335 _total_cleanup_time += recent_cleanup_time; 1336 _cleanup_times.add(recent_cleanup_time); 1337 1338 { 1339 GCTraceTime(Debug, gc, phases)("Finalize Concurrent Mark Cleanup"); 1340 _g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 1341 } 1342 } 1343 1344 // Supporting Object and Oop closures for reference discovery 1345 // and processing in during marking 1346 1347 bool G1CMIsAliveClosure::do_object_b(oop obj) { 1348 HeapWord* addr = (HeapWord*)obj; 1349 return addr != NULL && 1350 (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj)); 1351 } 1352 1353 // 'Keep Alive' oop closure used by both serial parallel reference processing. 1354 // Uses the G1CMTask associated with a worker thread (for serial reference 1355 // processing the G1CMTask for worker 0 is used) to preserve (mark) and 1356 // trace referent objects. 1357 // 1358 // Using the G1CMTask and embedded local queues avoids having the worker 1359 // threads operating on the global mark stack. This reduces the risk 1360 // of overflowing the stack - which we would rather avoid at this late 1361 // state. Also using the tasks' local queues removes the potential 1362 // of the workers interfering with each other that could occur if 1363 // operating on the global stack. 1364 1365 class G1CMKeepAliveAndDrainClosure : public OopClosure { 1366 G1ConcurrentMark* _cm; 1367 G1CMTask* _task; 1368 int _ref_counter_limit; 1369 int _ref_counter; 1370 bool _is_serial; 1371 public: 1372 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1373 _cm(cm), _task(task), _is_serial(is_serial), 1374 _ref_counter_limit(G1RefProcDrainInterval) { 1375 assert(_ref_counter_limit > 0, "sanity"); 1376 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1377 _ref_counter = _ref_counter_limit; 1378 } 1379 1380 virtual void do_oop(narrowOop* p) { do_oop_work(p); } 1381 virtual void do_oop( oop* p) { do_oop_work(p); } 1382 1383 template <class T> void do_oop_work(T* p) { 1384 if (!_cm->has_overflown()) { 1385 _task->deal_with_reference(p); 1386 _ref_counter--; 1387 1388 if (_ref_counter == 0) { 1389 // We have dealt with _ref_counter_limit references, pushing them 1390 // and objects reachable from them on to the local stack (and 1391 // possibly the global stack). Call G1CMTask::do_marking_step() to 1392 // process these entries. 1393 // 1394 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if 1395 // there's nothing more to do (i.e. we're done with the entries that 1396 // were pushed as a result of the G1CMTask::deal_with_reference() calls 1397 // above) or we overflow. 1398 // 1399 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1400 // flag while there may still be some work to do. (See the comment at 1401 // the beginning of G1CMTask::do_marking_step() for those conditions - 1402 // one of which is reaching the specified time target.) It is only 1403 // when G1CMTask::do_marking_step() returns without setting the 1404 // has_aborted() flag that the marking step has completed. 1405 do { 1406 double mark_step_duration_ms = G1ConcMarkStepDurationMillis; 1407 _task->do_marking_step(mark_step_duration_ms, 1408 false /* do_termination */, 1409 _is_serial); 1410 } while (_task->has_aborted() && !_cm->has_overflown()); 1411 _ref_counter = _ref_counter_limit; 1412 } 1413 } 1414 } 1415 }; 1416 1417 // 'Drain' oop closure used by both serial and parallel reference processing. 1418 // Uses the G1CMTask associated with a given worker thread (for serial 1419 // reference processing the G1CMtask for worker 0 is used). Calls the 1420 // do_marking_step routine, with an unbelievably large timeout value, 1421 // to drain the marking data structures of the remaining entries 1422 // added by the 'keep alive' oop closure above. 1423 1424 class G1CMDrainMarkingStackClosure : public VoidClosure { 1425 G1ConcurrentMark* _cm; 1426 G1CMTask* _task; 1427 bool _is_serial; 1428 public: 1429 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) : 1430 _cm(cm), _task(task), _is_serial(is_serial) { 1431 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code"); 1432 } 1433 1434 void do_void() { 1435 do { 1436 // We call G1CMTask::do_marking_step() to completely drain the local 1437 // and global marking stacks of entries pushed by the 'keep alive' 1438 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above). 1439 // 1440 // G1CMTask::do_marking_step() is called in a loop, which we'll exit 1441 // if there's nothing more to do (i.e. we've completely drained the 1442 // entries that were pushed as a a result of applying the 'keep alive' 1443 // closure to the entries on the discovered ref lists) or we overflow 1444 // the global marking stack. 1445 // 1446 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted() 1447 // flag while there may still be some work to do. (See the comment at 1448 // the beginning of G1CMTask::do_marking_step() for those conditions - 1449 // one of which is reaching the specified time target.) It is only 1450 // when G1CMTask::do_marking_step() returns without setting the 1451 // has_aborted() flag that the marking step has completed. 1452 1453 _task->do_marking_step(1000000000.0 /* something very large */, 1454 true /* do_termination */, 1455 _is_serial); 1456 } while (_task->has_aborted() && !_cm->has_overflown()); 1457 } 1458 }; 1459 1460 // Implementation of AbstractRefProcTaskExecutor for parallel 1461 // reference processing at the end of G1 concurrent marking 1462 1463 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor { 1464 private: 1465 G1CollectedHeap* _g1h; 1466 G1ConcurrentMark* _cm; 1467 WorkGang* _workers; 1468 uint _active_workers; 1469 1470 public: 1471 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 1472 G1ConcurrentMark* cm, 1473 WorkGang* workers, 1474 uint n_workers) : 1475 _g1h(g1h), _cm(cm), 1476 _workers(workers), _active_workers(n_workers) { } 1477 1478 // Executes the given task using concurrent marking worker threads. 1479 virtual void execute(ProcessTask& task); 1480 virtual void execute(EnqueueTask& task); 1481 }; 1482 1483 class G1CMRefProcTaskProxy : public AbstractGangTask { 1484 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1485 ProcessTask& _proc_task; 1486 G1CollectedHeap* _g1h; 1487 G1ConcurrentMark* _cm; 1488 1489 public: 1490 G1CMRefProcTaskProxy(ProcessTask& proc_task, 1491 G1CollectedHeap* g1h, 1492 G1ConcurrentMark* cm) : 1493 AbstractGangTask("Process reference objects in parallel"), 1494 _proc_task(proc_task), _g1h(g1h), _cm(cm) { 1495 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1496 assert(rp->processing_is_mt(), "shouldn't be here otherwise"); 1497 } 1498 1499 virtual void work(uint worker_id) { 1500 ResourceMark rm; 1501 HandleMark hm; 1502 G1CMTask* task = _cm->task(worker_id); 1503 G1CMIsAliveClosure g1_is_alive(_g1h); 1504 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */); 1505 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */); 1506 1507 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain); 1508 } 1509 }; 1510 1511 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 1512 assert(_workers != NULL, "Need parallel worker threads."); 1513 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1514 1515 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 1516 1517 // We need to reset the concurrency level before each 1518 // proxy task execution, so that the termination protocol 1519 // and overflow handling in G1CMTask::do_marking_step() knows 1520 // how many workers to wait for. 1521 _cm->set_concurrency(_active_workers); 1522 _workers->run_task(&proc_task_proxy); 1523 } 1524 1525 class G1CMRefEnqueueTaskProxy : public AbstractGangTask { 1526 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; 1527 EnqueueTask& _enq_task; 1528 1529 public: 1530 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 1531 AbstractGangTask("Enqueue reference objects in parallel"), 1532 _enq_task(enq_task) { } 1533 1534 virtual void work(uint worker_id) { 1535 _enq_task.work(worker_id); 1536 } 1537 }; 1538 1539 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { 1540 assert(_workers != NULL, "Need parallel worker threads."); 1541 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT"); 1542 1543 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); 1544 1545 // Not strictly necessary but... 1546 // 1547 // We need to reset the concurrency level before each 1548 // proxy task execution, so that the termination protocol 1549 // and overflow handling in G1CMTask::do_marking_step() knows 1550 // how many workers to wait for. 1551 _cm->set_concurrency(_active_workers); 1552 _workers->run_task(&enq_task_proxy); 1553 } 1554 1555 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) { 1556 ResourceMark rm; 1557 HandleMark hm; 1558 1559 // Is alive closure. 1560 G1CMIsAliveClosure g1_is_alive(_g1h); 1561 1562 // Inner scope to exclude the cleaning of the string and symbol 1563 // tables from the displayed time. 1564 { 1565 GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm); 1566 1567 ReferenceProcessor* rp = _g1h->ref_processor_cm(); 1568 1569 // See the comment in G1CollectedHeap::ref_processing_init() 1570 // about how reference processing currently works in G1. 1571 1572 // Set the soft reference policy 1573 rp->setup_policy(clear_all_soft_refs); 1574 assert(_global_mark_stack.is_empty(), "mark stack should be empty"); 1575 1576 // Instances of the 'Keep Alive' and 'Complete GC' closures used 1577 // in serial reference processing. Note these closures are also 1578 // used for serially processing (by the the current thread) the 1579 // JNI references during parallel reference processing. 1580 // 1581 // These closures do not need to synchronize with the worker 1582 // threads involved in parallel reference processing as these 1583 // instances are executed serially by the current thread (e.g. 1584 // reference processing is not multi-threaded and is thus 1585 // performed by the current thread instead of a gang worker). 1586 // 1587 // The gang tasks involved in parallel reference processing create 1588 // their own instances of these closures, which do their own 1589 // synchronization among themselves. 1590 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */); 1591 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */); 1592 1593 // We need at least one active thread. If reference processing 1594 // is not multi-threaded we use the current (VMThread) thread, 1595 // otherwise we use the work gang from the G1CollectedHeap and 1596 // we utilize all the worker threads we can. 1597 bool processing_is_mt = rp->processing_is_mt(); 1598 uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U); 1599 active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U); 1600 1601 // Parallel processing task executor. 1602 G1CMRefProcTaskExecutor par_task_executor(_g1h, this, 1603 _g1h->workers(), active_workers); 1604 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL); 1605 1606 // Set the concurrency level. The phase was already set prior to 1607 // executing the remark task. 1608 set_concurrency(active_workers); 1609 1610 // Set the degree of MT processing here. If the discovery was done MT, 1611 // the number of threads involved during discovery could differ from 1612 // the number of active workers. This is OK as long as the discovered 1613 // Reference lists are balanced (see balance_all_queues() and balance_queues()). 1614 rp->set_active_mt_degree(active_workers); 1615 1616 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q()); 1617 1618 // Process the weak references. 1619 const ReferenceProcessorStats& stats = 1620 rp->process_discovered_references(&g1_is_alive, 1621 &g1_keep_alive, 1622 &g1_drain_mark_stack, 1623 executor, 1624 &pt); 1625 _gc_tracer_cm->report_gc_reference_stats(stats); 1626 pt.print_all_references(); 1627 1628 // The do_oop work routines of the keep_alive and drain_marking_stack 1629 // oop closures will set the has_overflown flag if we overflow the 1630 // global marking stack. 1631 1632 assert(has_overflown() || _global_mark_stack.is_empty(), 1633 "Mark stack should be empty (unless it has overflown)"); 1634 1635 assert(rp->num_q() == active_workers, "why not"); 1636 1637 rp->enqueue_discovered_references(executor, &pt); 1638 1639 rp->verify_no_references_recorded(); 1640 1641 pt.print_enqueue_phase(); 1642 1643 assert(!rp->discovery_enabled(), "Post condition"); 1644 } 1645 1646 assert(has_overflown() || _global_mark_stack.is_empty(), 1647 "Mark stack should be empty (unless it has overflown)"); 1648 1649 { 1650 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm); 1651 WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl); 1652 } 1653 1654 if (has_overflown()) { 1655 // We can not trust g1_is_alive if the marking stack overflowed 1656 return; 1657 } 1658 1659 assert(_global_mark_stack.is_empty(), "Marking should have completed"); 1660 1661 // Unload Klasses, String, Symbols, Code Cache, etc. 1662 if (ClassUnloadingWithConcurrentMark) { 1663 GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm); 1664 bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */); 1665 _g1h->complete_cleaning(&g1_is_alive, purged_classes); 1666 } else { 1667 GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm); 1668 // No need to clean string table and symbol table as they are treated as strong roots when 1669 // class unloading is disabled. 1670 _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled()); 1671 } 1672 } 1673 1674 // When sampling object counts, we already swapped the mark bitmaps, so we need to use 1675 // the prev bitmap determining liveness. 1676 class G1ObjectCountIsAliveClosure: public BoolObjectClosure { 1677 G1CollectedHeap* _g1; 1678 public: 1679 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) { } 1680 1681 bool do_object_b(oop obj) { 1682 HeapWord* addr = (HeapWord*)obj; 1683 return addr != NULL && 1684 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_dead(obj)); 1685 } 1686 }; 1687 1688 void G1ConcurrentMark::report_object_count(bool mark_completed) { 1689 // Depending on the completion of the marking liveness needs to be determined 1690 // using either the next or prev bitmap. 1691 if (mark_completed) { 1692 G1ObjectCountIsAliveClosure is_alive(_g1h); 1693 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1694 } else { 1695 G1CMIsAliveClosure is_alive(_g1h); 1696 _gc_tracer_cm->report_object_count_after_gc(&is_alive); 1697 } 1698 } 1699 1700 1701 void G1ConcurrentMark::swap_mark_bitmaps() { 1702 G1CMBitMap* temp = _prev_mark_bitmap; 1703 _prev_mark_bitmap = _next_mark_bitmap; 1704 _next_mark_bitmap = temp; 1705 _g1h->collector_state()->set_clearing_next_bitmap(true); 1706 } 1707 1708 // Closure for marking entries in SATB buffers. 1709 class G1CMSATBBufferClosure : public SATBBufferClosure { 1710 private: 1711 G1CMTask* _task; 1712 G1CollectedHeap* _g1h; 1713 1714 // This is very similar to G1CMTask::deal_with_reference, but with 1715 // more relaxed requirements for the argument, so this must be more 1716 // circumspect about treating the argument as an object. 1717 void do_entry(void* entry) const { 1718 _task->increment_refs_reached(); 1719 oop const obj = static_cast<oop>(entry); 1720 _task->make_reference_grey(obj); 1721 } 1722 1723 public: 1724 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h) 1725 : _task(task), _g1h(g1h) { } 1726 1727 virtual void do_buffer(void** buffer, size_t size) { 1728 for (size_t i = 0; i < size; ++i) { 1729 do_entry(buffer[i]); 1730 } 1731 } 1732 }; 1733 1734 class G1RemarkThreadsClosure : public ThreadClosure { 1735 G1CMSATBBufferClosure _cm_satb_cl; 1736 G1CMOopClosure _cm_cl; 1737 MarkingCodeBlobClosure _code_cl; 1738 int _thread_parity; 1739 1740 public: 1741 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) : 1742 _cm_satb_cl(task, g1h), 1743 _cm_cl(g1h, task), 1744 _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations), 1745 _thread_parity(Threads::thread_claim_parity()) {} 1746 1747 void do_thread(Thread* thread) { 1748 if (thread->is_Java_thread()) { 1749 if (thread->claim_oops_do(true, _thread_parity)) { 1750 JavaThread* jt = (JavaThread*)thread; 1751 1752 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking 1753 // however the liveness of oops reachable from nmethods have very complex lifecycles: 1754 // * Alive if on the stack of an executing method 1755 // * Weakly reachable otherwise 1756 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be 1757 // live by the SATB invariant but other oops recorded in nmethods may behave differently. 1758 jt->nmethods_do(&_code_cl); 1759 1760 jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl); 1761 } 1762 } else if (thread->is_VM_thread()) { 1763 if (thread->claim_oops_do(true, _thread_parity)) { 1764 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl); 1765 } 1766 } 1767 } 1768 }; 1769 1770 class G1CMRemarkTask : public AbstractGangTask { 1771 G1ConcurrentMark* _cm; 1772 public: 1773 void work(uint worker_id) { 1774 G1CMTask* task = _cm->task(worker_id); 1775 task->record_start_time(); 1776 { 1777 ResourceMark rm; 1778 HandleMark hm; 1779 1780 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task); 1781 Threads::threads_do(&threads_f); 1782 } 1783 1784 do { 1785 task->do_marking_step(1000000000.0 /* something very large */, 1786 true /* do_termination */, 1787 false /* is_serial */); 1788 } while (task->has_aborted() && !_cm->has_overflown()); 1789 // If we overflow, then we do not want to restart. We instead 1790 // want to abort remark and do concurrent marking again. 1791 task->record_end_time(); 1792 } 1793 1794 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) : 1795 AbstractGangTask("Par Remark"), _cm(cm) { 1796 _cm->terminator()->reset_for_reuse(active_workers); 1797 } 1798 }; 1799 1800 void G1ConcurrentMark::finalize_marking() { 1801 ResourceMark rm; 1802 HandleMark hm; 1803 1804 _g1h->ensure_parsability(false); 1805 1806 // this is remark, so we'll use up all active threads 1807 uint active_workers = _g1h->workers()->active_workers(); 1808 set_concurrency_and_phase(active_workers, false /* concurrent */); 1809 // Leave _parallel_marking_threads at it's 1810 // value originally calculated in the G1ConcurrentMark 1811 // constructor and pass values of the active workers 1812 // through the gang in the task. 1813 1814 { 1815 StrongRootsScope srs(active_workers); 1816 1817 G1CMRemarkTask remarkTask(this, active_workers); 1818 // We will start all available threads, even if we decide that the 1819 // active_workers will be fewer. The extra ones will just bail out 1820 // immediately. 1821 _g1h->workers()->run_task(&remarkTask); 1822 } 1823 1824 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1825 guarantee(has_overflown() || 1826 satb_mq_set.completed_buffers_num() == 0, 1827 "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT, 1828 BOOL_TO_STR(has_overflown()), 1829 satb_mq_set.completed_buffers_num()); 1830 1831 print_stats(); 1832 } 1833 1834 void G1ConcurrentMark::flush_all_task_caches() { 1835 size_t hits = 0; 1836 size_t misses = 0; 1837 for (uint i = 0; i < _max_num_tasks; i++) { 1838 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache(); 1839 hits += stats.first; 1840 misses += stats.second; 1841 } 1842 size_t sum = hits + misses; 1843 log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf", 1844 hits, misses, percent_of(hits, sum)); 1845 } 1846 1847 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) { 1848 _prev_mark_bitmap->clear_range(mr); 1849 } 1850 1851 HeapRegion* 1852 G1ConcurrentMark::claim_region(uint worker_id) { 1853 // "checkpoint" the finger 1854 HeapWord* finger = _finger; 1855 1856 while (finger < _heap.end()) { 1857 assert(_g1h->is_in_g1_reserved(finger), "invariant"); 1858 1859 HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1860 // Make sure that the reads below do not float before loading curr_region. 1861 OrderAccess::loadload(); 1862 // Above heap_region_containing may return NULL as we always scan claim 1863 // until the end of the heap. In this case, just jump to the next region. 1864 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords; 1865 1866 // Is the gap between reading the finger and doing the CAS too long? 1867 HeapWord* res = Atomic::cmpxchg(end, &_finger, finger); 1868 if (res == finger && curr_region != NULL) { 1869 // we succeeded 1870 HeapWord* bottom = curr_region->bottom(); 1871 HeapWord* limit = curr_region->next_top_at_mark_start(); 1872 1873 // notice that _finger == end cannot be guaranteed here since, 1874 // someone else might have moved the finger even further 1875 assert(_finger >= end, "the finger should have moved forward"); 1876 1877 if (limit > bottom) { 1878 return curr_region; 1879 } else { 1880 assert(limit == bottom, 1881 "the region limit should be at bottom"); 1882 // we return NULL and the caller should try calling 1883 // claim_region() again. 1884 return NULL; 1885 } 1886 } else { 1887 assert(_finger > finger, "the finger should have moved forward"); 1888 // read it again 1889 finger = _finger; 1890 } 1891 } 1892 1893 return NULL; 1894 } 1895 1896 #ifndef PRODUCT 1897 class VerifyNoCSetOops { 1898 G1CollectedHeap* _g1h; 1899 const char* _phase; 1900 int _info; 1901 1902 public: 1903 VerifyNoCSetOops(const char* phase, int info = -1) : 1904 _g1h(G1CollectedHeap::heap()), 1905 _phase(phase), 1906 _info(info) 1907 { } 1908 1909 void operator()(G1TaskQueueEntry task_entry) const { 1910 if (task_entry.is_array_slice()) { 1911 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); 1912 return; 1913 } 1914 guarantee(oopDesc::is_oop(task_entry.obj()), 1915 "Non-oop " PTR_FORMAT ", phase: %s, info: %d", 1916 p2i(task_entry.obj()), _phase, _info); 1917 guarantee(!_g1h->is_in_cset(task_entry.obj()), 1918 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", 1919 p2i(task_entry.obj()), _phase, _info); 1920 } 1921 }; 1922 1923 void G1ConcurrentMark::verify_no_cset_oops() { 1924 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1925 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) { 1926 return; 1927 } 1928 1929 // Verify entries on the global mark stack 1930 _global_mark_stack.iterate(VerifyNoCSetOops("Stack")); 1931 1932 // Verify entries on the task queues 1933 for (uint i = 0; i < _max_num_tasks; ++i) { 1934 G1CMTaskQueue* queue = _task_queues->queue(i); 1935 queue->iterate(VerifyNoCSetOops("Queue", i)); 1936 } 1937 1938 // Verify the global finger 1939 HeapWord* global_finger = finger(); 1940 if (global_finger != NULL && global_finger < _heap.end()) { 1941 // Since we always iterate over all regions, we might get a NULL HeapRegion 1942 // here. 1943 HeapRegion* global_hr = _g1h->heap_region_containing(global_finger); 1944 guarantee(global_hr == NULL || global_finger == global_hr->bottom(), 1945 "global finger: " PTR_FORMAT " region: " HR_FORMAT, 1946 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)); 1947 } 1948 1949 // Verify the task fingers 1950 assert(_num_concurrent_workers <= _max_num_tasks, "sanity"); 1951 for (uint i = 0; i < _num_concurrent_workers; ++i) { 1952 G1CMTask* task = _tasks[i]; 1953 HeapWord* task_finger = task->finger(); 1954 if (task_finger != NULL && task_finger < _heap.end()) { 1955 // See above note on the global finger verification. 1956 HeapRegion* task_hr = _g1h->heap_region_containing(task_finger); 1957 guarantee(task_hr == NULL || task_finger == task_hr->bottom() || 1958 !task_hr->in_collection_set(), 1959 "task finger: " PTR_FORMAT " region: " HR_FORMAT, 1960 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)); 1961 } 1962 } 1963 } 1964 #endif // PRODUCT 1965 1966 void G1ConcurrentMark::rebuild_rem_set_concurrently() { 1967 _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset); 1968 } 1969 1970 void G1ConcurrentMark::print_stats() { 1971 if (!log_is_enabled(Debug, gc, stats)) { 1972 return; 1973 } 1974 log_debug(gc, stats)("---------------------------------------------------------------------"); 1975 for (size_t i = 0; i < _num_active_tasks; ++i) { 1976 _tasks[i]->print_stats(); 1977 log_debug(gc, stats)("---------------------------------------------------------------------"); 1978 } 1979 } 1980 1981 void G1ConcurrentMark::concurrent_cycle_abort() { 1982 if (!cm_thread()->during_cycle() || _has_aborted) { 1983 // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. 1984 return; 1985 } 1986 1987 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next 1988 // concurrent bitmap clearing. 1989 { 1990 GCTraceTime(Debug, gc)("Clear Next Bitmap"); 1991 clear_bitmap(_next_mark_bitmap, _g1h->workers(), false); 1992 } 1993 // Note we cannot clear the previous marking bitmap here 1994 // since VerifyDuringGC verifies the objects marked during 1995 // a full GC against the previous bitmap. 1996 1997 // Empty mark stack 1998 reset_marking_for_restart(); 1999 for (uint i = 0; i < _max_num_tasks; ++i) { 2000 _tasks[i]->clear_region_fields(); 2001 } 2002 _first_overflow_barrier_sync.abort(); 2003 _second_overflow_barrier_sync.abort(); 2004 _has_aborted = true; 2005 2006 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2007 satb_mq_set.abandon_partial_marking(); 2008 // This can be called either during or outside marking, we'll read 2009 // the expected_active value from the SATB queue set. 2010 satb_mq_set.set_active_all_threads( 2011 false, /* new active value */ 2012 satb_mq_set.is_active() /* expected_active */); 2013 } 2014 2015 static void print_ms_time_info(const char* prefix, const char* name, 2016 NumberSeq& ns) { 2017 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 2018 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 2019 if (ns.num() > 0) { 2020 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]", 2021 prefix, ns.sd(), ns.maximum()); 2022 } 2023 } 2024 2025 void G1ConcurrentMark::print_summary_info() { 2026 Log(gc, marking) log; 2027 if (!log.is_trace()) { 2028 return; 2029 } 2030 2031 log.trace(" Concurrent marking:"); 2032 print_ms_time_info(" ", "init marks", _init_times); 2033 print_ms_time_info(" ", "remarks", _remark_times); 2034 { 2035 print_ms_time_info(" ", "final marks", _remark_mark_times); 2036 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 2037 2038 } 2039 print_ms_time_info(" ", "cleanups", _cleanup_times); 2040 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", 2041 _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); 2042 log.trace(" Total stop_world time = %8.2f s.", 2043 (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); 2044 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).", 2045 cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum()); 2046 } 2047 2048 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const { 2049 _concurrent_workers->print_worker_threads_on(st); 2050 } 2051 2052 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const { 2053 _concurrent_workers->threads_do(tc); 2054 } 2055 2056 void G1ConcurrentMark::print_on_error(outputStream* st) const { 2057 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT, 2058 p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap)); 2059 _prev_mark_bitmap->print_on_error(st, " Prev Bits: "); 2060 _next_mark_bitmap->print_on_error(st, " Next Bits: "); 2061 } 2062 2063 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) { 2064 ReferenceProcessor* result = g1h->ref_processor_cm(); 2065 assert(result != NULL, "CM reference processor should not be NULL"); 2066 return result; 2067 } 2068 2069 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h, 2070 G1CMTask* task) 2071 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)), 2072 _g1h(g1h), _task(task) 2073 { } 2074 2075 void G1CMTask::setup_for_region(HeapRegion* hr) { 2076 assert(hr != NULL, 2077 "claim_region() should have filtered out NULL regions"); 2078 _curr_region = hr; 2079 _finger = hr->bottom(); 2080 update_region_limit(); 2081 } 2082 2083 void G1CMTask::update_region_limit() { 2084 HeapRegion* hr = _curr_region; 2085 HeapWord* bottom = hr->bottom(); 2086 HeapWord* limit = hr->next_top_at_mark_start(); 2087 2088 if (limit == bottom) { 2089 // The region was collected underneath our feet. 2090 // We set the finger to bottom to ensure that the bitmap 2091 // iteration that will follow this will not do anything. 2092 // (this is not a condition that holds when we set the region up, 2093 // as the region is not supposed to be empty in the first place) 2094 _finger = bottom; 2095 } else if (limit >= _region_limit) { 2096 assert(limit >= _finger, "peace of mind"); 2097 } else { 2098 assert(limit < _region_limit, "only way to get here"); 2099 // This can happen under some pretty unusual circumstances. An 2100 // evacuation pause empties the region underneath our feet (NTAMS 2101 // at bottom). We then do some allocation in the region (NTAMS 2102 // stays at bottom), followed by the region being used as a GC 2103 // alloc region (NTAMS will move to top() and the objects 2104 // originally below it will be grayed). All objects now marked in 2105 // the region are explicitly grayed, if below the global finger, 2106 // and we do not need in fact to scan anything else. So, we simply 2107 // set _finger to be limit to ensure that the bitmap iteration 2108 // doesn't do anything. 2109 _finger = limit; 2110 } 2111 2112 _region_limit = limit; 2113 } 2114 2115 void G1CMTask::giveup_current_region() { 2116 assert(_curr_region != NULL, "invariant"); 2117 clear_region_fields(); 2118 } 2119 2120 void G1CMTask::clear_region_fields() { 2121 // Values for these three fields that indicate that we're not 2122 // holding on to a region. 2123 _curr_region = NULL; 2124 _finger = NULL; 2125 _region_limit = NULL; 2126 } 2127 2128 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { 2129 if (cm_oop_closure == NULL) { 2130 assert(_cm_oop_closure != NULL, "invariant"); 2131 } else { 2132 assert(_cm_oop_closure == NULL, "invariant"); 2133 } 2134 _cm_oop_closure = cm_oop_closure; 2135 } 2136 2137 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) { 2138 guarantee(next_mark_bitmap != NULL, "invariant"); 2139 _next_mark_bitmap = next_mark_bitmap; 2140 clear_region_fields(); 2141 2142 _calls = 0; 2143 _elapsed_time_ms = 0.0; 2144 _termination_time_ms = 0.0; 2145 _termination_start_time_ms = 0.0; 2146 2147 _mark_stats_cache.reset(); 2148 } 2149 2150 bool G1CMTask::should_exit_termination() { 2151 regular_clock_call(); 2152 // This is called when we are in the termination protocol. We should 2153 // quit if, for some reason, this task wants to abort or the global 2154 // stack is not empty (this means that we can get work from it). 2155 return !_cm->mark_stack_empty() || has_aborted(); 2156 } 2157 2158 void G1CMTask::reached_limit() { 2159 assert(_words_scanned >= _words_scanned_limit || 2160 _refs_reached >= _refs_reached_limit , 2161 "shouldn't have been called otherwise"); 2162 regular_clock_call(); 2163 } 2164 2165 void G1CMTask::regular_clock_call() { 2166 if (has_aborted()) { 2167 return; 2168 } 2169 2170 // First, we need to recalculate the words scanned and refs reached 2171 // limits for the next clock call. 2172 recalculate_limits(); 2173 2174 // During the regular clock call we do the following 2175 2176 // (1) If an overflow has been flagged, then we abort. 2177 if (_cm->has_overflown()) { 2178 set_has_aborted(); 2179 return; 2180 } 2181 2182 // If we are not concurrent (i.e. we're doing remark) we don't need 2183 // to check anything else. The other steps are only needed during 2184 // the concurrent marking phase. 2185 if (!_cm->concurrent()) { 2186 return; 2187 } 2188 2189 // (2) If marking has been aborted for Full GC, then we also abort. 2190 if (_cm->has_aborted()) { 2191 set_has_aborted(); 2192 return; 2193 } 2194 2195 double curr_time_ms = os::elapsedVTime() * 1000.0; 2196 2197 // (4) We check whether we should yield. If we have to, then we abort. 2198 if (SuspendibleThreadSet::should_yield()) { 2199 // We should yield. To do this we abort the task. The caller is 2200 // responsible for yielding. 2201 set_has_aborted(); 2202 return; 2203 } 2204 2205 // (5) We check whether we've reached our time quota. If we have, 2206 // then we abort. 2207 double elapsed_time_ms = curr_time_ms - _start_time_ms; 2208 if (elapsed_time_ms > _time_target_ms) { 2209 set_has_aborted(); 2210 _has_timed_out = true; 2211 return; 2212 } 2213 2214 // (6) Finally, we check whether there are enough completed STAB 2215 // buffers available for processing. If there are, we abort. 2216 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2217 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 2218 // we do need to process SATB buffers, we'll abort and restart 2219 // the marking task to do so 2220 set_has_aborted(); 2221 return; 2222 } 2223 } 2224 2225 void G1CMTask::recalculate_limits() { 2226 _real_words_scanned_limit = _words_scanned + words_scanned_period; 2227 _words_scanned_limit = _real_words_scanned_limit; 2228 2229 _real_refs_reached_limit = _refs_reached + refs_reached_period; 2230 _refs_reached_limit = _real_refs_reached_limit; 2231 } 2232 2233 void G1CMTask::decrease_limits() { 2234 // This is called when we believe that we're going to do an infrequent 2235 // operation which will increase the per byte scanned cost (i.e. move 2236 // entries to/from the global stack). It basically tries to decrease the 2237 // scanning limit so that the clock is called earlier. 2238 2239 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4; 2240 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4; 2241 } 2242 2243 void G1CMTask::move_entries_to_global_stack() { 2244 // Local array where we'll store the entries that will be popped 2245 // from the local queue. 2246 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2247 2248 size_t n = 0; 2249 G1TaskQueueEntry task_entry; 2250 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) { 2251 buffer[n] = task_entry; 2252 ++n; 2253 } 2254 if (n < G1CMMarkStack::EntriesPerChunk) { 2255 buffer[n] = G1TaskQueueEntry(); 2256 } 2257 2258 if (n > 0) { 2259 if (!_cm->mark_stack_push(buffer)) { 2260 set_has_aborted(); 2261 } 2262 } 2263 2264 // This operation was quite expensive, so decrease the limits. 2265 decrease_limits(); 2266 } 2267 2268 bool G1CMTask::get_entries_from_global_stack() { 2269 // Local array where we'll store the entries that will be popped 2270 // from the global stack. 2271 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk]; 2272 2273 if (!_cm->mark_stack_pop(buffer)) { 2274 return false; 2275 } 2276 2277 // We did actually pop at least one entry. 2278 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) { 2279 G1TaskQueueEntry task_entry = buffer[i]; 2280 if (task_entry.is_null()) { 2281 break; 2282 } 2283 assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj())); 2284 bool success = _task_queue->push(task_entry); 2285 // We only call this when the local queue is empty or under a 2286 // given target limit. So, we do not expect this push to fail. 2287 assert(success, "invariant"); 2288 } 2289 2290 // This operation was quite expensive, so decrease the limits 2291 decrease_limits(); 2292 return true; 2293 } 2294 2295 void G1CMTask::drain_local_queue(bool partially) { 2296 if (has_aborted()) { 2297 return; 2298 } 2299 2300 // Decide what the target size is, depending whether we're going to 2301 // drain it partially (so that other tasks can steal if they run out 2302 // of things to do) or totally (at the very end). 2303 size_t target_size; 2304 if (partially) { 2305 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 2306 } else { 2307 target_size = 0; 2308 } 2309 2310 if (_task_queue->size() > target_size) { 2311 G1TaskQueueEntry entry; 2312 bool ret = _task_queue->pop_local(entry); 2313 while (ret) { 2314 scan_task_entry(entry); 2315 if (_task_queue->size() <= target_size || has_aborted()) { 2316 ret = false; 2317 } else { 2318 ret = _task_queue->pop_local(entry); 2319 } 2320 } 2321 } 2322 } 2323 2324 void G1CMTask::drain_global_stack(bool partially) { 2325 if (has_aborted()) { 2326 return; 2327 } 2328 2329 // We have a policy to drain the local queue before we attempt to 2330 // drain the global stack. 2331 assert(partially || _task_queue->size() == 0, "invariant"); 2332 2333 // Decide what the target size is, depending whether we're going to 2334 // drain it partially (so that other tasks can steal if they run out 2335 // of things to do) or totally (at the very end). 2336 // Notice that when draining the global mark stack partially, due to the racyness 2337 // of the mark stack size update we might in fact drop below the target. But, 2338 // this is not a problem. 2339 // In case of total draining, we simply process until the global mark stack is 2340 // totally empty, disregarding the size counter. 2341 if (partially) { 2342 size_t const target_size = _cm->partial_mark_stack_size_target(); 2343 while (!has_aborted() && _cm->mark_stack_size() > target_size) { 2344 if (get_entries_from_global_stack()) { 2345 drain_local_queue(partially); 2346 } 2347 } 2348 } else { 2349 while (!has_aborted() && get_entries_from_global_stack()) { 2350 drain_local_queue(partially); 2351 } 2352 } 2353 } 2354 2355 // SATB Queue has several assumptions on whether to call the par or 2356 // non-par versions of the methods. this is why some of the code is 2357 // replicated. We should really get rid of the single-threaded version 2358 // of the code to simplify things. 2359 void G1CMTask::drain_satb_buffers() { 2360 if (has_aborted()) { 2361 return; 2362 } 2363 2364 // We set this so that the regular clock knows that we're in the 2365 // middle of draining buffers and doesn't set the abort flag when it 2366 // notices that SATB buffers are available for draining. It'd be 2367 // very counter productive if it did that. :-) 2368 _draining_satb_buffers = true; 2369 2370 G1CMSATBBufferClosure satb_cl(this, _g1h); 2371 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 2372 2373 // This keeps claiming and applying the closure to completed buffers 2374 // until we run out of buffers or we need to abort. 2375 while (!has_aborted() && 2376 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { 2377 regular_clock_call(); 2378 } 2379 2380 _draining_satb_buffers = false; 2381 2382 assert(has_aborted() || 2383 _cm->concurrent() || 2384 satb_mq_set.completed_buffers_num() == 0, "invariant"); 2385 2386 // again, this was a potentially expensive operation, decrease the 2387 // limits to get the regular clock call early 2388 decrease_limits(); 2389 } 2390 2391 void G1CMTask::clear_mark_stats_cache(uint region_idx) { 2392 _mark_stats_cache.reset(region_idx); 2393 } 2394 2395 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() { 2396 return _mark_stats_cache.evict_all(); 2397 } 2398 2399 void G1CMTask::print_stats() { 2400 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls); 2401 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 2402 _elapsed_time_ms, _termination_time_ms); 2403 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms", 2404 _step_times_ms.num(), 2405 _step_times_ms.avg(), 2406 _step_times_ms.sd(), 2407 _step_times_ms.maximum(), 2408 _step_times_ms.sum()); 2409 size_t const hits = _mark_stats_cache.hits(); 2410 size_t const misses = _mark_stats_cache.misses(); 2411 log_debug(gc, stats)(" Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f", 2412 hits, misses, percent_of(hits, hits + misses)); 2413 } 2414 2415 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) { 2416 return _task_queues->steal(worker_id, hash_seed, task_entry); 2417 } 2418 2419 /***************************************************************************** 2420 2421 The do_marking_step(time_target_ms, ...) method is the building 2422 block of the parallel marking framework. It can be called in parallel 2423 with other invocations of do_marking_step() on different tasks 2424 (but only one per task, obviously) and concurrently with the 2425 mutator threads, or during remark, hence it eliminates the need 2426 for two versions of the code. When called during remark, it will 2427 pick up from where the task left off during the concurrent marking 2428 phase. Interestingly, tasks are also claimable during evacuation 2429 pauses too, since do_marking_step() ensures that it aborts before 2430 it needs to yield. 2431 2432 The data structures that it uses to do marking work are the 2433 following: 2434 2435 (1) Marking Bitmap. If there are gray objects that appear only 2436 on the bitmap (this happens either when dealing with an overflow 2437 or when the initial marking phase has simply marked the roots 2438 and didn't push them on the stack), then tasks claim heap 2439 regions whose bitmap they then scan to find gray objects. A 2440 global finger indicates where the end of the last claimed region 2441 is. A local finger indicates how far into the region a task has 2442 scanned. The two fingers are used to determine how to gray an 2443 object (i.e. whether simply marking it is OK, as it will be 2444 visited by a task in the future, or whether it needs to be also 2445 pushed on a stack). 2446 2447 (2) Local Queue. The local queue of the task which is accessed 2448 reasonably efficiently by the task. Other tasks can steal from 2449 it when they run out of work. Throughout the marking phase, a 2450 task attempts to keep its local queue short but not totally 2451 empty, so that entries are available for stealing by other 2452 tasks. Only when there is no more work, a task will totally 2453 drain its local queue. 2454 2455 (3) Global Mark Stack. This handles local queue overflow. During 2456 marking only sets of entries are moved between it and the local 2457 queues, as access to it requires a mutex and more fine-grain 2458 interaction with it which might cause contention. If it 2459 overflows, then the marking phase should restart and iterate 2460 over the bitmap to identify gray objects. Throughout the marking 2461 phase, tasks attempt to keep the global mark stack at a small 2462 length but not totally empty, so that entries are available for 2463 popping by other tasks. Only when there is no more work, tasks 2464 will totally drain the global mark stack. 2465 2466 (4) SATB Buffer Queue. This is where completed SATB buffers are 2467 made available. Buffers are regularly removed from this queue 2468 and scanned for roots, so that the queue doesn't get too 2469 long. During remark, all completed buffers are processed, as 2470 well as the filled in parts of any uncompleted buffers. 2471 2472 The do_marking_step() method tries to abort when the time target 2473 has been reached. There are a few other cases when the 2474 do_marking_step() method also aborts: 2475 2476 (1) When the marking phase has been aborted (after a Full GC). 2477 2478 (2) When a global overflow (on the global stack) has been 2479 triggered. Before the task aborts, it will actually sync up with 2480 the other tasks to ensure that all the marking data structures 2481 (local queues, stacks, fingers etc.) are re-initialized so that 2482 when do_marking_step() completes, the marking phase can 2483 immediately restart. 2484 2485 (3) When enough completed SATB buffers are available. The 2486 do_marking_step() method only tries to drain SATB buffers right 2487 at the beginning. So, if enough buffers are available, the 2488 marking step aborts and the SATB buffers are processed at 2489 the beginning of the next invocation. 2490 2491 (4) To yield. when we have to yield then we abort and yield 2492 right at the end of do_marking_step(). This saves us from a lot 2493 of hassle as, by yielding we might allow a Full GC. If this 2494 happens then objects will be compacted underneath our feet, the 2495 heap might shrink, etc. We save checking for this by just 2496 aborting and doing the yield right at the end. 2497 2498 From the above it follows that the do_marking_step() method should 2499 be called in a loop (or, otherwise, regularly) until it completes. 2500 2501 If a marking step completes without its has_aborted() flag being 2502 true, it means it has completed the current marking phase (and 2503 also all other marking tasks have done so and have all synced up). 2504 2505 A method called regular_clock_call() is invoked "regularly" (in 2506 sub ms intervals) throughout marking. It is this clock method that 2507 checks all the abort conditions which were mentioned above and 2508 decides when the task should abort. A work-based scheme is used to 2509 trigger this clock method: when the number of object words the 2510 marking phase has scanned or the number of references the marking 2511 phase has visited reach a given limit. Additional invocations to 2512 the method clock have been planted in a few other strategic places 2513 too. The initial reason for the clock method was to avoid calling 2514 vtime too regularly, as it is quite expensive. So, once it was in 2515 place, it was natural to piggy-back all the other conditions on it 2516 too and not constantly check them throughout the code. 2517 2518 If do_termination is true then do_marking_step will enter its 2519 termination protocol. 2520 2521 The value of is_serial must be true when do_marking_step is being 2522 called serially (i.e. by the VMThread) and do_marking_step should 2523 skip any synchronization in the termination and overflow code. 2524 Examples include the serial remark code and the serial reference 2525 processing closures. 2526 2527 The value of is_serial must be false when do_marking_step is 2528 being called by any of the worker threads in a work gang. 2529 Examples include the concurrent marking code (CMMarkingTask), 2530 the MT remark code, and the MT reference processing closures. 2531 2532 *****************************************************************************/ 2533 2534 void G1CMTask::do_marking_step(double time_target_ms, 2535 bool do_termination, 2536 bool is_serial) { 2537 assert(time_target_ms >= 1.0, "minimum granularity is 1ms"); 2538 2539 _start_time_ms = os::elapsedVTime() * 1000.0; 2540 2541 // If do_stealing is true then do_marking_step will attempt to 2542 // steal work from the other G1CMTasks. It only makes sense to 2543 // enable stealing when the termination protocol is enabled 2544 // and do_marking_step() is not being called serially. 2545 bool do_stealing = do_termination && !is_serial; 2546 2547 double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms); 2548 _time_target_ms = time_target_ms - diff_prediction_ms; 2549 2550 // set up the variables that are used in the work-based scheme to 2551 // call the regular clock method 2552 _words_scanned = 0; 2553 _refs_reached = 0; 2554 recalculate_limits(); 2555 2556 // clear all flags 2557 clear_has_aborted(); 2558 _has_timed_out = false; 2559 _draining_satb_buffers = false; 2560 2561 ++_calls; 2562 2563 // Set up the bitmap and oop closures. Anything that uses them is 2564 // eventually called from this method, so it is OK to allocate these 2565 // statically. 2566 G1CMBitMapClosure bitmap_closure(this, _cm); 2567 G1CMOopClosure cm_oop_closure(_g1h, this); 2568 set_cm_oop_closure(&cm_oop_closure); 2569 2570 if (_cm->has_overflown()) { 2571 // This can happen if the mark stack overflows during a GC pause 2572 // and this task, after a yield point, restarts. We have to abort 2573 // as we need to get into the overflow protocol which happens 2574 // right at the end of this task. 2575 set_has_aborted(); 2576 } 2577 2578 // First drain any available SATB buffers. After this, we will not 2579 // look at SATB buffers before the next invocation of this method. 2580 // If enough completed SATB buffers are queued up, the regular clock 2581 // will abort this task so that it restarts. 2582 drain_satb_buffers(); 2583 // ...then partially drain the local queue and the global stack 2584 drain_local_queue(true); 2585 drain_global_stack(true); 2586 2587 do { 2588 if (!has_aborted() && _curr_region != NULL) { 2589 // This means that we're already holding on to a region. 2590 assert(_finger != NULL, "if region is not NULL, then the finger " 2591 "should not be NULL either"); 2592 2593 // We might have restarted this task after an evacuation pause 2594 // which might have evacuated the region we're holding on to 2595 // underneath our feet. Let's read its limit again to make sure 2596 // that we do not iterate over a region of the heap that 2597 // contains garbage (update_region_limit() will also move 2598 // _finger to the start of the region if it is found empty). 2599 update_region_limit(); 2600 // We will start from _finger not from the start of the region, 2601 // as we might be restarting this task after aborting half-way 2602 // through scanning this region. In this case, _finger points to 2603 // the address where we last found a marked object. If this is a 2604 // fresh region, _finger points to start(). 2605 MemRegion mr = MemRegion(_finger, _region_limit); 2606 2607 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(), 2608 "humongous regions should go around loop once only"); 2609 2610 // Some special cases: 2611 // If the memory region is empty, we can just give up the region. 2612 // If the current region is humongous then we only need to check 2613 // the bitmap for the bit associated with the start of the object, 2614 // scan the object if it's live, and give up the region. 2615 // Otherwise, let's iterate over the bitmap of the part of the region 2616 // that is left. 2617 // If the iteration is successful, give up the region. 2618 if (mr.is_empty()) { 2619 giveup_current_region(); 2620 regular_clock_call(); 2621 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { 2622 if (_next_mark_bitmap->is_marked(mr.start())) { 2623 // The object is marked - apply the closure 2624 bitmap_closure.do_addr(mr.start()); 2625 } 2626 // Even if this task aborted while scanning the humongous object 2627 // we can (and should) give up the current region. 2628 giveup_current_region(); 2629 regular_clock_call(); 2630 } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) { 2631 giveup_current_region(); 2632 regular_clock_call(); 2633 } else { 2634 assert(has_aborted(), "currently the only way to do so"); 2635 // The only way to abort the bitmap iteration is to return 2636 // false from the do_bit() method. However, inside the 2637 // do_bit() method we move the _finger to point to the 2638 // object currently being looked at. So, if we bail out, we 2639 // have definitely set _finger to something non-null. 2640 assert(_finger != NULL, "invariant"); 2641 2642 // Region iteration was actually aborted. So now _finger 2643 // points to the address of the object we last scanned. If we 2644 // leave it there, when we restart this task, we will rescan 2645 // the object. It is easy to avoid this. We move the finger by 2646 // enough to point to the next possible object header. 2647 assert(_finger < _region_limit, "invariant"); 2648 HeapWord* const new_finger = _finger + ((oop)_finger)->size(); 2649 // Check if bitmap iteration was aborted while scanning the last object 2650 if (new_finger >= _region_limit) { 2651 giveup_current_region(); 2652 } else { 2653 move_finger_to(new_finger); 2654 } 2655 } 2656 } 2657 // At this point we have either completed iterating over the 2658 // region we were holding on to, or we have aborted. 2659 2660 // We then partially drain the local queue and the global stack. 2661 // (Do we really need this?) 2662 drain_local_queue(true); 2663 drain_global_stack(true); 2664 2665 // Read the note on the claim_region() method on why it might 2666 // return NULL with potentially more regions available for 2667 // claiming and why we have to check out_of_regions() to determine 2668 // whether we're done or not. 2669 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 2670 // We are going to try to claim a new region. We should have 2671 // given up on the previous one. 2672 // Separated the asserts so that we know which one fires. 2673 assert(_curr_region == NULL, "invariant"); 2674 assert(_finger == NULL, "invariant"); 2675 assert(_region_limit == NULL, "invariant"); 2676 HeapRegion* claimed_region = _cm->claim_region(_worker_id); 2677 if (claimed_region != NULL) { 2678 // Yes, we managed to claim one 2679 setup_for_region(claimed_region); 2680 assert(_curr_region == claimed_region, "invariant"); 2681 } 2682 // It is important to call the regular clock here. It might take 2683 // a while to claim a region if, for example, we hit a large 2684 // block of empty regions. So we need to call the regular clock 2685 // method once round the loop to make sure it's called 2686 // frequently enough. 2687 regular_clock_call(); 2688 } 2689 2690 if (!has_aborted() && _curr_region == NULL) { 2691 assert(_cm->out_of_regions(), 2692 "at this point we should be out of regions"); 2693 } 2694 } while ( _curr_region != NULL && !has_aborted()); 2695 2696 if (!has_aborted()) { 2697 // We cannot check whether the global stack is empty, since other 2698 // tasks might be pushing objects to it concurrently. 2699 assert(_cm->out_of_regions(), 2700 "at this point we should be out of regions"); 2701 // Try to reduce the number of available SATB buffers so that 2702 // remark has less work to do. 2703 drain_satb_buffers(); 2704 } 2705 2706 // Since we've done everything else, we can now totally drain the 2707 // local queue and global stack. 2708 drain_local_queue(false); 2709 drain_global_stack(false); 2710 2711 // Attempt at work stealing from other task's queues. 2712 if (do_stealing && !has_aborted()) { 2713 // We have not aborted. This means that we have finished all that 2714 // we could. Let's try to do some stealing... 2715 2716 // We cannot check whether the global stack is empty, since other 2717 // tasks might be pushing objects to it concurrently. 2718 assert(_cm->out_of_regions() && _task_queue->size() == 0, 2719 "only way to reach here"); 2720 while (!has_aborted()) { 2721 G1TaskQueueEntry entry; 2722 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { 2723 scan_task_entry(entry); 2724 2725 // And since we're towards the end, let's totally drain the 2726 // local queue and global stack. 2727 drain_local_queue(false); 2728 drain_global_stack(false); 2729 } else { 2730 break; 2731 } 2732 } 2733 } 2734 2735 // We still haven't aborted. Now, let's try to get into the 2736 // termination protocol. 2737 if (do_termination && !has_aborted()) { 2738 // We cannot check whether the global stack is empty, since other 2739 // tasks might be concurrently pushing objects on it. 2740 // Separated the asserts so that we know which one fires. 2741 assert(_cm->out_of_regions(), "only way to reach here"); 2742 assert(_task_queue->size() == 0, "only way to reach here"); 2743 _termination_start_time_ms = os::elapsedVTime() * 1000.0; 2744 2745 // The G1CMTask class also extends the TerminatorTerminator class, 2746 // hence its should_exit_termination() method will also decide 2747 // whether to exit the termination protocol or not. 2748 bool finished = (is_serial || 2749 _cm->terminator()->offer_termination(this)); 2750 double termination_end_time_ms = os::elapsedVTime() * 1000.0; 2751 _termination_time_ms += 2752 termination_end_time_ms - _termination_start_time_ms; 2753 2754 if (finished) { 2755 // We're all done. 2756 2757 // We can now guarantee that the global stack is empty, since 2758 // all other tasks have finished. We separated the guarantees so 2759 // that, if a condition is false, we can immediately find out 2760 // which one. 2761 guarantee(_cm->out_of_regions(), "only way to reach here"); 2762 guarantee(_cm->mark_stack_empty(), "only way to reach here"); 2763 guarantee(_task_queue->size() == 0, "only way to reach here"); 2764 guarantee(!_cm->has_overflown(), "only way to reach here"); 2765 } else { 2766 // Apparently there's more work to do. Let's abort this task. It 2767 // will restart it and we can hopefully find more things to do. 2768 set_has_aborted(); 2769 } 2770 } 2771 2772 // Mainly for debugging purposes to make sure that a pointer to the 2773 // closure which was statically allocated in this frame doesn't 2774 // escape it by accident. 2775 set_cm_oop_closure(NULL); 2776 double end_time_ms = os::elapsedVTime() * 1000.0; 2777 double elapsed_time_ms = end_time_ms - _start_time_ms; 2778 // Update the step history. 2779 _step_times_ms.add(elapsed_time_ms); 2780 2781 if (has_aborted()) { 2782 // The task was aborted for some reason. 2783 if (_has_timed_out) { 2784 double diff_ms = elapsed_time_ms - _time_target_ms; 2785 // Keep statistics of how well we did with respect to hitting 2786 // our target only if we actually timed out (if we aborted for 2787 // other reasons, then the results might get skewed). 2788 _marking_step_diffs_ms.add(diff_ms); 2789 } 2790 2791 if (_cm->has_overflown()) { 2792 // This is the interesting one. We aborted because a global 2793 // overflow was raised. This means we have to restart the 2794 // marking phase and start iterating over regions. However, in 2795 // order to do this we have to make sure that all tasks stop 2796 // what they are doing and re-initialize in a safe manner. We 2797 // will achieve this with the use of two barrier sync points. 2798 2799 if (!is_serial) { 2800 // We only need to enter the sync barrier if being called 2801 // from a parallel context 2802 _cm->enter_first_sync_barrier(_worker_id); 2803 2804 // When we exit this sync barrier we know that all tasks have 2805 // stopped doing marking work. So, it's now safe to 2806 // re-initialize our data structures. 2807 } 2808 2809 clear_region_fields(); 2810 flush_mark_stats_cache(); 2811 2812 if (!is_serial) { 2813 // If we're executing the concurrent phase of marking, reset the marking 2814 // state; otherwise the marking state is reset after reference processing, 2815 // during the remark pause. 2816 // If we reset here as a result of an overflow during the remark we will 2817 // see assertion failures from any subsequent set_concurrency_and_phase() 2818 // calls. 2819 if (_cm->concurrent() && _worker_id == 0) { 2820 // Worker 0 is responsible for clearing the global data structures because 2821 // of an overflow. During STW we should not clear the overflow flag (in 2822 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit 2823 // method to abort the pause and restart concurrent marking. 2824 _cm->reset_marking_for_restart(); 2825 2826 log_info(gc, marking)("Concurrent Mark reset for overflow"); 2827 } 2828 2829 // ...and enter the second barrier. 2830 _cm->enter_second_sync_barrier(_worker_id); 2831 } 2832 // At this point, if we're during the concurrent phase of 2833 // marking, everything has been re-initialized and we're 2834 // ready to restart. 2835 } 2836 } 2837 } 2838 2839 G1CMTask::G1CMTask(uint worker_id, 2840 G1ConcurrentMark* cm, 2841 G1CMTaskQueue* task_queue, 2842 G1RegionMarkStats* mark_stats, 2843 uint max_regions) : 2844 _objArray_processor(this), 2845 _worker_id(worker_id), 2846 _g1h(G1CollectedHeap::heap()), 2847 _cm(cm), 2848 _next_mark_bitmap(NULL), 2849 _task_queue(task_queue), 2850 _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize), 2851 _calls(0), 2852 _time_target_ms(0.0), 2853 _start_time_ms(0.0), 2854 _cm_oop_closure(NULL), 2855 _curr_region(NULL), 2856 _finger(NULL), 2857 _region_limit(NULL), 2858 _words_scanned(0), 2859 _words_scanned_limit(0), 2860 _real_words_scanned_limit(0), 2861 _refs_reached(0), 2862 _refs_reached_limit(0), 2863 _real_refs_reached_limit(0), 2864 _hash_seed(17), 2865 _has_aborted(false), 2866 _has_timed_out(false), 2867 _draining_satb_buffers(false), 2868 _step_times_ms(), 2869 _elapsed_time_ms(0.0), 2870 _termination_time_ms(0.0), 2871 _termination_start_time_ms(0.0), 2872 _marking_step_diffs_ms() 2873 { 2874 guarantee(task_queue != NULL, "invariant"); 2875 2876 _marking_step_diffs_ms.add(0.5); 2877 } 2878 2879 // These are formatting macros that are used below to ensure 2880 // consistent formatting. The *_H_* versions are used to format the 2881 // header for a particular value and they should be kept consistent 2882 // with the corresponding macro. Also note that most of the macros add 2883 // the necessary white space (as a prefix) which makes them a bit 2884 // easier to compose. 2885 2886 // All the output lines are prefixed with this string to be able to 2887 // identify them easily in a large log file. 2888 #define G1PPRL_LINE_PREFIX "###" 2889 2890 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT 2891 #ifdef _LP64 2892 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s" 2893 #else // _LP64 2894 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s" 2895 #endif // _LP64 2896 2897 // For per-region info 2898 #define G1PPRL_TYPE_FORMAT " %-4s" 2899 #define G1PPRL_TYPE_H_FORMAT " %4s" 2900 #define G1PPRL_STATE_FORMAT " %-5s" 2901 #define G1PPRL_STATE_H_FORMAT " %5s" 2902 #define G1PPRL_BYTE_FORMAT " " SIZE_FORMAT_W(9) 2903 #define G1PPRL_BYTE_H_FORMAT " %9s" 2904 #define G1PPRL_DOUBLE_FORMAT " %14.1f" 2905 #define G1PPRL_DOUBLE_H_FORMAT " %14s" 2906 2907 // For summary info 2908 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT 2909 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": " SIZE_FORMAT 2910 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB" 2911 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%" 2912 2913 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) : 2914 _total_used_bytes(0), _total_capacity_bytes(0), 2915 _total_prev_live_bytes(0), _total_next_live_bytes(0), 2916 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) 2917 { 2918 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 2919 MemRegion g1_reserved = g1h->g1_reserved(); 2920 double now = os::elapsedTime(); 2921 2922 // Print the header of the output. 2923 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now); 2924 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP" 2925 G1PPRL_SUM_ADDR_FORMAT("reserved") 2926 G1PPRL_SUM_BYTE_FORMAT("region-size"), 2927 p2i(g1_reserved.start()), p2i(g1_reserved.end()), 2928 HeapRegion::GrainBytes); 2929 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 2930 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2931 G1PPRL_TYPE_H_FORMAT 2932 G1PPRL_ADDR_BASE_H_FORMAT 2933 G1PPRL_BYTE_H_FORMAT 2934 G1PPRL_BYTE_H_FORMAT 2935 G1PPRL_BYTE_H_FORMAT 2936 G1PPRL_DOUBLE_H_FORMAT 2937 G1PPRL_BYTE_H_FORMAT 2938 G1PPRL_STATE_H_FORMAT 2939 G1PPRL_BYTE_H_FORMAT, 2940 "type", "address-range", 2941 "used", "prev-live", "next-live", "gc-eff", 2942 "remset", "state", "code-roots"); 2943 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2944 G1PPRL_TYPE_H_FORMAT 2945 G1PPRL_ADDR_BASE_H_FORMAT 2946 G1PPRL_BYTE_H_FORMAT 2947 G1PPRL_BYTE_H_FORMAT 2948 G1PPRL_BYTE_H_FORMAT 2949 G1PPRL_DOUBLE_H_FORMAT 2950 G1PPRL_BYTE_H_FORMAT 2951 G1PPRL_STATE_H_FORMAT 2952 G1PPRL_BYTE_H_FORMAT, 2953 "", "", 2954 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", 2955 "(bytes)", "", "(bytes)"); 2956 } 2957 2958 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) { 2959 const char* type = r->get_type_str(); 2960 HeapWord* bottom = r->bottom(); 2961 HeapWord* end = r->end(); 2962 size_t capacity_bytes = r->capacity(); 2963 size_t used_bytes = r->used(); 2964 size_t prev_live_bytes = r->live_bytes(); 2965 size_t next_live_bytes = r->next_live_bytes(); 2966 double gc_eff = r->gc_efficiency(); 2967 size_t remset_bytes = r->rem_set()->mem_size(); 2968 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); 2969 const char* remset_type = r->rem_set()->get_short_state_str(); 2970 2971 _total_used_bytes += used_bytes; 2972 _total_capacity_bytes += capacity_bytes; 2973 _total_prev_live_bytes += prev_live_bytes; 2974 _total_next_live_bytes += next_live_bytes; 2975 _total_remset_bytes += remset_bytes; 2976 _total_strong_code_roots_bytes += strong_code_roots_bytes; 2977 2978 // Print a line for this particular region. 2979 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 2980 G1PPRL_TYPE_FORMAT 2981 G1PPRL_ADDR_BASE_FORMAT 2982 G1PPRL_BYTE_FORMAT 2983 G1PPRL_BYTE_FORMAT 2984 G1PPRL_BYTE_FORMAT 2985 G1PPRL_DOUBLE_FORMAT 2986 G1PPRL_BYTE_FORMAT 2987 G1PPRL_STATE_FORMAT 2988 G1PPRL_BYTE_FORMAT, 2989 type, p2i(bottom), p2i(end), 2990 used_bytes, prev_live_bytes, next_live_bytes, gc_eff, 2991 remset_bytes, remset_type, strong_code_roots_bytes); 2992 2993 return false; 2994 } 2995 2996 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() { 2997 // add static memory usages to remembered set sizes 2998 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size(); 2999 // Print the footer of the output. 3000 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX); 3001 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX 3002 " SUMMARY" 3003 G1PPRL_SUM_MB_FORMAT("capacity") 3004 G1PPRL_SUM_MB_PERC_FORMAT("used") 3005 G1PPRL_SUM_MB_PERC_FORMAT("prev-live") 3006 G1PPRL_SUM_MB_PERC_FORMAT("next-live") 3007 G1PPRL_SUM_MB_FORMAT("remset") 3008 G1PPRL_SUM_MB_FORMAT("code-roots"), 3009 bytes_to_mb(_total_capacity_bytes), 3010 bytes_to_mb(_total_used_bytes), 3011 percent_of(_total_used_bytes, _total_capacity_bytes), 3012 bytes_to_mb(_total_prev_live_bytes), 3013 percent_of(_total_prev_live_bytes, _total_capacity_bytes), 3014 bytes_to_mb(_total_next_live_bytes), 3015 percent_of(_total_next_live_bytes, _total_capacity_bytes), 3016 bytes_to_mb(_total_remset_bytes), 3017 bytes_to_mb(_total_strong_code_roots_bytes)); 3018 }