1 /*
   2  * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc/g1/g1CollectorState.hpp"
  31 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  32 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  33 #include "gc/g1/g1HeapVerifier.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1Policy.hpp"
  36 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/shared/adaptiveSizePolicy.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/suspendibleThreadSet.hpp"
  50 #include "gc/shared/taskqueue.inline.hpp"
  51 #include "gc/shared/vmGCOperations.hpp"
  52 #include "gc/shared/weakProcessor.hpp"
  53 #include "include/jvm.h"
  54 #include "logging/log.hpp"
  55 #include "memory/allocation.hpp"
  56 #include "memory/resourceArea.hpp"
  57 #include "oops/access.inline.hpp"
  58 #include "oops/oop.inline.hpp"
  59 #include "runtime/atomic.hpp"
  60 #include "runtime/handles.inline.hpp"
  61 #include "runtime/java.hpp"
  62 #include "runtime/prefetch.inline.hpp"
  63 #include "services/memTracker.hpp"
  64 #include "utilities/align.hpp"
  65 #include "utilities/growableArray.hpp"
  66 
  67 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  68   assert(addr < _cm->finger(), "invariant");
  69   assert(addr >= _task->finger(), "invariant");
  70 
  71   // We move that task's local finger along.
  72   _task->move_finger_to(addr);
  73 
  74   _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
  75   // we only partially drain the local queue and global stack
  76   _task->drain_local_queue(true);
  77   _task->drain_global_stack(true);
  78 
  79   // if the has_aborted flag has been raised, we need to bail out of
  80   // the iteration
  81   return !_task->has_aborted();
  82 }
  83 
  84 G1CMMarkStack::G1CMMarkStack() :
  85   _max_chunk_capacity(0),
  86   _base(NULL),
  87   _chunk_capacity(0) {
  88   set_empty();
  89 }
  90 
  91 bool G1CMMarkStack::resize(size_t new_capacity) {
  92   assert(is_empty(), "Only resize when stack is empty.");
  93   assert(new_capacity <= _max_chunk_capacity,
  94          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
  95 
  96   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(new_capacity, mtGC);
  97 
  98   if (new_base == NULL) {
  99     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 100     return false;
 101   }
 102   // Release old mapping.
 103   if (_base != NULL) {
 104     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 105   }
 106 
 107   _base = new_base;
 108   _chunk_capacity = new_capacity;
 109   set_empty();
 110 
 111   return true;
 112 }
 113 
 114 size_t G1CMMarkStack::capacity_alignment() {
 115   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 116 }
 117 
 118 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 119   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 120 
 121   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 122 
 123   _max_chunk_capacity = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 124   size_t initial_chunk_capacity = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 125 
 126   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 127             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 128             _max_chunk_capacity,
 129             initial_chunk_capacity);
 130 
 131   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 132                 initial_chunk_capacity, _max_chunk_capacity);
 133 
 134   return resize(initial_chunk_capacity);
 135 }
 136 
 137 void G1CMMarkStack::expand() {
 138   if (_chunk_capacity == _max_chunk_capacity) {
 139     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 140     return;
 141   }
 142   size_t old_capacity = _chunk_capacity;
 143   // Double capacity if possible
 144   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 145 
 146   if (resize(new_capacity)) {
 147     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 148                   old_capacity, new_capacity);
 149   } else {
 150     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 151                     old_capacity, new_capacity);
 152   }
 153 }
 154 
 155 G1CMMarkStack::~G1CMMarkStack() {
 156   if (_base != NULL) {
 157     MmapArrayAllocator<TaskQueueEntryChunk>::free(_base, _chunk_capacity);
 158   }
 159 }
 160 
 161 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 162   elem->next = *list;
 163   *list = elem;
 164 }
 165 
 166 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 167   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 168   add_chunk_to_list(&_chunk_list, elem);
 169   _chunks_in_chunk_list++;
 170 }
 171 
 172 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 173   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 174   add_chunk_to_list(&_free_list, elem);
 175 }
 176 
 177 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 178   TaskQueueEntryChunk* result = *list;
 179   if (result != NULL) {
 180     *list = (*list)->next;
 181   }
 182   return result;
 183 }
 184 
 185 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 186   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 187   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 188   if (result != NULL) {
 189     _chunks_in_chunk_list--;
 190   }
 191   return result;
 192 }
 193 
 194 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 195   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 196   return remove_chunk_from_list(&_free_list);
 197 }
 198 
 199 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 200   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 201   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 202   // wraparound of _hwm.
 203   if (_hwm >= _chunk_capacity) {
 204     return NULL;
 205   }
 206 
 207   size_t cur_idx = Atomic::add(1u, &_hwm) - 1;
 208   if (cur_idx >= _chunk_capacity) {
 209     return NULL;
 210   }
 211 
 212   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 213   result->next = NULL;
 214   return result;
 215 }
 216 
 217 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 218   // Get a new chunk.
 219   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 220 
 221   if (new_chunk == NULL) {
 222     // Did not get a chunk from the free list. Allocate from backing memory.
 223     new_chunk = allocate_new_chunk();
 224 
 225     if (new_chunk == NULL) {
 226       return false;
 227     }
 228   }
 229 
 230   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 231 
 232   add_chunk_to_chunk_list(new_chunk);
 233 
 234   return true;
 235 }
 236 
 237 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 238   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 239 
 240   if (cur == NULL) {
 241     return false;
 242   }
 243 
 244   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 245 
 246   add_chunk_to_free_list(cur);
 247   return true;
 248 }
 249 
 250 void G1CMMarkStack::set_empty() {
 251   _chunks_in_chunk_list = 0;
 252   _hwm = 0;
 253   _chunk_list = NULL;
 254   _free_list = NULL;
 255 }
 256 
 257 G1CMRootRegions::G1CMRootRegions() :
 258   _survivors(NULL), _cm(NULL), _scan_in_progress(false),
 259   _should_abort(false), _claimed_survivor_index(0) { }
 260 
 261 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 262   _survivors = survivors;
 263   _cm = cm;
 264 }
 265 
 266 void G1CMRootRegions::prepare_for_scan() {
 267   assert(!scan_in_progress(), "pre-condition");
 268 
 269   // Currently, only survivors can be root regions.
 270   _claimed_survivor_index = 0;
 271   _scan_in_progress = _survivors->regions()->is_nonempty();
 272   _should_abort = false;
 273 }
 274 
 275 HeapRegion* G1CMRootRegions::claim_next() {
 276   if (_should_abort) {
 277     // If someone has set the should_abort flag, we return NULL to
 278     // force the caller to bail out of their loop.
 279     return NULL;
 280   }
 281 
 282   // Currently, only survivors can be root regions.
 283   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 284 
 285   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 286   if (claimed_index < survivor_regions->length()) {
 287     return survivor_regions->at(claimed_index);
 288   }
 289   return NULL;
 290 }
 291 
 292 uint G1CMRootRegions::num_root_regions() const {
 293   return (uint)_survivors->regions()->length();
 294 }
 295 
 296 void G1CMRootRegions::notify_scan_done() {
 297   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 298   _scan_in_progress = false;
 299   RootRegionScan_lock->notify_all();
 300 }
 301 
 302 void G1CMRootRegions::cancel_scan() {
 303   notify_scan_done();
 304 }
 305 
 306 void G1CMRootRegions::scan_finished() {
 307   assert(scan_in_progress(), "pre-condition");
 308 
 309   // Currently, only survivors can be root regions.
 310   if (!_should_abort) {
 311     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 312     assert((uint)_claimed_survivor_index >= _survivors->length(),
 313            "we should have claimed all survivors, claimed index = %u, length = %u",
 314            (uint)_claimed_survivor_index, _survivors->length());
 315   }
 316 
 317   notify_scan_done();
 318 }
 319 
 320 bool G1CMRootRegions::wait_until_scan_finished() {
 321   if (!scan_in_progress()) {
 322     return false;
 323   }
 324 
 325   {
 326     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 327     while (scan_in_progress()) {
 328       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 329     }
 330   }
 331   return true;
 332 }
 333 
 334 // Returns the maximum number of workers to be used in a concurrent
 335 // phase based on the number of GC workers being used in a STW
 336 // phase.
 337 static uint scale_concurrent_worker_threads(uint num_gc_workers) {
 338   return MAX2((num_gc_workers + 2) / 4, 1U);
 339 }
 340 
 341 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 342                                    G1RegionToSpaceMapper* prev_bitmap_storage,
 343                                    G1RegionToSpaceMapper* next_bitmap_storage) :
 344   // _cm_thread set inside the constructor
 345   _g1h(g1h),
 346   _completed_initialization(false),
 347 
 348   _mark_bitmap_1(),
 349   _mark_bitmap_2(),
 350   _prev_mark_bitmap(&_mark_bitmap_1),
 351   _next_mark_bitmap(&_mark_bitmap_2),
 352 
 353   _heap(_g1h->reserved_region()),
 354 
 355   _root_regions(),
 356 
 357   _global_mark_stack(),
 358 
 359   // _finger set in set_non_marking_state
 360 
 361   _worker_id_offset(DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
 362   _max_num_tasks(ParallelGCThreads),
 363   // _num_active_tasks set in set_non_marking_state()
 364   // _tasks set inside the constructor
 365 
 366   _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)),
 367   _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)),
 368 
 369   _first_overflow_barrier_sync(),
 370   _second_overflow_barrier_sync(),
 371 
 372   _has_overflown(false),
 373   _concurrent(false),
 374   _has_aborted(false),
 375   _restart_for_overflow(false),
 376   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 377   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 378 
 379   // _verbose_level set below
 380 
 381   _init_times(),
 382   _remark_times(),
 383   _remark_mark_times(),
 384   _remark_weak_ref_times(),
 385   _cleanup_times(),
 386   _total_cleanup_time(0.0),
 387 
 388   _accum_task_vtime(NULL),
 389 
 390   _concurrent_workers(NULL),
 391   _num_concurrent_workers(0),
 392   _max_concurrent_workers(0),
 393 
 394   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_regions(), mtGC)),
 395   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_regions(), mtGC))
 396 {
 397   _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 398   _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage);
 399 
 400   // Create & start ConcurrentMark thread.
 401   _cm_thread = new G1ConcurrentMarkThread(this);
 402   if (_cm_thread->osthread() == NULL) {
 403     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 404   }
 405 
 406   assert(CGC_lock != NULL, "CGC_lock must be initialized");
 407 
 408   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 409   satb_qs.set_buffer_size(G1SATBBufferSize);
 410 
 411   _root_regions.init(_g1h->survivor(), this);
 412 
 413   if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) {
 414     // Calculate the number of concurrent worker threads by scaling
 415     // the number of parallel GC threads.
 416     uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads);
 417     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 418   }
 419 
 420   assert(ConcGCThreads > 0, "ConcGCThreads have been set.");
 421   if (ConcGCThreads > ParallelGCThreads) {
 422     log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).",
 423                     ConcGCThreads, ParallelGCThreads);
 424     return;
 425   }
 426 
 427   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 428   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 429 
 430   _num_concurrent_workers = ConcGCThreads;
 431   _max_concurrent_workers = _num_concurrent_workers;
 432 
 433   _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true);
 434   _concurrent_workers->initialize_workers();
 435 
 436   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 437     size_t mark_stack_size =
 438       MIN2(MarkStackSizeMax,
 439           MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE)));
 440     // Verify that the calculated value for MarkStackSize is in range.
 441     // It would be nice to use the private utility routine from Arguments.
 442     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 443       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 444                       "must be between 1 and " SIZE_FORMAT,
 445                       mark_stack_size, MarkStackSizeMax);
 446       return;
 447     }
 448     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 449   } else {
 450     // Verify MarkStackSize is in range.
 451     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 452       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 453         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 454           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 455                           "must be between 1 and " SIZE_FORMAT,
 456                           MarkStackSize, MarkStackSizeMax);
 457           return;
 458         }
 459       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 460         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 461           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 462                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 463                           MarkStackSize, MarkStackSizeMax);
 464           return;
 465         }
 466       }
 467     }
 468   }
 469 
 470   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 471     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 472   }
 473 
 474   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 475   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC);
 476 
 477   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 478   _num_active_tasks = _max_num_tasks;
 479 
 480   for (uint i = 0; i < _max_num_tasks; ++i) {
 481     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 482     task_queue->initialize();
 483     _task_queues->register_queue(i, task_queue);
 484 
 485     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats, _g1h->max_regions());
 486 
 487     _accum_task_vtime[i] = 0.0;
 488   }
 489 
 490   reset_at_marking_complete();
 491   _completed_initialization = true;
 492 }
 493 
 494 void G1ConcurrentMark::reset() {
 495   _has_aborted = false;
 496 
 497   reset_marking_for_restart();
 498 
 499   // Reset all tasks, since different phases will use different number of active
 500   // threads. So, it's easiest to have all of them ready.
 501   for (uint i = 0; i < _max_num_tasks; ++i) {
 502     _tasks[i]->reset(_next_mark_bitmap);
 503   }
 504 
 505   uint max_regions = _g1h->max_regions();
 506   for (uint i = 0; i < max_regions; i++) {
 507     _top_at_rebuild_starts[i] = NULL;
 508     _region_mark_stats[i].clear();
 509   }
 510 }
 511 
 512 void G1ConcurrentMark::clear_statistics_in_region(uint region_idx) {
 513   for (uint j = 0; j < _max_num_tasks; ++j) {
 514     _tasks[j]->clear_mark_stats_cache(region_idx);
 515   }
 516   _top_at_rebuild_starts[region_idx] = NULL;
 517   _region_mark_stats[region_idx].clear();
 518 }
 519 
 520 void G1ConcurrentMark::clear_statistics(HeapRegion* r) {
 521   uint const region_idx = r->hrm_index();
 522   if (r->is_humongous()) {
 523     assert(r->is_starts_humongous(), "Got humongous continues region here");
 524     uint const size_in_regions = (uint)_g1h->humongous_obj_size_in_regions(oop(r->humongous_start_region()->bottom())->size());
 525     for (uint j = region_idx; j < (region_idx + size_in_regions); j++) {
 526       clear_statistics_in_region(j);
 527     }
 528   } else {
 529     clear_statistics_in_region(region_idx);
 530   }
 531 }
 532 
 533 static void clear_mark_if_set(G1CMBitMap* bitmap, HeapWord* addr) {
 534   if (bitmap->is_marked(addr)) {
 535     bitmap->clear(addr);
 536   }
 537 }
 538 
 539 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(HeapRegion* r) {
 540   assert_at_safepoint_on_vm_thread();
 541 
 542   // Need to clear all mark bits of the humongous object.
 543   clear_mark_if_set(_prev_mark_bitmap, r->bottom());
 544   clear_mark_if_set(_next_mark_bitmap, r->bottom());
 545 
 546   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 547     return;
 548   }
 549 
 550   // Clear any statistics about the region gathered so far.
 551   clear_statistics(r);
 552 }
 553 
 554 void G1ConcurrentMark::reset_marking_for_restart() {
 555   _global_mark_stack.set_empty();
 556 
 557   // Expand the marking stack, if we have to and if we can.
 558   if (has_overflown()) {
 559     _global_mark_stack.expand();
 560 
 561     uint max_regions = _g1h->max_regions();
 562     for (uint i = 0; i < max_regions; i++) {
 563       _region_mark_stats[i].clear_during_overflow();
 564     }
 565   }
 566 
 567   clear_has_overflown();
 568   _finger = _heap.start();
 569 
 570   for (uint i = 0; i < _max_num_tasks; ++i) {
 571     G1CMTaskQueue* queue = _task_queues->queue(i);
 572     queue->set_empty();
 573   }
 574 }
 575 
 576 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 577   assert(active_tasks <= _max_num_tasks, "we should not have more");
 578 
 579   _num_active_tasks = active_tasks;
 580   // Need to update the three data structures below according to the
 581   // number of active threads for this phase.
 582   _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
 583   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 584   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 585 }
 586 
 587 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 588   set_concurrency(active_tasks);
 589 
 590   _concurrent = concurrent;
 591 
 592   if (!concurrent) {
 593     // At this point we should be in a STW phase, and completed marking.
 594     assert_at_safepoint_on_vm_thread();
 595     assert(out_of_regions(),
 596            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 597            p2i(_finger), p2i(_heap.end()));
 598   }
 599 }
 600 
 601 void G1ConcurrentMark::reset_at_marking_complete() {
 602   // We set the global marking state to some default values when we're
 603   // not doing marking.
 604   reset_marking_for_restart();
 605   _num_active_tasks = 0;
 606 }
 607 
 608 G1ConcurrentMark::~G1ConcurrentMark() {
 609   FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts);
 610   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 611   // The G1ConcurrentMark instance is never freed.
 612   ShouldNotReachHere();
 613 }
 614 
 615 class G1ClearBitMapTask : public AbstractGangTask {
 616 public:
 617   static size_t chunk_size() { return M; }
 618 
 619 private:
 620   // Heap region closure used for clearing the given mark bitmap.
 621   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 622   private:
 623     G1CMBitMap* _bitmap;
 624     G1ConcurrentMark* _cm;
 625   public:
 626     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 627     }
 628 
 629     virtual bool do_heap_region(HeapRegion* r) {
 630       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 631 
 632       HeapWord* cur = r->bottom();
 633       HeapWord* const end = r->end();
 634 
 635       while (cur < end) {
 636         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 637         _bitmap->clear_range(mr);
 638 
 639         cur += chunk_size_in_words;
 640 
 641         // Abort iteration if after yielding the marking has been aborted.
 642         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 643           return true;
 644         }
 645         // Repeat the asserts from before the start of the closure. We will do them
 646         // as asserts here to minimize their overhead on the product. However, we
 647         // will have them as guarantees at the beginning / end of the bitmap
 648         // clearing to get some checking in the product.
 649         assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant");
 650         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 651       }
 652       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 653 
 654       return false;
 655     }
 656   };
 657 
 658   G1ClearBitmapHRClosure _cl;
 659   HeapRegionClaimer _hr_claimer;
 660   bool _suspendible; // If the task is suspendible, workers must join the STS.
 661 
 662 public:
 663   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 664     AbstractGangTask("G1 Clear Bitmap"),
 665     _cl(bitmap, suspendible ? cm : NULL),
 666     _hr_claimer(n_workers),
 667     _suspendible(suspendible)
 668   { }
 669 
 670   void work(uint worker_id) {
 671     SuspendibleThreadSetJoiner sts_join(_suspendible);
 672     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 673   }
 674 
 675   bool is_complete() {
 676     return _cl.is_complete();
 677   }
 678 };
 679 
 680 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 681   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 682 
 683   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 684   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 685 
 686   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 687 
 688   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 689 
 690   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 691   workers->run_task(&cl, num_workers);
 692   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 693 }
 694 
 695 void G1ConcurrentMark::cleanup_for_next_mark() {
 696   // Make sure that the concurrent mark thread looks to still be in
 697   // the current cycle.
 698   guarantee(cm_thread()->during_cycle(), "invariant");
 699 
 700   // We are finishing up the current cycle by clearing the next
 701   // marking bitmap and getting it ready for the next cycle. During
 702   // this time no other cycle can start. So, let's make sure that this
 703   // is the case.
 704   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 705 
 706   clear_bitmap(_next_mark_bitmap, _concurrent_workers, true);
 707 
 708   // Repeat the asserts from above.
 709   guarantee(cm_thread()->during_cycle(), "invariant");
 710   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 711 }
 712 
 713 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 714   assert_at_safepoint_on_vm_thread();
 715   clear_bitmap(_prev_mark_bitmap, workers, false);
 716 }
 717 
 718 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 719   G1CMBitMap* _bitmap;
 720  public:
 721   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 722   }
 723 
 724   virtual bool do_heap_region(HeapRegion* r) {
 725     // This closure can be called concurrently to the mutator, so we must make sure
 726     // that the result of the getNextMarkedWordAddress() call is compared to the
 727     // value passed to it as limit to detect any found bits.
 728     // end never changes in G1.
 729     HeapWord* end = r->end();
 730     return _bitmap->get_next_marked_addr(r->bottom(), end) != end;
 731   }
 732 };
 733 
 734 bool G1ConcurrentMark::next_mark_bitmap_is_clear() {
 735   CheckBitmapClearHRClosure cl(_next_mark_bitmap);
 736   _g1h->heap_region_iterate(&cl);
 737   return cl.is_complete();
 738 }
 739 
 740 class NoteStartOfMarkHRClosure : public HeapRegionClosure {
 741 public:
 742   bool do_heap_region(HeapRegion* r) {
 743     r->note_start_of_marking();
 744     return false;
 745   }
 746 };
 747 
 748 void G1ConcurrentMark::pre_initial_mark() {
 749   // Initialize marking structures. This has to be done in a STW phase.
 750   reset();
 751 
 752   // For each region note start of marking.
 753   NoteStartOfMarkHRClosure startcl;
 754   _g1h->heap_region_iterate(&startcl);
 755 }
 756 
 757 
 758 void G1ConcurrentMark::post_initial_mark() {
 759   // Start Concurrent Marking weak-reference discovery.
 760   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 761   // enable ("weak") refs discovery
 762   rp->enable_discovery();
 763   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 764 
 765   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 766   // This is the start of  the marking cycle, we're expected all
 767   // threads to have SATB queues with active set to false.
 768   satb_mq_set.set_active_all_threads(true, /* new active value */
 769                                      false /* expected_active */);
 770 
 771   _root_regions.prepare_for_scan();
 772 
 773   // update_g1_committed() will be called at the end of an evac pause
 774   // when marking is on. So, it's also called at the end of the
 775   // initial-mark pause to update the heap end, if the heap expands
 776   // during it. No need to call it here.
 777 }
 778 
 779 /*
 780  * Notice that in the next two methods, we actually leave the STS
 781  * during the barrier sync and join it immediately afterwards. If we
 782  * do not do this, the following deadlock can occur: one thread could
 783  * be in the barrier sync code, waiting for the other thread to also
 784  * sync up, whereas another one could be trying to yield, while also
 785  * waiting for the other threads to sync up too.
 786  *
 787  * Note, however, that this code is also used during remark and in
 788  * this case we should not attempt to leave / enter the STS, otherwise
 789  * we'll either hit an assert (debug / fastdebug) or deadlock
 790  * (product). So we should only leave / enter the STS if we are
 791  * operating concurrently.
 792  *
 793  * Because the thread that does the sync barrier has left the STS, it
 794  * is possible to be suspended for a Full GC or an evacuation pause
 795  * could occur. This is actually safe, since the entering the sync
 796  * barrier is one of the last things do_marking_step() does, and it
 797  * doesn't manipulate any data structures afterwards.
 798  */
 799 
 800 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 801   bool barrier_aborted;
 802   {
 803     SuspendibleThreadSetLeaver sts_leave(concurrent());
 804     barrier_aborted = !_first_overflow_barrier_sync.enter();
 805   }
 806 
 807   // at this point everyone should have synced up and not be doing any
 808   // more work
 809 
 810   if (barrier_aborted) {
 811     // If the barrier aborted we ignore the overflow condition and
 812     // just abort the whole marking phase as quickly as possible.
 813     return;
 814   }
 815 }
 816 
 817 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 818   SuspendibleThreadSetLeaver sts_leave(concurrent());
 819   _second_overflow_barrier_sync.enter();
 820 
 821   // at this point everything should be re-initialized and ready to go
 822 }
 823 
 824 class G1CMConcurrentMarkingTask : public AbstractGangTask {
 825   G1ConcurrentMark*     _cm;
 826 
 827 public:
 828   void work(uint worker_id) {
 829     assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread");
 830     ResourceMark rm;
 831 
 832     double start_vtime = os::elapsedVTime();
 833 
 834     {
 835       SuspendibleThreadSetJoiner sts_join;
 836 
 837       assert(worker_id < _cm->active_tasks(), "invariant");
 838 
 839       G1CMTask* task = _cm->task(worker_id);
 840       task->record_start_time();
 841       if (!_cm->has_aborted()) {
 842         do {
 843           task->do_marking_step(G1ConcMarkStepDurationMillis,
 844                                 true  /* do_termination */,
 845                                 false /* is_serial*/);
 846 
 847           _cm->do_yield_check();
 848         } while (!_cm->has_aborted() && task->has_aborted());
 849       }
 850       task->record_end_time();
 851       guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
 852     }
 853 
 854     double end_vtime = os::elapsedVTime();
 855     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 856   }
 857 
 858   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
 859       AbstractGangTask("Concurrent Mark"), _cm(cm) { }
 860 
 861   ~G1CMConcurrentMarkingTask() { }
 862 };
 863 
 864 uint G1ConcurrentMark::calc_active_marking_workers() {
 865   uint result = 0;
 866   if (!UseDynamicNumberOfGCThreads ||
 867       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 868        !ForceDynamicNumberOfGCThreads)) {
 869     result = _max_concurrent_workers;
 870   } else {
 871     result =
 872       AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers,
 873                                                       1, /* Minimum workers */
 874                                                       _num_concurrent_workers,
 875                                                       Threads::number_of_non_daemon_threads());
 876     // Don't scale the result down by scale_concurrent_workers() because
 877     // that scaling has already gone into "_max_concurrent_workers".
 878   }
 879   assert(result > 0 && result <= _max_concurrent_workers,
 880          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
 881          _max_concurrent_workers, result);
 882   return result;
 883 }
 884 
 885 void G1ConcurrentMark::scan_root_region(HeapRegion* hr, uint worker_id) {
 886   // Currently, only survivors can be root regions.
 887   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 888   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 889 
 890   const uintx interval = PrefetchScanIntervalInBytes;
 891   HeapWord* curr = hr->bottom();
 892   const HeapWord* end = hr->top();
 893   while (curr < end) {
 894     Prefetch::read(curr, interval);
 895     oop obj = oop(curr);
 896     int size = obj->oop_iterate_size(&cl);
 897     assert(size == obj->size(), "sanity");
 898     curr += size;
 899   }
 900 }
 901 
 902 class G1CMRootRegionScanTask : public AbstractGangTask {
 903   G1ConcurrentMark* _cm;
 904 public:
 905   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 906     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
 907 
 908   void work(uint worker_id) {
 909     assert(Thread::current()->is_ConcurrentGC_thread(),
 910            "this should only be done by a conc GC thread");
 911 
 912     G1CMRootRegions* root_regions = _cm->root_regions();
 913     HeapRegion* hr = root_regions->claim_next();
 914     while (hr != NULL) {
 915       _cm->scan_root_region(hr, worker_id);
 916       hr = root_regions->claim_next();
 917     }
 918   }
 919 };
 920 
 921 void G1ConcurrentMark::scan_root_regions() {
 922   // scan_in_progress() will have been set to true only if there was
 923   // at least one root region to scan. So, if it's false, we
 924   // should not attempt to do any further work.
 925   if (root_regions()->scan_in_progress()) {
 926     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 927 
 928     _num_concurrent_workers = MIN2(calc_active_marking_workers(),
 929                                    // We distribute work on a per-region basis, so starting
 930                                    // more threads than that is useless.
 931                                    root_regions()->num_root_regions());
 932     assert(_num_concurrent_workers <= _max_concurrent_workers,
 933            "Maximum number of marking threads exceeded");
 934 
 935     G1CMRootRegionScanTask task(this);
 936     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
 937                         task.name(), _num_concurrent_workers, root_regions()->num_root_regions());
 938     _concurrent_workers->run_task(&task, _num_concurrent_workers);
 939 
 940     // It's possible that has_aborted() is true here without actually
 941     // aborting the survivor scan earlier. This is OK as it's
 942     // mainly used for sanity checking.
 943     root_regions()->scan_finished();
 944   }
 945 }
 946 
 947 void G1ConcurrentMark::concurrent_cycle_start() {
 948   _gc_timer_cm->register_gc_start();
 949 
 950   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
 951 
 952   _g1h->trace_heap_before_gc(_gc_tracer_cm);
 953 }
 954 
 955 void G1ConcurrentMark::concurrent_cycle_end() {
 956   _g1h->collector_state()->set_clearing_next_bitmap(false);
 957 
 958   _g1h->trace_heap_after_gc(_gc_tracer_cm);
 959 
 960   if (has_aborted()) {
 961     log_info(gc, marking)("Concurrent Mark Abort");
 962     _gc_tracer_cm->report_concurrent_mode_failure();
 963   }
 964 
 965   _gc_timer_cm->register_gc_end();
 966 
 967   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
 968 }
 969 
 970 void G1ConcurrentMark::mark_from_roots() {
 971   _restart_for_overflow = false;
 972 
 973   _num_concurrent_workers = calc_active_marking_workers();
 974 
 975   uint active_workers = MAX2(1U, _num_concurrent_workers);
 976 
 977   // Setting active workers is not guaranteed since fewer
 978   // worker threads may currently exist and more may not be
 979   // available.
 980   active_workers = _concurrent_workers->update_active_workers(active_workers);
 981   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers());
 982 
 983   // Parallel task terminator is set in "set_concurrency_and_phase()"
 984   set_concurrency_and_phase(active_workers, true /* concurrent */);
 985 
 986   G1CMConcurrentMarkingTask marking_task(this);
 987   _concurrent_workers->run_task(&marking_task);
 988   print_stats();
 989 }
 990 
 991 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) {
 992   G1HeapVerifier* verifier = _g1h->verifier();
 993 
 994   verifier->verify_region_sets_optional();
 995 
 996   if (VerifyDuringGC) {
 997     GCTraceTime(Debug, gc, phases) trace(caller, _gc_timer_cm);
 998 
 999     size_t const BufLen = 512;
1000     char buffer[BufLen];
1001 
1002     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1003     verifier->verify(type, vo, buffer);
1004   }
1005 
1006   verifier->check_bitmaps(caller);
1007 }
1008 
1009 class G1UpdateRemSetTrackingBeforeRebuild : public HeapRegionClosure {
1010   G1CollectedHeap* _g1h;
1011   G1ConcurrentMark* _cm;
1012 
1013   uint _num_regions_selected_for_rebuild;  // The number of regions actually selected for rebuild.
1014 
1015   void update_remset_before_rebuild(HeapRegion * hr) {
1016     G1RemSetTrackingPolicy* tracking_policy = _g1h->g1_policy()->remset_tracker();
1017 
1018     size_t live_bytes = _cm->liveness(hr->hrm_index()) * HeapWordSize;
1019     bool selected_for_rebuild = tracking_policy->update_before_rebuild(hr, live_bytes);
1020     if (selected_for_rebuild) {
1021       _num_regions_selected_for_rebuild++;
1022     }
1023     _cm->update_top_at_rebuild_start(hr);
1024   }
1025 
1026 public:
1027   G1UpdateRemSetTrackingBeforeRebuild(G1CollectedHeap* g1h, G1ConcurrentMark* cm) :
1028     _g1h(g1h), _cm(cm), _num_regions_selected_for_rebuild(0) { }
1029 
1030   virtual bool do_heap_region(HeapRegion* r) {
1031     update_remset_before_rebuild(r);
1032     return false;
1033   }
1034 
1035   uint num_selected_for_rebuild() const { return _num_regions_selected_for_rebuild; }
1036 };
1037 
1038 class G1UpdateRemSetTrackingAfterRebuild : public HeapRegionClosure {
1039   G1CollectedHeap* _g1h;
1040 public:
1041   G1UpdateRemSetTrackingAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1042 
1043   virtual bool do_heap_region(HeapRegion* r) {
1044     _g1h->g1_policy()->remset_tracker()->update_after_rebuild(r);
1045     return false;
1046   }
1047 };
1048 
1049 void G1ConcurrentMark::remark() {
1050   assert_at_safepoint_on_vm_thread();
1051 
1052   // If a full collection has happened, we should not continue. However we might
1053   // have ended up here as the Remark VM operation has been scheduled already.
1054   if (has_aborted()) {
1055     return;
1056   }
1057 
1058   G1Policy* g1p = _g1h->g1_policy();
1059   g1p->record_concurrent_mark_remark_start();
1060 
1061   double start = os::elapsedTime();
1062 
1063   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark before");
1064 
1065   {
1066     GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1067     finalize_marking();
1068   }
1069 
1070   double mark_work_end = os::elapsedTime();
1071 
1072   bool const mark_finished = !has_overflown();
1073   if (mark_finished) {
1074     weak_refs_work(false /* clear_all_soft_refs */);
1075 
1076     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1077     // We're done with marking.
1078     // This is the end of the marking cycle, we're expected all
1079     // threads to have SATB queues with active set to true.
1080     satb_mq_set.set_active_all_threads(false, /* new active value */
1081                                        true /* expected_active */);
1082 
1083     {
1084       GCTraceTime(Debug, gc, phases)("Flush Task Caches");
1085       flush_all_task_caches();
1086     }
1087 
1088     {
1089       GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking Before Rebuild");
1090       G1UpdateRemSetTrackingBeforeRebuild cl(_g1h, this);
1091       _g1h->heap_region_iterate(&cl);
1092       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1093                                       _g1h->num_regions(), cl.num_selected_for_rebuild());
1094     }
1095 
1096     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UseNextMarking, "Remark after");
1097 
1098     assert(!restart_for_overflow(), "sanity");
1099     // Completely reset the marking state since marking completed
1100     reset_at_marking_complete();
1101   } else {
1102     // We overflowed.  Restart concurrent marking.
1103     _restart_for_overflow = true;
1104 
1105     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyOption_G1UsePrevMarking, "Remark overflow");
1106 
1107     // Clear the marking state because we will be restarting
1108     // marking due to overflowing the global mark stack.
1109     reset_marking_for_restart();
1110   }
1111 
1112   {
1113     GCTraceTime(Debug, gc, phases)("Report Object Count");
1114     report_object_count();
1115   }
1116 
1117   // Statistics
1118   double now = os::elapsedTime();
1119   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1120   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1121   _remark_times.add((now - start) * 1000.0);
1122 
1123   g1p->record_concurrent_mark_remark_end();
1124 }
1125 
1126 class G1CleanupTask : public AbstractGangTask {
1127   // Per-region work during the Cleanup pause.
1128   class G1CleanupRegionsClosure : public HeapRegionClosure {
1129     G1CollectedHeap* _g1h;
1130     size_t _freed_bytes;
1131     FreeRegionList* _local_cleanup_list;
1132     uint _old_regions_removed;
1133     uint _humongous_regions_removed;
1134     HRRSCleanupTask* _hrrs_cleanup_task;
1135 
1136   public:
1137     G1CleanupRegionsClosure(G1CollectedHeap* g1,
1138                             FreeRegionList* local_cleanup_list,
1139                             HRRSCleanupTask* hrrs_cleanup_task) :
1140       _g1h(g1),
1141       _freed_bytes(0),
1142       _local_cleanup_list(local_cleanup_list),
1143       _old_regions_removed(0),
1144       _humongous_regions_removed(0),
1145       _hrrs_cleanup_task(hrrs_cleanup_task) { }
1146 
1147     size_t freed_bytes() { return _freed_bytes; }
1148     const uint old_regions_removed() { return _old_regions_removed; }
1149     const uint humongous_regions_removed() { return _humongous_regions_removed; }
1150 
1151     bool do_heap_region(HeapRegion *hr) {
1152       hr->note_end_of_marking();
1153 
1154       if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young() && !hr->is_archive()) {
1155         _freed_bytes += hr->used();
1156         hr->set_containing_set(NULL);
1157         if (hr->is_humongous()) {
1158           _humongous_regions_removed++;
1159           _g1h->free_humongous_region(hr, _local_cleanup_list);
1160         } else {
1161           _old_regions_removed++;
1162           _g1h->free_region(hr, _local_cleanup_list, false /* skip_remset */, false /* skip_hcc */, true /* locked */);
1163         }
1164         hr->clear_cardtable();
1165         _g1h->concurrent_mark()->clear_statistics_in_region(hr->hrm_index());
1166         log_trace(gc)("Reclaimed empty region %u (%s) bot " PTR_FORMAT, hr->hrm_index(), hr->get_short_type_str(), p2i(hr->bottom()));
1167       } else {
1168         hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1169       }
1170 
1171       return false;
1172     }
1173   };
1174 
1175   G1CollectedHeap* _g1h;
1176   FreeRegionList* _cleanup_list;
1177   HeapRegionClaimer _hrclaimer;
1178 
1179 public:
1180   G1CleanupTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1181     AbstractGangTask("G1 Cleanup"),
1182     _g1h(g1h),
1183     _cleanup_list(cleanup_list),
1184     _hrclaimer(n_workers) {
1185 
1186     HeapRegionRemSet::reset_for_cleanup_tasks();
1187   }
1188 
1189   void work(uint worker_id) {
1190     FreeRegionList local_cleanup_list("Local Cleanup List");
1191     HRRSCleanupTask hrrs_cleanup_task;
1192     G1CleanupRegionsClosure cl(_g1h,
1193                                &local_cleanup_list,
1194                                &hrrs_cleanup_task);
1195     _g1h->heap_region_par_iterate_from_worker_offset(&cl, &_hrclaimer, worker_id);
1196     assert(cl.is_complete(), "Shouldn't have aborted!");
1197 
1198     // Now update the old/humongous region sets
1199     _g1h->remove_from_old_sets(cl.old_regions_removed(), cl.humongous_regions_removed());
1200     {
1201       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1202       _g1h->decrement_summary_bytes(cl.freed_bytes());
1203 
1204       _cleanup_list->add_ordered(&local_cleanup_list);
1205       assert(local_cleanup_list.is_empty(), "post-condition");
1206 
1207       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1208     }
1209   }
1210 };
1211 
1212 void G1ConcurrentMark::reclaim_empty_regions() {
1213   WorkGang* workers = _g1h->workers();
1214   FreeRegionList empty_regions_list("Empty Regions After Mark List");
1215 
1216   G1CleanupTask cl(_g1h, &empty_regions_list, workers->active_workers());
1217   workers->run_task(&cl);
1218 
1219   if (!empty_regions_list.is_empty()) {
1220     log_debug(gc)("Reclaimed %u empty regions", empty_regions_list.length());
1221     // Now print the empty regions list.
1222     G1HRPrinter* hrp = _g1h->hr_printer();
1223     if (hrp->is_active()) {
1224       FreeRegionListIterator iter(&empty_regions_list);
1225       while (iter.more_available()) {
1226         HeapRegion* hr = iter.get_next();
1227         hrp->cleanup(hr);
1228       }
1229     }
1230     // And actually make them available.
1231     _g1h->prepend_to_freelist(&empty_regions_list);
1232   }
1233 }
1234 
1235 void G1ConcurrentMark::cleanup() {
1236   assert_at_safepoint_on_vm_thread();
1237 
1238   // If a full collection has happened, we shouldn't do this.
1239   if (has_aborted()) {
1240     return;
1241   }
1242 
1243   G1Policy* g1p = _g1h->g1_policy();
1244   g1p->record_concurrent_mark_cleanup_start();
1245 
1246   double start = os::elapsedTime();
1247 
1248   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UseNextMarking, "Cleanup before");
1249 
1250   {
1251     GCTraceTime(Debug, gc, phases)("Update Remembered Set Tracking After Rebuild");
1252     G1UpdateRemSetTrackingAfterRebuild cl(_g1h);
1253     _g1h->heap_region_iterate(&cl);
1254   }
1255 
1256   if (log_is_enabled(Trace, gc, liveness)) {
1257     G1PrintRegionLivenessInfoClosure cl("Post-Cleanup");
1258     _g1h->heap_region_iterate(&cl);
1259   }
1260 
1261   // Install newly created mark bitmap as "prev".
1262   swap_mark_bitmaps();
1263   {
1264     GCTraceTime(Debug, gc, phases)("Reclaim Empty Regions");
1265     reclaim_empty_regions();
1266   }
1267 
1268   // Cleanup will have freed any regions completely full of garbage.
1269   // Update the soft reference policy with the new heap occupancy.
1270   Universe::update_heap_info_at_gc();
1271 
1272   // Clean out dead classes and update Metaspace sizes.
1273   if (ClassUnloadingWithConcurrentMark) {
1274     GCTraceTime(Debug, gc, phases)("Purge Metaspace");
1275     ClassLoaderDataGraph::purge();
1276   }
1277   MetaspaceGC::compute_new_size();
1278 
1279   // We reclaimed old regions so we should calculate the sizes to make
1280   // sure we update the old gen/space data.
1281   _g1h->g1mm()->update_sizes();
1282 
1283   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyOption_G1UsePrevMarking, "Cleanup after");
1284 
1285   // We need to make this be a "collection" so any collection pause that
1286   // races with it goes around and waits for Cleanup to finish.
1287   _g1h->increment_total_collections();
1288 
1289   // Local statistics
1290   double recent_cleanup_time = (os::elapsedTime() - start);
1291   _total_cleanup_time += recent_cleanup_time;
1292   _cleanup_times.add(recent_cleanup_time);
1293 
1294   {
1295     GCTraceTime(Debug, gc, phases)("Finalize Concurrent Mark Cleanup");
1296     _g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1297   }
1298 }
1299 
1300 // Supporting Object and Oop closures for reference discovery
1301 // and processing in during marking
1302 
1303 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1304   HeapWord* addr = (HeapWord*)obj;
1305   return addr != NULL &&
1306          (!_g1h->is_in_g1_reserved(addr) || !_g1h->is_obj_ill(obj));
1307 }
1308 
1309 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1310 // Uses the G1CMTask associated with a worker thread (for serial reference
1311 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1312 // trace referent objects.
1313 //
1314 // Using the G1CMTask and embedded local queues avoids having the worker
1315 // threads operating on the global mark stack. This reduces the risk
1316 // of overflowing the stack - which we would rather avoid at this late
1317 // state. Also using the tasks' local queues removes the potential
1318 // of the workers interfering with each other that could occur if
1319 // operating on the global stack.
1320 
1321 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1322   G1ConcurrentMark* _cm;
1323   G1CMTask*         _task;
1324   int               _ref_counter_limit;
1325   int               _ref_counter;
1326   bool              _is_serial;
1327 public:
1328   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1329     _cm(cm), _task(task), _is_serial(is_serial),
1330     _ref_counter_limit(G1RefProcDrainInterval) {
1331     assert(_ref_counter_limit > 0, "sanity");
1332     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1333     _ref_counter = _ref_counter_limit;
1334   }
1335 
1336   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1337   virtual void do_oop(      oop* p) { do_oop_work(p); }
1338 
1339   template <class T> void do_oop_work(T* p) {
1340     if (!_cm->has_overflown()) {
1341       _task->deal_with_reference(p);
1342       _ref_counter--;
1343 
1344       if (_ref_counter == 0) {
1345         // We have dealt with _ref_counter_limit references, pushing them
1346         // and objects reachable from them on to the local stack (and
1347         // possibly the global stack). Call G1CMTask::do_marking_step() to
1348         // process these entries.
1349         //
1350         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1351         // there's nothing more to do (i.e. we're done with the entries that
1352         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1353         // above) or we overflow.
1354         //
1355         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1356         // flag while there may still be some work to do. (See the comment at
1357         // the beginning of G1CMTask::do_marking_step() for those conditions -
1358         // one of which is reaching the specified time target.) It is only
1359         // when G1CMTask::do_marking_step() returns without setting the
1360         // has_aborted() flag that the marking step has completed.
1361         do {
1362           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1363           _task->do_marking_step(mark_step_duration_ms,
1364                                  false      /* do_termination */,
1365                                  _is_serial);
1366         } while (_task->has_aborted() && !_cm->has_overflown());
1367         _ref_counter = _ref_counter_limit;
1368       }
1369     }
1370   }
1371 };
1372 
1373 // 'Drain' oop closure used by both serial and parallel reference processing.
1374 // Uses the G1CMTask associated with a given worker thread (for serial
1375 // reference processing the G1CMtask for worker 0 is used). Calls the
1376 // do_marking_step routine, with an unbelievably large timeout value,
1377 // to drain the marking data structures of the remaining entries
1378 // added by the 'keep alive' oop closure above.
1379 
1380 class G1CMDrainMarkingStackClosure : public VoidClosure {
1381   G1ConcurrentMark* _cm;
1382   G1CMTask*         _task;
1383   bool              _is_serial;
1384  public:
1385   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1386     _cm(cm), _task(task), _is_serial(is_serial) {
1387     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1388   }
1389 
1390   void do_void() {
1391     do {
1392       // We call G1CMTask::do_marking_step() to completely drain the local
1393       // and global marking stacks of entries pushed by the 'keep alive'
1394       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1395       //
1396       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1397       // if there's nothing more to do (i.e. we've completely drained the
1398       // entries that were pushed as a a result of applying the 'keep alive'
1399       // closure to the entries on the discovered ref lists) or we overflow
1400       // the global marking stack.
1401       //
1402       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1403       // flag while there may still be some work to do. (See the comment at
1404       // the beginning of G1CMTask::do_marking_step() for those conditions -
1405       // one of which is reaching the specified time target.) It is only
1406       // when G1CMTask::do_marking_step() returns without setting the
1407       // has_aborted() flag that the marking step has completed.
1408 
1409       _task->do_marking_step(1000000000.0 /* something very large */,
1410                              true         /* do_termination */,
1411                              _is_serial);
1412     } while (_task->has_aborted() && !_cm->has_overflown());
1413   }
1414 };
1415 
1416 // Implementation of AbstractRefProcTaskExecutor for parallel
1417 // reference processing at the end of G1 concurrent marking
1418 
1419 class G1CMRefProcTaskExecutor : public AbstractRefProcTaskExecutor {
1420 private:
1421   G1CollectedHeap*  _g1h;
1422   G1ConcurrentMark* _cm;
1423   WorkGang*         _workers;
1424   uint              _active_workers;
1425 
1426 public:
1427   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1428                           G1ConcurrentMark* cm,
1429                           WorkGang* workers,
1430                           uint n_workers) :
1431     _g1h(g1h), _cm(cm),
1432     _workers(workers), _active_workers(n_workers) { }
1433 
1434   // Executes the given task using concurrent marking worker threads.
1435   virtual void execute(ProcessTask& task);
1436   virtual void execute(EnqueueTask& task);
1437 };
1438 
1439 class G1CMRefProcTaskProxy : public AbstractGangTask {
1440   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1441   ProcessTask&      _proc_task;
1442   G1CollectedHeap*  _g1h;
1443   G1ConcurrentMark* _cm;
1444 
1445 public:
1446   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1447                        G1CollectedHeap* g1h,
1448                        G1ConcurrentMark* cm) :
1449     AbstractGangTask("Process reference objects in parallel"),
1450     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1451     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1452     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1453   }
1454 
1455   virtual void work(uint worker_id) {
1456     ResourceMark rm;
1457     HandleMark hm;
1458     G1CMTask* task = _cm->task(worker_id);
1459     G1CMIsAliveClosure g1_is_alive(_g1h);
1460     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1461     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1462 
1463     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1464   }
1465 };
1466 
1467 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1468   assert(_workers != NULL, "Need parallel worker threads.");
1469   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1470 
1471   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1472 
1473   // We need to reset the concurrency level before each
1474   // proxy task execution, so that the termination protocol
1475   // and overflow handling in G1CMTask::do_marking_step() knows
1476   // how many workers to wait for.
1477   _cm->set_concurrency(_active_workers);
1478   _workers->run_task(&proc_task_proxy);
1479 }
1480 
1481 class G1CMRefEnqueueTaskProxy : public AbstractGangTask {
1482   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1483   EnqueueTask& _enq_task;
1484 
1485 public:
1486   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1487     AbstractGangTask("Enqueue reference objects in parallel"),
1488     _enq_task(enq_task) { }
1489 
1490   virtual void work(uint worker_id) {
1491     _enq_task.work(worker_id);
1492   }
1493 };
1494 
1495 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1496   assert(_workers != NULL, "Need parallel worker threads.");
1497   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1498 
1499   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1500 
1501   // Not strictly necessary but...
1502   //
1503   // We need to reset the concurrency level before each
1504   // proxy task execution, so that the termination protocol
1505   // and overflow handling in G1CMTask::do_marking_step() knows
1506   // how many workers to wait for.
1507   _cm->set_concurrency(_active_workers);
1508   _workers->run_task(&enq_task_proxy);
1509 }
1510 
1511 void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
1512   ResourceMark rm;
1513   HandleMark   hm;
1514 
1515   // Is alive closure.
1516   G1CMIsAliveClosure g1_is_alive(_g1h);
1517 
1518   // Inner scope to exclude the cleaning of the string and symbol
1519   // tables from the displayed time.
1520   {
1521     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1522 
1523     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1524 
1525     // See the comment in G1CollectedHeap::ref_processing_init()
1526     // about how reference processing currently works in G1.
1527 
1528     // Set the soft reference policy
1529     rp->setup_policy(clear_all_soft_refs);
1530     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1531 
1532     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1533     // in serial reference processing. Note these closures are also
1534     // used for serially processing (by the the current thread) the
1535     // JNI references during parallel reference processing.
1536     //
1537     // These closures do not need to synchronize with the worker
1538     // threads involved in parallel reference processing as these
1539     // instances are executed serially by the current thread (e.g.
1540     // reference processing is not multi-threaded and is thus
1541     // performed by the current thread instead of a gang worker).
1542     //
1543     // The gang tasks involved in parallel reference processing create
1544     // their own instances of these closures, which do their own
1545     // synchronization among themselves.
1546     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1547     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1548 
1549     // We need at least one active thread. If reference processing
1550     // is not multi-threaded we use the current (VMThread) thread,
1551     // otherwise we use the work gang from the G1CollectedHeap and
1552     // we utilize all the worker threads we can.
1553     bool processing_is_mt = rp->processing_is_mt();
1554     uint active_workers = (processing_is_mt ? _g1h->workers()->active_workers() : 1U);
1555     active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U);
1556 
1557     // Parallel processing task executor.
1558     G1CMRefProcTaskExecutor par_task_executor(_g1h, this,
1559                                               _g1h->workers(), active_workers);
1560     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1561 
1562     // Set the concurrency level. The phase was already set prior to
1563     // executing the remark task.
1564     set_concurrency(active_workers);
1565 
1566     // Set the degree of MT processing here.  If the discovery was done MT,
1567     // the number of threads involved during discovery could differ from
1568     // the number of active workers.  This is OK as long as the discovered
1569     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1570     rp->set_active_mt_degree(active_workers);
1571 
1572     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
1573 
1574     // Process the weak references.
1575     const ReferenceProcessorStats& stats =
1576         rp->process_discovered_references(&g1_is_alive,
1577                                           &g1_keep_alive,
1578                                           &g1_drain_mark_stack,
1579                                           executor,
1580                                           &pt);
1581     _gc_tracer_cm->report_gc_reference_stats(stats);
1582     pt.print_all_references();
1583 
1584     // The do_oop work routines of the keep_alive and drain_marking_stack
1585     // oop closures will set the has_overflown flag if we overflow the
1586     // global marking stack.
1587 
1588     assert(has_overflown() || _global_mark_stack.is_empty(),
1589            "Mark stack should be empty (unless it has overflown)");
1590 
1591     assert(rp->num_q() == active_workers, "why not");
1592 
1593     rp->enqueue_discovered_references(executor, &pt);
1594 
1595     rp->verify_no_references_recorded();
1596 
1597     pt.print_enqueue_phase();
1598 
1599     assert(!rp->discovery_enabled(), "Post condition");
1600   }
1601 
1602   assert(has_overflown() || _global_mark_stack.is_empty(),
1603          "Mark stack should be empty (unless it has overflown)");
1604 
1605   {
1606     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1607     WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl);
1608   }
1609 
1610   if (has_overflown()) {
1611     // We can not trust g1_is_alive if the marking stack overflowed
1612     return;
1613   }
1614 
1615   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1616 
1617   // Unload Klasses, String, Symbols, Code Cache, etc.
1618   if (ClassUnloadingWithConcurrentMark) {
1619     GCTraceTime(Debug, gc, phases) debug("Class Unloading", _gc_timer_cm);
1620     bool purged_classes = SystemDictionary::do_unloading(&g1_is_alive, _gc_timer_cm, false /* Defer cleaning */);
1621     _g1h->complete_cleaning(&g1_is_alive, purged_classes);
1622   } else {
1623     GCTraceTime(Debug, gc, phases) debug("Cleanup", _gc_timer_cm);
1624     // No need to clean string table and symbol table as they are treated as strong roots when
1625     // class unloading is disabled.
1626     _g1h->partial_cleaning(&g1_is_alive, false, false, G1StringDedup::is_enabled());
1627   }
1628 }
1629 
1630 void G1ConcurrentMark::report_object_count() {
1631   G1CMIsAliveClosure is_alive(_g1h);
1632   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1633 }
1634 
1635 void G1ConcurrentMark::swap_mark_bitmaps() {
1636   G1CMBitMap* temp = _prev_mark_bitmap;
1637   _prev_mark_bitmap = _next_mark_bitmap;
1638   _next_mark_bitmap = temp;
1639   _g1h->collector_state()->set_clearing_next_bitmap(true);
1640 }
1641 
1642 // Closure for marking entries in SATB buffers.
1643 class G1CMSATBBufferClosure : public SATBBufferClosure {
1644 private:
1645   G1CMTask* _task;
1646   G1CollectedHeap* _g1h;
1647 
1648   // This is very similar to G1CMTask::deal_with_reference, but with
1649   // more relaxed requirements for the argument, so this must be more
1650   // circumspect about treating the argument as an object.
1651   void do_entry(void* entry) const {
1652     _task->increment_refs_reached();
1653     oop const obj = static_cast<oop>(entry);
1654     _task->make_reference_grey(obj);
1655   }
1656 
1657 public:
1658   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1659     : _task(task), _g1h(g1h) { }
1660 
1661   virtual void do_buffer(void** buffer, size_t size) {
1662     for (size_t i = 0; i < size; ++i) {
1663       do_entry(buffer[i]);
1664     }
1665   }
1666 };
1667 
1668 class G1RemarkThreadsClosure : public ThreadClosure {
1669   G1CMSATBBufferClosure _cm_satb_cl;
1670   G1CMOopClosure _cm_cl;
1671   MarkingCodeBlobClosure _code_cl;
1672   int _thread_parity;
1673 
1674  public:
1675   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1676     _cm_satb_cl(task, g1h),
1677     _cm_cl(g1h, task),
1678     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1679     _thread_parity(Threads::thread_claim_parity()) {}
1680 
1681   void do_thread(Thread* thread) {
1682     if (thread->is_Java_thread()) {
1683       if (thread->claim_oops_do(true, _thread_parity)) {
1684         JavaThread* jt = (JavaThread*)thread;
1685 
1686         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1687         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1688         // * Alive if on the stack of an executing method
1689         // * Weakly reachable otherwise
1690         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1691         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1692         jt->nmethods_do(&_code_cl);
1693 
1694         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1695       }
1696     } else if (thread->is_VM_thread()) {
1697       if (thread->claim_oops_do(true, _thread_parity)) {
1698         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1699       }
1700     }
1701   }
1702 };
1703 
1704 class G1CMRemarkTask : public AbstractGangTask {
1705   G1ConcurrentMark* _cm;
1706 public:
1707   void work(uint worker_id) {
1708     G1CMTask* task = _cm->task(worker_id);
1709     task->record_start_time();
1710     {
1711       ResourceMark rm;
1712       HandleMark hm;
1713 
1714       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1715       Threads::threads_do(&threads_f);
1716     }
1717 
1718     do {
1719       task->do_marking_step(1000000000.0 /* something very large */,
1720                             true         /* do_termination       */,
1721                             false        /* is_serial            */);
1722     } while (task->has_aborted() && !_cm->has_overflown());
1723     // If we overflow, then we do not want to restart. We instead
1724     // want to abort remark and do concurrent marking again.
1725     task->record_end_time();
1726   }
1727 
1728   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1729     AbstractGangTask("Par Remark"), _cm(cm) {
1730     _cm->terminator()->reset_for_reuse(active_workers);
1731   }
1732 };
1733 
1734 void G1ConcurrentMark::finalize_marking() {
1735   ResourceMark rm;
1736   HandleMark   hm;
1737 
1738   _g1h->ensure_parsability(false);
1739 
1740   // this is remark, so we'll use up all active threads
1741   uint active_workers = _g1h->workers()->active_workers();
1742   set_concurrency_and_phase(active_workers, false /* concurrent */);
1743   // Leave _parallel_marking_threads at it's
1744   // value originally calculated in the G1ConcurrentMark
1745   // constructor and pass values of the active workers
1746   // through the gang in the task.
1747 
1748   {
1749     StrongRootsScope srs(active_workers);
1750 
1751     G1CMRemarkTask remarkTask(this, active_workers);
1752     // We will start all available threads, even if we decide that the
1753     // active_workers will be fewer. The extra ones will just bail out
1754     // immediately.
1755     _g1h->workers()->run_task(&remarkTask);
1756   }
1757 
1758   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1759   guarantee(has_overflown() ||
1760             satb_mq_set.completed_buffers_num() == 0,
1761             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1762             BOOL_TO_STR(has_overflown()),
1763             satb_mq_set.completed_buffers_num());
1764 
1765   print_stats();
1766 }
1767 
1768 void G1ConcurrentMark::flush_all_task_caches() {
1769   size_t hits = 0;
1770   size_t misses = 0;
1771   for (uint i = 0; i < _max_num_tasks; i++) {
1772     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1773     hits += stats.first;
1774     misses += stats.second;
1775   }
1776   size_t sum = hits + misses;
1777   log_debug(gc, stats)("Mark stats cache hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %1.3lf",
1778                        hits, misses, percent_of(hits, sum));
1779 }
1780 
1781 void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) {
1782   _prev_mark_bitmap->clear_range(mr);
1783 }
1784 
1785 HeapRegion*
1786 G1ConcurrentMark::claim_region(uint worker_id) {
1787   // "checkpoint" the finger
1788   HeapWord* finger = _finger;
1789 
1790   while (finger < _heap.end()) {
1791     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1792 
1793     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1794     // Make sure that the reads below do not float before loading curr_region.
1795     OrderAccess::loadload();
1796     // Above heap_region_containing may return NULL as we always scan claim
1797     // until the end of the heap. In this case, just jump to the next region.
1798     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1799 
1800     // Is the gap between reading the finger and doing the CAS too long?
1801     HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
1802     if (res == finger && curr_region != NULL) {
1803       // we succeeded
1804       HeapWord*   bottom        = curr_region->bottom();
1805       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1806 
1807       // notice that _finger == end cannot be guaranteed here since,
1808       // someone else might have moved the finger even further
1809       assert(_finger >= end, "the finger should have moved forward");
1810 
1811       if (limit > bottom) {
1812         return curr_region;
1813       } else {
1814         assert(limit == bottom,
1815                "the region limit should be at bottom");
1816         // we return NULL and the caller should try calling
1817         // claim_region() again.
1818         return NULL;
1819       }
1820     } else {
1821       assert(_finger > finger, "the finger should have moved forward");
1822       // read it again
1823       finger = _finger;
1824     }
1825   }
1826 
1827   return NULL;
1828 }
1829 
1830 #ifndef PRODUCT
1831 class VerifyNoCSetOops {
1832   G1CollectedHeap* _g1h;
1833   const char* _phase;
1834   int _info;
1835 
1836 public:
1837   VerifyNoCSetOops(const char* phase, int info = -1) :
1838     _g1h(G1CollectedHeap::heap()),
1839     _phase(phase),
1840     _info(info)
1841   { }
1842 
1843   void operator()(G1TaskQueueEntry task_entry) const {
1844     if (task_entry.is_array_slice()) {
1845       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
1846       return;
1847     }
1848     guarantee(oopDesc::is_oop(task_entry.obj()),
1849               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1850               p2i(task_entry.obj()), _phase, _info);
1851     guarantee(!_g1h->is_in_cset(task_entry.obj()),
1852               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
1853               p2i(task_entry.obj()), _phase, _info);
1854   }
1855 };
1856 
1857 void G1ConcurrentMark::verify_no_cset_oops() {
1858   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
1859   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1860     return;
1861   }
1862 
1863   // Verify entries on the global mark stack
1864   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1865 
1866   // Verify entries on the task queues
1867   for (uint i = 0; i < _max_num_tasks; ++i) {
1868     G1CMTaskQueue* queue = _task_queues->queue(i);
1869     queue->iterate(VerifyNoCSetOops("Queue", i));
1870   }
1871 
1872   // Verify the global finger
1873   HeapWord* global_finger = finger();
1874   if (global_finger != NULL && global_finger < _heap.end()) {
1875     // Since we always iterate over all regions, we might get a NULL HeapRegion
1876     // here.
1877     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
1878     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
1879               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1880               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1881   }
1882 
1883   // Verify the task fingers
1884   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1885   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1886     G1CMTask* task = _tasks[i];
1887     HeapWord* task_finger = task->finger();
1888     if (task_finger != NULL && task_finger < _heap.end()) {
1889       // See above note on the global finger verification.
1890       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
1891       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
1892                 !task_hr->in_collection_set(),
1893                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1894                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
1895     }
1896   }
1897 }
1898 #endif // PRODUCT
1899 
1900 void G1ConcurrentMark::rebuild_rem_set_concurrently() {
1901   _g1h->g1_rem_set()->rebuild_rem_set(this, _concurrent_workers, _worker_id_offset);
1902 }
1903 
1904 void G1ConcurrentMark::print_stats() {
1905   if (!log_is_enabled(Debug, gc, stats)) {
1906     return;
1907   }
1908   log_debug(gc, stats)("---------------------------------------------------------------------");
1909   for (size_t i = 0; i < _num_active_tasks; ++i) {
1910     _tasks[i]->print_stats();
1911     log_debug(gc, stats)("---------------------------------------------------------------------");
1912   }
1913 }
1914 
1915 void G1ConcurrentMark::concurrent_cycle_abort() {
1916   if (!cm_thread()->during_cycle() || _has_aborted) {
1917     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
1918     return;
1919   }
1920 
1921   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
1922   // concurrent bitmap clearing.
1923   {
1924     GCTraceTime(Debug, gc)("Clear Next Bitmap");
1925     clear_bitmap(_next_mark_bitmap, _g1h->workers(), false);
1926   }
1927   // Note we cannot clear the previous marking bitmap here
1928   // since VerifyDuringGC verifies the objects marked during
1929   // a full GC against the previous bitmap.
1930 
1931   // Empty mark stack
1932   reset_marking_for_restart();
1933   for (uint i = 0; i < _max_num_tasks; ++i) {
1934     _tasks[i]->clear_region_fields();
1935   }
1936   _first_overflow_barrier_sync.abort();
1937   _second_overflow_barrier_sync.abort();
1938   _has_aborted = true;
1939 
1940   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1941   satb_mq_set.abandon_partial_marking();
1942   // This can be called either during or outside marking, we'll read
1943   // the expected_active value from the SATB queue set.
1944   satb_mq_set.set_active_all_threads(
1945                                  false, /* new active value */
1946                                  satb_mq_set.is_active() /* expected_active */);
1947 }
1948 
1949 static void print_ms_time_info(const char* prefix, const char* name,
1950                                NumberSeq& ns) {
1951   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1952                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1953   if (ns.num() > 0) {
1954     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
1955                            prefix, ns.sd(), ns.maximum());
1956   }
1957 }
1958 
1959 void G1ConcurrentMark::print_summary_info() {
1960   Log(gc, marking) log;
1961   if (!log.is_trace()) {
1962     return;
1963   }
1964 
1965   log.trace(" Concurrent marking:");
1966   print_ms_time_info("  ", "init marks", _init_times);
1967   print_ms_time_info("  ", "remarks", _remark_times);
1968   {
1969     print_ms_time_info("     ", "final marks", _remark_mark_times);
1970     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
1971 
1972   }
1973   print_ms_time_info("  ", "cleanups", _cleanup_times);
1974   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
1975             _total_cleanup_time, (_cleanup_times.num() > 0 ? _total_cleanup_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
1976   log.trace("  Total stop_world time = %8.2f s.",
1977             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
1978   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
1979             cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum());
1980 }
1981 
1982 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
1983   _concurrent_workers->print_worker_threads_on(st);
1984 }
1985 
1986 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
1987   _concurrent_workers->threads_do(tc);
1988 }
1989 
1990 void G1ConcurrentMark::print_on_error(outputStream* st) const {
1991   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
1992                p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap));
1993   _prev_mark_bitmap->print_on_error(st, " Prev Bits: ");
1994   _next_mark_bitmap->print_on_error(st, " Next Bits: ");
1995 }
1996 
1997 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
1998   ReferenceProcessor* result = g1h->ref_processor_cm();
1999   assert(result != NULL, "CM reference processor should not be NULL");
2000   return result;
2001 }
2002 
2003 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2004                                G1CMTask* task)
2005   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2006     _g1h(g1h), _task(task)
2007 { }
2008 
2009 void G1CMTask::setup_for_region(HeapRegion* hr) {
2010   assert(hr != NULL,
2011         "claim_region() should have filtered out NULL regions");
2012   _curr_region  = hr;
2013   _finger       = hr->bottom();
2014   update_region_limit();
2015 }
2016 
2017 void G1CMTask::update_region_limit() {
2018   HeapRegion* hr            = _curr_region;
2019   HeapWord* bottom          = hr->bottom();
2020   HeapWord* limit           = hr->next_top_at_mark_start();
2021 
2022   if (limit == bottom) {
2023     // The region was collected underneath our feet.
2024     // We set the finger to bottom to ensure that the bitmap
2025     // iteration that will follow this will not do anything.
2026     // (this is not a condition that holds when we set the region up,
2027     // as the region is not supposed to be empty in the first place)
2028     _finger = bottom;
2029   } else if (limit >= _region_limit) {
2030     assert(limit >= _finger, "peace of mind");
2031   } else {
2032     assert(limit < _region_limit, "only way to get here");
2033     // This can happen under some pretty unusual circumstances.  An
2034     // evacuation pause empties the region underneath our feet (NTAMS
2035     // at bottom). We then do some allocation in the region (NTAMS
2036     // stays at bottom), followed by the region being used as a GC
2037     // alloc region (NTAMS will move to top() and the objects
2038     // originally below it will be grayed). All objects now marked in
2039     // the region are explicitly grayed, if below the global finger,
2040     // and we do not need in fact to scan anything else. So, we simply
2041     // set _finger to be limit to ensure that the bitmap iteration
2042     // doesn't do anything.
2043     _finger = limit;
2044   }
2045 
2046   _region_limit = limit;
2047 }
2048 
2049 void G1CMTask::giveup_current_region() {
2050   assert(_curr_region != NULL, "invariant");
2051   clear_region_fields();
2052 }
2053 
2054 void G1CMTask::clear_region_fields() {
2055   // Values for these three fields that indicate that we're not
2056   // holding on to a region.
2057   _curr_region   = NULL;
2058   _finger        = NULL;
2059   _region_limit  = NULL;
2060 }
2061 
2062 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2063   if (cm_oop_closure == NULL) {
2064     assert(_cm_oop_closure != NULL, "invariant");
2065   } else {
2066     assert(_cm_oop_closure == NULL, "invariant");
2067   }
2068   _cm_oop_closure = cm_oop_closure;
2069 }
2070 
2071 void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) {
2072   guarantee(next_mark_bitmap != NULL, "invariant");
2073   _next_mark_bitmap              = next_mark_bitmap;
2074   clear_region_fields();
2075 
2076   _calls                         = 0;
2077   _elapsed_time_ms               = 0.0;
2078   _termination_time_ms           = 0.0;
2079   _termination_start_time_ms     = 0.0;
2080 
2081   _mark_stats_cache.reset();
2082 }
2083 
2084 bool G1CMTask::should_exit_termination() {
2085   regular_clock_call();
2086   // This is called when we are in the termination protocol. We should
2087   // quit if, for some reason, this task wants to abort or the global
2088   // stack is not empty (this means that we can get work from it).
2089   return !_cm->mark_stack_empty() || has_aborted();
2090 }
2091 
2092 void G1CMTask::reached_limit() {
2093   assert(_words_scanned >= _words_scanned_limit ||
2094          _refs_reached >= _refs_reached_limit ,
2095          "shouldn't have been called otherwise");
2096   regular_clock_call();
2097 }
2098 
2099 void G1CMTask::regular_clock_call() {
2100   if (has_aborted()) {
2101     return;
2102   }
2103 
2104   // First, we need to recalculate the words scanned and refs reached
2105   // limits for the next clock call.
2106   recalculate_limits();
2107 
2108   // During the regular clock call we do the following
2109 
2110   // (1) If an overflow has been flagged, then we abort.
2111   if (_cm->has_overflown()) {
2112     set_has_aborted();
2113     return;
2114   }
2115 
2116   // If we are not concurrent (i.e. we're doing remark) we don't need
2117   // to check anything else. The other steps are only needed during
2118   // the concurrent marking phase.
2119   if (!_cm->concurrent()) {
2120     return;
2121   }
2122 
2123   // (2) If marking has been aborted for Full GC, then we also abort.
2124   if (_cm->has_aborted()) {
2125     set_has_aborted();
2126     return;
2127   }
2128 
2129   double curr_time_ms = os::elapsedVTime() * 1000.0;
2130 
2131   // (4) We check whether we should yield. If we have to, then we abort.
2132   if (SuspendibleThreadSet::should_yield()) {
2133     // We should yield. To do this we abort the task. The caller is
2134     // responsible for yielding.
2135     set_has_aborted();
2136     return;
2137   }
2138 
2139   // (5) We check whether we've reached our time quota. If we have,
2140   // then we abort.
2141   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2142   if (elapsed_time_ms > _time_target_ms) {
2143     set_has_aborted();
2144     _has_timed_out = true;
2145     return;
2146   }
2147 
2148   // (6) Finally, we check whether there are enough completed STAB
2149   // buffers available for processing. If there are, we abort.
2150   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2151   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2152     // we do need to process SATB buffers, we'll abort and restart
2153     // the marking task to do so
2154     set_has_aborted();
2155     return;
2156   }
2157 }
2158 
2159 void G1CMTask::recalculate_limits() {
2160   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2161   _words_scanned_limit      = _real_words_scanned_limit;
2162 
2163   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2164   _refs_reached_limit       = _real_refs_reached_limit;
2165 }
2166 
2167 void G1CMTask::decrease_limits() {
2168   // This is called when we believe that we're going to do an infrequent
2169   // operation which will increase the per byte scanned cost (i.e. move
2170   // entries to/from the global stack). It basically tries to decrease the
2171   // scanning limit so that the clock is called earlier.
2172 
2173   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2174   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2175 }
2176 
2177 void G1CMTask::move_entries_to_global_stack() {
2178   // Local array where we'll store the entries that will be popped
2179   // from the local queue.
2180   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2181 
2182   size_t n = 0;
2183   G1TaskQueueEntry task_entry;
2184   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2185     buffer[n] = task_entry;
2186     ++n;
2187   }
2188   if (n < G1CMMarkStack::EntriesPerChunk) {
2189     buffer[n] = G1TaskQueueEntry();
2190   }
2191 
2192   if (n > 0) {
2193     if (!_cm->mark_stack_push(buffer)) {
2194       set_has_aborted();
2195     }
2196   }
2197 
2198   // This operation was quite expensive, so decrease the limits.
2199   decrease_limits();
2200 }
2201 
2202 bool G1CMTask::get_entries_from_global_stack() {
2203   // Local array where we'll store the entries that will be popped
2204   // from the global stack.
2205   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2206 
2207   if (!_cm->mark_stack_pop(buffer)) {
2208     return false;
2209   }
2210 
2211   // We did actually pop at least one entry.
2212   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2213     G1TaskQueueEntry task_entry = buffer[i];
2214     if (task_entry.is_null()) {
2215       break;
2216     }
2217     assert(task_entry.is_array_slice() || oopDesc::is_oop(task_entry.obj()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2218     bool success = _task_queue->push(task_entry);
2219     // We only call this when the local queue is empty or under a
2220     // given target limit. So, we do not expect this push to fail.
2221     assert(success, "invariant");
2222   }
2223 
2224   // This operation was quite expensive, so decrease the limits
2225   decrease_limits();
2226   return true;
2227 }
2228 
2229 void G1CMTask::drain_local_queue(bool partially) {
2230   if (has_aborted()) {
2231     return;
2232   }
2233 
2234   // Decide what the target size is, depending whether we're going to
2235   // drain it partially (so that other tasks can steal if they run out
2236   // of things to do) or totally (at the very end).
2237   size_t target_size;
2238   if (partially) {
2239     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2240   } else {
2241     target_size = 0;
2242   }
2243 
2244   if (_task_queue->size() > target_size) {
2245     G1TaskQueueEntry entry;
2246     bool ret = _task_queue->pop_local(entry);
2247     while (ret) {
2248       scan_task_entry(entry);
2249       if (_task_queue->size() <= target_size || has_aborted()) {
2250         ret = false;
2251       } else {
2252         ret = _task_queue->pop_local(entry);
2253       }
2254     }
2255   }
2256 }
2257 
2258 void G1CMTask::drain_global_stack(bool partially) {
2259   if (has_aborted()) {
2260     return;
2261   }
2262 
2263   // We have a policy to drain the local queue before we attempt to
2264   // drain the global stack.
2265   assert(partially || _task_queue->size() == 0, "invariant");
2266 
2267   // Decide what the target size is, depending whether we're going to
2268   // drain it partially (so that other tasks can steal if they run out
2269   // of things to do) or totally (at the very end).
2270   // Notice that when draining the global mark stack partially, due to the racyness
2271   // of the mark stack size update we might in fact drop below the target. But,
2272   // this is not a problem.
2273   // In case of total draining, we simply process until the global mark stack is
2274   // totally empty, disregarding the size counter.
2275   if (partially) {
2276     size_t const target_size = _cm->partial_mark_stack_size_target();
2277     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2278       if (get_entries_from_global_stack()) {
2279         drain_local_queue(partially);
2280       }
2281     }
2282   } else {
2283     while (!has_aborted() && get_entries_from_global_stack()) {
2284       drain_local_queue(partially);
2285     }
2286   }
2287 }
2288 
2289 // SATB Queue has several assumptions on whether to call the par or
2290 // non-par versions of the methods. this is why some of the code is
2291 // replicated. We should really get rid of the single-threaded version
2292 // of the code to simplify things.
2293 void G1CMTask::drain_satb_buffers() {
2294   if (has_aborted()) {
2295     return;
2296   }
2297 
2298   // We set this so that the regular clock knows that we're in the
2299   // middle of draining buffers and doesn't set the abort flag when it
2300   // notices that SATB buffers are available for draining. It'd be
2301   // very counter productive if it did that. :-)
2302   _draining_satb_buffers = true;
2303 
2304   G1CMSATBBufferClosure satb_cl(this, _g1h);
2305   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2306 
2307   // This keeps claiming and applying the closure to completed buffers
2308   // until we run out of buffers or we need to abort.
2309   while (!has_aborted() &&
2310          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2311     regular_clock_call();
2312   }
2313 
2314   _draining_satb_buffers = false;
2315 
2316   assert(has_aborted() ||
2317          _cm->concurrent() ||
2318          satb_mq_set.completed_buffers_num() == 0, "invariant");
2319 
2320   // again, this was a potentially expensive operation, decrease the
2321   // limits to get the regular clock call early
2322   decrease_limits();
2323 }
2324 
2325 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2326   _mark_stats_cache.reset(region_idx);
2327 }
2328 
2329 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2330   return _mark_stats_cache.evict_all();
2331 }
2332 
2333 void G1CMTask::print_stats() {
2334   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2335   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2336                        _elapsed_time_ms, _termination_time_ms);
2337   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2338                        _step_times_ms.num(),
2339                        _step_times_ms.avg(),
2340                        _step_times_ms.sd(),
2341                        _step_times_ms.maximum(),
2342                        _step_times_ms.sum());
2343   size_t const hits = _mark_stats_cache.hits();
2344   size_t const misses = _mark_stats_cache.misses();
2345   log_debug(gc, stats)("  Mark Stats Cache: hits " SIZE_FORMAT " misses " SIZE_FORMAT " ratio %.3f",
2346                        hits, misses, percent_of(hits, hits + misses));
2347 }
2348 
2349 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2350   return _task_queues->steal(worker_id, hash_seed, task_entry);
2351 }
2352 
2353 /*****************************************************************************
2354 
2355     The do_marking_step(time_target_ms, ...) method is the building
2356     block of the parallel marking framework. It can be called in parallel
2357     with other invocations of do_marking_step() on different tasks
2358     (but only one per task, obviously) and concurrently with the
2359     mutator threads, or during remark, hence it eliminates the need
2360     for two versions of the code. When called during remark, it will
2361     pick up from where the task left off during the concurrent marking
2362     phase. Interestingly, tasks are also claimable during evacuation
2363     pauses too, since do_marking_step() ensures that it aborts before
2364     it needs to yield.
2365 
2366     The data structures that it uses to do marking work are the
2367     following:
2368 
2369       (1) Marking Bitmap. If there are gray objects that appear only
2370       on the bitmap (this happens either when dealing with an overflow
2371       or when the initial marking phase has simply marked the roots
2372       and didn't push them on the stack), then tasks claim heap
2373       regions whose bitmap they then scan to find gray objects. A
2374       global finger indicates where the end of the last claimed region
2375       is. A local finger indicates how far into the region a task has
2376       scanned. The two fingers are used to determine how to gray an
2377       object (i.e. whether simply marking it is OK, as it will be
2378       visited by a task in the future, or whether it needs to be also
2379       pushed on a stack).
2380 
2381       (2) Local Queue. The local queue of the task which is accessed
2382       reasonably efficiently by the task. Other tasks can steal from
2383       it when they run out of work. Throughout the marking phase, a
2384       task attempts to keep its local queue short but not totally
2385       empty, so that entries are available for stealing by other
2386       tasks. Only when there is no more work, a task will totally
2387       drain its local queue.
2388 
2389       (3) Global Mark Stack. This handles local queue overflow. During
2390       marking only sets of entries are moved between it and the local
2391       queues, as access to it requires a mutex and more fine-grain
2392       interaction with it which might cause contention. If it
2393       overflows, then the marking phase should restart and iterate
2394       over the bitmap to identify gray objects. Throughout the marking
2395       phase, tasks attempt to keep the global mark stack at a small
2396       length but not totally empty, so that entries are available for
2397       popping by other tasks. Only when there is no more work, tasks
2398       will totally drain the global mark stack.
2399 
2400       (4) SATB Buffer Queue. This is where completed SATB buffers are
2401       made available. Buffers are regularly removed from this queue
2402       and scanned for roots, so that the queue doesn't get too
2403       long. During remark, all completed buffers are processed, as
2404       well as the filled in parts of any uncompleted buffers.
2405 
2406     The do_marking_step() method tries to abort when the time target
2407     has been reached. There are a few other cases when the
2408     do_marking_step() method also aborts:
2409 
2410       (1) When the marking phase has been aborted (after a Full GC).
2411 
2412       (2) When a global overflow (on the global stack) has been
2413       triggered. Before the task aborts, it will actually sync up with
2414       the other tasks to ensure that all the marking data structures
2415       (local queues, stacks, fingers etc.)  are re-initialized so that
2416       when do_marking_step() completes, the marking phase can
2417       immediately restart.
2418 
2419       (3) When enough completed SATB buffers are available. The
2420       do_marking_step() method only tries to drain SATB buffers right
2421       at the beginning. So, if enough buffers are available, the
2422       marking step aborts and the SATB buffers are processed at
2423       the beginning of the next invocation.
2424 
2425       (4) To yield. when we have to yield then we abort and yield
2426       right at the end of do_marking_step(). This saves us from a lot
2427       of hassle as, by yielding we might allow a Full GC. If this
2428       happens then objects will be compacted underneath our feet, the
2429       heap might shrink, etc. We save checking for this by just
2430       aborting and doing the yield right at the end.
2431 
2432     From the above it follows that the do_marking_step() method should
2433     be called in a loop (or, otherwise, regularly) until it completes.
2434 
2435     If a marking step completes without its has_aborted() flag being
2436     true, it means it has completed the current marking phase (and
2437     also all other marking tasks have done so and have all synced up).
2438 
2439     A method called regular_clock_call() is invoked "regularly" (in
2440     sub ms intervals) throughout marking. It is this clock method that
2441     checks all the abort conditions which were mentioned above and
2442     decides when the task should abort. A work-based scheme is used to
2443     trigger this clock method: when the number of object words the
2444     marking phase has scanned or the number of references the marking
2445     phase has visited reach a given limit. Additional invocations to
2446     the method clock have been planted in a few other strategic places
2447     too. The initial reason for the clock method was to avoid calling
2448     vtime too regularly, as it is quite expensive. So, once it was in
2449     place, it was natural to piggy-back all the other conditions on it
2450     too and not constantly check them throughout the code.
2451 
2452     If do_termination is true then do_marking_step will enter its
2453     termination protocol.
2454 
2455     The value of is_serial must be true when do_marking_step is being
2456     called serially (i.e. by the VMThread) and do_marking_step should
2457     skip any synchronization in the termination and overflow code.
2458     Examples include the serial remark code and the serial reference
2459     processing closures.
2460 
2461     The value of is_serial must be false when do_marking_step is
2462     being called by any of the worker threads in a work gang.
2463     Examples include the concurrent marking code (CMMarkingTask),
2464     the MT remark code, and the MT reference processing closures.
2465 
2466  *****************************************************************************/
2467 
2468 void G1CMTask::do_marking_step(double time_target_ms,
2469                                bool do_termination,
2470                                bool is_serial) {
2471   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2472 
2473   _start_time_ms = os::elapsedVTime() * 1000.0;
2474 
2475   // If do_stealing is true then do_marking_step will attempt to
2476   // steal work from the other G1CMTasks. It only makes sense to
2477   // enable stealing when the termination protocol is enabled
2478   // and do_marking_step() is not being called serially.
2479   bool do_stealing = do_termination && !is_serial;
2480 
2481   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2482   _time_target_ms = time_target_ms - diff_prediction_ms;
2483 
2484   // set up the variables that are used in the work-based scheme to
2485   // call the regular clock method
2486   _words_scanned = 0;
2487   _refs_reached  = 0;
2488   recalculate_limits();
2489 
2490   // clear all flags
2491   clear_has_aborted();
2492   _has_timed_out = false;
2493   _draining_satb_buffers = false;
2494 
2495   ++_calls;
2496 
2497   // Set up the bitmap and oop closures. Anything that uses them is
2498   // eventually called from this method, so it is OK to allocate these
2499   // statically.
2500   G1CMBitMapClosure bitmap_closure(this, _cm);
2501   G1CMOopClosure cm_oop_closure(_g1h, this);
2502   set_cm_oop_closure(&cm_oop_closure);
2503 
2504   if (_cm->has_overflown()) {
2505     // This can happen if the mark stack overflows during a GC pause
2506     // and this task, after a yield point, restarts. We have to abort
2507     // as we need to get into the overflow protocol which happens
2508     // right at the end of this task.
2509     set_has_aborted();
2510   }
2511 
2512   // First drain any available SATB buffers. After this, we will not
2513   // look at SATB buffers before the next invocation of this method.
2514   // If enough completed SATB buffers are queued up, the regular clock
2515   // will abort this task so that it restarts.
2516   drain_satb_buffers();
2517   // ...then partially drain the local queue and the global stack
2518   drain_local_queue(true);
2519   drain_global_stack(true);
2520 
2521   do {
2522     if (!has_aborted() && _curr_region != NULL) {
2523       // This means that we're already holding on to a region.
2524       assert(_finger != NULL, "if region is not NULL, then the finger "
2525              "should not be NULL either");
2526 
2527       // We might have restarted this task after an evacuation pause
2528       // which might have evacuated the region we're holding on to
2529       // underneath our feet. Let's read its limit again to make sure
2530       // that we do not iterate over a region of the heap that
2531       // contains garbage (update_region_limit() will also move
2532       // _finger to the start of the region if it is found empty).
2533       update_region_limit();
2534       // We will start from _finger not from the start of the region,
2535       // as we might be restarting this task after aborting half-way
2536       // through scanning this region. In this case, _finger points to
2537       // the address where we last found a marked object. If this is a
2538       // fresh region, _finger points to start().
2539       MemRegion mr = MemRegion(_finger, _region_limit);
2540 
2541       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2542              "humongous regions should go around loop once only");
2543 
2544       // Some special cases:
2545       // If the memory region is empty, we can just give up the region.
2546       // If the current region is humongous then we only need to check
2547       // the bitmap for the bit associated with the start of the object,
2548       // scan the object if it's live, and give up the region.
2549       // Otherwise, let's iterate over the bitmap of the part of the region
2550       // that is left.
2551       // If the iteration is successful, give up the region.
2552       if (mr.is_empty()) {
2553         giveup_current_region();
2554         regular_clock_call();
2555       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2556         if (_next_mark_bitmap->is_marked(mr.start())) {
2557           // The object is marked - apply the closure
2558           bitmap_closure.do_addr(mr.start());
2559         }
2560         // Even if this task aborted while scanning the humongous object
2561         // we can (and should) give up the current region.
2562         giveup_current_region();
2563         regular_clock_call();
2564       } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) {
2565         giveup_current_region();
2566         regular_clock_call();
2567       } else {
2568         assert(has_aborted(), "currently the only way to do so");
2569         // The only way to abort the bitmap iteration is to return
2570         // false from the do_bit() method. However, inside the
2571         // do_bit() method we move the _finger to point to the
2572         // object currently being looked at. So, if we bail out, we
2573         // have definitely set _finger to something non-null.
2574         assert(_finger != NULL, "invariant");
2575 
2576         // Region iteration was actually aborted. So now _finger
2577         // points to the address of the object we last scanned. If we
2578         // leave it there, when we restart this task, we will rescan
2579         // the object. It is easy to avoid this. We move the finger by
2580         // enough to point to the next possible object header.
2581         assert(_finger < _region_limit, "invariant");
2582         HeapWord* const new_finger = _finger + ((oop)_finger)->size();
2583         // Check if bitmap iteration was aborted while scanning the last object
2584         if (new_finger >= _region_limit) {
2585           giveup_current_region();
2586         } else {
2587           move_finger_to(new_finger);
2588         }
2589       }
2590     }
2591     // At this point we have either completed iterating over the
2592     // region we were holding on to, or we have aborted.
2593 
2594     // We then partially drain the local queue and the global stack.
2595     // (Do we really need this?)
2596     drain_local_queue(true);
2597     drain_global_stack(true);
2598 
2599     // Read the note on the claim_region() method on why it might
2600     // return NULL with potentially more regions available for
2601     // claiming and why we have to check out_of_regions() to determine
2602     // whether we're done or not.
2603     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2604       // We are going to try to claim a new region. We should have
2605       // given up on the previous one.
2606       // Separated the asserts so that we know which one fires.
2607       assert(_curr_region  == NULL, "invariant");
2608       assert(_finger       == NULL, "invariant");
2609       assert(_region_limit == NULL, "invariant");
2610       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2611       if (claimed_region != NULL) {
2612         // Yes, we managed to claim one
2613         setup_for_region(claimed_region);
2614         assert(_curr_region == claimed_region, "invariant");
2615       }
2616       // It is important to call the regular clock here. It might take
2617       // a while to claim a region if, for example, we hit a large
2618       // block of empty regions. So we need to call the regular clock
2619       // method once round the loop to make sure it's called
2620       // frequently enough.
2621       regular_clock_call();
2622     }
2623 
2624     if (!has_aborted() && _curr_region == NULL) {
2625       assert(_cm->out_of_regions(),
2626              "at this point we should be out of regions");
2627     }
2628   } while ( _curr_region != NULL && !has_aborted());
2629 
2630   if (!has_aborted()) {
2631     // We cannot check whether the global stack is empty, since other
2632     // tasks might be pushing objects to it concurrently.
2633     assert(_cm->out_of_regions(),
2634            "at this point we should be out of regions");
2635     // Try to reduce the number of available SATB buffers so that
2636     // remark has less work to do.
2637     drain_satb_buffers();
2638   }
2639 
2640   // Since we've done everything else, we can now totally drain the
2641   // local queue and global stack.
2642   drain_local_queue(false);
2643   drain_global_stack(false);
2644 
2645   // Attempt at work stealing from other task's queues.
2646   if (do_stealing && !has_aborted()) {
2647     // We have not aborted. This means that we have finished all that
2648     // we could. Let's try to do some stealing...
2649 
2650     // We cannot check whether the global stack is empty, since other
2651     // tasks might be pushing objects to it concurrently.
2652     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2653            "only way to reach here");
2654     while (!has_aborted()) {
2655       G1TaskQueueEntry entry;
2656       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2657         scan_task_entry(entry);
2658 
2659         // And since we're towards the end, let's totally drain the
2660         // local queue and global stack.
2661         drain_local_queue(false);
2662         drain_global_stack(false);
2663       } else {
2664         break;
2665       }
2666     }
2667   }
2668 
2669   // We still haven't aborted. Now, let's try to get into the
2670   // termination protocol.
2671   if (do_termination && !has_aborted()) {
2672     // We cannot check whether the global stack is empty, since other
2673     // tasks might be concurrently pushing objects on it.
2674     // Separated the asserts so that we know which one fires.
2675     assert(_cm->out_of_regions(), "only way to reach here");
2676     assert(_task_queue->size() == 0, "only way to reach here");
2677     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2678 
2679     // The G1CMTask class also extends the TerminatorTerminator class,
2680     // hence its should_exit_termination() method will also decide
2681     // whether to exit the termination protocol or not.
2682     bool finished = (is_serial ||
2683                      _cm->terminator()->offer_termination(this));
2684     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2685     _termination_time_ms +=
2686       termination_end_time_ms - _termination_start_time_ms;
2687 
2688     if (finished) {
2689       // We're all done.
2690 
2691       // We can now guarantee that the global stack is empty, since
2692       // all other tasks have finished. We separated the guarantees so
2693       // that, if a condition is false, we can immediately find out
2694       // which one.
2695       guarantee(_cm->out_of_regions(), "only way to reach here");
2696       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2697       guarantee(_task_queue->size() == 0, "only way to reach here");
2698       guarantee(!_cm->has_overflown(), "only way to reach here");
2699     } else {
2700       // Apparently there's more work to do. Let's abort this task. It
2701       // will restart it and we can hopefully find more things to do.
2702       set_has_aborted();
2703     }
2704   }
2705 
2706   // Mainly for debugging purposes to make sure that a pointer to the
2707   // closure which was statically allocated in this frame doesn't
2708   // escape it by accident.
2709   set_cm_oop_closure(NULL);
2710   double end_time_ms = os::elapsedVTime() * 1000.0;
2711   double elapsed_time_ms = end_time_ms - _start_time_ms;
2712   // Update the step history.
2713   _step_times_ms.add(elapsed_time_ms);
2714 
2715   if (has_aborted()) {
2716     // The task was aborted for some reason.
2717     if (_has_timed_out) {
2718       double diff_ms = elapsed_time_ms - _time_target_ms;
2719       // Keep statistics of how well we did with respect to hitting
2720       // our target only if we actually timed out (if we aborted for
2721       // other reasons, then the results might get skewed).
2722       _marking_step_diffs_ms.add(diff_ms);
2723     }
2724 
2725     if (_cm->has_overflown()) {
2726       // This is the interesting one. We aborted because a global
2727       // overflow was raised. This means we have to restart the
2728       // marking phase and start iterating over regions. However, in
2729       // order to do this we have to make sure that all tasks stop
2730       // what they are doing and re-initialize in a safe manner. We
2731       // will achieve this with the use of two barrier sync points.
2732 
2733       if (!is_serial) {
2734         // We only need to enter the sync barrier if being called
2735         // from a parallel context
2736         _cm->enter_first_sync_barrier(_worker_id);
2737 
2738         // When we exit this sync barrier we know that all tasks have
2739         // stopped doing marking work. So, it's now safe to
2740         // re-initialize our data structures.
2741       }
2742 
2743       clear_region_fields();
2744       flush_mark_stats_cache();
2745 
2746       if (!is_serial) {
2747         // If we're executing the concurrent phase of marking, reset the marking
2748         // state; otherwise the marking state is reset after reference processing,
2749         // during the remark pause.
2750         // If we reset here as a result of an overflow during the remark we will
2751         // see assertion failures from any subsequent set_concurrency_and_phase()
2752         // calls.
2753         if (_cm->concurrent() && _worker_id == 0) {
2754           // Worker 0 is responsible for clearing the global data structures because
2755           // of an overflow. During STW we should not clear the overflow flag (in
2756           // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2757           // method to abort the pause and restart concurrent marking.
2758           _cm->reset_marking_for_restart();
2759 
2760           log_info(gc, marking)("Concurrent Mark reset for overflow");
2761         }
2762 
2763         // ...and enter the second barrier.
2764         _cm->enter_second_sync_barrier(_worker_id);
2765       }
2766       // At this point, if we're during the concurrent phase of
2767       // marking, everything has been re-initialized and we're
2768       // ready to restart.
2769     }
2770   }
2771 }
2772 
2773 G1CMTask::G1CMTask(uint worker_id,
2774                    G1ConcurrentMark* cm,
2775                    G1CMTaskQueue* task_queue,
2776                    G1RegionMarkStats* mark_stats,
2777                    uint max_regions) :
2778   _objArray_processor(this),
2779   _worker_id(worker_id),
2780   _g1h(G1CollectedHeap::heap()),
2781   _cm(cm),
2782   _next_mark_bitmap(NULL),
2783   _task_queue(task_queue),
2784   _mark_stats_cache(mark_stats, max_regions, RegionMarkStatsCacheSize),
2785   _calls(0),
2786   _time_target_ms(0.0),
2787   _start_time_ms(0.0),
2788   _cm_oop_closure(NULL),
2789   _curr_region(NULL),
2790   _finger(NULL),
2791   _region_limit(NULL),
2792   _words_scanned(0),
2793   _words_scanned_limit(0),
2794   _real_words_scanned_limit(0),
2795   _refs_reached(0),
2796   _refs_reached_limit(0),
2797   _real_refs_reached_limit(0),
2798   _hash_seed(17),
2799   _has_aborted(false),
2800   _has_timed_out(false),
2801   _draining_satb_buffers(false),
2802   _step_times_ms(),
2803   _elapsed_time_ms(0.0),
2804   _termination_time_ms(0.0),
2805   _termination_start_time_ms(0.0),
2806   _marking_step_diffs_ms()
2807 {
2808   guarantee(task_queue != NULL, "invariant");
2809 
2810   _marking_step_diffs_ms.add(0.5);
2811 }
2812 
2813 // These are formatting macros that are used below to ensure
2814 // consistent formatting. The *_H_* versions are used to format the
2815 // header for a particular value and they should be kept consistent
2816 // with the corresponding macro. Also note that most of the macros add
2817 // the necessary white space (as a prefix) which makes them a bit
2818 // easier to compose.
2819 
2820 // All the output lines are prefixed with this string to be able to
2821 // identify them easily in a large log file.
2822 #define G1PPRL_LINE_PREFIX            "###"
2823 
2824 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2825 #ifdef _LP64
2826 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2827 #else // _LP64
2828 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2829 #endif // _LP64
2830 
2831 // For per-region info
2832 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2833 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2834 #define G1PPRL_STATE_FORMAT           "   %-5s"
2835 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2836 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
2837 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2838 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
2839 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
2840 
2841 // For summary info
2842 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2843 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
2844 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2845 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2846 
2847 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2848   _total_used_bytes(0), _total_capacity_bytes(0),
2849   _total_prev_live_bytes(0), _total_next_live_bytes(0),
2850   _total_remset_bytes(0), _total_strong_code_roots_bytes(0)
2851 {
2852   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2853   MemRegion g1_reserved = g1h->g1_reserved();
2854   double now = os::elapsedTime();
2855 
2856   // Print the header of the output.
2857   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2858   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2859                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2860                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2861                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
2862                           HeapRegion::GrainBytes);
2863   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2864   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2865                           G1PPRL_TYPE_H_FORMAT
2866                           G1PPRL_ADDR_BASE_H_FORMAT
2867                           G1PPRL_BYTE_H_FORMAT
2868                           G1PPRL_BYTE_H_FORMAT
2869                           G1PPRL_BYTE_H_FORMAT
2870                           G1PPRL_DOUBLE_H_FORMAT
2871                           G1PPRL_BYTE_H_FORMAT
2872                           G1PPRL_STATE_H_FORMAT
2873                           G1PPRL_BYTE_H_FORMAT,
2874                           "type", "address-range",
2875                           "used", "prev-live", "next-live", "gc-eff",
2876                           "remset", "state", "code-roots");
2877   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2878                           G1PPRL_TYPE_H_FORMAT
2879                           G1PPRL_ADDR_BASE_H_FORMAT
2880                           G1PPRL_BYTE_H_FORMAT
2881                           G1PPRL_BYTE_H_FORMAT
2882                           G1PPRL_BYTE_H_FORMAT
2883                           G1PPRL_DOUBLE_H_FORMAT
2884                           G1PPRL_BYTE_H_FORMAT
2885                           G1PPRL_STATE_H_FORMAT
2886                           G1PPRL_BYTE_H_FORMAT,
2887                           "", "",
2888                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
2889                           "(bytes)", "", "(bytes)");
2890 }
2891 
2892 bool G1PrintRegionLivenessInfoClosure::do_heap_region(HeapRegion* r) {
2893   const char* type       = r->get_type_str();
2894   HeapWord* bottom       = r->bottom();
2895   HeapWord* end          = r->end();
2896   size_t capacity_bytes  = r->capacity();
2897   size_t used_bytes      = r->used();
2898   size_t prev_live_bytes = r->live_bytes();
2899   size_t next_live_bytes = r->next_live_bytes();
2900   double gc_eff          = r->gc_efficiency();
2901   size_t remset_bytes    = r->rem_set()->mem_size();
2902   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
2903   const char* remset_type = r->rem_set()->get_short_state_str();
2904 
2905   _total_used_bytes      += used_bytes;
2906   _total_capacity_bytes  += capacity_bytes;
2907   _total_prev_live_bytes += prev_live_bytes;
2908   _total_next_live_bytes += next_live_bytes;
2909   _total_remset_bytes    += remset_bytes;
2910   _total_strong_code_roots_bytes += strong_code_roots_bytes;
2911 
2912   // Print a line for this particular region.
2913   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2914                           G1PPRL_TYPE_FORMAT
2915                           G1PPRL_ADDR_BASE_FORMAT
2916                           G1PPRL_BYTE_FORMAT
2917                           G1PPRL_BYTE_FORMAT
2918                           G1PPRL_BYTE_FORMAT
2919                           G1PPRL_DOUBLE_FORMAT
2920                           G1PPRL_BYTE_FORMAT
2921                           G1PPRL_STATE_FORMAT
2922                           G1PPRL_BYTE_FORMAT,
2923                           type, p2i(bottom), p2i(end),
2924                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
2925                           remset_bytes, remset_type, strong_code_roots_bytes);
2926 
2927   return false;
2928 }
2929 
2930 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
2931   // add static memory usages to remembered set sizes
2932   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
2933   // Print the footer of the output.
2934   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2935   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2936                          " SUMMARY"
2937                          G1PPRL_SUM_MB_FORMAT("capacity")
2938                          G1PPRL_SUM_MB_PERC_FORMAT("used")
2939                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
2940                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
2941                          G1PPRL_SUM_MB_FORMAT("remset")
2942                          G1PPRL_SUM_MB_FORMAT("code-roots"),
2943                          bytes_to_mb(_total_capacity_bytes),
2944                          bytes_to_mb(_total_used_bytes),
2945                          percent_of(_total_used_bytes, _total_capacity_bytes),
2946                          bytes_to_mb(_total_prev_live_bytes),
2947                          percent_of(_total_prev_live_bytes, _total_capacity_bytes),
2948                          bytes_to_mb(_total_next_live_bytes),
2949                          percent_of(_total_next_live_bytes, _total_capacity_bytes),
2950                          bytes_to_mb(_total_remset_bytes),
2951                          bytes_to_mb(_total_strong_code_roots_bytes));
2952 }