src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp

Print this page
rev 2896 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by: brutisso

@@ -4052,11 +4052,11 @@
       assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
       _cm->markPrev(obj);
       assert(_cm->isPrevMarked(obj), "Should be marked!");
       _prev_marked_bytes += (obj_size * HeapWordSize);
       if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
-        _cm->markAndGrayObjectIfNecessary(obj);
+        _cm->markAndGrayObjectIfNecessary(obj, 0 /* worker_i */);
       }
       obj->set_mark(markOopDesc::prototype());
       // While we were processing RSet buffers during the
       // collection, we actually didn't scan any cards on the
       // collection set, since we didn't want to update remebered

@@ -4171,11 +4171,12 @@
 }
 
 oop
 G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
                                                oop old,
-                                               bool should_mark_root) {
+                                               bool should_mark_root,
+                                               int worker_i) {
   assert(obj_in_cs(old),
          err_msg("obj: "PTR_FORMAT" should still be in the CSet",
                  (HeapWord*) old));
   markOop m = old->mark();
   oop forward_ptr = old->forward_to_atomic(old);

@@ -4186,11 +4187,11 @@
     // from a root scanning closure during an initial mark pause.
     // In this case the thread that succeeds in self-forwarding the
     // object is also responsible for marking the object.
     if (should_mark_root) {
       assert(!oopDesc::is_null(old), "shouldn't be");
-      _cm->grayRoot(old);
+      _cm->grayRoot(old, worker_i);
     }
 
     if (_evac_failure_closure != cl) {
       MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
       assert(!_drain_in_progress,

@@ -4292,14 +4293,49 @@
   guarantee(_cm->isMarked(oop(addr)), "it should be!");
   return true;
 }
 #endif // PRODUCT
 
-G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
+void GCLabBitMap::retire(int worker_i) {
+  guarantee(use_local_bitmaps, "invariant");
+  assert(fields_well_formed(), "invariant");
+
+  if (_start_word != NULL) {
+    CMBitMap* mark_bitmap = _cm->nextMarkBitMap();
+
+    // this means that the bitmap was set up for the GCLab
+    assert(_real_start_word != NULL && _real_end_word != NULL, "invariant");
+
+    mark_bitmap->mostly_disjoint_range_union(this,
+                              0, // always start from the start of the bitmap
+                              _start_word,
+                              gclab_real_word_size());
+
+    // Note: Even though that not all objects copied into the LAB will
+    // have their bit set in the LAB bitmap (the LAB bitmap is used to
+    // propagate marks), we can just add the entire lab and its bitmap
+    // to the count of the marked data. It's OK (but inaccurate) to
+    // count a dead object but we can't miss counting a live object.
+    MemRegion lab_region(_real_start_word, _real_end_word);
+    _cm->count_region(lab_region, worker_i);
+    _cm->grayRegionIfNecessary(lab_region);
+
+#ifndef PRODUCT
+    if (use_local_bitmaps && verify_local_bitmaps) {
+      verify();
+    }
+#endif // PRODUCT
+  } else {
+    assert(_real_start_word == NULL && _real_end_word == NULL, "invariant");
+  }
+}
+
+G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size, int worker_i) :
   ParGCAllocBuffer(gclab_word_size),
   _should_mark_objects(false),
   _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size),
+  _worker_i(worker_i),
   _retired(false)
 {
   //_should_mark_objects is set to true when G1ParCopyHelper needs to
   // mark the forwarded location of an evacuated object.
   // We set _should_mark_objects to true if marking is active, i.e. when we

@@ -4317,12 +4353,12 @@
     _dcq(&g1h->dirty_card_queue_set()),
     _ct_bs((CardTableModRefBS*)_g1h->barrier_set()),
     _g1_rem(g1h->g1_rem_set()),
     _hash_seed(17), _queue_num(queue_num),
     _term_attempts(0),
-    _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
-    _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
+    _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived), queue_num),
+    _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured), queue_num),
     _age_table(false),
     _strong_roots_time(0), _term_time(0),
     _alloc_buffer_waste(0), _undo_waste(0)
 {
   // we allocate G1YoungSurvRateNumRegions plus one entries, since

@@ -4430,10 +4466,11 @@
 }
 
 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state),
+  _worker_i(par_scan_state->queue_num()),
   _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
   _mark_in_progress(_g1->mark_in_progress()) { }
 
 template <class T> void G1ParCopyHelper::mark_object(T* p) {
   // This is called from do_oop_work for objects that are not

@@ -4443,11 +4480,11 @@
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop(heap_oop);
     HeapWord* addr = (HeapWord*)obj;
     if (_g1->is_in_g1_reserved(addr)) {
-      _cm->grayRoot(oop(addr));
+      _cm->grayRoot(oop(addr), _worker_i);
     }
   }
 }
 
 oop G1ParCopyHelper::copy_to_survivor_space(oop old, bool should_mark_root,

@@ -4469,11 +4506,11 @@
 
   if (obj_ptr == NULL) {
     // This will either forward-to-self, or detect that someone else has
     // installed a forwarding pointer.
     OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
-    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root);
+    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root, _worker_i);
   }
 
   // We're going to allocate linearly, so might as well prefetch ahead.
   Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
 

@@ -4512,17 +4549,33 @@
       if (!use_local_bitmaps ||
           !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
         // if we couldn't mark it on the local bitmap (this happens when
         // the object was not allocated in the GCLab), we have to bite
         // the bullet and do the standard parallel mark
-        _cm->markAndGrayObjectIfNecessary(obj);
+        _cm->markAndGrayObjectIfNecessary(obj, _worker_i);
       }
 
       if (_g1->isMarkedNext(old)) {
         // Unmark the object's old location so that marking
         // doesn't think the old object is alive.
         _cm->nextMarkBitMap()->parClear((HeapWord*)old);
+
+        // We could clear the count data for the old object here but
+        // currently we do not. Why don't we do this? The thread/task
+        // that marks a newly copied object is likely _not_ the thread/task
+        // that originally marked the old object. So, to clear the count
+        // data for the old object, we would have to scan the count
+        // data for all of the tasks (and clear the data for the old object
+        // in parallel with other threads adding to the count data). Even
+        // then we could clear a bit incorrectly (e.g. if the old object
+        // does not start or end on a card boundary). It's more important
+        // that we don't have missed bits that should've been set than
+        // having extra bits set.
+        //
+        // As a result the accumulated count data could be a superset
+        // of the data that is/would have been calculated by walking
+        // the marking bitmap.
       }
     }
 
     size_t* surv_young_words = _par_scan_state->surviving_young_words();
     surv_young_words[young_index] += word_sz;

@@ -4605,11 +4658,12 @@
                                                  should_mark_copy);
       oopDesc::encode_store_heap_oop(p, copy_oop);
     }
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
-      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+      assert(_worker_i == _par_scan_state->queue_num(), "sanity");
+      _par_scan_state->update_rs(_from, p, _worker_i);
     }
   } else {
     // The object is not in collection set. If we're a root scanning
     // closure during an initial mark pause (i.e. do_mark_object will
     // be true) then attempt to mark the object.

@@ -4617,11 +4671,12 @@
       mark_object(p);
     }
   }
 
   if (barrier == G1BarrierEvac && obj != NULL) {
-    _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+    assert(_worker_i == _par_scan_state->queue_num(), "sanity");
+    _par_scan_state->update_rs(_from, p, _worker_i);
   }
 
   if (do_gen_barrier && obj != NULL) {
     par_do_barrier(p);
   }

@@ -5841,12 +5896,13 @@
       // even during an initial mark pause, until the set_marking_started()
       // call which takes place later in the pause.
       if (mark_in_progress()) {
         assert(!g1_policy()->during_initial_mark_pause(), "sanity");
         _cm->nextMarkBitMap()->clearRange(used_mr);
+        // Need to remove values from the count info
+         _cm->clear_count_data_for_heap_region(cur);
       }
-
       free_region(cur, &pre_used, &local_free_list, false /* par */);
     } else {
       cur->uninstall_surv_rate_group();
       if (cur->is_young()) {
         cur->set_young_index_in_cset(-1);