Print this page
rev 2896 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by: brutisso

Split Close
Expand all
Collapse all
          --- old/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
          +++ new/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
↓ open down ↓ 4046 lines elided ↑ open up ↑
4047 4047      assert(_hr->is_in(obj_addr), "sanity");
4048 4048      size_t obj_size = obj->size();
4049 4049      _hr->update_bot_for_object(obj_addr, obj_size);
4050 4050      if (obj->is_forwarded() && obj->forwardee() == obj) {
4051 4051        // The object failed to move.
4052 4052        assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
4053 4053        _cm->markPrev(obj);
4054 4054        assert(_cm->isPrevMarked(obj), "Should be marked!");
4055 4055        _prev_marked_bytes += (obj_size * HeapWordSize);
4056 4056        if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
4057      -        _cm->markAndGrayObjectIfNecessary(obj);
     4057 +        _cm->markAndGrayObjectIfNecessary(obj, 0 /* worker_i */);
4058 4058        }
4059 4059        obj->set_mark(markOopDesc::prototype());
4060 4060        // While we were processing RSet buffers during the
4061 4061        // collection, we actually didn't scan any cards on the
4062 4062        // collection set, since we didn't want to update remebered
4063 4063        // sets with entries that point into the collection set, given
4064 4064        // that live objects fromthe collection set are about to move
4065 4065        // and such entries will be stale very soon. This change also
4066 4066        // dealt with a reliability issue which involved scanning a
4067 4067        // card in the collection set and coming across an array that
↓ open down ↓ 98 lines elided ↑ open up ↑
4166 4166    while (_evac_failure_scan_stack->length() > 0) {
4167 4167       oop obj = _evac_failure_scan_stack->pop();
4168 4168       _evac_failure_closure->set_region(heap_region_containing(obj));
4169 4169       obj->oop_iterate_backwards(_evac_failure_closure);
4170 4170    }
4171 4171  }
4172 4172  
4173 4173  oop
4174 4174  G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
4175 4175                                                 oop old,
4176      -                                               bool should_mark_root) {
     4176 +                                               bool should_mark_root,
     4177 +                                               int worker_i) {
4177 4178    assert(obj_in_cs(old),
4178 4179           err_msg("obj: "PTR_FORMAT" should still be in the CSet",
4179 4180                   (HeapWord*) old));
4180 4181    markOop m = old->mark();
4181 4182    oop forward_ptr = old->forward_to_atomic(old);
4182 4183    if (forward_ptr == NULL) {
4183 4184      // Forward-to-self succeeded.
4184 4185  
4185 4186      // should_mark_root will be true when this routine is called
4186 4187      // from a root scanning closure during an initial mark pause.
4187 4188      // In this case the thread that succeeds in self-forwarding the
4188 4189      // object is also responsible for marking the object.
4189 4190      if (should_mark_root) {
4190 4191        assert(!oopDesc::is_null(old), "shouldn't be");
4191      -      _cm->grayRoot(old);
     4192 +      _cm->grayRoot(old, worker_i);
4192 4193      }
4193 4194  
4194 4195      if (_evac_failure_closure != cl) {
4195 4196        MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
4196 4197        assert(!_drain_in_progress,
4197 4198               "Should only be true while someone holds the lock.");
4198 4199        // Set the global evac-failure closure to the current thread's.
4199 4200        assert(_evac_failure_closure == NULL, "Or locking has failed.");
4200 4201        set_evac_failure_closure(cl);
4201 4202        // Now do the common part.
↓ open down ↓ 85 lines elided ↑ open up ↑
4287 4288  }
4288 4289  
4289 4290  #ifndef PRODUCT
4290 4291  bool GCLabBitMapClosure::do_bit(size_t offset) {
4291 4292    HeapWord* addr = _bitmap->offsetToHeapWord(offset);
4292 4293    guarantee(_cm->isMarked(oop(addr)), "it should be!");
4293 4294    return true;
4294 4295  }
4295 4296  #endif // PRODUCT
4296 4297  
4297      -G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
     4298 +void GCLabBitMap::retire(int worker_i) {
     4299 +  guarantee(use_local_bitmaps, "invariant");
     4300 +  assert(fields_well_formed(), "invariant");
     4301 +
     4302 +  if (_start_word != NULL) {
     4303 +    CMBitMap* mark_bitmap = _cm->nextMarkBitMap();
     4304 +
     4305 +    // this means that the bitmap was set up for the GCLab
     4306 +    assert(_real_start_word != NULL && _real_end_word != NULL, "invariant");
     4307 +
     4308 +    mark_bitmap->mostly_disjoint_range_union(this,
     4309 +                              0, // always start from the start of the bitmap
     4310 +                              _start_word,
     4311 +                              gclab_real_word_size());
     4312 +
     4313 +    // Note: Even though that not all objects copied into the LAB will
     4314 +    // have their bit set in the LAB bitmap (the LAB bitmap is used to
     4315 +    // propagate marks), we can just add the entire lab and its bitmap
     4316 +    // to the count of the marked data. It's OK (but inaccurate) to
     4317 +    // count a dead object but we can't miss counting a live object.
     4318 +    MemRegion lab_region(_real_start_word, _real_end_word);
     4319 +    _cm->count_region(lab_region, worker_i);
     4320 +    _cm->grayRegionIfNecessary(lab_region);
     4321 +
     4322 +#ifndef PRODUCT
     4323 +    if (use_local_bitmaps && verify_local_bitmaps) {
     4324 +      verify();
     4325 +    }
     4326 +#endif // PRODUCT
     4327 +  } else {
     4328 +    assert(_real_start_word == NULL && _real_end_word == NULL, "invariant");
     4329 +  }
     4330 +}
     4331 +
     4332 +G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size, int worker_i) :
4298 4333    ParGCAllocBuffer(gclab_word_size),
4299 4334    _should_mark_objects(false),
4300 4335    _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size),
     4336 +  _worker_i(worker_i),
4301 4337    _retired(false)
4302 4338  {
4303 4339    //_should_mark_objects is set to true when G1ParCopyHelper needs to
4304 4340    // mark the forwarded location of an evacuated object.
4305 4341    // We set _should_mark_objects to true if marking is active, i.e. when we
4306 4342    // need to propagate a mark, or during an initial mark pause, i.e. when we
4307 4343    // need to mark objects immediately reachable by the roots.
4308 4344    if (G1CollectedHeap::heap()->mark_in_progress() ||
4309 4345        G1CollectedHeap::heap()->g1_policy()->during_initial_mark_pause()) {
4310 4346      _should_mark_objects = true;
↓ open down ↓ 1 lines elided ↑ open up ↑
4312 4348  }
4313 4349  
4314 4350  G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
4315 4351    : _g1h(g1h),
4316 4352      _refs(g1h->task_queue(queue_num)),
4317 4353      _dcq(&g1h->dirty_card_queue_set()),
4318 4354      _ct_bs((CardTableModRefBS*)_g1h->barrier_set()),
4319 4355      _g1_rem(g1h->g1_rem_set()),
4320 4356      _hash_seed(17), _queue_num(queue_num),
4321 4357      _term_attempts(0),
4322      -    _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
4323      -    _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
     4358 +    _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived), queue_num),
     4359 +    _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured), queue_num),
4324 4360      _age_table(false),
4325 4361      _strong_roots_time(0), _term_time(0),
4326 4362      _alloc_buffer_waste(0), _undo_waste(0)
4327 4363  {
4328 4364    // we allocate G1YoungSurvRateNumRegions plus one entries, since
4329 4365    // we "sacrifice" entry 0 to keep track of surviving bytes for
4330 4366    // non-young regions (where the age is -1)
4331 4367    // We also add a few elements at the beginning and at the end in
4332 4368    // an attempt to eliminate cache contention
4333 4369    size_t real_length = 1 + _g1h->g1_policy()->young_cset_region_length();
↓ open down ↓ 91 lines elided ↑ open up ↑
4425 4461  
4426 4462      while (refs()->pop_local(ref)) {
4427 4463        deal_with_reference(ref);
4428 4464      }
4429 4465    } while (!refs()->is_empty());
4430 4466  }
4431 4467  
4432 4468  G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
4433 4469    _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
4434 4470    _par_scan_state(par_scan_state),
     4471 +  _worker_i(par_scan_state->queue_num()),
4435 4472    _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
4436 4473    _mark_in_progress(_g1->mark_in_progress()) { }
4437 4474  
4438 4475  template <class T> void G1ParCopyHelper::mark_object(T* p) {
4439 4476    // This is called from do_oop_work for objects that are not
4440 4477    // in the collection set. Objects in the collection set
4441 4478    // are marked after they have been evacuated.
4442 4479  
4443 4480    T heap_oop = oopDesc::load_heap_oop(p);
4444 4481    if (!oopDesc::is_null(heap_oop)) {
4445 4482      oop obj = oopDesc::decode_heap_oop(heap_oop);
4446 4483      HeapWord* addr = (HeapWord*)obj;
4447 4484      if (_g1->is_in_g1_reserved(addr)) {
4448      -      _cm->grayRoot(oop(addr));
     4485 +      _cm->grayRoot(oop(addr), _worker_i);
4449 4486      }
4450 4487    }
4451 4488  }
4452 4489  
4453 4490  oop G1ParCopyHelper::copy_to_survivor_space(oop old, bool should_mark_root,
4454 4491                                                       bool should_mark_copy) {
4455 4492    size_t    word_sz = old->size();
4456 4493    HeapRegion* from_region = _g1->heap_region_containing_raw(old);
4457 4494    // +1 to make the -1 indexes valid...
4458 4495    int       young_index = from_region->young_index_in_cset()+1;
↓ open down ↓ 5 lines elided ↑ open up ↑
4464 4501                                             : m->age();
4465 4502    GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
4466 4503                                                               word_sz);
4467 4504    HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz);
4468 4505    oop       obj     = oop(obj_ptr);
4469 4506  
4470 4507    if (obj_ptr == NULL) {
4471 4508      // This will either forward-to-self, or detect that someone else has
4472 4509      // installed a forwarding pointer.
4473 4510      OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
4474      -    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root);
     4511 +    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root, _worker_i);
4475 4512    }
4476 4513  
4477 4514    // We're going to allocate linearly, so might as well prefetch ahead.
4478 4515    Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
4479 4516  
4480 4517    oop forward_ptr = old->forward_to_atomic(obj);
4481 4518    if (forward_ptr == NULL) {
4482 4519      Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
4483 4520      if (g1p->track_object_age(alloc_purpose)) {
4484 4521        // We could simply do obj->incr_age(). However, this causes a
↓ open down ↓ 22 lines elided ↑ open up ↑
4507 4544        obj->set_mark(m);
4508 4545      }
4509 4546  
4510 4547      // Mark the evacuated object or propagate "next" mark bit
4511 4548      if (should_mark_copy) {
4512 4549        if (!use_local_bitmaps ||
4513 4550            !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
4514 4551          // if we couldn't mark it on the local bitmap (this happens when
4515 4552          // the object was not allocated in the GCLab), we have to bite
4516 4553          // the bullet and do the standard parallel mark
4517      -        _cm->markAndGrayObjectIfNecessary(obj);
     4554 +        _cm->markAndGrayObjectIfNecessary(obj, _worker_i);
4518 4555        }
4519 4556  
4520 4557        if (_g1->isMarkedNext(old)) {
4521 4558          // Unmark the object's old location so that marking
4522 4559          // doesn't think the old object is alive.
4523 4560          _cm->nextMarkBitMap()->parClear((HeapWord*)old);
     4561 +
     4562 +        // We could clear the count data for the old object here but
     4563 +        // currently we do not. Why don't we do this? The thread/task
     4564 +        // that marks a newly copied object is likely _not_ the thread/task
     4565 +        // that originally marked the old object. So, to clear the count
     4566 +        // data for the old object, we would have to scan the count
     4567 +        // data for all of the tasks (and clear the data for the old object
     4568 +        // in parallel with other threads adding to the count data). Even
     4569 +        // then we could clear a bit incorrectly (e.g. if the old object
     4570 +        // does not start or end on a card boundary). It's more important
     4571 +        // that we don't have missed bits that should've been set than
     4572 +        // having extra bits set.
     4573 +        //
     4574 +        // As a result the accumulated count data could be a superset
     4575 +        // of the data that is/would have been calculated by walking
     4576 +        // the marking bitmap.
4524 4577        }
4525 4578      }
4526 4579  
4527 4580      size_t* surv_young_words = _par_scan_state->surviving_young_words();
4528 4581      surv_young_words[young_index] += word_sz;
4529 4582  
4530 4583      if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
4531 4584        arrayOop(old)->set_length(0);
4532 4585        oop* old_p = set_partial_array_mask(old);
4533 4586        _par_scan_state->push_on_queue(old_p);
↓ open down ↓ 66 lines elided ↑ open up ↑
4600 4653        bool should_mark_copy = do_mark_object ||
4601 4654                                _during_initial_mark ||
4602 4655                                (_mark_in_progress && !_g1->is_obj_ill(obj));
4603 4656  
4604 4657        oop copy_oop = copy_to_survivor_space(obj, should_mark_root,
4605 4658                                                   should_mark_copy);
4606 4659        oopDesc::encode_store_heap_oop(p, copy_oop);
4607 4660      }
4608 4661      // When scanning the RS, we only care about objs in CS.
4609 4662      if (barrier == G1BarrierRS) {
4610      -      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
     4663 +      assert(_worker_i == _par_scan_state->queue_num(), "sanity");
     4664 +      _par_scan_state->update_rs(_from, p, _worker_i);
4611 4665      }
4612 4666    } else {
4613 4667      // The object is not in collection set. If we're a root scanning
4614 4668      // closure during an initial mark pause (i.e. do_mark_object will
4615 4669      // be true) then attempt to mark the object.
4616 4670      if (do_mark_object) {
4617 4671        mark_object(p);
4618 4672      }
4619 4673    }
4620 4674  
4621 4675    if (barrier == G1BarrierEvac && obj != NULL) {
4622      -    _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
     4676 +    assert(_worker_i == _par_scan_state->queue_num(), "sanity");
     4677 +    _par_scan_state->update_rs(_from, p, _worker_i);
4623 4678    }
4624 4679  
4625 4680    if (do_gen_barrier && obj != NULL) {
4626 4681      par_do_barrier(p);
4627 4682    }
4628 4683  }
4629 4684  
4630 4685  template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(oop* p);
4631 4686  template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(narrowOop* p);
4632 4687  
↓ open down ↓ 1203 lines elided ↑ open up ↑
5836 5891        // And the region is empty.
5837 5892        assert(!used_mr.is_empty(), "Should not have empty regions in a CS.");
5838 5893  
5839 5894        // If marking is in progress then clear any objects marked in
5840 5895        // the current region. Note mark_in_progress() returns false,
5841 5896        // even during an initial mark pause, until the set_marking_started()
5842 5897        // call which takes place later in the pause.
5843 5898        if (mark_in_progress()) {
5844 5899          assert(!g1_policy()->during_initial_mark_pause(), "sanity");
5845 5900          _cm->nextMarkBitMap()->clearRange(used_mr);
     5901 +        // Need to remove values from the count info
     5902 +         _cm->clear_count_data_for_heap_region(cur);
5846 5903        }
5847      -
5848 5904        free_region(cur, &pre_used, &local_free_list, false /* par */);
5849 5905      } else {
5850 5906        cur->uninstall_surv_rate_group();
5851 5907        if (cur->is_young()) {
5852 5908          cur->set_young_index_in_cset(-1);
5853 5909        }
5854 5910        cur->set_not_young();
5855 5911        cur->set_evacuation_failed(false);
5856 5912        // The region is now considered to be old.
5857 5913        _old_set.add(cur);
↓ open down ↓ 462 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX