461 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
462 return MAX2((n_par_threads + 2) / 4, (size_t)1);
463 }
464
465 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
466 int max_regions) :
467 _markBitMap1(rs, MinObjAlignment - 1),
468 _markBitMap2(rs, MinObjAlignment - 1),
469
470 _parallel_marking_threads(0),
471 _max_parallel_marking_threads(0),
472 _sleep_factor(0.0),
473 _marking_task_overhead(1.0),
474 _cleanup_sleep_factor(0.0),
475 _cleanup_task_overhead(1.0),
476 _cleanup_list("Cleanup List"),
477 _region_bm(max_regions, false /* in_resource_area*/),
478 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
479 CardTableModRefBS::card_shift,
480 false /* in_resource_area*/),
481 _prevMarkBitMap(&_markBitMap1),
482 _nextMarkBitMap(&_markBitMap2),
483 _at_least_one_mark_complete(false),
484
485 _markStack(this),
486 _regionStack(),
487 // _finger set in set_non_marking_state
488
489 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
490 // _active_tasks set in set_non_marking_state
491 // _tasks set inside the constructor
492 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
493 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
494
495 _has_overflown(false),
496 _concurrent(false),
497 _has_aborted(false),
498 _restart_for_overflow(false),
499 _concurrent_marking_in_progress(false),
500 _should_gray_objects(false),
501
502 // _verbose_level set below
503
504 _init_times(),
505 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
506 _cleanup_times(),
507 _total_counting_time(0.0),
508 _total_rs_scrub_time(0.0),
509
510 _parallel_workers(NULL) {
511 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
512 if (verbose_level < no_verbose) {
513 verbose_level = no_verbose;
514 }
515 if (verbose_level > high_verbose) {
516 verbose_level = high_verbose;
517 }
518 _verbose_level = verbose_level;
519
520 if (verbose_low()) {
521 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
522 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
523 }
524
525 _markStack.allocate(MarkStackSize);
526 _regionStack.allocate(G1MarkRegionStackSize);
527
528 // Create & start a ConcurrentMark thread.
529 _cmThread = new ConcurrentMarkThread(this);
530 assert(cmThread() != NULL, "CM Thread should have been created");
531 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
532
533 _g1h = G1CollectedHeap::heap();
534 assert(CGC_lock != NULL, "Where's the CGC_lock?");
535 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
536 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
537
538 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
539 satb_qs.set_buffer_size(G1SATBBufferSize);
540
541 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
542 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
543
544 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
545 _active_tasks = _max_task_num;
546 for (int i = 0; i < (int) _max_task_num; ++i) {
547 CMTaskQueue* task_queue = new CMTaskQueue();
548 task_queue->initialize();
549 _task_queues->register_queue(i, task_queue);
550
551 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
552 _accum_task_vtime[i] = 0.0;
553 }
554
555 if (ConcGCThreads > ParallelGCThreads) {
556 vm_exit_during_initialization("Can't have more ConcGCThreads "
557 "than ParallelGCThreads.");
558 }
559 if (ParallelGCThreads == 0) {
560 // if we are not running with any parallel GC threads we will not
561 // spawn any marking threads either
562 _parallel_marking_threads = 0;
563 _max_parallel_marking_threads = 0;
564 _sleep_factor = 0.0;
565 _marking_task_overhead = 1.0;
566 } else {
567 if (ConcGCThreads > 0) {
568 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
569 // if both are set
570
571 _parallel_marking_threads = ConcGCThreads;
572 _max_parallel_marking_threads = _parallel_marking_threads;
573 _sleep_factor = 0.0;
574 _marking_task_overhead = 1.0;
658 // do nothing.
659 }
660
661 void ConcurrentMark::reset() {
662 // Starting values for these two. This should be called in a STW
663 // phase. CM will be notified of any future g1_committed expansions
664 // will be at the end of evacuation pauses, when tasks are
665 // inactive.
666 MemRegion committed = _g1h->g1_committed();
667 _heap_start = committed.start();
668 _heap_end = committed.end();
669
670 // Separated the asserts so that we know which one fires.
671 assert(_heap_start != NULL, "heap bounds should look ok");
672 assert(_heap_end != NULL, "heap bounds should look ok");
673 assert(_heap_start < _heap_end, "heap bounds should look ok");
674
675 // reset all the marking data structures and any necessary flags
676 clear_marking_state();
677
678 if (verbose_low()) {
679 gclog_or_tty->print_cr("[global] resetting");
680 }
681
682 // We do reset all of them, since different phases will use
683 // different number of active threads. So, it's easiest to have all
684 // of them ready.
685 for (int i = 0; i < (int) _max_task_num; ++i) {
686 _tasks[i]->reset(_nextMarkBitMap);
687 }
688
689 // we need this to make sure that the flag is on during the evac
690 // pause with initial mark piggy-backed
691 set_concurrent_marking_in_progress();
692 }
693
694 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
695 assert(active_tasks <= _max_task_num, "we should not have more");
696
697 _active_tasks = active_tasks;
709 if (concurrent) {
710 set_concurrent_marking_in_progress();
711 } else {
712 // We currently assume that the concurrent flag has been set to
713 // false before we start remark. At this point we should also be
714 // in a STW phase.
715 assert(!concurrent_marking_in_progress(), "invariant");
716 assert(_finger == _heap_end, "only way to get here");
717 update_g1_committed(true);
718 }
719 }
720
721 void ConcurrentMark::set_non_marking_state() {
722 // We set the global marking state to some default values when we're
723 // not doing marking.
724 clear_marking_state();
725 _active_tasks = 0;
726 clear_concurrent_marking_in_progress();
727 }
728
729 ConcurrentMark::~ConcurrentMark() {
730 for (int i = 0; i < (int) _max_task_num; ++i) {
731 delete _task_queues->queue(i);
732 delete _tasks[i];
733 }
734 delete _task_queues;
735 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
736 }
737
738 // This closure is used to mark refs into the g1 generation
739 // from external roots in the CMS bit map.
740 // Called at the first checkpoint.
741 //
742
743 void ConcurrentMark::clearNextBitmap() {
744 G1CollectedHeap* g1h = G1CollectedHeap::heap();
745 G1CollectorPolicy* g1p = g1h->g1_policy();
746
747 // Make sure that the concurrent mark thread looks to still be in
748 // the current cycle.
749 guarantee(cmThread()->during_cycle(), "invariant");
750
751 // We are finishing up the current cycle by clearing the next
752 // marking bitmap and getting it ready for the next cycle. During
753 // this time no other cycle can start. So, let's make sure that this
754 // is the case.
755 guarantee(!g1h->mark_in_progress(), "invariant");
756
757 // clear the mark bitmap (no grey objects to start with).
934
935 void ForceOverflowSettings::update() {
936 if (_num_remaining > 0) {
937 _num_remaining -= 1;
938 _force = true;
939 } else {
940 _force = false;
941 }
942 }
943
944 bool ForceOverflowSettings::should_force() {
945 if (_force) {
946 _force = false;
947 return true;
948 } else {
949 return false;
950 }
951 }
952 #endif // !PRODUCT
953
954 void ConcurrentMark::grayRoot(oop p) {
955 HeapWord* addr = (HeapWord*) p;
956 // We can't really check against _heap_start and _heap_end, since it
957 // is possible during an evacuation pause with piggy-backed
958 // initial-mark that the committed space is expanded during the
959 // pause without CM observing this change. So the assertions below
960 // is a bit conservative; but better than nothing.
961 assert(_g1h->g1_committed().contains(addr),
962 "address should be within the heap bounds");
963
964 if (!_nextMarkBitMap->isMarked(addr)) {
965 _nextMarkBitMap->parMark(addr);
966 }
967 }
968
969 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
970 // The objects on the region have already been marked "in bulk" by
971 // the caller. We only need to decide whether to push the region on
972 // the region stack or not.
973
974 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
975 // We're done with marking and waiting for remark. We do not need to
976 // push anything else on the region stack.
977 return;
978 }
979
980 HeapWord* finger = _finger;
981
982 if (verbose_low()) {
983 gclog_or_tty->print_cr("[global] attempting to push "
984 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
985 PTR_FORMAT, mr.start(), mr.end(), finger);
994 assert(mr.start() <= mr.end(),
995 "region boundaries should fall within the committed space");
996 assert(_heap_start <= mr.start(),
997 "region boundaries should fall within the committed space");
998 assert(mr.end() <= _heap_end,
999 "region boundaries should fall within the committed space");
1000 if (verbose_low()) {
1001 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1002 "below the finger, pushing it",
1003 mr.start(), mr.end());
1004 }
1005
1006 if (!region_stack_push_lock_free(mr)) {
1007 if (verbose_low()) {
1008 gclog_or_tty->print_cr("[global] region stack has overflown.");
1009 }
1010 }
1011 }
1012 }
1013
1014 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1015 // The object is not marked by the caller. We need to at least mark
1016 // it and maybe push in on the stack.
1017
1018 HeapWord* addr = (HeapWord*)p;
1019 if (!_nextMarkBitMap->isMarked(addr)) {
1020 // We definitely need to mark it, irrespective whether we bail out
1021 // because we're done with marking.
1022 if (_nextMarkBitMap->parMark(addr)) {
1023 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1024 // If we're done with concurrent marking and we're waiting for
1025 // remark, then we're not pushing anything on the stack.
1026 return;
1027 }
1028
1029 // No OrderAccess:store_load() is needed. It is implicit in the
1030 // CAS done in parMark(addr) above
1031 HeapWord* finger = _finger;
1032
1033 if (addr < finger) {
1034 if (!mark_stack_push(oop(addr))) {
1035 if (verbose_low()) {
1036 gclog_or_tty->print_cr("[global] global stack overflow "
1037 "during parMark");
1038 }
1039 }
1040 }
1041 }
1042 }
1204 G1CollectorPolicy* g1p = g1h->g1_policy();
1205 g1p->record_concurrent_mark_remark_start();
1206
1207 double start = os::elapsedTime();
1208
1209 checkpointRootsFinalWork();
1210
1211 double mark_work_end = os::elapsedTime();
1212
1213 weakRefsWork(clear_all_soft_refs);
1214
1215 if (has_overflown()) {
1216 // Oops. We overflowed. Restart concurrent marking.
1217 _restart_for_overflow = true;
1218 // Clear the flag. We do not need it any more.
1219 clear_has_overflown();
1220 if (G1TraceMarkStackOverflow) {
1221 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1222 }
1223 } else {
1224 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1225 // We're done with marking.
1226 // This is the end of the marking cycle, we're expected all
1227 // threads to have SATB queues with active set to true.
1228 satb_mq_set.set_active_all_threads(false, /* new active value */
1229 true /* expected_active */);
1230
1231 if (VerifyDuringGC) {
1232
1233 HandleMark hm; // handle scope
1234 gclog_or_tty->print(" VerifyDuringGC:(after)");
1235 Universe::heap()->prepare_for_verify();
1236 Universe::verify(/* allow dirty */ true,
1237 /* silent */ false,
1238 /* option */ VerifyOption_G1UseNextMarking);
1239 }
1240 assert(!restart_for_overflow(), "sanity");
1241 }
1242
1243 // Reset the marking state if marking completed
1244 if (!restart_for_overflow()) {
1245 set_non_marking_state();
1246 }
1247
1248 #if VERIFY_OBJS_PROCESSED
1249 _scan_obj_cl.objs_processed = 0;
1250 ThreadLocalObjQueue::objs_enqueued = 0;
1251 #endif
1252
1253 // Statistics
1254 double now = os::elapsedTime();
1255 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1256 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1257 _remark_times.add((now - start) * 1000.0);
1258
1259 g1p->record_concurrent_mark_remark_end();
1260 }
1261
1262 #define CARD_BM_TEST_MODE 0
1263
1264 class CalcLiveObjectsClosure: public HeapRegionClosure {
1265
1266 CMBitMapRO* _bm;
1267 ConcurrentMark* _cm;
1268 bool _changed;
1269 bool _yield;
1270 size_t _words_done;
1271 size_t _tot_live;
1272 size_t _tot_used;
1273 size_t _regions_done;
1274 double _start_vtime_sec;
1275
1276 BitMap* _region_bm;
1277 BitMap* _card_bm;
1278 intptr_t _bottom_card_num;
1279 bool _final;
1280
1281 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1282 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1283 #if CARD_BM_TEST_MODE
1284 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1285 #else
1286 _card_bm->par_at_put(i - _bottom_card_num, 1);
1287 #endif
1288 }
1289 }
1290
1291 public:
1292 CalcLiveObjectsClosure(bool final,
1293 CMBitMapRO *bm, ConcurrentMark *cm,
1294 BitMap* region_bm, BitMap* card_bm) :
1295 _bm(bm), _cm(cm), _changed(false), _yield(true),
1296 _words_done(0), _tot_live(0), _tot_used(0),
1297 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1298 _regions_done(0), _start_vtime_sec(0.0)
1299 {
1300 _bottom_card_num =
1301 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1302 CardTableModRefBS::card_shift);
1303 }
1304
1305 // It takes a region that's not empty (i.e., it has at least one
1306 // live object in it and sets its corresponding bit on the region
1307 // bitmap to 1. If the region is "starts humongous" it will also set
1308 // to 1 the bits on the region bitmap that correspond to its
1309 // associated "continues humongous" regions.
1310 void set_bit_for_region(HeapRegion* hr) {
1311 assert(!hr->continuesHumongous(), "should have filtered those out");
1312
1313 size_t index = hr->hrs_index();
1314 if (!hr->startsHumongous()) {
1315 // Normal (non-humongous) case: just set the bit.
1316 _region_bm->par_at_put((BitMap::idx_t) index, true);
1317 } else {
1318 // Starts humongous case: calculate how many regions are part of
1319 // this humongous region and then set the bit range. It might
1320 // have been a bit more efficient to look at the object that
1321 // spans these humongous regions to calculate their number from
1322 // the object's size. However, it's a good idea to calculate
1323 // this based on the metadata itself, and not the region
1324 // contents, so that this code is not aware of what goes into
1325 // the humongous regions (in case this changes in the future).
1326 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1327 size_t end_index = index + 1;
1328 while (end_index < g1h->n_regions()) {
1329 HeapRegion* chr = g1h->region_at(end_index);
1330 if (!chr->continuesHumongous()) break;
1331 end_index += 1;
1332 }
1333 _region_bm->par_at_put_range((BitMap::idx_t) index,
1334 (BitMap::idx_t) end_index, true);
1335 }
1336 }
1337
1338 bool doHeapRegion(HeapRegion* hr) {
1339 if (!_final && _regions_done == 0) {
1340 _start_vtime_sec = os::elapsedVTime();
1341 }
1342
1343 if (hr->continuesHumongous()) {
1344 // We will ignore these here and process them when their
1345 // associated "starts humongous" region is processed (see
1346 // set_bit_for_heap_region()). Note that we cannot rely on their
1347 // associated "starts humongous" region to have their bit set to
1348 // 1 since, due to the region chunking in the parallel region
1349 // iteration, a "continues humongous" region might be visited
1350 // before its associated "starts humongous".
1351 return false;
1352 }
1353
1354 HeapWord* nextTop = hr->next_top_at_mark_start();
1355 HeapWord* start = hr->top_at_conc_mark_count();
1356 assert(hr->bottom() <= start && start <= hr->end() &&
1357 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1358 start <= nextTop,
1359 "Preconditions.");
1360 // Otherwise, record the number of word's we'll examine.
1361 size_t words_done = (nextTop - start);
1362 // Find the first marked object at or after "start".
1363 start = _bm->getNextMarkedWordAddress(start, nextTop);
1364 size_t marked_bytes = 0;
1365
1366 // Below, the term "card num" means the result of shifting an address
1367 // by the card shift -- address 0 corresponds to card number 0. One
1368 // must subtract the card num of the bottom of the heap to obtain a
1369 // card table index.
1370 // The first card num of the sequence of live cards currently being
1371 // constructed. -1 ==> no sequence.
1372 intptr_t start_card_num = -1;
1373 // The last card num of the sequence of live cards currently being
1374 // constructed. -1 ==> no sequence.
1375 intptr_t last_card_num = -1;
1376
1377 while (start < nextTop) {
1378 if (_yield && _cm->do_yield_check()) {
1379 // We yielded. It might be for a full collection, in which case
1380 // all bets are off; terminate the traversal.
1381 if (_cm->has_aborted()) {
1382 _changed = false;
1383 return true;
1384 } else {
1385 // Otherwise, it might be a collection pause, and the region
1386 // we're looking at might be in the collection set. We'll
1387 // abandon this region.
1388 return false;
1389 }
1390 }
1391 oop obj = oop(start);
1392 int obj_sz = obj->size();
1393 // The card num of the start of the current object.
1394 intptr_t obj_card_num =
1395 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1396
1397 HeapWord* obj_last = start + obj_sz - 1;
1398 intptr_t obj_last_card_num =
1399 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1400
1401 if (obj_card_num != last_card_num) {
1402 if (start_card_num == -1) {
1403 assert(last_card_num == -1, "Both or neither.");
1404 start_card_num = obj_card_num;
1405 } else {
1406 assert(last_card_num != -1, "Both or neither.");
1407 assert(obj_card_num >= last_card_num, "Inv");
1408 if ((obj_card_num - last_card_num) > 1) {
1409 // Mark the last run, and start a new one.
1410 mark_card_num_range(start_card_num, last_card_num);
1411 start_card_num = obj_card_num;
1412 }
1413 }
1414 #if CARD_BM_TEST_MODE
1415 /*
1416 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1417 obj_card_num - _bottom_card_num,
1418 obj_last_card_num - _bottom_card_num);
1419 */
1420 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1421 _card_bm->par_at_put(j - _bottom_card_num, 1);
1422 }
1423 #endif
1424 }
1425 // In any case, we set the last card num.
1426 last_card_num = obj_last_card_num;
1427
1428 marked_bytes += (size_t)obj_sz * HeapWordSize;
1429 // Find the next marked object after this one.
1430 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1431 _changed = true;
1432 }
1433 // Handle the last range, if any.
1434 if (start_card_num != -1) {
1435 mark_card_num_range(start_card_num, last_card_num);
1436 }
1437 if (_final) {
1438 // Mark the allocated-since-marking portion...
1439 HeapWord* tp = hr->top();
1440 if (nextTop < tp) {
1441 start_card_num =
1442 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1443 last_card_num =
1444 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1445 mark_card_num_range(start_card_num, last_card_num);
1446 // This definitely means the region has live objects.
1447 set_bit_for_region(hr);
1448 }
1449 }
1450
1451 hr->add_to_marked_bytes(marked_bytes);
1452 // Update the live region bitmap.
1453 if (marked_bytes > 0) {
1454 set_bit_for_region(hr);
1455 }
1456 hr->set_top_at_conc_mark_count(nextTop);
1457 _tot_live += hr->next_live_bytes();
1458 _tot_used += hr->used();
1459 _words_done = words_done;
1460
1461 if (!_final) {
1462 ++_regions_done;
1463 if (_regions_done % 10 == 0) {
1464 double end_vtime_sec = os::elapsedVTime();
1465 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1466 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1467 jlong sleep_time_ms =
1468 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1469 os::sleep(Thread::current(), sleep_time_ms, false);
1470 _start_vtime_sec = end_vtime_sec;
1471 }
1472 }
1473 }
1474
1475 return false;
1476 }
1477
1478 bool changed() { return _changed; }
1479 void reset() { _changed = false; _words_done = 0; }
1480 void no_yield() { _yield = false; }
1481 size_t words_done() { return _words_done; }
1482 size_t tot_live() { return _tot_live; }
1483 size_t tot_used() { return _tot_used; }
1484 };
1485
1486
1487 void ConcurrentMark::calcDesiredRegions() {
1488 _region_bm.clear();
1489 _card_bm.clear();
1490 CalcLiveObjectsClosure calccl(false /*final*/,
1491 nextMarkBitMap(), this,
1492 &_region_bm, &_card_bm);
1493 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1494 g1h->heap_region_iterate(&calccl);
1495
1496 do {
1497 calccl.reset();
1498 g1h->heap_region_iterate(&calccl);
1499 } while (calccl.changed());
1500 }
1501
1502 class G1ParFinalCountTask: public AbstractGangTask {
1503 protected:
1504 G1CollectedHeap* _g1h;
1505 CMBitMap* _bm;
1506 size_t _n_workers;
1507 size_t *_live_bytes;
1508 size_t *_used_bytes;
1509 BitMap* _region_bm;
1510 BitMap* _card_bm;
1511 public:
1512 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1513 BitMap* region_bm, BitMap* card_bm)
1514 : AbstractGangTask("G1 final counting"), _g1h(g1h),
1515 _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
1516 _n_workers(0)
1517 {
1518 // Use the value already set as the number of active threads
1519 // in the call to run_task(). Needed for the allocation of
1520 // _live_bytes and _used_bytes.
1521 if (G1CollectedHeap::use_parallel_gc_threads()) {
1522 assert( _g1h->workers()->active_workers() > 0,
1523 "Should have been previously set");
1524 _n_workers = _g1h->workers()->active_workers();
1525 } else {
1526 _n_workers = 1;
1527 }
1528
1529 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1530 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1531 }
1532
1533 ~G1ParFinalCountTask() {
1534 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1535 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1536 }
1537
1538 void work(int i) {
1539 CalcLiveObjectsClosure calccl(true /*final*/,
1540 _bm, _g1h->concurrent_mark(),
1541 _region_bm, _card_bm);
1542 calccl.no_yield();
1543 if (G1CollectedHeap::use_parallel_gc_threads()) {
1544 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1545 (int) _n_workers,
1546 HeapRegion::FinalCountClaimValue);
1547 } else {
1548 _g1h->heap_region_iterate(&calccl);
1549 }
1550 assert(calccl.complete(), "Shouldn't have yielded!");
1551
1552 assert((size_t) i < _n_workers, "invariant");
1553 _live_bytes[i] = calccl.tot_live();
1554 _used_bytes[i] = calccl.tot_used();
1555 }
1556 size_t live_bytes() {
1557 size_t live_bytes = 0;
1558 for (size_t i = 0; i < _n_workers; ++i)
1559 live_bytes += _live_bytes[i];
1560 return live_bytes;
1561 }
1562 size_t used_bytes() {
1563 size_t used_bytes = 0;
1564 for (size_t i = 0; i < _n_workers; ++i)
1565 used_bytes += _used_bytes[i];
1566 return used_bytes;
1567 }
1568 };
1569
1570 class G1ParNoteEndTask;
1571
1572 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1573 G1CollectedHeap* _g1;
1574 int _worker_num;
1575 size_t _max_live_bytes;
1576 size_t _regions_claimed;
1577 size_t _freed_bytes;
1578 FreeRegionList* _local_cleanup_list;
1579 OldRegionSet* _old_proxy_set;
1580 HumongousRegionSet* _humongous_proxy_set;
1581 HRRSCleanupTask* _hrrs_cleanup_task;
1749
1750 HRSPhaseSetter x(HRSPhaseCleanup);
1751 g1h->verify_region_sets_optional();
1752
1753 if (VerifyDuringGC) {
1754 HandleMark hm; // handle scope
1755 gclog_or_tty->print(" VerifyDuringGC:(before)");
1756 Universe::heap()->prepare_for_verify();
1757 Universe::verify(/* allow dirty */ true,
1758 /* silent */ false,
1759 /* option */ VerifyOption_G1UsePrevMarking);
1760 }
1761
1762 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1763 g1p->record_concurrent_mark_cleanup_start();
1764
1765 double start = os::elapsedTime();
1766
1767 HeapRegionRemSet::reset_for_cleanup_tasks();
1768
1769 size_t n_workers;
1770
1771 // Do counting once more with the world stopped for good measure.
1772 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1773 &_region_bm, &_card_bm);
1774 if (G1CollectedHeap::use_parallel_gc_threads()) {
1775 assert(g1h->check_heap_region_claim_values(
1776 HeapRegion::InitialClaimValue),
1777 "sanity check");
1778
1779 g1h->set_par_threads();
1780 n_workers = g1h->n_par_threads();
1781 assert(g1h->n_par_threads() == (int) n_workers,
1782 "Should not have been reset");
1783 g1h->workers()->run_task(&g1_par_count_task);
1784 // Done with the parallel phase so reset to 0.
1785 g1h->set_par_threads(0);
1786
1787 assert(g1h->check_heap_region_claim_values(
1788 HeapRegion::FinalCountClaimValue),
1789 "sanity check");
1790 } else {
1791 n_workers = 1;
1792 g1_par_count_task.work(0);
1793 }
1794
1795 size_t known_garbage_bytes =
1796 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1797 g1p->set_known_garbage_bytes(known_garbage_bytes);
1798
1799 size_t start_used_bytes = g1h->used();
1800 _at_least_one_mark_complete = true;
1801 g1h->set_marking_complete();
1802
1803 ergo_verbose4(ErgoConcCycles,
1804 "finish cleanup",
1805 ergo_format_byte("occupancy")
1806 ergo_format_byte("capacity")
1807 ergo_format_byte_perc("known garbage"),
1808 start_used_bytes, g1h->capacity(),
1809 known_garbage_bytes,
1810 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1811
1812 double count_end = os::elapsedTime();
1813 double this_final_counting_time = (count_end - start);
1814 if (G1PrintParCleanupStats) {
1967 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1968 os::sleep(Thread::current(), (jlong) 1, false);
1969 }
1970 }
1971 }
1972 }
1973 assert(tmp_free_list.is_empty(), "post-condition");
1974 }
1975
1976 // Support closures for reference procssing in G1
1977
1978 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1979 HeapWord* addr = (HeapWord*)obj;
1980 return addr != NULL &&
1981 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1982 }
1983
1984 class G1CMKeepAliveClosure: public OopClosure {
1985 G1CollectedHeap* _g1;
1986 ConcurrentMark* _cm;
1987 CMBitMap* _bitMap;
1988 public:
1989 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1990 CMBitMap* bitMap) :
1991 _g1(g1), _cm(cm),
1992 _bitMap(bitMap) {}
1993
1994 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1995 virtual void do_oop( oop* p) { do_oop_work(p); }
1996
1997 template <class T> void do_oop_work(T* p) {
1998 oop obj = oopDesc::load_decode_heap_oop(p);
1999 HeapWord* addr = (HeapWord*)obj;
2000
2001 if (_cm->verbose_high()) {
2002 gclog_or_tty->print_cr("\t[0] we're looking at location "
2003 "*"PTR_FORMAT" = "PTR_FORMAT,
2004 p, (void*) obj);
2005 }
2006
2007 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2008 _bitMap->mark(addr);
2009 _cm->mark_stack_push(obj);
2010 }
2011 }
2012 };
2013
2014 class G1CMDrainMarkingStackClosure: public VoidClosure {
2015 CMMarkStack* _markStack;
2016 CMBitMap* _bitMap;
2017 G1CMKeepAliveClosure* _oopClosure;
2018 public:
2019 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
2020 G1CMKeepAliveClosure* oopClosure) :
2021 _bitMap(bitMap),
2022 _markStack(markStack),
2023 _oopClosure(oopClosure)
2024 {}
2025
2026 void do_void() {
2027 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
2028 }
2029 };
2030
2031 // 'Keep Alive' closure used by parallel reference processing.
2032 // An instance of this closure is used in the parallel reference processing
2033 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2034 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2035 // placed on to discovered ref lists once so we can mark and push with no
2036 // need to check whether the object has already been marked. Using the
2037 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2038 // operating on the global mark stack. This means that an individual
2039 // worker would be doing lock-free pushes while it processes its own
2040 // discovered ref list followed by drain call. If the discovered ref lists
2041 // are unbalanced then this could cause interference with the other
2042 // workers. Using a CMTask (and its embedded local data structures)
2043 // avoids that potential interference.
2044 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2045 ConcurrentMark* _cm;
2046 CMTask* _task;
2047 int _ref_counter_limit;
2226 G1CMIsAliveClosure g1_is_alive(g1h);
2227
2228 // Inner scope to exclude the cleaning of the string and symbol
2229 // tables from the displayed time.
2230 {
2231 bool verbose = PrintGC && PrintGCDetails;
2232 if (verbose) {
2233 gclog_or_tty->put(' ');
2234 }
2235 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2236
2237 ReferenceProcessor* rp = g1h->ref_processor_cm();
2238
2239 // See the comment in G1CollectedHeap::ref_processing_init()
2240 // about how reference processing currently works in G1.
2241
2242 // Process weak references.
2243 rp->setup_policy(clear_all_soft_refs);
2244 assert(_markStack.isEmpty(), "mark stack should be empty");
2245
2246 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2247 G1CMDrainMarkingStackClosure
2248 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2249
2250 // We use the work gang from the G1CollectedHeap and we utilize all
2251 // the worker threads.
2252 int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
2253 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2254
2255 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2256 g1h->workers(), active_workers);
2257
2258 if (rp->processing_is_mt()) {
2259 // Set the degree of MT here. If the discovery is done MT, there
2260 // may have been a different number of threads doing the discovery
2261 // and a different number of discovered lists may have Ref objects.
2262 // That is OK as long as the Reference lists are balanced (see
2263 // balance_all_queues() and balance_queues()).
2264 rp->set_active_mt_degree(active_workers);
2265
2266 rp->process_discovered_references(&g1_is_alive,
2267 &g1_keep_alive,
2268 &g1_drain_mark_stack,
2606 out->cr();
2607
2608 gclog_or_tty->print_cr(" done");
2609 gclog_or_tty->flush();
2610 }
2611
2612 #endif // PRODUCT
2613
2614 // This note is for drainAllSATBBuffers and the code in between.
2615 // In the future we could reuse a task to do this work during an
2616 // evacuation pause (since now tasks are not active and can be claimed
2617 // during an evacuation pause). This was a late change to the code and
2618 // is currently not being taken advantage of.
2619
2620 class CMGlobalObjectClosure : public ObjectClosure {
2621 private:
2622 ConcurrentMark* _cm;
2623
2624 public:
2625 void do_object(oop obj) {
2626 _cm->deal_with_reference(obj);
2627 }
2628
2629 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2630 };
2631
2632 void ConcurrentMark::deal_with_reference(oop obj) {
2633 if (verbose_high()) {
2634 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2635 (void*) obj);
2636 }
2637
2638 HeapWord* objAddr = (HeapWord*) obj;
2639 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2640 if (_g1h->is_in_g1_reserved(objAddr)) {
2641 assert(obj != NULL, "null check is implicit");
2642 if (!_nextMarkBitMap->isMarked(objAddr)) {
2643 // Only get the containing region if the object is not marked on the
2644 // bitmap (otherwise, it's a waste of time since we won't do
2645 // anything with it).
2646 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2647 if (!hr->obj_allocated_since_next_marking(obj)) {
2648 if (verbose_high()) {
2649 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2650 "marked", (void*) obj);
2651 }
2652
2653 // we need to mark it first
2654 if (_nextMarkBitMap->parMark(objAddr)) {
2655 // No OrderAccess:store_load() is needed. It is implicit in the
2656 // CAS done in parMark(objAddr) above
2657 HeapWord* finger = _finger;
2658 if (objAddr < finger) {
2659 if (verbose_high()) {
2660 gclog_or_tty->print_cr("[global] below the global finger "
2661 "("PTR_FORMAT"), pushing it", finger);
2662 }
2663 if (!mark_stack_push(obj)) {
2664 if (verbose_low()) {
2665 gclog_or_tty->print_cr("[global] global stack overflow during "
2666 "deal_with_reference");
2667 }
2668 }
2669 }
2670 }
2671 }
2672 }
2673 }
2674 }
2675
2676 void ConcurrentMark::drainAllSATBBuffers() {
2681 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2682 if (verbose_medium()) {
2683 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2684 }
2685 }
2686
2687 // no need to check whether we should do this, as this is only
2688 // called during an evacuation pause
2689 satb_mq_set.iterate_closure_all_threads();
2690
2691 satb_mq_set.set_closure(NULL);
2692 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2693 }
2694
2695 void ConcurrentMark::markPrev(oop p) {
2696 // Note we are overriding the read-only view of the prev map here, via
2697 // the cast.
2698 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2699 }
2700
2701 void ConcurrentMark::clear(oop p) {
2702 assert(p != NULL && p->is_oop(), "expected an oop");
2703 HeapWord* addr = (HeapWord*)p;
2704 assert(addr >= _nextMarkBitMap->startWord() ||
2705 addr < _nextMarkBitMap->endWord(), "in a region");
2706
2707 _nextMarkBitMap->clear(addr);
2708 }
2709
2710 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2711 // Note we are overriding the read-only view of the prev map here, via
2712 // the cast.
2713 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2714 _nextMarkBitMap->clearRange(mr);
2715 }
2716
2717 HeapRegion*
2718 ConcurrentMark::claim_region(int task_num) {
2719 // "checkpoint" the finger
2720 HeapWord* finger = _finger;
2721
2881 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2882 _markStack.setEmpty();
2883 _markStack.clear_overflow();
2884 _regionStack.setEmpty();
2885 _regionStack.clear_overflow();
2886 if (clear_overflow) {
2887 clear_has_overflown();
2888 } else {
2889 assert(has_overflown(), "pre-condition");
2890 }
2891 _finger = _heap_start;
2892
2893 for (int i = 0; i < (int)_max_task_num; ++i) {
2894 OopTaskQueue* queue = _task_queues->queue(i);
2895 queue->set_empty();
2896 // Clear any partial regions from the CMTasks
2897 _tasks[i]->clear_aborted_region();
2898 }
2899 }
2900
2901 void ConcurrentMark::print_stats() {
2902 if (verbose_stats()) {
2903 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2904 for (size_t i = 0; i < _active_tasks; ++i) {
2905 _tasks[i]->print_stats();
2906 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2907 }
2908 }
2909 }
2910
2911 // Closures used by ConcurrentMark::complete_marking_in_collection_set().
2912
2913 class CSetMarkOopClosure: public OopClosure {
2914 friend class CSetMarkBitMapClosure;
2915
2916 G1CollectedHeap* _g1h;
2917 CMBitMap* _bm;
2918 ConcurrentMark* _cm;
2919 oop* _ms;
2920 jint* _array_ind_stack;
2921 int _ms_size;
2922 int _ms_ind;
2923 int _array_increment;
2924 int _worker_i;
2925
2926 bool push(oop obj, int arr_ind = 0) {
2927 if (_ms_ind == _ms_size) {
2928 gclog_or_tty->print_cr("Mark stack is full.");
2929 return false;
2930 }
2931 _ms[_ms_ind] = obj;
2932 if (obj->is_objArray()) {
2933 _array_ind_stack[_ms_ind] = arr_ind;
2934 }
2935 _ms_ind++;
2936 return true;
2937 }
2957 if (next_arr_ind < len) {
2958 push(obj, next_arr_ind);
2959 }
2960 // Now process this portion of this one.
2961 int lim = MIN2(next_arr_ind, len);
2962 for (int j = arr_ind; j < lim; j++) {
2963 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2964 }
2965 } else {
2966 obj->oop_iterate(this);
2967 }
2968 if (abort()) return false;
2969 }
2970 return true;
2971 }
2972
2973 public:
2974 CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
2975 _g1h(G1CollectedHeap::heap()),
2976 _cm(cm),
2977 _bm(cm->nextMarkBitMap()),
2978 _ms_size(ms_size), _ms_ind(0),
2979 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2980 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2981 _array_increment(MAX2(ms_size/8, 16)),
2982 _worker_i(worker_i) { }
2983
2984 ~CSetMarkOopClosure() {
2985 FREE_C_HEAP_ARRAY(oop, _ms);
2986 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2987 }
2988
2989 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2990 virtual void do_oop( oop* p) { do_oop_work(p); }
2991
2992 template <class T> void do_oop_work(T* p) {
2993 T heap_oop = oopDesc::load_heap_oop(p);
2994 if (oopDesc::is_null(heap_oop)) return;
2995 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2996 if (obj->is_forwarded()) {
2997 // If the object has already been forwarded, we have to make sure
2998 // that it's marked. So follow the forwarding pointer. Note that
2999 // this does the right thing for self-forwarding pointers in the
3000 // evacuation failure case.
3001 obj = obj->forwardee();
3002 }
3003 HeapRegion* hr = _g1h->heap_region_containing(obj);
3004 if (hr != NULL) {
3005 if (hr->in_collection_set()) {
3006 if (_g1h->is_obj_ill(obj)) {
3007 if (_bm->parMark((HeapWord*)obj)) {
3008 if (!push(obj)) {
3009 gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
3010 set_abort();
3011 }
3012 }
3013 }
3014 } else {
3015 // Outside the collection set; we need to gray it
3016 _cm->deal_with_reference(obj);
3017 }
3018 }
3019 }
3020 };
3021
3022 class CSetMarkBitMapClosure: public BitMapClosure {
3023 G1CollectedHeap* _g1h;
3024 CMBitMap* _bitMap;
3025 ConcurrentMark* _cm;
3026 CSetMarkOopClosure _oop_cl;
3027 int _worker_i;
3028
3029 public:
3030 CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
3031 _g1h(G1CollectedHeap::heap()),
3032 _bitMap(cm->nextMarkBitMap()),
3033 _oop_cl(cm, ms_size, worker_i),
3034 _worker_i(worker_i) { }
3035
3036 bool do_bit(size_t offset) {
3272 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3273
3274 }
3275 print_ms_time_info(" ", "cleanups", _cleanup_times);
3276 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3277 _total_counting_time,
3278 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3279 (double)_cleanup_times.num()
3280 : 0.0));
3281 if (G1ScrubRemSets) {
3282 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3283 _total_rs_scrub_time,
3284 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3285 (double)_cleanup_times.num()
3286 : 0.0));
3287 }
3288 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3289 (_init_times.sum() + _remark_times.sum() +
3290 _cleanup_times.sum())/1000.0);
3291 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3292 "(%8.2f s marking, %8.2f s counting).",
3293 cmThread()->vtime_accum(),
3294 cmThread()->vtime_mark_accum(),
3295 cmThread()->vtime_count_accum());
3296 }
3297
3298 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3299 _parallel_workers->print_worker_threads_on(st);
3300 }
3301
3302 // Closures
3303 // XXX: there seems to be a lot of code duplication here;
3304 // should refactor and consolidate the shared code.
3305
3306 // This closure is used to mark refs into the CMS generation in
3307 // the CMS bit map. Called at the first checkpoint.
3308
3309 // We take a break if someone is trying to stop the world.
3310 bool ConcurrentMark::do_yield_check(int worker_i) {
3311 if (should_yield()) {
3312 if (worker_i == 0) {
3313 _g1h->g1_policy()->record_concurrent_pause();
3314 }
3315 cmThread()->yield();
4570 "elapsed = %1.2lfms <<<<<<<<<<",
4571 _task_id, _time_target_ms, elapsed_time_ms);
4572 if (_cm->has_aborted()) {
4573 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4574 _task_id);
4575 }
4576 }
4577 } else {
4578 if (_cm->verbose_low()) {
4579 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4580 "elapsed = %1.2lfms <<<<<<<<<<",
4581 _task_id, _time_target_ms, elapsed_time_ms);
4582 }
4583 }
4584
4585 _claimed = false;
4586 }
4587
4588 CMTask::CMTask(int task_id,
4589 ConcurrentMark* cm,
4590 CMTaskQueue* task_queue,
4591 CMTaskQueueSet* task_queues)
4592 : _g1h(G1CollectedHeap::heap()),
4593 _task_id(task_id), _cm(cm),
4594 _claimed(false),
4595 _nextMarkBitMap(NULL), _hash_seed(17),
4596 _task_queue(task_queue),
4597 _task_queues(task_queues),
4598 _cm_oop_closure(NULL),
4599 _aborted_region(MemRegion()) {
4600 guarantee(task_queue != NULL, "invariant");
4601 guarantee(task_queues != NULL, "invariant");
4602
4603 statsOnly( _clock_due_to_scanning = 0;
4604 _clock_due_to_marking = 0 );
4605
4606 _marking_step_diffs_ms.add(0.5);
4607 }
4608
4609 // These are formatting macros that are used below to ensure
4610 // consistent formatting. The *_H_* versions are used to format the
4611 // header for a particular value and they should be kept consistent
4612 // with the corresponding macro. Also note that most of the macros add
4613 // the necessary white space (as a prefix) which makes them a bit
4614 // easier to compose.
4615
4616 // All the output lines are prefixed with this string to be able to
4617 // identify them easily in a large log file.
4618 #define G1PPRL_LINE_PREFIX "###"
4619
|
461 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
462 return MAX2((n_par_threads + 2) / 4, (size_t)1);
463 }
464
465 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
466 int max_regions) :
467 _markBitMap1(rs, MinObjAlignment - 1),
468 _markBitMap2(rs, MinObjAlignment - 1),
469
470 _parallel_marking_threads(0),
471 _max_parallel_marking_threads(0),
472 _sleep_factor(0.0),
473 _marking_task_overhead(1.0),
474 _cleanup_sleep_factor(0.0),
475 _cleanup_task_overhead(1.0),
476 _cleanup_list("Cleanup List"),
477 _region_bm(max_regions, false /* in_resource_area*/),
478 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
479 CardTableModRefBS::card_shift,
480 false /* in_resource_area*/),
481
482 _prevMarkBitMap(&_markBitMap1),
483 _nextMarkBitMap(&_markBitMap2),
484 _at_least_one_mark_complete(false),
485
486 _markStack(this),
487 _regionStack(),
488 // _finger set in set_non_marking_state
489
490 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
491 // _active_tasks set in set_non_marking_state
492 // _tasks set inside the constructor
493 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
494 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
495
496 _has_overflown(false),
497 _concurrent(false),
498 _has_aborted(false),
499 _restart_for_overflow(false),
500 _concurrent_marking_in_progress(false),
501 _should_gray_objects(false),
502
503 // _verbose_level set below
504
505 _init_times(),
506 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
507 _cleanup_times(),
508 _total_counting_time(0.0),
509 _total_rs_scrub_time(0.0),
510
511 _parallel_workers(NULL),
512
513 _count_card_bitmaps(NULL),
514 _count_marked_bytes(NULL)
515 {
516 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
517 if (verbose_level < no_verbose) {
518 verbose_level = no_verbose;
519 }
520 if (verbose_level > high_verbose) {
521 verbose_level = high_verbose;
522 }
523 _verbose_level = verbose_level;
524
525 if (verbose_low()) {
526 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
527 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
528 }
529
530 _markStack.allocate(MarkStackSize);
531 _regionStack.allocate(G1MarkRegionStackSize);
532
533 // Create & start a ConcurrentMark thread.
534 _cmThread = new ConcurrentMarkThread(this);
535 assert(cmThread() != NULL, "CM Thread should have been created");
536 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
537
538 _g1h = G1CollectedHeap::heap();
539 assert(CGC_lock != NULL, "Where's the CGC_lock?");
540 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
541 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
542
543 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
544 satb_qs.set_buffer_size(G1SATBBufferSize);
545
546 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
547 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
548
549 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num);
550 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
551
552 BitMap::idx_t card_bm_size = _card_bm.size();
553
554 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
555 _active_tasks = _max_task_num;
556 for (int i = 0; i < (int) _max_task_num; ++i) {
557 CMTaskQueue* task_queue = new CMTaskQueue();
558 task_queue->initialize();
559 _task_queues->register_queue(i, task_queue);
560
561 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
562 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
563
564 _tasks[i] = new CMTask(i, this,
565 _count_marked_bytes[i],
566 &_count_card_bitmaps[i],
567 task_queue, _task_queues);
568
569 _accum_task_vtime[i] = 0.0;
570 }
571
572 // Calculate the card number for the bottom of the heap. Used
573 // in biasing indexes into the accounting card bitmaps.
574 _heap_bottom_card_num =
575 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
576 CardTableModRefBS::card_shift);
577
578
579 if (ConcGCThreads > ParallelGCThreads) {
580 vm_exit_during_initialization("Can't have more ConcGCThreads "
581 "than ParallelGCThreads.");
582 }
583 if (ParallelGCThreads == 0) {
584 // if we are not running with any parallel GC threads we will not
585 // spawn any marking threads either
586 _parallel_marking_threads = 0;
587 _max_parallel_marking_threads = 0;
588 _sleep_factor = 0.0;
589 _marking_task_overhead = 1.0;
590 } else {
591 if (ConcGCThreads > 0) {
592 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
593 // if both are set
594
595 _parallel_marking_threads = ConcGCThreads;
596 _max_parallel_marking_threads = _parallel_marking_threads;
597 _sleep_factor = 0.0;
598 _marking_task_overhead = 1.0;
682 // do nothing.
683 }
684
685 void ConcurrentMark::reset() {
686 // Starting values for these two. This should be called in a STW
687 // phase. CM will be notified of any future g1_committed expansions
688 // will be at the end of evacuation pauses, when tasks are
689 // inactive.
690 MemRegion committed = _g1h->g1_committed();
691 _heap_start = committed.start();
692 _heap_end = committed.end();
693
694 // Separated the asserts so that we know which one fires.
695 assert(_heap_start != NULL, "heap bounds should look ok");
696 assert(_heap_end != NULL, "heap bounds should look ok");
697 assert(_heap_start < _heap_end, "heap bounds should look ok");
698
699 // reset all the marking data structures and any necessary flags
700 clear_marking_state();
701
702 clear_all_count_data();
703
704 if (verbose_low()) {
705 gclog_or_tty->print_cr("[global] resetting");
706 }
707
708 // We do reset all of them, since different phases will use
709 // different number of active threads. So, it's easiest to have all
710 // of them ready.
711 for (int i = 0; i < (int) _max_task_num; ++i) {
712 _tasks[i]->reset(_nextMarkBitMap);
713 }
714
715 // we need this to make sure that the flag is on during the evac
716 // pause with initial mark piggy-backed
717 set_concurrent_marking_in_progress();
718 }
719
720 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
721 assert(active_tasks <= _max_task_num, "we should not have more");
722
723 _active_tasks = active_tasks;
735 if (concurrent) {
736 set_concurrent_marking_in_progress();
737 } else {
738 // We currently assume that the concurrent flag has been set to
739 // false before we start remark. At this point we should also be
740 // in a STW phase.
741 assert(!concurrent_marking_in_progress(), "invariant");
742 assert(_finger == _heap_end, "only way to get here");
743 update_g1_committed(true);
744 }
745 }
746
747 void ConcurrentMark::set_non_marking_state() {
748 // We set the global marking state to some default values when we're
749 // not doing marking.
750 clear_marking_state();
751 _active_tasks = 0;
752 clear_concurrent_marking_in_progress();
753 }
754
755 // This closure is used to mark refs into the g1 generation
756 // from external roots in the CMS bit map.
757 // Called at the first checkpoint.
758 //
759
760 void ConcurrentMark::clearNextBitmap() {
761 G1CollectedHeap* g1h = G1CollectedHeap::heap();
762 G1CollectorPolicy* g1p = g1h->g1_policy();
763
764 // Make sure that the concurrent mark thread looks to still be in
765 // the current cycle.
766 guarantee(cmThread()->during_cycle(), "invariant");
767
768 // We are finishing up the current cycle by clearing the next
769 // marking bitmap and getting it ready for the next cycle. During
770 // this time no other cycle can start. So, let's make sure that this
771 // is the case.
772 guarantee(!g1h->mark_in_progress(), "invariant");
773
774 // clear the mark bitmap (no grey objects to start with).
951
952 void ForceOverflowSettings::update() {
953 if (_num_remaining > 0) {
954 _num_remaining -= 1;
955 _force = true;
956 } else {
957 _force = false;
958 }
959 }
960
961 bool ForceOverflowSettings::should_force() {
962 if (_force) {
963 _force = false;
964 return true;
965 } else {
966 return false;
967 }
968 }
969 #endif // !PRODUCT
970
971 void ConcurrentMark::grayRoot(oop p, int worker_i) {
972 HeapWord* addr = (HeapWord*) p;
973 // We can't really check against _heap_start and _heap_end, since it
974 // is possible during an evacuation pause with piggy-backed
975 // initial-mark that the committed space is expanded during the
976 // pause without CM observing this change. So the assertions below
977 // is a bit conservative; but better than nothing.
978 assert(_g1h->g1_committed().contains(addr),
979 "address should be within the heap bounds");
980
981 if (!_nextMarkBitMap->isMarked(addr)) {
982 par_mark_and_count(p, worker_i);
983 }
984 }
985
986 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
987 // The objects on the region have already been marked "in bulk" by
988 // the caller. We only need to decide whether to push the region on
989 // the region stack or not.
990
991 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
992 // We're done with marking and waiting for remark. We do not need to
993 // push anything else on the region stack.
994 return;
995 }
996
997 HeapWord* finger = _finger;
998
999 if (verbose_low()) {
1000 gclog_or_tty->print_cr("[global] attempting to push "
1001 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
1002 PTR_FORMAT, mr.start(), mr.end(), finger);
1011 assert(mr.start() <= mr.end(),
1012 "region boundaries should fall within the committed space");
1013 assert(_heap_start <= mr.start(),
1014 "region boundaries should fall within the committed space");
1015 assert(mr.end() <= _heap_end,
1016 "region boundaries should fall within the committed space");
1017 if (verbose_low()) {
1018 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1019 "below the finger, pushing it",
1020 mr.start(), mr.end());
1021 }
1022
1023 if (!region_stack_push_lock_free(mr)) {
1024 if (verbose_low()) {
1025 gclog_or_tty->print_cr("[global] region stack has overflown.");
1026 }
1027 }
1028 }
1029 }
1030
1031 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p, int worker_i) {
1032 // The object is not marked by the caller. We need to at least mark
1033 // it and maybe push in on the stack.
1034
1035 HeapWord* addr = (HeapWord*)p;
1036 if (!_nextMarkBitMap->isMarked(addr)) {
1037 // We definitely need to mark it, irrespective whether we bail out
1038 // because we're done with marking.
1039
1040 if (par_mark_and_count(p, worker_i)) {
1041 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1042 // If we're done with concurrent marking and we're waiting for
1043 // remark, then we're not pushing anything on the stack.
1044 return;
1045 }
1046
1047 // No OrderAccess:store_load() is needed. It is implicit in the
1048 // CAS done in parMark(addr) above
1049 HeapWord* finger = _finger;
1050
1051 if (addr < finger) {
1052 if (!mark_stack_push(oop(addr))) {
1053 if (verbose_low()) {
1054 gclog_or_tty->print_cr("[global] global stack overflow "
1055 "during parMark");
1056 }
1057 }
1058 }
1059 }
1060 }
1222 G1CollectorPolicy* g1p = g1h->g1_policy();
1223 g1p->record_concurrent_mark_remark_start();
1224
1225 double start = os::elapsedTime();
1226
1227 checkpointRootsFinalWork();
1228
1229 double mark_work_end = os::elapsedTime();
1230
1231 weakRefsWork(clear_all_soft_refs);
1232
1233 if (has_overflown()) {
1234 // Oops. We overflowed. Restart concurrent marking.
1235 _restart_for_overflow = true;
1236 // Clear the flag. We do not need it any more.
1237 clear_has_overflown();
1238 if (G1TraceMarkStackOverflow) {
1239 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1240 }
1241 } else {
1242 // Aggregate the per-task counting data that we have accumulated
1243 // while marking.
1244 aggregate_and_clear_count_data();
1245
1246 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1247 // We're done with marking.
1248 // This is the end of the marking cycle, we're expected all
1249 // threads to have SATB queues with active set to true.
1250 satb_mq_set.set_active_all_threads(false, /* new active value */
1251 true /* expected_active */);
1252
1253 if (VerifyDuringGC) {
1254
1255 HandleMark hm; // handle scope
1256 gclog_or_tty->print(" VerifyDuringGC:(after)");
1257 Universe::heap()->prepare_for_verify();
1258 Universe::verify(/* allow dirty */ true,
1259 /* silent */ false,
1260 /* option */ VerifyOption_G1UseNextMarking);
1261 }
1262 assert(!restart_for_overflow(), "sanity");
1263 }
1264
1265 // Reset the marking state if marking completed
1266 if (!restart_for_overflow()) {
1267 set_non_marking_state();
1268 }
1269
1270 #if VERIFY_OBJS_PROCESSED
1271 _scan_obj_cl.objs_processed = 0;
1272 ThreadLocalObjQueue::objs_enqueued = 0;
1273 #endif
1274
1275 // Statistics
1276 double now = os::elapsedTime();
1277 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1278 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1279 _remark_times.add((now - start) * 1000.0);
1280
1281 g1p->record_concurrent_mark_remark_end();
1282 }
1283
1284 #define CARD_BM_TEST_MODE 0
1285
1286 // Used to calculate the # live objects per region
1287 // for verification purposes
1288 class CalcLiveObjectsClosure: public HeapRegionClosure {
1289
1290 CMBitMapRO* _bm;
1291 ConcurrentMark* _cm;
1292 BitMap* _region_bm;
1293 BitMap* _card_bm;
1294
1295 size_t _tot_words_done;
1296 size_t _tot_live;
1297 size_t _tot_used;
1298
1299 size_t _region_marked_bytes;
1300
1301 intptr_t _bottom_card_num;
1302
1303 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1304 BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1305 BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1306
1307 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1308 #if CARD_BM_TEST_MODE
1309 guarantee(_card_bm->at(i), "Should already be set.");
1310 #else
1311 _card_bm->par_at_put(i, 1);
1312 #endif
1313 }
1314 }
1315
1316 public:
1317 CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1318 BitMap* region_bm, BitMap* card_bm) :
1319 _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1320 _region_marked_bytes(0), _tot_words_done(0),
1321 _tot_live(0), _tot_used(0)
1322 {
1323 _bottom_card_num = cm->heap_bottom_card_num();
1324 }
1325
1326 // It takes a region that's not empty (i.e., it has at least one
1327 // live object in it and sets its corresponding bit on the region
1328 // bitmap to 1. If the region is "starts humongous" it will also set
1329 // to 1 the bits on the region bitmap that correspond to its
1330 // associated "continues humongous" regions.
1331 void set_bit_for_region(HeapRegion* hr) {
1332 assert(!hr->continuesHumongous(), "should have filtered those out");
1333
1334 size_t index = hr->hrs_index();
1335 if (!hr->startsHumongous()) {
1336 // Normal (non-humongous) case: just set the bit.
1337 _region_bm->par_at_put((BitMap::idx_t) index, true);
1338 } else {
1339 // Starts humongous case: calculate how many regions are part of
1340 // this humongous region and then set the bit range.
1341 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1342 HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1343 size_t end_index = last_hr->hrs_index() + 1;
1344 _region_bm->par_at_put_range((BitMap::idx_t) index,
1345 (BitMap::idx_t) end_index, true);
1346 }
1347 }
1348
1349 bool doHeapRegion(HeapRegion* hr) {
1350
1351 if (hr->continuesHumongous()) {
1352 // We will ignore these here and process them when their
1353 // associated "starts humongous" region is processed (see
1354 // set_bit_for_heap_region()). Note that we cannot rely on their
1355 // associated "starts humongous" region to have their bit set to
1356 // 1 since, due to the region chunking in the parallel region
1357 // iteration, a "continues humongous" region might be visited
1358 // before its associated "starts humongous".
1359 return false;
1360 }
1361
1362 HeapWord* nextTop = hr->next_top_at_mark_start();
1363 HeapWord* start = hr->bottom();
1364
1365 assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1366 "Preconditions.");
1367
1368 // Record the number of word's we'll examine.
1369 size_t words_done = (nextTop - start);
1370
1371 // Find the first marked object at or after "start".
1372 start = _bm->getNextMarkedWordAddress(start, nextTop);
1373
1374 size_t marked_bytes = 0;
1375 _region_marked_bytes = 0;
1376
1377 // Below, the term "card num" means the result of shifting an address
1378 // by the card shift -- address 0 corresponds to card number 0. One
1379 // must subtract the card num of the bottom of the heap to obtain a
1380 // card table index.
1381
1382 // The first card num of the sequence of live cards currently being
1383 // constructed. -1 ==> no sequence.
1384 intptr_t start_card_num = -1;
1385
1386 // The last card num of the sequence of live cards currently being
1387 // constructed. -1 ==> no sequence.
1388 intptr_t last_card_num = -1;
1389
1390 while (start < nextTop) {
1391 oop obj = oop(start);
1392 int obj_sz = obj->size();
1393
1394 // The card num of the start of the current object.
1395 intptr_t obj_card_num =
1396 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1397 HeapWord* obj_last = start + obj_sz - 1;
1398 intptr_t obj_last_card_num =
1399 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1400
1401 if (obj_card_num != last_card_num) {
1402 if (start_card_num == -1) {
1403 assert(last_card_num == -1, "Both or neither.");
1404 start_card_num = obj_card_num;
1405 } else {
1406 assert(last_card_num != -1, "Both or neither.");
1407 assert(obj_card_num >= last_card_num, "Inv");
1408 if ((obj_card_num - last_card_num) > 1) {
1409 // Mark the last run, and start a new one.
1410 mark_card_num_range(start_card_num, last_card_num);
1411 start_card_num = obj_card_num;
1412 }
1413 }
1414 #if CARD_BM_TEST_MODE
1415 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1416 _card_bm->par_at_put(j - _bottom_card_num, 1);
1417 }
1418 #endif // CARD_BM_TEST_MODE
1419 }
1420 // In any case, we set the last card num.
1421 last_card_num = obj_last_card_num;
1422
1423 marked_bytes += (size_t)obj_sz * HeapWordSize;
1424
1425 // Find the next marked object after this one.
1426 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1427 }
1428
1429 // Handle the last range, if any.
1430 if (start_card_num != -1) {
1431 mark_card_num_range(start_card_num, last_card_num);
1432 }
1433
1434 // Mark the allocated-since-marking portion...
1435 HeapWord* top = hr->top();
1436 if (nextTop < top) {
1437 start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1438 last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1439
1440 mark_card_num_range(start_card_num, last_card_num);
1441
1442 // This definitely means the region has live objects.
1443 set_bit_for_region(hr);
1444 }
1445
1446 // Update the live region bitmap.
1447 if (marked_bytes > 0) {
1448 set_bit_for_region(hr);
1449 }
1450
1451 // Set the marked bytes for the current region so that
1452 // it can be queried by a calling verificiation routine
1453 _region_marked_bytes = marked_bytes;
1454
1455 _tot_live += hr->next_live_bytes();
1456 _tot_used += hr->used();
1457 _tot_words_done = words_done;
1458
1459 return false;
1460 }
1461
1462 size_t region_marked_bytes() const { return _region_marked_bytes; }
1463 size_t tot_words_done() const { return _tot_words_done; }
1464 size_t tot_live() const { return _tot_live; }
1465 size_t tot_used() const { return _tot_used; }
1466 };
1467
1468 // Heap region closure used for verifying the counting data
1469 // that was accumulated concurrently and aggregated during
1470 // the remark pause. This closure is applied to the heap
1471 // regions during the STW cleanup pause.
1472
1473 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1474 ConcurrentMark* _cm;
1475 CalcLiveObjectsClosure _calc_cl;
1476 BitMap* _region_bm; // Region BM to be verified
1477 BitMap* _card_bm; // Card BM to be verified
1478 bool _verbose; // verbose output?
1479
1480 BitMap* _exp_region_bm; // Expected Region BM values
1481 BitMap* _exp_card_bm; // Expected card BM values
1482
1483 intptr_t _bottom_card_num; // Used for calculatint bitmap indices
1484
1485 int _failures;
1486
1487 public:
1488 VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1489 BitMap* region_bm,
1490 BitMap* card_bm,
1491 BitMap* exp_region_bm,
1492 BitMap* exp_card_bm,
1493 bool verbose) :
1494 _cm(cm),
1495 _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1496 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1497 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1498 _failures(0)
1499 {
1500 _bottom_card_num = cm->heap_bottom_card_num();
1501 }
1502
1503 int failures() const { return _failures; }
1504
1505 bool doHeapRegion(HeapRegion* hr) {
1506 if (hr->continuesHumongous()) {
1507 // We will ignore these here and process them when their
1508 // associated "starts humongous" region is processed (see
1509 // set_bit_for_heap_region()). Note that we cannot rely on their
1510 // associated "starts humongous" region to have their bit set to
1511 // 1 since, due to the region chunking in the parallel region
1512 // iteration, a "continues humongous" region might be visited
1513 // before its associated "starts humongous".
1514 return false;
1515 }
1516
1517 int failures = 0;
1518
1519 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1520 // this region and set the corresponding bits in the expected region
1521 // and card bitmaps.
1522 bool res = _calc_cl.doHeapRegion(hr);
1523 assert(res == false, "should be continuing");
1524
1525 // Note that the calculated count data could be a subset of the
1526 // count data that was accumlated during marking. See the comment
1527 // in G1ParCopyHelper::copy_to_survivor space for an explanation
1528 // why.
1529
1530 // Verify that _top_at_conc_count == ntams
1531 if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
1532 if (_verbose) {
1533 gclog_or_tty->print_cr("Region %d: top at conc count incorrect: expected "
1534 PTR_FORMAT", actual: "PTR_FORMAT,
1535 hr->hrs_index(), hr->next_top_at_mark_start(),
1536 hr->top_at_conc_mark_count());
1537 }
1538 failures += 1;
1539 }
1540
1541 // Verify the marked bytes for this region.
1542 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1543 size_t act_marked_bytes = hr->next_marked_bytes();
1544
1545 // We're not OK if expected marked bytes > actual marked bytes. It means
1546 // we have missed accounting some objects during the actual marking.
1547 if (exp_marked_bytes > act_marked_bytes) {
1548 if (_verbose) {
1549 gclog_or_tty->print_cr("Region %d: marked bytes mismatch: expected: "
1550 SIZE_FORMAT", actual: "SIZE_FORMAT,
1551 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1552 }
1553 failures += 1;
1554 }
1555
1556 // Verify the bit, for this region, in the actual and expected
1557 // (which was just calculated) region bit maps.
1558 // We're not OK if the expected bit is set and the actual is not set.
1559 BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
1560
1561 bool expected = _exp_region_bm->at(index);
1562 bool actual = _region_bm->at(index);
1563 if (expected && !actual) {
1564 if (_verbose) {
1565 gclog_or_tty->print_cr("Region %d: region bitmap mismatch: expected: %d, actual: %d",
1566 hr->hrs_index(), expected, actual);
1567 }
1568 failures += 1;
1569 }
1570
1571 // Verify that the card bit maps for the cards spanned by the current
1572 // region match. The set of offsets that have set bits in the expected
1573 // bitmap should be a subset of the offsets with set bits from the actual
1574 // calculated card bitmap.
1575 // Again it's more important that if the expected bit is set then the
1576 // actual bit be set.
1577 intptr_t start_card_num =
1578 intptr_t(uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift);
1579 intptr_t top_card_num =
1580 intptr_t(uintptr_t(hr->top()) >> CardTableModRefBS::card_shift);
1581
1582 BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1583 BitMap::idx_t end_idx = top_card_num - _bottom_card_num;
1584
1585 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1586 expected = _exp_card_bm->at(i);
1587 actual = _card_bm->at(i);
1588
1589 if (expected && !actual) {
1590 if (_verbose) {
1591 gclog_or_tty->print_cr("Region %d: card bitmap mismatch at idx %d: expected: %d, actual: %d",
1592 hr->hrs_index(), i, expected, actual);
1593 }
1594 failures += 1;
1595 }
1596 }
1597
1598 if (failures > 0 && _verbose) {
1599 gclog_or_tty->print("Region %d: bottom: "PTR_FORMAT", ntams: "
1600 PTR_FORMAT", top: "PTR_FORMAT", end: "PTR_FORMAT,
1601 hr->hrs_index(), hr->bottom(), hr->next_top_at_mark_start(),
1602 hr->top(), hr->end());
1603 gclog_or_tty->print_cr(", marked_bytes: calc/actual "SIZE_FORMAT"/"SIZE_FORMAT,
1604 _calc_cl.region_marked_bytes(),
1605 hr->next_marked_bytes());
1606 }
1607
1608 _failures += failures;
1609
1610 // We could stop iteration over the heap when we
1611 // find the first voilating region by returning true.
1612 return false;
1613 }
1614 };
1615
1616
1617 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1618 protected:
1619 G1CollectedHeap* _g1h;
1620 ConcurrentMark* _cm;
1621 BitMap* _actual_region_bm;
1622 BitMap* _actual_card_bm;
1623
1624 size_t _n_workers;
1625
1626 BitMap* _expected_region_bm;
1627 BitMap* _expected_card_bm;
1628
1629 int _failures;
1630 bool _verbose;
1631
1632 public:
1633 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1634 BitMap* region_bm, BitMap* card_bm,
1635 BitMap* expected_region_bm, BitMap* expected_card_bm)
1636 : AbstractGangTask("G1 verify final counting"),
1637 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1638 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1639 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1640 _failures(0), _verbose(false),
1641 _n_workers(0)
1642 {
1643 assert(VerifyDuringGC, "don't call this otherwise");
1644
1645 // Use the value already set as the number of active threads
1646 // in the call to run_task().
1647 if (G1CollectedHeap::use_parallel_gc_threads()) {
1648 assert( _g1h->workers()->active_workers() > 0,
1649 "Should have been previously set");
1650 _n_workers = _g1h->workers()->active_workers();
1651 } else {
1652 _n_workers = 1;
1653 }
1654
1655 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1656 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1657
1658 _verbose = _cm->verbose_medium();
1659 }
1660
1661 void work(int worker_i) {
1662 assert((size_t) worker_i < _n_workers, "invariant");
1663
1664 VerifyLiveObjectDataHRClosure verify_cl(_cm,
1665 _actual_region_bm, _actual_card_bm,
1666 _expected_region_bm,
1667 _expected_card_bm,
1668 _verbose);
1669
1670 if (G1CollectedHeap::use_parallel_gc_threads()) {
1671 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1672 worker_i,
1673 (int) _n_workers,
1674 HeapRegion::VerifyCountClaimValue);
1675 } else {
1676 _g1h->heap_region_iterate(&verify_cl);
1677 }
1678
1679 Atomic::add(verify_cl.failures(), &_failures);
1680 }
1681
1682 int failures() const { return _failures; }
1683 };
1684
1685 // Final update of count data (during cleanup).
1686 // Adds [top_at_count, NTAMS) to the marked bytes for each
1687 // region. Sets the bits in the card bitmap corresponding
1688 // to the interval [top_at_count, top], and sets the
1689 // liveness bit for each region containing live data
1690 // in the region bitmap.
1691
1692 class FinalCountDataUpdateClosure: public HeapRegionClosure {
1693 ConcurrentMark* _cm;
1694 BitMap* _region_bm;
1695 BitMap* _card_bm;
1696 intptr_t _bottom_card_num;
1697
1698 size_t _total_live_bytes;
1699 size_t _total_used_bytes;
1700 size_t _total_words_done;
1701
1702 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1703 BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1704 BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1705
1706 // Inclusive bit range [start_idx, last_idx]. par_at_put_range
1707 // is exclusive so we have to also set the bit for last_idx.
1708 // Passing last_idx+1 to the clear_range would work in
1709 // most cases but could trip an OOB assertion.
1710
1711 if ((last_idx - start_idx) > 0) {
1712 _card_bm->par_at_put_range(start_idx, last_idx, true);
1713 }
1714 _card_bm->par_set_bit(last_idx);
1715 }
1716
1717 // It takes a region that's not empty (i.e., it has at least one
1718 // live object in it and sets its corresponding bit on the region
1719 // bitmap to 1. If the region is "starts humongous" it will also set
1720 // to 1 the bits on the region bitmap that correspond to its
1721 // associated "continues humongous" regions.
1722 void set_bit_for_region(HeapRegion* hr) {
1723 assert(!hr->continuesHumongous(), "should have filtered those out");
1724
1725 size_t index = hr->hrs_index();
1726 if (!hr->startsHumongous()) {
1727 // Normal (non-humongous) case: just set the bit.
1728 _region_bm->par_set_bit((BitMap::idx_t) index);
1729 } else {
1730 // Starts humongous case: calculate how many regions are part of
1731 // this humongous region and then set the bit range.
1732 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1733 HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1734 size_t end_index = last_hr->hrs_index() + 1;
1735 _region_bm->par_at_put_range((BitMap::idx_t) index,
1736 (BitMap::idx_t) end_index, true);
1737 }
1738 }
1739
1740 public:
1741 FinalCountDataUpdateClosure(ConcurrentMark* cm,
1742 BitMap* region_bm,
1743 BitMap* card_bm) :
1744 _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1745 _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0)
1746 {
1747 _bottom_card_num = cm->heap_bottom_card_num();
1748 }
1749
1750 bool doHeapRegion(HeapRegion* hr) {
1751
1752 if (hr->continuesHumongous()) {
1753 // We will ignore these here and process them when their
1754 // associated "starts humongous" region is processed (see
1755 // set_bit_for_heap_region()). Note that we cannot rely on their
1756 // associated "starts humongous" region to have their bit set to
1757 // 1 since, due to the region chunking in the parallel region
1758 // iteration, a "continues humongous" region might be visited
1759 // before its associated "starts humongous".
1760 return false;
1761 }
1762
1763 HeapWord* start = hr->top_at_conc_mark_count();
1764 HeapWord* ntams = hr->next_top_at_mark_start();
1765 HeapWord* top = hr->top();
1766
1767 assert(hr->bottom() <= start && start <= hr->end() &&
1768 hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1769
1770 size_t words_done = ntams - hr->bottom();
1771
1772 intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1773 intptr_t last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1774
1775
1776 if (start < ntams) {
1777 // Region was changed between remark and cleanup pauses
1778 // We need to add (ntams - start) to the marked bytes
1779 // for this region, and set bits for the range
1780 // [ card_num(start), card_num(ntams) ) in the
1781 // card bitmap.
1782 size_t live_bytes = (ntams - start) * HeapWordSize;
1783 hr->add_to_marked_bytes(live_bytes);
1784
1785 // Record the new top at conc count
1786 hr->set_top_at_conc_mark_count(ntams);
1787
1788 // The setting of the bits card bitmap takes place below
1789 }
1790
1791 // Mark the allocated-since-marking portion...
1792 if (ntams < top) {
1793 // This definitely means the region has live objects.
1794 set_bit_for_region(hr);
1795 }
1796
1797 // Now set the bits for [start, top]
1798 mark_card_num_range(start_card_num, last_card_num);
1799
1800 // Set the bit for the region if it contains live data
1801 if (hr->next_marked_bytes() > 0) {
1802 set_bit_for_region(hr);
1803 }
1804
1805 _total_words_done += words_done;
1806 _total_used_bytes += hr->used();
1807 _total_live_bytes += hr->next_marked_bytes();
1808
1809 return false;
1810 }
1811
1812 size_t total_words_done() const { return _total_words_done; }
1813 size_t total_live_bytes() const { return _total_live_bytes; }
1814 size_t total_used_bytes() const { return _total_used_bytes; }
1815 };
1816
1817 class G1ParFinalCountTask: public AbstractGangTask {
1818 protected:
1819 G1CollectedHeap* _g1h;
1820 ConcurrentMark* _cm;
1821 BitMap* _actual_region_bm;
1822 BitMap* _actual_card_bm;
1823
1824 size_t _n_workers;
1825
1826 size_t *_live_bytes;
1827 size_t *_used_bytes;
1828
1829 public:
1830 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1831 : AbstractGangTask("G1 final counting"),
1832 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1833 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1834 _n_workers(0)
1835 {
1836 // Use the value already set as the number of active threads
1837 // in the call to run_task(). Needed for the allocation of
1838 // _live_bytes and _used_bytes.
1839 if (G1CollectedHeap::use_parallel_gc_threads()) {
1840 assert( _g1h->workers()->active_workers() > 0,
1841 "Should have been previously set");
1842 _n_workers = _g1h->workers()->active_workers();
1843 } else {
1844 _n_workers = 1;
1845 }
1846
1847 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1848 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1849 }
1850
1851 ~G1ParFinalCountTask() {
1852 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1853 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1854 }
1855
1856 void work(int worker_i) {
1857 assert((size_t) worker_i < _n_workers, "invariant");
1858
1859 FinalCountDataUpdateClosure final_update_cl(_cm,
1860 _actual_region_bm,
1861 _actual_card_bm);
1862
1863 if (G1CollectedHeap::use_parallel_gc_threads()) {
1864 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1865 worker_i,
1866 (int) _n_workers,
1867 HeapRegion::FinalCountClaimValue);
1868 } else {
1869 _g1h->heap_region_iterate(&final_update_cl);
1870 }
1871
1872 _live_bytes[worker_i] = final_update_cl.total_live_bytes();
1873 _used_bytes[worker_i] = final_update_cl.total_used_bytes();
1874 }
1875
1876 size_t live_bytes() {
1877 size_t live_bytes = 0;
1878 for (size_t i = 0; i < _n_workers; ++i)
1879 live_bytes += _live_bytes[i];
1880 return live_bytes;
1881 }
1882
1883 size_t used_bytes() {
1884 size_t used_bytes = 0;
1885 for (size_t i = 0; i < _n_workers; ++i)
1886 used_bytes += _used_bytes[i];
1887 return used_bytes;
1888 }
1889 };
1890
1891 class G1ParNoteEndTask;
1892
1893 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1894 G1CollectedHeap* _g1;
1895 int _worker_num;
1896 size_t _max_live_bytes;
1897 size_t _regions_claimed;
1898 size_t _freed_bytes;
1899 FreeRegionList* _local_cleanup_list;
1900 OldRegionSet* _old_proxy_set;
1901 HumongousRegionSet* _humongous_proxy_set;
1902 HRRSCleanupTask* _hrrs_cleanup_task;
2070
2071 HRSPhaseSetter x(HRSPhaseCleanup);
2072 g1h->verify_region_sets_optional();
2073
2074 if (VerifyDuringGC) {
2075 HandleMark hm; // handle scope
2076 gclog_or_tty->print(" VerifyDuringGC:(before)");
2077 Universe::heap()->prepare_for_verify();
2078 Universe::verify(/* allow dirty */ true,
2079 /* silent */ false,
2080 /* option */ VerifyOption_G1UsePrevMarking);
2081 }
2082
2083 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2084 g1p->record_concurrent_mark_cleanup_start();
2085
2086 double start = os::elapsedTime();
2087
2088 HeapRegionRemSet::reset_for_cleanup_tasks();
2089
2090 // Clear the global region bitmap - it will be filled as part
2091 // of the final counting task.
2092 _region_bm.clear();
2093
2094 size_t n_workers;
2095
2096 // Do counting once more with the world stopped for good measure.
2097 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2098
2099 if (G1CollectedHeap::use_parallel_gc_threads()) {
2100 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2101 "sanity check");
2102
2103 g1h->set_par_threads();
2104 n_workers = g1h->n_par_threads();
2105 assert(g1h->n_par_threads() == (int) n_workers,
2106 "Should not have been reset");
2107 g1h->workers()->run_task(&g1_par_count_task);
2108 // Done with the parallel phase so reset to 0.
2109 g1h->set_par_threads(0);
2110
2111 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2112 "sanity check");
2113 } else {
2114 n_workers = 1;
2115 g1_par_count_task.work(0);
2116 }
2117
2118 if (VerifyDuringGC) {
2119 // Verify that the counting data accumulated during marking matches
2120 // that calculated by walking the marking bitmap.
2121
2122 // Bitmaps to hold expected values
2123 BitMap expected_region_bm(_region_bm.size(), false);
2124 BitMap expected_card_bm(_card_bm.size(), false);
2125
2126 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2127 &_region_bm,
2128 &_card_bm,
2129 &expected_region_bm,
2130 &expected_card_bm);
2131
2132 if (G1CollectedHeap::use_parallel_gc_threads()) {
2133 g1h->set_par_threads((int)n_workers);
2134 g1h->workers()->run_task(&g1_par_verify_task);
2135 // Done with the parallel phase so reset to 0.
2136 g1h->set_par_threads(0);
2137
2138 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2139 "sanity check");
2140 } else {
2141 g1_par_verify_task.work(0);
2142 }
2143
2144 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2145 }
2146
2147 size_t known_garbage_bytes =
2148 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
2149 g1p->set_known_garbage_bytes(known_garbage_bytes);
2150
2151 size_t start_used_bytes = g1h->used();
2152 _at_least_one_mark_complete = true;
2153 g1h->set_marking_complete();
2154
2155 ergo_verbose4(ErgoConcCycles,
2156 "finish cleanup",
2157 ergo_format_byte("occupancy")
2158 ergo_format_byte("capacity")
2159 ergo_format_byte_perc("known garbage"),
2160 start_used_bytes, g1h->capacity(),
2161 known_garbage_bytes,
2162 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
2163
2164 double count_end = os::elapsedTime();
2165 double this_final_counting_time = (count_end - start);
2166 if (G1PrintParCleanupStats) {
2319 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2320 os::sleep(Thread::current(), (jlong) 1, false);
2321 }
2322 }
2323 }
2324 }
2325 assert(tmp_free_list.is_empty(), "post-condition");
2326 }
2327
2328 // Support closures for reference procssing in G1
2329
2330 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2331 HeapWord* addr = (HeapWord*)obj;
2332 return addr != NULL &&
2333 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2334 }
2335
2336 class G1CMKeepAliveClosure: public OopClosure {
2337 G1CollectedHeap* _g1;
2338 ConcurrentMark* _cm;
2339 public:
2340 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2341 _g1(g1), _cm(cm)
2342 {
2343 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2344 }
2345
2346 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2347 virtual void do_oop( oop* p) { do_oop_work(p); }
2348
2349 template <class T> void do_oop_work(T* p) {
2350 oop obj = oopDesc::load_decode_heap_oop(p);
2351 HeapWord* addr = (HeapWord*)obj;
2352
2353 if (_cm->verbose_high()) {
2354 gclog_or_tty->print_cr("\t[0] we're looking at location "
2355 "*"PTR_FORMAT" = "PTR_FORMAT,
2356 p, (void*) obj);
2357 }
2358
2359 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2360 _cm->mark_and_count(obj);
2361 _cm->mark_stack_push(obj);
2362 }
2363 }
2364 };
2365
2366 class G1CMDrainMarkingStackClosure: public VoidClosure {
2367 ConcurrentMark* _cm;
2368 CMMarkStack* _markStack;
2369 G1CMKeepAliveClosure* _oopClosure;
2370 public:
2371 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2372 G1CMKeepAliveClosure* oopClosure) :
2373 _cm(cm),
2374 _markStack(markStack),
2375 _oopClosure(oopClosure)
2376 {}
2377
2378 void do_void() {
2379 _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
2380 }
2381 };
2382
2383 // 'Keep Alive' closure used by parallel reference processing.
2384 // An instance of this closure is used in the parallel reference processing
2385 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2386 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2387 // placed on to discovered ref lists once so we can mark and push with no
2388 // need to check whether the object has already been marked. Using the
2389 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2390 // operating on the global mark stack. This means that an individual
2391 // worker would be doing lock-free pushes while it processes its own
2392 // discovered ref list followed by drain call. If the discovered ref lists
2393 // are unbalanced then this could cause interference with the other
2394 // workers. Using a CMTask (and its embedded local data structures)
2395 // avoids that potential interference.
2396 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2397 ConcurrentMark* _cm;
2398 CMTask* _task;
2399 int _ref_counter_limit;
2578 G1CMIsAliveClosure g1_is_alive(g1h);
2579
2580 // Inner scope to exclude the cleaning of the string and symbol
2581 // tables from the displayed time.
2582 {
2583 bool verbose = PrintGC && PrintGCDetails;
2584 if (verbose) {
2585 gclog_or_tty->put(' ');
2586 }
2587 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2588
2589 ReferenceProcessor* rp = g1h->ref_processor_cm();
2590
2591 // See the comment in G1CollectedHeap::ref_processing_init()
2592 // about how reference processing currently works in G1.
2593
2594 // Process weak references.
2595 rp->setup_policy(clear_all_soft_refs);
2596 assert(_markStack.isEmpty(), "mark stack should be empty");
2597
2598 G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2599 G1CMDrainMarkingStackClosure
2600 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2601
2602 // We use the work gang from the G1CollectedHeap and we utilize all
2603 // the worker threads.
2604 int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
2605 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2606
2607 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2608 g1h->workers(), active_workers);
2609
2610 if (rp->processing_is_mt()) {
2611 // Set the degree of MT here. If the discovery is done MT, there
2612 // may have been a different number of threads doing the discovery
2613 // and a different number of discovered lists may have Ref objects.
2614 // That is OK as long as the Reference lists are balanced (see
2615 // balance_all_queues() and balance_queues()).
2616 rp->set_active_mt_degree(active_workers);
2617
2618 rp->process_discovered_references(&g1_is_alive,
2619 &g1_keep_alive,
2620 &g1_drain_mark_stack,
2958 out->cr();
2959
2960 gclog_or_tty->print_cr(" done");
2961 gclog_or_tty->flush();
2962 }
2963
2964 #endif // PRODUCT
2965
2966 // This note is for drainAllSATBBuffers and the code in between.
2967 // In the future we could reuse a task to do this work during an
2968 // evacuation pause (since now tasks are not active and can be claimed
2969 // during an evacuation pause). This was a late change to the code and
2970 // is currently not being taken advantage of.
2971
2972 class CMGlobalObjectClosure : public ObjectClosure {
2973 private:
2974 ConcurrentMark* _cm;
2975
2976 public:
2977 void do_object(oop obj) {
2978 _cm->deal_with_reference(obj, 0);
2979 }
2980
2981 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2982 };
2983
2984 void ConcurrentMark::deal_with_reference(oop obj, int worker_i) {
2985 if (verbose_high()) {
2986 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2987 (void*) obj);
2988 }
2989
2990 HeapWord* objAddr = (HeapWord*) obj;
2991 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2992 if (_g1h->is_in_g1_reserved(objAddr)) {
2993 assert(obj != NULL, "null check is implicit");
2994 if (!_nextMarkBitMap->isMarked(objAddr)) {
2995 // Only get the containing region if the object is not marked on the
2996 // bitmap (otherwise, it's a waste of time since we won't do
2997 // anything with it).
2998 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2999 if (!hr->obj_allocated_since_next_marking(obj)) {
3000 if (verbose_high()) {
3001 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
3002 "marked", (void*) obj);
3003 }
3004
3005 // we need to mark it first
3006 if (par_mark_and_count(obj, hr, worker_i)) {
3007 // No OrderAccess:store_load() is needed. It is implicit in the
3008 // CAS done in the call to CMBitMap::parMark() in the above
3009 // routine.
3010 HeapWord* finger = _finger;
3011 if (objAddr < finger) {
3012 if (verbose_high()) {
3013 gclog_or_tty->print_cr("[global] below the global finger "
3014 "("PTR_FORMAT"), pushing it", finger);
3015 }
3016 if (!mark_stack_push(obj)) {
3017 if (verbose_low()) {
3018 gclog_or_tty->print_cr("[global] global stack overflow during "
3019 "deal_with_reference");
3020 }
3021 }
3022 }
3023 }
3024 }
3025 }
3026 }
3027 }
3028
3029 void ConcurrentMark::drainAllSATBBuffers() {
3034 while (satb_mq_set.apply_closure_to_completed_buffer()) {
3035 if (verbose_medium()) {
3036 gclog_or_tty->print_cr("[global] processed an SATB buffer");
3037 }
3038 }
3039
3040 // no need to check whether we should do this, as this is only
3041 // called during an evacuation pause
3042 satb_mq_set.iterate_closure_all_threads();
3043
3044 satb_mq_set.set_closure(NULL);
3045 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
3046 }
3047
3048 void ConcurrentMark::markPrev(oop p) {
3049 // Note we are overriding the read-only view of the prev map here, via
3050 // the cast.
3051 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
3052 }
3053
3054 void ConcurrentMark::clear_mark(oop p) {
3055 assert(p != NULL && p->is_oop(), "expected an oop");
3056 HeapWord* addr = (HeapWord*)p;
3057 assert(addr >= _nextMarkBitMap->startWord() ||
3058 addr < _nextMarkBitMap->endWord(), "in a region");
3059
3060 _nextMarkBitMap->clear(addr);
3061 }
3062
3063 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
3064 // Note we are overriding the read-only view of the prev map here, via
3065 // the cast.
3066 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
3067 _nextMarkBitMap->clearRange(mr);
3068 }
3069
3070 HeapRegion*
3071 ConcurrentMark::claim_region(int task_num) {
3072 // "checkpoint" the finger
3073 HeapWord* finger = _finger;
3074
3234 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
3235 _markStack.setEmpty();
3236 _markStack.clear_overflow();
3237 _regionStack.setEmpty();
3238 _regionStack.clear_overflow();
3239 if (clear_overflow) {
3240 clear_has_overflown();
3241 } else {
3242 assert(has_overflown(), "pre-condition");
3243 }
3244 _finger = _heap_start;
3245
3246 for (int i = 0; i < (int)_max_task_num; ++i) {
3247 OopTaskQueue* queue = _task_queues->queue(i);
3248 queue->set_empty();
3249 // Clear any partial regions from the CMTasks
3250 _tasks[i]->clear_aborted_region();
3251 }
3252 }
3253
3254 // Aggregate the counting data that was constructed concurrently
3255 // with marking.
3256 class AggregateCountDataHRClosure: public HeapRegionClosure {
3257 ConcurrentMark* _cm;
3258 BitMap* _cm_card_bm;
3259 intptr_t _bottom_card_num;
3260 size_t _max_task_num;
3261
3262 public:
3263 AggregateCountDataHRClosure(ConcurrentMark *cm,
3264 BitMap* cm_card_bm,
3265 intptr_t bottom_card_num,
3266 size_t max_task_num) :
3267 _cm(cm),
3268 _cm_card_bm(cm_card_bm),
3269 _bottom_card_num(bottom_card_num),
3270 _max_task_num(max_task_num)
3271 { }
3272
3273 bool is_card_aligned(HeapWord* p) {
3274 return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
3275 }
3276
3277 bool doHeapRegion(HeapRegion* hr) {
3278 if (hr->continuesHumongous()) {
3279 // We will ignore these here and process them when their
3280 // associated "starts humongous" region is processed.
3281 // Note that we cannot rely on their associated
3282 // "starts humongous" region to have their bit set to 1
3283 // since, due to the region chunking in the parallel region
3284 // iteration, a "continues humongous" region might be visited
3285 // before its associated "starts humongous".
3286 return false;
3287 }
3288
3289 HeapWord* start = hr->bottom();
3290 HeapWord* limit = hr->next_top_at_mark_start();
3291 HeapWord* end = hr->end();
3292
3293 assert(start <= limit && limit <= hr->top() &&
3294 hr->top() <= hr->end(), "Preconditions");
3295
3296 assert(hr->next_marked_bytes() == 0, "Precondition");
3297
3298 if (start == limit) {
3299 // NTAMS of this region has not been set so nothing to do.
3300 return false;
3301 }
3302
3303 intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3304 intptr_t limit_card_num = intptr_t(uintptr_t(limit) >> CardTableModRefBS::card_shift);
3305 intptr_t end_card_num = intptr_t(uintptr_t(end) >> CardTableModRefBS::card_shift);
3306
3307 assert(is_card_aligned(start), "sanity");
3308 assert(is_card_aligned(end), "sanity");
3309
3310 // If ntams is not card aligned then we bump the index for
3311 // limit so that we get the card spanning ntams.
3312 if (!is_card_aligned(limit)) {
3313 limit_card_num += 1;
3314 }
3315
3316 assert(limit_card_num <= end_card_num, "or else use atomics");
3317
3318 BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
3319 BitMap::idx_t limit_idx = limit_card_num - _bottom_card_num;
3320
3321 // Aggregate the "stripe" in the count data associated with hr.
3322 size_t hrs_index = hr->hrs_index();
3323 size_t marked_bytes = 0;
3324
3325 for (int i = 0; (size_t)i < _max_task_num; i += 1) {
3326 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3327 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3328
3329 // Fetch the marked_bytes in this region for task i and
3330 // add it to the running total for this region.
3331 marked_bytes += marked_bytes_array[hrs_index];
3332
3333 // Now clear the value in the task's marked bytes array
3334 // for this region.
3335 marked_bytes_array[hrs_index] = 0;
3336
3337 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
3338 // into the global card bitmap.
3339 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3340
3341 while (scan_idx < limit_idx) {
3342 assert(task_card_bm->at(scan_idx) == true, "should be");
3343 _cm_card_bm->set_bit(scan_idx);
3344 task_card_bm->clear_bit(scan_idx);
3345 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3346 scan_idx = task_card_bm->get_next_one_offset(start_idx + 1, limit_idx);
3347 }
3348 }
3349
3350 // Update the marked bytes for this region.
3351 hr->add_to_marked_bytes(marked_bytes);
3352
3353 // Now set the top at count to NTAMS.
3354 hr->set_top_at_conc_mark_count(limit);
3355
3356 // Next heap region
3357 return false;
3358 }
3359 };
3360
3361 class G1AggregateCountDataTask: public AbstractGangTask {
3362 protected:
3363 G1CollectedHeap* _g1h;
3364 ConcurrentMark* _cm;
3365 BitMap* _cm_card_bm;
3366 intptr_t _heap_bottom_card_num;
3367 size_t _max_task_num;
3368 int _active_workers;
3369
3370 public:
3371 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3372 ConcurrentMark* cm,
3373 BitMap* cm_card_bm,
3374 intptr_t bottom_card_num,
3375 size_t max_task_num,
3376 int n_workers) :
3377 AbstractGangTask("Count Aggregation"),
3378 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3379 _heap_bottom_card_num(bottom_card_num),
3380 _max_task_num(max_task_num),
3381 _active_workers(n_workers)
3382 { }
3383
3384 void work(int worker_i) {
3385 AggregateCountDataHRClosure cl(_cm, _cm_card_bm,
3386 _heap_bottom_card_num, _max_task_num);
3387
3388 if (G1CollectedHeap::use_parallel_gc_threads()) {
3389 _g1h->heap_region_par_iterate_chunked(&cl, worker_i,
3390 _active_workers,
3391 HeapRegion::AggregateCountClaimValue);
3392 } else {
3393 _g1h->heap_region_iterate(&cl);
3394 }
3395 }
3396 };
3397
3398
3399 void ConcurrentMark::aggregate_and_clear_count_data() {
3400 // Clear the global card bitmap
3401 _card_bm.clear();
3402
3403 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3404 _g1h->workers()->active_workers() :
3405 1);
3406
3407 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3408 _heap_bottom_card_num, _max_task_num,
3409 n_workers);
3410
3411 if (G1CollectedHeap::use_parallel_gc_threads()) {
3412 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3413 "sanity check");
3414 _g1h->set_par_threads(n_workers);
3415 _g1h->workers()->run_task(&g1_par_agg_task);
3416 _g1h->set_par_threads(0);
3417
3418 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3419 "sanity check");
3420 _g1h->reset_heap_region_claim_values();
3421 } else {
3422 g1_par_agg_task.work(0);
3423 }
3424 }
3425
3426 // Clear the per-worker arrays used to store the per-region counting data
3427 void ConcurrentMark::clear_all_count_data() {
3428 assert(SafepointSynchronize::is_at_safepoint() ||
3429 !Universe::is_fully_initialized(), "must be");
3430
3431 size_t max_regions = _g1h->max_regions();
3432
3433 assert(_max_task_num != 0, "unitialized");
3434 assert(_count_card_bitmaps != NULL, "uninitialized");
3435 assert(_count_marked_bytes != NULL, "uninitialized");
3436
3437 for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3438 BitMap* task_card_bm = count_card_bitmap_for(i);
3439 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3440
3441 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3442 assert(marked_bytes_array != NULL, "uninitialized");
3443
3444 for (int j = 0; (size_t) j < max_regions; j++) {
3445 marked_bytes_array[j] = 0;
3446 }
3447 task_card_bm->clear();
3448 }
3449 }
3450
3451 void ConcurrentMark::clear_count_data_for_heap_region(HeapRegion* hr) {
3452 // Clears the count data for the given region from _all_ of
3453 // the per-task counting data structures.
3454
3455 MemRegion used_region = hr->used_region();
3456 HeapWord* start = used_region.start();
3457 HeapWord* last = used_region.last();
3458 size_t hr_index = hr->hrs_index();
3459
3460 intptr_t start_card_num =
3461 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3462 intptr_t last_card_num =
3463 intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift);
3464
3465 BitMap::idx_t start_idx = start_card_num - heap_bottom_card_num();
3466 BitMap::idx_t last_idx = last_card_num - heap_bottom_card_num();
3467
3468 size_t used_region_bytes = used_region.byte_size();
3469 size_t marked_bytes = 0;
3470
3471 for (int i=0; (size_t)i < _max_task_num; i += 1) {
3472 BitMap* task_card_bm = count_card_bitmap_for(i);
3473 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3474
3475 marked_bytes += marked_bytes_array[hr_index];
3476 // clear the amount of marked bytes in the task array for this
3477 // region
3478 marked_bytes_array[hr_index] = 0;
3479
3480 // Clear the inclusive range [start_idx, last_idx] from the
3481 // card bitmap. The clear_range routine is exclusive so we
3482 // need to also explicitly clear the bit at last_idx.
3483 // Passing last_idx+1 to the clear_range would work in
3484 // most cases but could trip an OOB assertion.
3485
3486 if ((last_idx - start_idx) > 0) {
3487 task_card_bm->clear_range(start_idx, last_idx);
3488 }
3489 task_card_bm->clear_bit(last_idx);
3490 }
3491 }
3492
3493 void ConcurrentMark::print_stats() {
3494 if (verbose_stats()) {
3495 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3496 for (size_t i = 0; i < _active_tasks; ++i) {
3497 _tasks[i]->print_stats();
3498 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3499 }
3500 }
3501 }
3502
3503 // Closures used by ConcurrentMark::complete_marking_in_collection_set().
3504
3505 class CSetMarkOopClosure: public OopClosure {
3506 friend class CSetMarkBitMapClosure;
3507
3508 G1CollectedHeap* _g1h;
3509 ConcurrentMark* _cm;
3510 oop* _ms;
3511 jint* _array_ind_stack;
3512 int _ms_size;
3513 int _ms_ind;
3514 int _array_increment;
3515 int _worker_i;
3516
3517 bool push(oop obj, int arr_ind = 0) {
3518 if (_ms_ind == _ms_size) {
3519 gclog_or_tty->print_cr("Mark stack is full.");
3520 return false;
3521 }
3522 _ms[_ms_ind] = obj;
3523 if (obj->is_objArray()) {
3524 _array_ind_stack[_ms_ind] = arr_ind;
3525 }
3526 _ms_ind++;
3527 return true;
3528 }
3548 if (next_arr_ind < len) {
3549 push(obj, next_arr_ind);
3550 }
3551 // Now process this portion of this one.
3552 int lim = MIN2(next_arr_ind, len);
3553 for (int j = arr_ind; j < lim; j++) {
3554 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
3555 }
3556 } else {
3557 obj->oop_iterate(this);
3558 }
3559 if (abort()) return false;
3560 }
3561 return true;
3562 }
3563
3564 public:
3565 CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
3566 _g1h(G1CollectedHeap::heap()),
3567 _cm(cm),
3568 _ms_size(ms_size), _ms_ind(0),
3569 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
3570 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
3571 _array_increment(MAX2(ms_size/8, 16)),
3572 _worker_i(worker_i) { }
3573
3574 ~CSetMarkOopClosure() {
3575 FREE_C_HEAP_ARRAY(oop, _ms);
3576 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
3577 }
3578
3579 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
3580 virtual void do_oop( oop* p) { do_oop_work(p); }
3581
3582 template <class T> void do_oop_work(T* p) {
3583 T heap_oop = oopDesc::load_heap_oop(p);
3584 if (oopDesc::is_null(heap_oop)) return;
3585 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
3586 if (obj->is_forwarded()) {
3587 // If the object has already been forwarded, we have to make sure
3588 // that it's marked. So follow the forwarding pointer. Note that
3589 // this does the right thing for self-forwarding pointers in the
3590 // evacuation failure case.
3591 obj = obj->forwardee();
3592 }
3593 HeapRegion* hr = _g1h->heap_region_containing(obj);
3594 if (hr != NULL) {
3595 if (hr->in_collection_set()) {
3596 if (_g1h->is_obj_ill(obj)) {
3597 if (_cm->par_mark_and_count(obj, hr, _worker_i)) {
3598 if (!push(obj)) {
3599 gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
3600 set_abort();
3601 }
3602 }
3603 }
3604 } else {
3605 // Outside the collection set; we need to gray it
3606 _cm->deal_with_reference(obj, _worker_i);
3607 }
3608 }
3609 }
3610 };
3611
3612 class CSetMarkBitMapClosure: public BitMapClosure {
3613 G1CollectedHeap* _g1h;
3614 CMBitMap* _bitMap;
3615 ConcurrentMark* _cm;
3616 CSetMarkOopClosure _oop_cl;
3617 int _worker_i;
3618
3619 public:
3620 CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
3621 _g1h(G1CollectedHeap::heap()),
3622 _bitMap(cm->nextMarkBitMap()),
3623 _oop_cl(cm, ms_size, worker_i),
3624 _worker_i(worker_i) { }
3625
3626 bool do_bit(size_t offset) {
3862 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3863
3864 }
3865 print_ms_time_info(" ", "cleanups", _cleanup_times);
3866 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3867 _total_counting_time,
3868 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3869 (double)_cleanup_times.num()
3870 : 0.0));
3871 if (G1ScrubRemSets) {
3872 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3873 _total_rs_scrub_time,
3874 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3875 (double)_cleanup_times.num()
3876 : 0.0));
3877 }
3878 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3879 (_init_times.sum() + _remark_times.sum() +
3880 _cleanup_times.sum())/1000.0);
3881 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3882 "(%8.2f s marking).",
3883 cmThread()->vtime_accum(),
3884 cmThread()->vtime_mark_accum());
3885 }
3886
3887 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3888 _parallel_workers->print_worker_threads_on(st);
3889 }
3890
3891 // Closures
3892 // XXX: there seems to be a lot of code duplication here;
3893 // should refactor and consolidate the shared code.
3894
3895 // This closure is used to mark refs into the CMS generation in
3896 // the CMS bit map. Called at the first checkpoint.
3897
3898 // We take a break if someone is trying to stop the world.
3899 bool ConcurrentMark::do_yield_check(int worker_i) {
3900 if (should_yield()) {
3901 if (worker_i == 0) {
3902 _g1h->g1_policy()->record_concurrent_pause();
3903 }
3904 cmThread()->yield();
5159 "elapsed = %1.2lfms <<<<<<<<<<",
5160 _task_id, _time_target_ms, elapsed_time_ms);
5161 if (_cm->has_aborted()) {
5162 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
5163 _task_id);
5164 }
5165 }
5166 } else {
5167 if (_cm->verbose_low()) {
5168 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
5169 "elapsed = %1.2lfms <<<<<<<<<<",
5170 _task_id, _time_target_ms, elapsed_time_ms);
5171 }
5172 }
5173
5174 _claimed = false;
5175 }
5176
5177 CMTask::CMTask(int task_id,
5178 ConcurrentMark* cm,
5179 size_t* marked_bytes,
5180 BitMap* card_bm,
5181 CMTaskQueue* task_queue,
5182 CMTaskQueueSet* task_queues)
5183 : _g1h(G1CollectedHeap::heap()),
5184 _task_id(task_id), _cm(cm),
5185 _claimed(false),
5186 _nextMarkBitMap(NULL), _hash_seed(17),
5187 _task_queue(task_queue),
5188 _task_queues(task_queues),
5189 _cm_oop_closure(NULL),
5190 _aborted_region(MemRegion()),
5191 _marked_bytes_array(marked_bytes),
5192 _card_bm(card_bm) {
5193 guarantee(task_queue != NULL, "invariant");
5194 guarantee(task_queues != NULL, "invariant");
5195
5196 statsOnly( _clock_due_to_scanning = 0;
5197 _clock_due_to_marking = 0 );
5198
5199 _marking_step_diffs_ms.add(0.5);
5200 }
5201
5202 // These are formatting macros that are used below to ensure
5203 // consistent formatting. The *_H_* versions are used to format the
5204 // header for a particular value and they should be kept consistent
5205 // with the corresponding macro. Also note that most of the macros add
5206 // the necessary white space (as a prefix) which makes them a bit
5207 // easier to compose.
5208
5209 // All the output lines are prefixed with this string to be able to
5210 // identify them easily in a large log file.
5211 #define G1PPRL_LINE_PREFIX "###"
5212
|