Print this page
rev 2896 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by: brutisso
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
33 33 #include "gc_implementation/g1/g1RemSet.hpp"
34 34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
35 35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
36 36 #include "gc_implementation/shared/vmGCOperations.hpp"
37 37 #include "memory/genOopClosures.inline.hpp"
38 38 #include "memory/referencePolicy.hpp"
39 39 #include "memory/resourceArea.hpp"
40 40 #include "oops/oop.inline.hpp"
41 41 #include "runtime/handles.inline.hpp"
42 42 #include "runtime/java.hpp"
43 43
44 44 //
45 45 // CMS Bit Map Wrapper
46 46
47 47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
48 48 _bm((uintptr_t*)NULL,0),
49 49 _shifter(shifter) {
50 50 _bmStartWord = (HeapWord*)(rs.base());
51 51 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
52 52 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
53 53 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
54 54
55 55 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
56 56 // For now we'll just commit all of the bit map up fromt.
57 57 // Later on we'll try to be more parsimonious with swap.
58 58 guarantee(_virtual_space.initialize(brs, brs.size()),
59 59 "couldn't reseve backing store for CMS bit map");
60 60 assert(_virtual_space.committed_size() == brs.size(),
61 61 "didn't reserve backing store for all of CMS bit map?");
62 62 _bm.set_map((uintptr_t*)_virtual_space.low());
63 63 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
64 64 _bmWordSize, "inconsistency in bit map sizing");
65 65 _bm.set_size(_bmWordSize >> _shifter);
66 66 }
67 67
68 68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
69 69 HeapWord* limit) const {
70 70 // First we must round addr *up* to a possible object boundary.
71 71 addr = (HeapWord*)align_size_up((intptr_t)addr,
72 72 HeapWordSize << _shifter);
73 73 size_t addrOffset = heapWordToOffset(addr);
74 74 if (limit == NULL) {
75 75 limit = _bmStartWord + _bmWordSize;
76 76 }
77 77 size_t limitOffset = heapWordToOffset(limit);
78 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
79 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
80 80 assert(nextAddr >= addr, "get_next_one postcondition");
81 81 assert(nextAddr == limit || isMarked(nextAddr),
82 82 "get_next_one postcondition");
83 83 return nextAddr;
84 84 }
85 85
86 86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
87 87 HeapWord* limit) const {
88 88 size_t addrOffset = heapWordToOffset(addr);
89 89 if (limit == NULL) {
90 90 limit = _bmStartWord + _bmWordSize;
91 91 }
92 92 size_t limitOffset = heapWordToOffset(limit);
93 93 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
94 94 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
95 95 assert(nextAddr >= addr, "get_next_one postcondition");
96 96 assert(nextAddr == limit || !isMarked(nextAddr),
97 97 "get_next_one postcondition");
98 98 return nextAddr;
99 99 }
100 100
101 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
102 102 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
103 103 return (int) (diff >> _shifter);
104 104 }
105 105
106 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
107 107 HeapWord* left = MAX2(_bmStartWord, mr.start());
108 108 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
109 109 if (right > left) {
110 110 // Right-open interval [leftOffset, rightOffset).
111 111 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
112 112 } else {
113 113 return true;
114 114 }
115 115 }
116 116
117 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
118 118 size_t from_start_index,
119 119 HeapWord* to_start_word,
120 120 size_t word_num) {
121 121 _bm.mostly_disjoint_range_union(from_bitmap,
122 122 from_start_index,
123 123 heapWordToOffset(to_start_word),
124 124 word_num);
125 125 }
126 126
127 127 #ifndef PRODUCT
128 128 bool CMBitMapRO::covers(ReservedSpace rs) const {
129 129 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
130 130 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
131 131 "size inconsistency");
132 132 return _bmStartWord == (HeapWord*)(rs.base()) &&
133 133 _bmWordSize == rs.size()>>LogHeapWordSize;
134 134 }
135 135 #endif
136 136
137 137 void CMBitMap::clearAll() {
138 138 _bm.clear();
139 139 return;
140 140 }
141 141
142 142 void CMBitMap::markRange(MemRegion mr) {
143 143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
144 144 assert(!mr.is_empty(), "unexpected empty region");
145 145 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
146 146 ((HeapWord *) mr.end())),
147 147 "markRange memory region end is not card aligned");
148 148 // convert address range into offset range
149 149 _bm.at_put_range(heapWordToOffset(mr.start()),
150 150 heapWordToOffset(mr.end()), true);
151 151 }
152 152
153 153 void CMBitMap::clearRange(MemRegion mr) {
154 154 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
155 155 assert(!mr.is_empty(), "unexpected empty region");
156 156 // convert address range into offset range
157 157 _bm.at_put_range(heapWordToOffset(mr.start()),
158 158 heapWordToOffset(mr.end()), false);
159 159 }
160 160
161 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
162 162 HeapWord* end_addr) {
163 163 HeapWord* start = getNextMarkedWordAddress(addr);
164 164 start = MIN2(start, end_addr);
165 165 HeapWord* end = getNextUnmarkedWordAddress(start);
166 166 end = MIN2(end, end_addr);
167 167 assert(start <= end, "Consistency check");
168 168 MemRegion mr(start, end);
169 169 if (!mr.is_empty()) {
170 170 clearRange(mr);
171 171 }
172 172 return mr;
173 173 }
174 174
175 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
176 176 _base(NULL), _cm(cm)
177 177 #ifdef ASSERT
178 178 , _drain_in_progress(false)
179 179 , _drain_in_progress_yields(false)
180 180 #endif
181 181 {}
182 182
183 183 void CMMarkStack::allocate(size_t size) {
184 184 _base = NEW_C_HEAP_ARRAY(oop, size);
185 185 if (_base == NULL) {
186 186 vm_exit_during_initialization("Failed to allocate "
187 187 "CM region mark stack");
188 188 }
189 189 _index = 0;
190 190 _capacity = (jint) size;
191 191 _oops_do_bound = -1;
192 192 NOT_PRODUCT(_max_depth = 0);
193 193 }
194 194
195 195 CMMarkStack::~CMMarkStack() {
196 196 if (_base != NULL) {
197 197 FREE_C_HEAP_ARRAY(oop, _base);
198 198 }
199 199 }
200 200
201 201 void CMMarkStack::par_push(oop ptr) {
202 202 while (true) {
203 203 if (isFull()) {
204 204 _overflow = true;
205 205 return;
206 206 }
207 207 // Otherwise...
208 208 jint index = _index;
209 209 jint next_index = index+1;
210 210 jint res = Atomic::cmpxchg(next_index, &_index, index);
211 211 if (res == index) {
212 212 _base[index] = ptr;
213 213 // Note that we don't maintain this atomically. We could, but it
214 214 // doesn't seem necessary.
215 215 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
216 216 return;
217 217 }
218 218 // Otherwise, we need to try again.
219 219 }
220 220 }
221 221
222 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
223 223 while (true) {
224 224 if (isFull()) {
225 225 _overflow = true;
226 226 return;
227 227 }
228 228 // Otherwise...
229 229 jint index = _index;
230 230 jint next_index = index + n;
231 231 if (next_index > _capacity) {
232 232 _overflow = true;
233 233 return;
234 234 }
235 235 jint res = Atomic::cmpxchg(next_index, &_index, index);
236 236 if (res == index) {
237 237 for (int i = 0; i < n; i++) {
238 238 int ind = index + i;
239 239 assert(ind < _capacity, "By overflow test above.");
240 240 _base[ind] = ptr_arr[i];
241 241 }
242 242 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
243 243 return;
244 244 }
245 245 // Otherwise, we need to try again.
246 246 }
247 247 }
248 248
249 249
250 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
251 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
252 252 jint start = _index;
253 253 jint next_index = start + n;
254 254 if (next_index > _capacity) {
255 255 _overflow = true;
256 256 return;
257 257 }
258 258 // Otherwise.
259 259 _index = next_index;
260 260 for (int i = 0; i < n; i++) {
261 261 int ind = start + i;
262 262 assert(ind < _capacity, "By overflow test above.");
263 263 _base[ind] = ptr_arr[i];
264 264 }
265 265 }
266 266
267 267
268 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
269 269 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
270 270 jint index = _index;
271 271 if (index == 0) {
272 272 *n = 0;
273 273 return false;
274 274 } else {
275 275 int k = MIN2(max, index);
276 276 jint new_ind = index - k;
277 277 for (int j = 0; j < k; j++) {
278 278 ptr_arr[j] = _base[new_ind + j];
279 279 }
280 280 _index = new_ind;
281 281 *n = k;
282 282 return true;
283 283 }
284 284 }
285 285
286 286
287 287 CMRegionStack::CMRegionStack() : _base(NULL) {}
288 288
289 289 void CMRegionStack::allocate(size_t size) {
290 290 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
291 291 if (_base == NULL) {
292 292 vm_exit_during_initialization("Failed to allocate CM region mark stack");
293 293 }
294 294 _index = 0;
295 295 _capacity = (jint) size;
296 296 }
297 297
298 298 CMRegionStack::~CMRegionStack() {
299 299 if (_base != NULL) {
300 300 FREE_C_HEAP_ARRAY(oop, _base);
301 301 }
302 302 }
303 303
304 304 void CMRegionStack::push_lock_free(MemRegion mr) {
305 305 assert(mr.word_size() > 0, "Precondition");
306 306 while (true) {
307 307 jint index = _index;
308 308
309 309 if (index >= _capacity) {
310 310 _overflow = true;
311 311 return;
312 312 }
313 313 // Otherwise...
314 314 jint next_index = index+1;
315 315 jint res = Atomic::cmpxchg(next_index, &_index, index);
316 316 if (res == index) {
317 317 _base[index] = mr;
318 318 return;
319 319 }
320 320 // Otherwise, we need to try again.
321 321 }
322 322 }
323 323
324 324 // Lock-free pop of the region stack. Called during the concurrent
325 325 // marking / remark phases. Should only be called in tandem with
326 326 // other lock-free pops.
327 327 MemRegion CMRegionStack::pop_lock_free() {
328 328 while (true) {
329 329 jint index = _index;
330 330
331 331 if (index == 0) {
332 332 return MemRegion();
333 333 }
334 334 // Otherwise...
335 335 jint next_index = index-1;
336 336 jint res = Atomic::cmpxchg(next_index, &_index, index);
337 337 if (res == index) {
338 338 MemRegion mr = _base[next_index];
339 339 if (mr.start() != NULL) {
340 340 assert(mr.end() != NULL, "invariant");
341 341 assert(mr.word_size() > 0, "invariant");
342 342 return mr;
343 343 } else {
344 344 // that entry was invalidated... let's skip it
345 345 assert(mr.end() == NULL, "invariant");
346 346 }
347 347 }
348 348 // Otherwise, we need to try again.
349 349 }
350 350 }
351 351
352 352 #if 0
353 353 // The routines that manipulate the region stack with a lock are
354 354 // not currently used. They should be retained, however, as a
355 355 // diagnostic aid.
356 356
357 357 void CMRegionStack::push_with_lock(MemRegion mr) {
358 358 assert(mr.word_size() > 0, "Precondition");
359 359 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
360 360
361 361 if (isFull()) {
362 362 _overflow = true;
363 363 return;
364 364 }
365 365
366 366 _base[_index] = mr;
367 367 _index += 1;
368 368 }
369 369
370 370 MemRegion CMRegionStack::pop_with_lock() {
371 371 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
372 372
373 373 while (true) {
374 374 if (_index == 0) {
375 375 return MemRegion();
376 376 }
377 377 _index -= 1;
378 378
379 379 MemRegion mr = _base[_index];
380 380 if (mr.start() != NULL) {
381 381 assert(mr.end() != NULL, "invariant");
382 382 assert(mr.word_size() > 0, "invariant");
383 383 return mr;
384 384 } else {
385 385 // that entry was invalidated... let's skip it
386 386 assert(mr.end() == NULL, "invariant");
387 387 }
388 388 }
389 389 }
390 390 #endif
391 391
392 392 bool CMRegionStack::invalidate_entries_into_cset() {
393 393 bool result = false;
394 394 G1CollectedHeap* g1h = G1CollectedHeap::heap();
395 395 for (int i = 0; i < _oops_do_bound; ++i) {
396 396 MemRegion mr = _base[i];
397 397 if (mr.start() != NULL) {
398 398 assert(mr.end() != NULL, "invariant");
399 399 assert(mr.word_size() > 0, "invariant");
400 400 HeapRegion* hr = g1h->heap_region_containing(mr.start());
401 401 assert(hr != NULL, "invariant");
402 402 if (hr->in_collection_set()) {
403 403 // The region points into the collection set
404 404 _base[i] = MemRegion();
405 405 result = true;
406 406 }
407 407 } else {
408 408 // that entry was invalidated... let's skip it
409 409 assert(mr.end() == NULL, "invariant");
410 410 }
411 411 }
412 412 return result;
413 413 }
414 414
415 415 template<class OopClosureClass>
416 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
417 417 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
418 418 || SafepointSynchronize::is_at_safepoint(),
419 419 "Drain recursion must be yield-safe.");
420 420 bool res = true;
421 421 debug_only(_drain_in_progress = true);
422 422 debug_only(_drain_in_progress_yields = yield_after);
423 423 while (!isEmpty()) {
424 424 oop newOop = pop();
425 425 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
426 426 assert(newOop->is_oop(), "Expected an oop");
427 427 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
428 428 "only grey objects on this stack");
429 429 // iterate over the oops in this oop, marking and pushing
430 430 // the ones in CMS generation.
431 431 newOop->oop_iterate(cl);
432 432 if (yield_after && _cm->do_yield_check()) {
433 433 res = false;
434 434 break;
435 435 }
436 436 }
437 437 debug_only(_drain_in_progress = false);
438 438 return res;
439 439 }
440 440
441 441 void CMMarkStack::oops_do(OopClosure* f) {
442 442 if (_index == 0) return;
443 443 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
444 444 "Bound must be set.");
445 445 for (int i = 0; i < _oops_do_bound; i++) {
446 446 f->do_oop(&_base[i]);
447 447 }
448 448 _oops_do_bound = -1;
449 449 }
450 450
451 451 bool ConcurrentMark::not_yet_marked(oop obj) const {
452 452 return (_g1h->is_obj_ill(obj)
453 453 || (_g1h->is_in_permanent(obj)
454 454 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
455 455 }
456 456
457 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
458 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
459 459 #endif // _MSC_VER
460 460
461 461 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
462 462 return MAX2((n_par_threads + 2) / 4, (size_t)1);
463 463 }
464 464
465 465 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
466 466 int max_regions) :
467 467 _markBitMap1(rs, MinObjAlignment - 1),
468 468 _markBitMap2(rs, MinObjAlignment - 1),
469 469
470 470 _parallel_marking_threads(0),
↓ open down ↓ |
470 lines elided |
↑ open up ↑ |
471 471 _max_parallel_marking_threads(0),
472 472 _sleep_factor(0.0),
473 473 _marking_task_overhead(1.0),
474 474 _cleanup_sleep_factor(0.0),
475 475 _cleanup_task_overhead(1.0),
476 476 _cleanup_list("Cleanup List"),
477 477 _region_bm(max_regions, false /* in_resource_area*/),
478 478 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
479 479 CardTableModRefBS::card_shift,
480 480 false /* in_resource_area*/),
481 +
481 482 _prevMarkBitMap(&_markBitMap1),
482 483 _nextMarkBitMap(&_markBitMap2),
483 484 _at_least_one_mark_complete(false),
484 485
485 486 _markStack(this),
486 487 _regionStack(),
487 488 // _finger set in set_non_marking_state
488 489
489 490 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
490 491 // _active_tasks set in set_non_marking_state
491 492 // _tasks set inside the constructor
492 493 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
493 494 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
494 495
495 496 _has_overflown(false),
496 497 _concurrent(false),
497 498 _has_aborted(false),
498 499 _restart_for_overflow(false),
499 500 _concurrent_marking_in_progress(false),
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
500 501 _should_gray_objects(false),
501 502
502 503 // _verbose_level set below
503 504
504 505 _init_times(),
505 506 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
506 507 _cleanup_times(),
507 508 _total_counting_time(0.0),
508 509 _total_rs_scrub_time(0.0),
509 510
510 - _parallel_workers(NULL) {
511 + _parallel_workers(NULL),
512 +
513 + _count_card_bitmaps(NULL),
514 + _count_marked_bytes(NULL)
515 +{
511 516 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
512 517 if (verbose_level < no_verbose) {
513 518 verbose_level = no_verbose;
514 519 }
515 520 if (verbose_level > high_verbose) {
516 521 verbose_level = high_verbose;
517 522 }
518 523 _verbose_level = verbose_level;
519 524
520 525 if (verbose_low()) {
521 526 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
522 527 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
523 528 }
524 529
525 530 _markStack.allocate(MarkStackSize);
526 531 _regionStack.allocate(G1MarkRegionStackSize);
527 532
528 533 // Create & start a ConcurrentMark thread.
529 534 _cmThread = new ConcurrentMarkThread(this);
530 535 assert(cmThread() != NULL, "CM Thread should have been created");
531 536 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
532 537
533 538 _g1h = G1CollectedHeap::heap();
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
534 539 assert(CGC_lock != NULL, "Where's the CGC_lock?");
535 540 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
536 541 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
537 542
538 543 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
539 544 satb_qs.set_buffer_size(G1SATBBufferSize);
540 545
541 546 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
542 547 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
543 548
549 + _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num);
550 + _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
551 +
552 + BitMap::idx_t card_bm_size = _card_bm.size();
553 +
544 554 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
545 555 _active_tasks = _max_task_num;
546 556 for (int i = 0; i < (int) _max_task_num; ++i) {
547 557 CMTaskQueue* task_queue = new CMTaskQueue();
548 558 task_queue->initialize();
549 559 _task_queues->register_queue(i, task_queue);
550 560
551 - _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
561 + _count_card_bitmaps[i] = BitMap(card_bm_size, false);
562 + _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
563 +
564 + _tasks[i] = new CMTask(i, this,
565 + _count_marked_bytes[i],
566 + &_count_card_bitmaps[i],
567 + task_queue, _task_queues);
568 +
552 569 _accum_task_vtime[i] = 0.0;
553 570 }
554 571
572 + // Calculate the card number for the bottom of the heap. Used
573 + // in biasing indexes into the accounting card bitmaps.
574 + _heap_bottom_card_num =
575 + intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
576 + CardTableModRefBS::card_shift);
577 +
578 +
555 579 if (ConcGCThreads > ParallelGCThreads) {
556 580 vm_exit_during_initialization("Can't have more ConcGCThreads "
557 581 "than ParallelGCThreads.");
558 582 }
559 583 if (ParallelGCThreads == 0) {
560 584 // if we are not running with any parallel GC threads we will not
561 585 // spawn any marking threads either
562 586 _parallel_marking_threads = 0;
563 587 _max_parallel_marking_threads = 0;
564 588 _sleep_factor = 0.0;
565 589 _marking_task_overhead = 1.0;
566 590 } else {
567 591 if (ConcGCThreads > 0) {
568 592 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
569 593 // if both are set
570 594
571 595 _parallel_marking_threads = ConcGCThreads;
572 596 _max_parallel_marking_threads = _parallel_marking_threads;
573 597 _sleep_factor = 0.0;
574 598 _marking_task_overhead = 1.0;
575 599 } else if (G1MarkingOverheadPercent > 0) {
576 600 // we will calculate the number of parallel marking threads
577 601 // based on a target overhead with respect to the soft real-time
578 602 // goal
579 603
580 604 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
581 605 double overall_cm_overhead =
582 606 (double) MaxGCPauseMillis * marking_overhead /
583 607 (double) GCPauseIntervalMillis;
584 608 double cpu_ratio = 1.0 / (double) os::processor_count();
585 609 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
586 610 double marking_task_overhead =
587 611 overall_cm_overhead / marking_thread_num *
588 612 (double) os::processor_count();
589 613 double sleep_factor =
590 614 (1.0 - marking_task_overhead) / marking_task_overhead;
591 615
592 616 _parallel_marking_threads = (size_t) marking_thread_num;
593 617 _max_parallel_marking_threads = _parallel_marking_threads;
594 618 _sleep_factor = sleep_factor;
595 619 _marking_task_overhead = marking_task_overhead;
596 620 } else {
597 621 _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
598 622 _max_parallel_marking_threads = _parallel_marking_threads;
599 623 _sleep_factor = 0.0;
600 624 _marking_task_overhead = 1.0;
601 625 }
602 626
603 627 if (parallel_marking_threads() > 1) {
604 628 _cleanup_task_overhead = 1.0;
605 629 } else {
606 630 _cleanup_task_overhead = marking_task_overhead();
607 631 }
608 632 _cleanup_sleep_factor =
609 633 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
610 634
611 635 #if 0
612 636 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
613 637 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
614 638 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
615 639 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
616 640 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
617 641 #endif
618 642
619 643 guarantee(parallel_marking_threads() > 0, "peace of mind");
620 644 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
621 645 (int) _max_parallel_marking_threads, false, true);
622 646 if (_parallel_workers == NULL) {
623 647 vm_exit_during_initialization("Failed necessary allocation.");
624 648 } else {
625 649 _parallel_workers->initialize_workers();
626 650 }
627 651 }
628 652
629 653 // so that the call below can read a sensible value
630 654 _heap_start = (HeapWord*) rs.base();
631 655 set_non_marking_state();
632 656 }
633 657
634 658 void ConcurrentMark::update_g1_committed(bool force) {
635 659 // If concurrent marking is not in progress, then we do not need to
636 660 // update _heap_end. This has a subtle and important
637 661 // side-effect. Imagine that two evacuation pauses happen between
638 662 // marking completion and remark. The first one can grow the
639 663 // heap (hence now the finger is below the heap end). Then, the
640 664 // second one could unnecessarily push regions on the region
641 665 // stack. This causes the invariant that the region stack is empty
642 666 // at the beginning of remark to be false. By ensuring that we do
643 667 // not observe heap expansions after marking is complete, then we do
644 668 // not have this problem.
645 669 if (!concurrent_marking_in_progress() && !force) return;
646 670
647 671 MemRegion committed = _g1h->g1_committed();
648 672 assert(committed.start() == _heap_start, "start shouldn't change");
649 673 HeapWord* new_end = committed.end();
650 674 if (new_end > _heap_end) {
651 675 // The heap has been expanded.
652 676
653 677 _heap_end = new_end;
654 678 }
655 679 // Notice that the heap can also shrink. However, this only happens
656 680 // during a Full GC (at least currently) and the entire marking
657 681 // phase will bail out and the task will not be restarted. So, let's
658 682 // do nothing.
659 683 }
660 684
661 685 void ConcurrentMark::reset() {
662 686 // Starting values for these two. This should be called in a STW
663 687 // phase. CM will be notified of any future g1_committed expansions
664 688 // will be at the end of evacuation pauses, when tasks are
665 689 // inactive.
666 690 MemRegion committed = _g1h->g1_committed();
667 691 _heap_start = committed.start();
↓ open down ↓ |
103 lines elided |
↑ open up ↑ |
668 692 _heap_end = committed.end();
669 693
670 694 // Separated the asserts so that we know which one fires.
671 695 assert(_heap_start != NULL, "heap bounds should look ok");
672 696 assert(_heap_end != NULL, "heap bounds should look ok");
673 697 assert(_heap_start < _heap_end, "heap bounds should look ok");
674 698
675 699 // reset all the marking data structures and any necessary flags
676 700 clear_marking_state();
677 701
702 + clear_all_count_data();
703 +
678 704 if (verbose_low()) {
679 705 gclog_or_tty->print_cr("[global] resetting");
680 706 }
681 707
682 708 // We do reset all of them, since different phases will use
683 709 // different number of active threads. So, it's easiest to have all
684 710 // of them ready.
685 711 for (int i = 0; i < (int) _max_task_num; ++i) {
686 712 _tasks[i]->reset(_nextMarkBitMap);
687 713 }
688 714
689 715 // we need this to make sure that the flag is on during the evac
690 716 // pause with initial mark piggy-backed
691 717 set_concurrent_marking_in_progress();
692 718 }
693 719
694 720 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
695 721 assert(active_tasks <= _max_task_num, "we should not have more");
696 722
697 723 _active_tasks = active_tasks;
698 724 // Need to update the three data structures below according to the
699 725 // number of active threads for this phase.
700 726 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
701 727 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
702 728 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
703 729
704 730 _concurrent = concurrent;
705 731 // We propagate this to all tasks, not just the active ones.
706 732 for (int i = 0; i < (int) _max_task_num; ++i)
707 733 _tasks[i]->set_concurrent(concurrent);
708 734
709 735 if (concurrent) {
710 736 set_concurrent_marking_in_progress();
711 737 } else {
712 738 // We currently assume that the concurrent flag has been set to
713 739 // false before we start remark. At this point we should also be
714 740 // in a STW phase.
715 741 assert(!concurrent_marking_in_progress(), "invariant");
716 742 assert(_finger == _heap_end, "only way to get here");
717 743 update_g1_committed(true);
718 744 }
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
719 745 }
720 746
721 747 void ConcurrentMark::set_non_marking_state() {
722 748 // We set the global marking state to some default values when we're
723 749 // not doing marking.
724 750 clear_marking_state();
725 751 _active_tasks = 0;
726 752 clear_concurrent_marking_in_progress();
727 753 }
728 754
729 -ConcurrentMark::~ConcurrentMark() {
730 - for (int i = 0; i < (int) _max_task_num; ++i) {
731 - delete _task_queues->queue(i);
732 - delete _tasks[i];
733 - }
734 - delete _task_queues;
735 - FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
736 -}
737 -
738 755 // This closure is used to mark refs into the g1 generation
739 756 // from external roots in the CMS bit map.
740 757 // Called at the first checkpoint.
741 758 //
742 759
743 760 void ConcurrentMark::clearNextBitmap() {
744 761 G1CollectedHeap* g1h = G1CollectedHeap::heap();
745 762 G1CollectorPolicy* g1p = g1h->g1_policy();
746 763
747 764 // Make sure that the concurrent mark thread looks to still be in
748 765 // the current cycle.
749 766 guarantee(cmThread()->during_cycle(), "invariant");
750 767
751 768 // We are finishing up the current cycle by clearing the next
752 769 // marking bitmap and getting it ready for the next cycle. During
753 770 // this time no other cycle can start. So, let's make sure that this
754 771 // is the case.
755 772 guarantee(!g1h->mark_in_progress(), "invariant");
756 773
757 774 // clear the mark bitmap (no grey objects to start with).
758 775 // We need to do this in chunks and offer to yield in between
759 776 // each chunk.
760 777 HeapWord* start = _nextMarkBitMap->startWord();
761 778 HeapWord* end = _nextMarkBitMap->endWord();
762 779 HeapWord* cur = start;
763 780 size_t chunkSize = M;
764 781 while (cur < end) {
765 782 HeapWord* next = cur + chunkSize;
766 783 if (next > end) {
767 784 next = end;
768 785 }
769 786 MemRegion mr(cur,next);
770 787 _nextMarkBitMap->clearRange(mr);
771 788 cur = next;
772 789 do_yield_check();
773 790
774 791 // Repeat the asserts from above. We'll do them as asserts here to
775 792 // minimize their overhead on the product. However, we'll have
776 793 // them as guarantees at the beginning / end of the bitmap
777 794 // clearing to get some checking in the product.
778 795 assert(cmThread()->during_cycle(), "invariant");
779 796 assert(!g1h->mark_in_progress(), "invariant");
780 797 }
781 798
782 799 // Repeat the asserts from above.
783 800 guarantee(cmThread()->during_cycle(), "invariant");
784 801 guarantee(!g1h->mark_in_progress(), "invariant");
785 802 }
786 803
787 804 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
788 805 public:
789 806 bool doHeapRegion(HeapRegion* r) {
790 807 if (!r->continuesHumongous()) {
791 808 r->note_start_of_marking(true);
792 809 }
793 810 return false;
794 811 }
795 812 };
796 813
797 814 void ConcurrentMark::checkpointRootsInitialPre() {
798 815 G1CollectedHeap* g1h = G1CollectedHeap::heap();
799 816 G1CollectorPolicy* g1p = g1h->g1_policy();
800 817
801 818 _has_aborted = false;
802 819
803 820 #ifndef PRODUCT
804 821 if (G1PrintReachableAtInitialMark) {
805 822 print_reachable("at-cycle-start",
806 823 VerifyOption_G1UsePrevMarking, true /* all */);
807 824 }
808 825 #endif
809 826
810 827 // Initialise marking structures. This has to be done in a STW phase.
811 828 reset();
812 829 }
813 830
814 831
815 832 void ConcurrentMark::checkpointRootsInitialPost() {
816 833 G1CollectedHeap* g1h = G1CollectedHeap::heap();
817 834
818 835 // If we force an overflow during remark, the remark operation will
819 836 // actually abort and we'll restart concurrent marking. If we always
820 837 // force an oveflow during remark we'll never actually complete the
821 838 // marking phase. So, we initilize this here, at the start of the
822 839 // cycle, so that at the remaining overflow number will decrease at
823 840 // every remark and we'll eventually not need to cause one.
824 841 force_overflow_stw()->init();
825 842
826 843 // For each region note start of marking.
827 844 NoteStartOfMarkHRClosure startcl;
828 845 g1h->heap_region_iterate(&startcl);
829 846
830 847 // Start Concurrent Marking weak-reference discovery.
831 848 ReferenceProcessor* rp = g1h->ref_processor_cm();
832 849 // enable ("weak") refs discovery
833 850 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
834 851 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
835 852
836 853 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
837 854 // This is the start of the marking cycle, we're expected all
838 855 // threads to have SATB queues with active set to false.
839 856 satb_mq_set.set_active_all_threads(true, /* new active value */
840 857 false /* expected_active */);
841 858
842 859 // update_g1_committed() will be called at the end of an evac pause
843 860 // when marking is on. So, it's also called at the end of the
844 861 // initial-mark pause to update the heap end, if the heap expands
845 862 // during it. No need to call it here.
846 863 }
847 864
848 865 /*
849 866 * Notice that in the next two methods, we actually leave the STS
850 867 * during the barrier sync and join it immediately afterwards. If we
851 868 * do not do this, the following deadlock can occur: one thread could
852 869 * be in the barrier sync code, waiting for the other thread to also
853 870 * sync up, whereas another one could be trying to yield, while also
854 871 * waiting for the other threads to sync up too.
855 872 *
856 873 * Note, however, that this code is also used during remark and in
857 874 * this case we should not attempt to leave / enter the STS, otherwise
858 875 * we'll either hit an asseert (debug / fastdebug) or deadlock
859 876 * (product). So we should only leave / enter the STS if we are
860 877 * operating concurrently.
861 878 *
862 879 * Because the thread that does the sync barrier has left the STS, it
863 880 * is possible to be suspended for a Full GC or an evacuation pause
864 881 * could occur. This is actually safe, since the entering the sync
865 882 * barrier is one of the last things do_marking_step() does, and it
866 883 * doesn't manipulate any data structures afterwards.
867 884 */
868 885
869 886 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
870 887 if (verbose_low()) {
871 888 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
872 889 }
873 890
874 891 if (concurrent()) {
875 892 ConcurrentGCThread::stsLeave();
876 893 }
877 894 _first_overflow_barrier_sync.enter();
878 895 if (concurrent()) {
879 896 ConcurrentGCThread::stsJoin();
880 897 }
881 898 // at this point everyone should have synced up and not be doing any
882 899 // more work
883 900
884 901 if (verbose_low()) {
885 902 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
886 903 }
887 904
888 905 // let task 0 do this
889 906 if (task_num == 0) {
890 907 // task 0 is responsible for clearing the global data structures
891 908 // We should be here because of an overflow. During STW we should
892 909 // not clear the overflow flag since we rely on it being true when
893 910 // we exit this method to abort the pause and restart concurent
894 911 // marking.
895 912 clear_marking_state(concurrent() /* clear_overflow */);
896 913 force_overflow()->update();
897 914
898 915 if (PrintGC) {
899 916 gclog_or_tty->date_stamp(PrintGCDateStamps);
900 917 gclog_or_tty->stamp(PrintGCTimeStamps);
901 918 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
902 919 }
903 920 }
904 921
905 922 // after this, each task should reset its own data structures then
906 923 // then go into the second barrier
907 924 }
908 925
909 926 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
910 927 if (verbose_low()) {
911 928 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
912 929 }
913 930
914 931 if (concurrent()) {
915 932 ConcurrentGCThread::stsLeave();
916 933 }
917 934 _second_overflow_barrier_sync.enter();
918 935 if (concurrent()) {
919 936 ConcurrentGCThread::stsJoin();
920 937 }
921 938 // at this point everything should be re-initialised and ready to go
922 939
923 940 if (verbose_low()) {
924 941 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
925 942 }
926 943 }
927 944
928 945 #ifndef PRODUCT
929 946 void ForceOverflowSettings::init() {
930 947 _num_remaining = G1ConcMarkForceOverflow;
931 948 _force = false;
932 949 update();
933 950 }
934 951
935 952 void ForceOverflowSettings::update() {
936 953 if (_num_remaining > 0) {
937 954 _num_remaining -= 1;
938 955 _force = true;
939 956 } else {
940 957 _force = false;
941 958 }
942 959 }
943 960
↓ open down ↓ |
196 lines elided |
↑ open up ↑ |
944 961 bool ForceOverflowSettings::should_force() {
945 962 if (_force) {
946 963 _force = false;
947 964 return true;
948 965 } else {
949 966 return false;
950 967 }
951 968 }
952 969 #endif // !PRODUCT
953 970
954 -void ConcurrentMark::grayRoot(oop p) {
971 +void ConcurrentMark::grayRoot(oop p, int worker_i) {
955 972 HeapWord* addr = (HeapWord*) p;
956 973 // We can't really check against _heap_start and _heap_end, since it
957 974 // is possible during an evacuation pause with piggy-backed
958 975 // initial-mark that the committed space is expanded during the
959 976 // pause without CM observing this change. So the assertions below
960 977 // is a bit conservative; but better than nothing.
961 978 assert(_g1h->g1_committed().contains(addr),
962 979 "address should be within the heap bounds");
963 980
964 981 if (!_nextMarkBitMap->isMarked(addr)) {
965 - _nextMarkBitMap->parMark(addr);
982 + par_mark_and_count(p, worker_i);
966 983 }
967 984 }
968 985
969 986 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
970 987 // The objects on the region have already been marked "in bulk" by
971 988 // the caller. We only need to decide whether to push the region on
972 989 // the region stack or not.
973 990
974 991 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
975 992 // We're done with marking and waiting for remark. We do not need to
976 993 // push anything else on the region stack.
977 994 return;
978 995 }
979 996
980 997 HeapWord* finger = _finger;
981 998
982 999 if (verbose_low()) {
983 1000 gclog_or_tty->print_cr("[global] attempting to push "
984 1001 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
985 1002 PTR_FORMAT, mr.start(), mr.end(), finger);
986 1003 }
987 1004
988 1005 if (mr.start() < finger) {
989 1006 // The finger is always heap region aligned and it is not possible
990 1007 // for mr to span heap regions.
991 1008 assert(mr.end() <= finger, "invariant");
992 1009
993 1010 // Separated the asserts so that we know which one fires.
994 1011 assert(mr.start() <= mr.end(),
995 1012 "region boundaries should fall within the committed space");
996 1013 assert(_heap_start <= mr.start(),
997 1014 "region boundaries should fall within the committed space");
998 1015 assert(mr.end() <= _heap_end,
999 1016 "region boundaries should fall within the committed space");
1000 1017 if (verbose_low()) {
1001 1018 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1002 1019 "below the finger, pushing it",
1003 1020 mr.start(), mr.end());
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
1004 1021 }
1005 1022
1006 1023 if (!region_stack_push_lock_free(mr)) {
1007 1024 if (verbose_low()) {
1008 1025 gclog_or_tty->print_cr("[global] region stack has overflown.");
1009 1026 }
1010 1027 }
1011 1028 }
1012 1029 }
1013 1030
1014 -void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1031 +void ConcurrentMark::markAndGrayObjectIfNecessary(oop p, int worker_i) {
1015 1032 // The object is not marked by the caller. We need to at least mark
1016 1033 // it and maybe push in on the stack.
1017 1034
1018 1035 HeapWord* addr = (HeapWord*)p;
1019 1036 if (!_nextMarkBitMap->isMarked(addr)) {
1020 1037 // We definitely need to mark it, irrespective whether we bail out
1021 1038 // because we're done with marking.
1022 - if (_nextMarkBitMap->parMark(addr)) {
1039 +
1040 + if (par_mark_and_count(p, worker_i)) {
1023 1041 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1024 1042 // If we're done with concurrent marking and we're waiting for
1025 1043 // remark, then we're not pushing anything on the stack.
1026 1044 return;
1027 1045 }
1028 1046
1029 1047 // No OrderAccess:store_load() is needed. It is implicit in the
1030 1048 // CAS done in parMark(addr) above
1031 1049 HeapWord* finger = _finger;
1032 1050
1033 1051 if (addr < finger) {
1034 1052 if (!mark_stack_push(oop(addr))) {
1035 1053 if (verbose_low()) {
1036 1054 gclog_or_tty->print_cr("[global] global stack overflow "
1037 1055 "during parMark");
1038 1056 }
1039 1057 }
1040 1058 }
1041 1059 }
1042 1060 }
1043 1061 }
1044 1062
1045 1063 class CMConcurrentMarkingTask: public AbstractGangTask {
1046 1064 private:
1047 1065 ConcurrentMark* _cm;
1048 1066 ConcurrentMarkThread* _cmt;
1049 1067
1050 1068 public:
1051 1069 void work(int worker_i) {
1052 1070 assert(Thread::current()->is_ConcurrentGC_thread(),
1053 1071 "this should only be done by a conc GC thread");
1054 1072 ResourceMark rm;
1055 1073
1056 1074 double start_vtime = os::elapsedVTime();
1057 1075
1058 1076 ConcurrentGCThread::stsJoin();
1059 1077
1060 1078 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1061 1079 CMTask* the_task = _cm->task(worker_i);
1062 1080 the_task->record_start_time();
1063 1081 if (!_cm->has_aborted()) {
1064 1082 do {
1065 1083 double start_vtime_sec = os::elapsedVTime();
1066 1084 double start_time_sec = os::elapsedTime();
1067 1085 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1068 1086
1069 1087 the_task->do_marking_step(mark_step_duration_ms,
1070 1088 true /* do_stealing */,
1071 1089 true /* do_termination */);
1072 1090
1073 1091 double end_time_sec = os::elapsedTime();
1074 1092 double end_vtime_sec = os::elapsedVTime();
1075 1093 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1076 1094 double elapsed_time_sec = end_time_sec - start_time_sec;
1077 1095 _cm->clear_has_overflown();
1078 1096
1079 1097 bool ret = _cm->do_yield_check(worker_i);
1080 1098
1081 1099 jlong sleep_time_ms;
1082 1100 if (!_cm->has_aborted() && the_task->has_aborted()) {
1083 1101 sleep_time_ms =
1084 1102 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1085 1103 ConcurrentGCThread::stsLeave();
1086 1104 os::sleep(Thread::current(), sleep_time_ms, false);
1087 1105 ConcurrentGCThread::stsJoin();
1088 1106 }
1089 1107 double end_time2_sec = os::elapsedTime();
1090 1108 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1091 1109
1092 1110 #if 0
1093 1111 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1094 1112 "overhead %1.4lf",
1095 1113 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1096 1114 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1097 1115 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1098 1116 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1099 1117 #endif
1100 1118 } while (!_cm->has_aborted() && the_task->has_aborted());
1101 1119 }
1102 1120 the_task->record_end_time();
1103 1121 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1104 1122
1105 1123 ConcurrentGCThread::stsLeave();
1106 1124
1107 1125 double end_vtime = os::elapsedVTime();
1108 1126 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1109 1127 }
1110 1128
1111 1129 CMConcurrentMarkingTask(ConcurrentMark* cm,
1112 1130 ConcurrentMarkThread* cmt) :
1113 1131 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1114 1132
1115 1133 ~CMConcurrentMarkingTask() { }
1116 1134 };
1117 1135
1118 1136 // Calculates the number of active workers for a concurrent
1119 1137 // phase.
1120 1138 size_t ConcurrentMark::calc_parallel_marking_threads() {
1121 1139 if (G1CollectedHeap::use_parallel_gc_threads()) {
1122 1140 size_t n_conc_workers = 0;
1123 1141 if (!UseDynamicNumberOfGCThreads ||
1124 1142 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1125 1143 !ForceDynamicNumberOfGCThreads)) {
1126 1144 n_conc_workers = max_parallel_marking_threads();
1127 1145 } else {
1128 1146 n_conc_workers =
1129 1147 AdaptiveSizePolicy::calc_default_active_workers(
1130 1148 max_parallel_marking_threads(),
1131 1149 1, /* Minimum workers */
1132 1150 parallel_marking_threads(),
1133 1151 Threads::number_of_non_daemon_threads());
1134 1152 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1135 1153 // that scaling has already gone into "_max_parallel_marking_threads".
1136 1154 }
1137 1155 assert(n_conc_workers > 0, "Always need at least 1");
1138 1156 return n_conc_workers;
1139 1157 }
1140 1158 // If we are not running with any parallel GC threads we will not
1141 1159 // have spawned any marking threads either. Hence the number of
1142 1160 // concurrent workers should be 0.
1143 1161 return 0;
1144 1162 }
1145 1163
1146 1164 void ConcurrentMark::markFromRoots() {
1147 1165 // we might be tempted to assert that:
1148 1166 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1149 1167 // "inconsistent argument?");
1150 1168 // However that wouldn't be right, because it's possible that
1151 1169 // a safepoint is indeed in progress as a younger generation
1152 1170 // stop-the-world GC happens even as we mark in this generation.
1153 1171
1154 1172 _restart_for_overflow = false;
1155 1173 force_overflow_conc()->init();
1156 1174
1157 1175 // _g1h has _n_par_threads
1158 1176 _parallel_marking_threads = calc_parallel_marking_threads();
1159 1177 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1160 1178 "Maximum number of marking threads exceeded");
1161 1179
1162 1180 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1163 1181
1164 1182 // Parallel task terminator is set in "set_phase()"
1165 1183 set_phase(active_workers, true /* concurrent */);
1166 1184
1167 1185 CMConcurrentMarkingTask markingTask(this, cmThread());
1168 1186 if (parallel_marking_threads() > 0) {
1169 1187 _parallel_workers->set_active_workers((int)active_workers);
1170 1188 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1171 1189 // and the decisions on that MT processing is made elsewhere.
1172 1190 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1173 1191 _parallel_workers->run_task(&markingTask);
1174 1192 } else {
1175 1193 markingTask.work(0);
1176 1194 }
1177 1195 print_stats();
1178 1196 }
1179 1197
1180 1198 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1181 1199 // world is stopped at this checkpoint
1182 1200 assert(SafepointSynchronize::is_at_safepoint(),
1183 1201 "world should be stopped");
1184 1202
1185 1203 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1186 1204
1187 1205 // If a full collection has happened, we shouldn't do this.
1188 1206 if (has_aborted()) {
1189 1207 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1190 1208 return;
1191 1209 }
1192 1210
1193 1211 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1194 1212
1195 1213 if (VerifyDuringGC) {
1196 1214 HandleMark hm; // handle scope
1197 1215 gclog_or_tty->print(" VerifyDuringGC:(before)");
1198 1216 Universe::heap()->prepare_for_verify();
1199 1217 Universe::verify(/* allow dirty */ true,
1200 1218 /* silent */ false,
1201 1219 /* option */ VerifyOption_G1UsePrevMarking);
1202 1220 }
1203 1221
1204 1222 G1CollectorPolicy* g1p = g1h->g1_policy();
1205 1223 g1p->record_concurrent_mark_remark_start();
1206 1224
1207 1225 double start = os::elapsedTime();
1208 1226
1209 1227 checkpointRootsFinalWork();
1210 1228
1211 1229 double mark_work_end = os::elapsedTime();
1212 1230
1213 1231 weakRefsWork(clear_all_soft_refs);
↓ open down ↓ |
181 lines elided |
↑ open up ↑ |
1214 1232
1215 1233 if (has_overflown()) {
1216 1234 // Oops. We overflowed. Restart concurrent marking.
1217 1235 _restart_for_overflow = true;
1218 1236 // Clear the flag. We do not need it any more.
1219 1237 clear_has_overflown();
1220 1238 if (G1TraceMarkStackOverflow) {
1221 1239 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1222 1240 }
1223 1241 } else {
1242 + // Aggregate the per-task counting data that we have accumulated
1243 + // while marking.
1244 + aggregate_and_clear_count_data();
1245 +
1224 1246 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1225 1247 // We're done with marking.
1226 1248 // This is the end of the marking cycle, we're expected all
1227 1249 // threads to have SATB queues with active set to true.
1228 1250 satb_mq_set.set_active_all_threads(false, /* new active value */
1229 1251 true /* expected_active */);
1230 1252
1231 1253 if (VerifyDuringGC) {
1232 1254
1233 1255 HandleMark hm; // handle scope
1234 1256 gclog_or_tty->print(" VerifyDuringGC:(after)");
1235 1257 Universe::heap()->prepare_for_verify();
1236 1258 Universe::verify(/* allow dirty */ true,
1237 1259 /* silent */ false,
1238 1260 /* option */ VerifyOption_G1UseNextMarking);
1239 1261 }
1240 1262 assert(!restart_for_overflow(), "sanity");
1241 1263 }
1242 1264
1243 1265 // Reset the marking state if marking completed
1244 1266 if (!restart_for_overflow()) {
1245 1267 set_non_marking_state();
1246 1268 }
1247 1269
1248 1270 #if VERIFY_OBJS_PROCESSED
1249 1271 _scan_obj_cl.objs_processed = 0;
1250 1272 ThreadLocalObjQueue::objs_enqueued = 0;
1251 1273 #endif
1252 1274
1253 1275 // Statistics
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
1254 1276 double now = os::elapsedTime();
1255 1277 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1256 1278 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1257 1279 _remark_times.add((now - start) * 1000.0);
1258 1280
1259 1281 g1p->record_concurrent_mark_remark_end();
1260 1282 }
1261 1283
1262 1284 #define CARD_BM_TEST_MODE 0
1263 1285
1286 +// Used to calculate the # live objects per region
1287 +// for verification purposes
1264 1288 class CalcLiveObjectsClosure: public HeapRegionClosure {
1265 1289
1266 1290 CMBitMapRO* _bm;
1267 1291 ConcurrentMark* _cm;
1268 - bool _changed;
1269 - bool _yield;
1270 - size_t _words_done;
1292 + BitMap* _region_bm;
1293 + BitMap* _card_bm;
1294 +
1295 + size_t _tot_words_done;
1271 1296 size_t _tot_live;
1272 1297 size_t _tot_used;
1273 - size_t _regions_done;
1274 - double _start_vtime_sec;
1275 1298
1276 - BitMap* _region_bm;
1277 - BitMap* _card_bm;
1299 + size_t _region_marked_bytes;
1300 +
1278 1301 intptr_t _bottom_card_num;
1279 - bool _final;
1280 1302
1281 1303 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1282 - for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1304 + BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1305 + BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1306 +
1307 + for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1283 1308 #if CARD_BM_TEST_MODE
1284 - guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1309 + guarantee(_card_bm->at(i), "Should already be set.");
1285 1310 #else
1286 - _card_bm->par_at_put(i - _bottom_card_num, 1);
1311 + _card_bm->par_at_put(i, 1);
1287 1312 #endif
1288 1313 }
1289 1314 }
1290 1315
1291 1316 public:
1292 - CalcLiveObjectsClosure(bool final,
1293 - CMBitMapRO *bm, ConcurrentMark *cm,
1317 + CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1294 1318 BitMap* region_bm, BitMap* card_bm) :
1295 - _bm(bm), _cm(cm), _changed(false), _yield(true),
1296 - _words_done(0), _tot_live(0), _tot_used(0),
1297 - _region_bm(region_bm), _card_bm(card_bm),_final(final),
1298 - _regions_done(0), _start_vtime_sec(0.0)
1319 + _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1320 + _region_marked_bytes(0), _tot_words_done(0),
1321 + _tot_live(0), _tot_used(0)
1299 1322 {
1300 - _bottom_card_num =
1301 - intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1302 - CardTableModRefBS::card_shift);
1323 + _bottom_card_num = cm->heap_bottom_card_num();
1303 1324 }
1304 1325
1305 1326 // It takes a region that's not empty (i.e., it has at least one
1306 1327 // live object in it and sets its corresponding bit on the region
1307 1328 // bitmap to 1. If the region is "starts humongous" it will also set
1308 1329 // to 1 the bits on the region bitmap that correspond to its
1309 1330 // associated "continues humongous" regions.
1310 1331 void set_bit_for_region(HeapRegion* hr) {
1311 1332 assert(!hr->continuesHumongous(), "should have filtered those out");
1312 1333
1313 1334 size_t index = hr->hrs_index();
1314 1335 if (!hr->startsHumongous()) {
1315 1336 // Normal (non-humongous) case: just set the bit.
1316 1337 _region_bm->par_at_put((BitMap::idx_t) index, true);
1317 1338 } else {
1318 1339 // Starts humongous case: calculate how many regions are part of
1319 - // this humongous region and then set the bit range. It might
1320 - // have been a bit more efficient to look at the object that
1321 - // spans these humongous regions to calculate their number from
1322 - // the object's size. However, it's a good idea to calculate
1323 - // this based on the metadata itself, and not the region
1324 - // contents, so that this code is not aware of what goes into
1325 - // the humongous regions (in case this changes in the future).
1340 + // this humongous region and then set the bit range.
1326 1341 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1327 - size_t end_index = index + 1;
1328 - while (end_index < g1h->n_regions()) {
1329 - HeapRegion* chr = g1h->region_at(end_index);
1330 - if (!chr->continuesHumongous()) break;
1331 - end_index += 1;
1332 - }
1342 + HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1343 + size_t end_index = last_hr->hrs_index() + 1;
1333 1344 _region_bm->par_at_put_range((BitMap::idx_t) index,
1334 1345 (BitMap::idx_t) end_index, true);
1335 1346 }
1336 1347 }
1337 1348
1338 1349 bool doHeapRegion(HeapRegion* hr) {
1339 - if (!_final && _regions_done == 0) {
1340 - _start_vtime_sec = os::elapsedVTime();
1341 - }
1342 1350
1343 1351 if (hr->continuesHumongous()) {
1344 1352 // We will ignore these here and process them when their
1345 1353 // associated "starts humongous" region is processed (see
1346 1354 // set_bit_for_heap_region()). Note that we cannot rely on their
1347 1355 // associated "starts humongous" region to have their bit set to
1348 1356 // 1 since, due to the region chunking in the parallel region
1349 1357 // iteration, a "continues humongous" region might be visited
1350 1358 // before its associated "starts humongous".
1351 1359 return false;
1352 1360 }
1353 1361
1354 1362 HeapWord* nextTop = hr->next_top_at_mark_start();
1355 - HeapWord* start = hr->top_at_conc_mark_count();
1356 - assert(hr->bottom() <= start && start <= hr->end() &&
1357 - hr->bottom() <= nextTop && nextTop <= hr->end() &&
1358 - start <= nextTop,
1359 - "Preconditions.");
1360 - // Otherwise, record the number of word's we'll examine.
1363 + HeapWord* start = hr->bottom();
1364 +
1365 + assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1366 + "Preconditions.");
1367 +
1368 + // Record the number of word's we'll examine.
1361 1369 size_t words_done = (nextTop - start);
1370 +
1362 1371 // Find the first marked object at or after "start".
1363 1372 start = _bm->getNextMarkedWordAddress(start, nextTop);
1373 +
1364 1374 size_t marked_bytes = 0;
1375 + _region_marked_bytes = 0;
1365 1376
1366 1377 // Below, the term "card num" means the result of shifting an address
1367 1378 // by the card shift -- address 0 corresponds to card number 0. One
1368 1379 // must subtract the card num of the bottom of the heap to obtain a
1369 1380 // card table index.
1381 +
1370 1382 // The first card num of the sequence of live cards currently being
1371 1383 // constructed. -1 ==> no sequence.
1372 1384 intptr_t start_card_num = -1;
1385 +
1373 1386 // The last card num of the sequence of live cards currently being
1374 1387 // constructed. -1 ==> no sequence.
1375 1388 intptr_t last_card_num = -1;
1376 1389
1377 1390 while (start < nextTop) {
1378 - if (_yield && _cm->do_yield_check()) {
1379 - // We yielded. It might be for a full collection, in which case
1380 - // all bets are off; terminate the traversal.
1381 - if (_cm->has_aborted()) {
1382 - _changed = false;
1383 - return true;
1384 - } else {
1385 - // Otherwise, it might be a collection pause, and the region
1386 - // we're looking at might be in the collection set. We'll
1387 - // abandon this region.
1388 - return false;
1389 - }
1390 - }
1391 1391 oop obj = oop(start);
1392 1392 int obj_sz = obj->size();
1393 +
1393 1394 // The card num of the start of the current object.
1394 1395 intptr_t obj_card_num =
1395 1396 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1396 -
1397 1397 HeapWord* obj_last = start + obj_sz - 1;
1398 1398 intptr_t obj_last_card_num =
1399 1399 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1400 1400
1401 1401 if (obj_card_num != last_card_num) {
1402 1402 if (start_card_num == -1) {
1403 1403 assert(last_card_num == -1, "Both or neither.");
1404 1404 start_card_num = obj_card_num;
1405 1405 } else {
1406 1406 assert(last_card_num != -1, "Both or neither.");
1407 1407 assert(obj_card_num >= last_card_num, "Inv");
1408 1408 if ((obj_card_num - last_card_num) > 1) {
1409 1409 // Mark the last run, and start a new one.
1410 1410 mark_card_num_range(start_card_num, last_card_num);
1411 1411 start_card_num = obj_card_num;
1412 1412 }
1413 1413 }
1414 1414 #if CARD_BM_TEST_MODE
1415 - /*
1416 - gclog_or_tty->print_cr("Setting bits from %d/%d.",
1417 - obj_card_num - _bottom_card_num,
1418 - obj_last_card_num - _bottom_card_num);
1419 - */
1420 1415 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1421 1416 _card_bm->par_at_put(j - _bottom_card_num, 1);
1422 1417 }
1423 -#endif
1418 +#endif // CARD_BM_TEST_MODE
1424 1419 }
1425 1420 // In any case, we set the last card num.
1426 1421 last_card_num = obj_last_card_num;
1427 1422
1428 1423 marked_bytes += (size_t)obj_sz * HeapWordSize;
1424 +
1429 1425 // Find the next marked object after this one.
1430 1426 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1431 - _changed = true;
1432 1427 }
1428 +
1433 1429 // Handle the last range, if any.
1434 1430 if (start_card_num != -1) {
1435 1431 mark_card_num_range(start_card_num, last_card_num);
1436 1432 }
1437 - if (_final) {
1438 - // Mark the allocated-since-marking portion...
1439 - HeapWord* tp = hr->top();
1440 - if (nextTop < tp) {
1441 - start_card_num =
1442 - intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1443 - last_card_num =
1444 - intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1445 - mark_card_num_range(start_card_num, last_card_num);
1446 - // This definitely means the region has live objects.
1447 - set_bit_for_region(hr);
1448 - }
1433 +
1434 + // Mark the allocated-since-marking portion...
1435 + HeapWord* top = hr->top();
1436 + if (nextTop < top) {
1437 + start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1438 + last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1439 +
1440 + mark_card_num_range(start_card_num, last_card_num);
1441 +
1442 + // This definitely means the region has live objects.
1443 + set_bit_for_region(hr);
1449 1444 }
1450 1445
1451 - hr->add_to_marked_bytes(marked_bytes);
1452 1446 // Update the live region bitmap.
1453 1447 if (marked_bytes > 0) {
1454 1448 set_bit_for_region(hr);
1455 1449 }
1456 - hr->set_top_at_conc_mark_count(nextTop);
1450 +
1451 + // Set the marked bytes for the current region so that
1452 + // it can be queried by a calling verificiation routine
1453 + _region_marked_bytes = marked_bytes;
1454 +
1457 1455 _tot_live += hr->next_live_bytes();
1458 1456 _tot_used += hr->used();
1459 - _words_done = words_done;
1457 + _tot_words_done = words_done;
1460 1458
1461 - if (!_final) {
1462 - ++_regions_done;
1463 - if (_regions_done % 10 == 0) {
1464 - double end_vtime_sec = os::elapsedVTime();
1465 - double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1466 - if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1467 - jlong sleep_time_ms =
1468 - (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1469 - os::sleep(Thread::current(), sleep_time_ms, false);
1470 - _start_vtime_sec = end_vtime_sec;
1459 + return false;
1460 + }
1461 +
1462 + size_t region_marked_bytes() const { return _region_marked_bytes; }
1463 + size_t tot_words_done() const { return _tot_words_done; }
1464 + size_t tot_live() const { return _tot_live; }
1465 + size_t tot_used() const { return _tot_used; }
1466 +};
1467 +
1468 +// Heap region closure used for verifying the counting data
1469 +// that was accumulated concurrently and aggregated during
1470 +// the remark pause. This closure is applied to the heap
1471 +// regions during the STW cleanup pause.
1472 +
1473 +class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1474 + ConcurrentMark* _cm;
1475 + CalcLiveObjectsClosure _calc_cl;
1476 + BitMap* _region_bm; // Region BM to be verified
1477 + BitMap* _card_bm; // Card BM to be verified
1478 + bool _verbose; // verbose output?
1479 +
1480 + BitMap* _exp_region_bm; // Expected Region BM values
1481 + BitMap* _exp_card_bm; // Expected card BM values
1482 +
1483 + intptr_t _bottom_card_num; // Used for calculatint bitmap indices
1484 +
1485 + int _failures;
1486 +
1487 +public:
1488 + VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1489 + BitMap* region_bm,
1490 + BitMap* card_bm,
1491 + BitMap* exp_region_bm,
1492 + BitMap* exp_card_bm,
1493 + bool verbose) :
1494 + _cm(cm),
1495 + _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1496 + _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1497 + _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1498 + _failures(0)
1499 + {
1500 + _bottom_card_num = cm->heap_bottom_card_num();
1501 + }
1502 +
1503 + int failures() const { return _failures; }
1504 +
1505 + bool doHeapRegion(HeapRegion* hr) {
1506 + if (hr->continuesHumongous()) {
1507 + // We will ignore these here and process them when their
1508 + // associated "starts humongous" region is processed (see
1509 + // set_bit_for_heap_region()). Note that we cannot rely on their
1510 + // associated "starts humongous" region to have their bit set to
1511 + // 1 since, due to the region chunking in the parallel region
1512 + // iteration, a "continues humongous" region might be visited
1513 + // before its associated "starts humongous".
1514 + return false;
1515 + }
1516 +
1517 + int failures = 0;
1518 +
1519 + // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1520 + // this region and set the corresponding bits in the expected region
1521 + // and card bitmaps.
1522 + bool res = _calc_cl.doHeapRegion(hr);
1523 + assert(res == false, "should be continuing");
1524 +
1525 + // Note that the calculated count data could be a subset of the
1526 + // count data that was accumlated during marking. See the comment
1527 + // in G1ParCopyHelper::copy_to_survivor space for an explanation
1528 + // why.
1529 +
1530 + // Verify that _top_at_conc_count == ntams
1531 + if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
1532 + if (_verbose) {
1533 + gclog_or_tty->print_cr("Region %d: top at conc count incorrect: expected "
1534 + PTR_FORMAT", actual: "PTR_FORMAT,
1535 + hr->hrs_index(), hr->next_top_at_mark_start(),
1536 + hr->top_at_conc_mark_count());
1537 + }
1538 + failures += 1;
1539 + }
1540 +
1541 + // Verify the marked bytes for this region.
1542 + size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1543 + size_t act_marked_bytes = hr->next_marked_bytes();
1544 +
1545 + // We're not OK if expected marked bytes > actual marked bytes. It means
1546 + // we have missed accounting some objects during the actual marking.
1547 + if (exp_marked_bytes > act_marked_bytes) {
1548 + if (_verbose) {
1549 + gclog_or_tty->print_cr("Region %d: marked bytes mismatch: expected: "
1550 + SIZE_FORMAT", actual: "SIZE_FORMAT,
1551 + hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1552 + }
1553 + failures += 1;
1554 + }
1555 +
1556 + // Verify the bit, for this region, in the actual and expected
1557 + // (which was just calculated) region bit maps.
1558 + // We're not OK if the expected bit is set and the actual is not set.
1559 + BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
1560 +
1561 + bool expected = _exp_region_bm->at(index);
1562 + bool actual = _region_bm->at(index);
1563 + if (expected && !actual) {
1564 + if (_verbose) {
1565 + gclog_or_tty->print_cr("Region %d: region bitmap mismatch: expected: %d, actual: %d",
1566 + hr->hrs_index(), expected, actual);
1567 + }
1568 + failures += 1;
1569 + }
1570 +
1571 + // Verify that the card bit maps for the cards spanned by the current
1572 + // region match. The set of offsets that have set bits in the expected
1573 + // bitmap should be a subset of the offsets with set bits from the actual
1574 + // calculated card bitmap.
1575 + // Again it's more important that if the expected bit is set then the
1576 + // actual bit be set.
1577 + intptr_t start_card_num =
1578 + intptr_t(uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift);
1579 + intptr_t top_card_num =
1580 + intptr_t(uintptr_t(hr->top()) >> CardTableModRefBS::card_shift);
1581 +
1582 + BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1583 + BitMap::idx_t end_idx = top_card_num - _bottom_card_num;
1584 +
1585 + for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1586 + expected = _exp_card_bm->at(i);
1587 + actual = _card_bm->at(i);
1588 +
1589 + if (expected && !actual) {
1590 + if (_verbose) {
1591 + gclog_or_tty->print_cr("Region %d: card bitmap mismatch at idx %d: expected: %d, actual: %d",
1592 + hr->hrs_index(), i, expected, actual);
1471 1593 }
1594 + failures += 1;
1472 1595 }
1473 1596 }
1474 1597
1598 + if (failures > 0 && _verbose) {
1599 + gclog_or_tty->print("Region %d: bottom: "PTR_FORMAT", ntams: "
1600 + PTR_FORMAT", top: "PTR_FORMAT", end: "PTR_FORMAT,
1601 + hr->hrs_index(), hr->bottom(), hr->next_top_at_mark_start(),
1602 + hr->top(), hr->end());
1603 + gclog_or_tty->print_cr(", marked_bytes: calc/actual "SIZE_FORMAT"/"SIZE_FORMAT,
1604 + _calc_cl.region_marked_bytes(),
1605 + hr->next_marked_bytes());
1606 + }
1607 +
1608 + _failures += failures;
1609 +
1610 + // We could stop iteration over the heap when we
1611 + // find the first voilating region by returning true.
1475 1612 return false;
1476 1613 }
1614 +};
1615 +
1616 +
1617 +class G1ParVerifyFinalCountTask: public AbstractGangTask {
1618 +protected:
1619 + G1CollectedHeap* _g1h;
1620 + ConcurrentMark* _cm;
1621 + BitMap* _actual_region_bm;
1622 + BitMap* _actual_card_bm;
1623 +
1624 + size_t _n_workers;
1625 +
1626 + BitMap* _expected_region_bm;
1627 + BitMap* _expected_card_bm;
1628 +
1629 + int _failures;
1630 + bool _verbose;
1631 +
1632 +public:
1633 + G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1634 + BitMap* region_bm, BitMap* card_bm,
1635 + BitMap* expected_region_bm, BitMap* expected_card_bm)
1636 + : AbstractGangTask("G1 verify final counting"),
1637 + _g1h(g1h), _cm(_g1h->concurrent_mark()),
1638 + _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1639 + _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1640 + _failures(0), _verbose(false),
1641 + _n_workers(0)
1642 + {
1643 + assert(VerifyDuringGC, "don't call this otherwise");
1644 +
1645 + // Use the value already set as the number of active threads
1646 + // in the call to run_task().
1647 + if (G1CollectedHeap::use_parallel_gc_threads()) {
1648 + assert( _g1h->workers()->active_workers() > 0,
1649 + "Should have been previously set");
1650 + _n_workers = _g1h->workers()->active_workers();
1651 + } else {
1652 + _n_workers = 1;
1653 + }
1654 +
1655 + assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1656 + assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1657 +
1658 + _verbose = _cm->verbose_medium();
1659 + }
1660 +
1661 + void work(int worker_i) {
1662 + assert((size_t) worker_i < _n_workers, "invariant");
1663 +
1664 + VerifyLiveObjectDataHRClosure verify_cl(_cm,
1665 + _actual_region_bm, _actual_card_bm,
1666 + _expected_region_bm,
1667 + _expected_card_bm,
1668 + _verbose);
1477 1669
1478 - bool changed() { return _changed; }
1479 - void reset() { _changed = false; _words_done = 0; }
1480 - void no_yield() { _yield = false; }
1481 - size_t words_done() { return _words_done; }
1482 - size_t tot_live() { return _tot_live; }
1483 - size_t tot_used() { return _tot_used; }
1670 + if (G1CollectedHeap::use_parallel_gc_threads()) {
1671 + _g1h->heap_region_par_iterate_chunked(&verify_cl,
1672 + worker_i,
1673 + (int) _n_workers,
1674 + HeapRegion::VerifyCountClaimValue);
1675 + } else {
1676 + _g1h->heap_region_iterate(&verify_cl);
1677 + }
1678 +
1679 + Atomic::add(verify_cl.failures(), &_failures);
1680 + }
1681 +
1682 + int failures() const { return _failures; }
1484 1683 };
1485 1684
1685 +// Final update of count data (during cleanup).
1686 +// Adds [top_at_count, NTAMS) to the marked bytes for each
1687 +// region. Sets the bits in the card bitmap corresponding
1688 +// to the interval [top_at_count, top], and sets the
1689 +// liveness bit for each region containing live data
1690 +// in the region bitmap.
1486 1691
1487 -void ConcurrentMark::calcDesiredRegions() {
1488 - _region_bm.clear();
1489 - _card_bm.clear();
1490 - CalcLiveObjectsClosure calccl(false /*final*/,
1491 - nextMarkBitMap(), this,
1492 - &_region_bm, &_card_bm);
1493 - G1CollectedHeap *g1h = G1CollectedHeap::heap();
1494 - g1h->heap_region_iterate(&calccl);
1692 +class FinalCountDataUpdateClosure: public HeapRegionClosure {
1693 + ConcurrentMark* _cm;
1694 + BitMap* _region_bm;
1695 + BitMap* _card_bm;
1696 + intptr_t _bottom_card_num;
1495 1697
1496 - do {
1497 - calccl.reset();
1498 - g1h->heap_region_iterate(&calccl);
1499 - } while (calccl.changed());
1500 -}
1698 + size_t _total_live_bytes;
1699 + size_t _total_used_bytes;
1700 + size_t _total_words_done;
1701 +
1702 + void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1703 + BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1704 + BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1705 +
1706 + // Inclusive bit range [start_idx, last_idx]. par_at_put_range
1707 + // is exclusive so we have to also set the bit for last_idx.
1708 + // Passing last_idx+1 to the clear_range would work in
1709 + // most cases but could trip an OOB assertion.
1710 +
1711 + if ((last_idx - start_idx) > 0) {
1712 + _card_bm->par_at_put_range(start_idx, last_idx, true);
1713 + }
1714 + _card_bm->par_set_bit(last_idx);
1715 + }
1716 +
1717 + // It takes a region that's not empty (i.e., it has at least one
1718 + // live object in it and sets its corresponding bit on the region
1719 + // bitmap to 1. If the region is "starts humongous" it will also set
1720 + // to 1 the bits on the region bitmap that correspond to its
1721 + // associated "continues humongous" regions.
1722 + void set_bit_for_region(HeapRegion* hr) {
1723 + assert(!hr->continuesHumongous(), "should have filtered those out");
1724 +
1725 + size_t index = hr->hrs_index();
1726 + if (!hr->startsHumongous()) {
1727 + // Normal (non-humongous) case: just set the bit.
1728 + _region_bm->par_set_bit((BitMap::idx_t) index);
1729 + } else {
1730 + // Starts humongous case: calculate how many regions are part of
1731 + // this humongous region and then set the bit range.
1732 + G1CollectedHeap* g1h = G1CollectedHeap::heap();
1733 + HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1734 + size_t end_index = last_hr->hrs_index() + 1;
1735 + _region_bm->par_at_put_range((BitMap::idx_t) index,
1736 + (BitMap::idx_t) end_index, true);
1737 + }
1738 + }
1739 +
1740 + public:
1741 + FinalCountDataUpdateClosure(ConcurrentMark* cm,
1742 + BitMap* region_bm,
1743 + BitMap* card_bm) :
1744 + _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1745 + _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0)
1746 + {
1747 + _bottom_card_num = cm->heap_bottom_card_num();
1748 + }
1749 +
1750 + bool doHeapRegion(HeapRegion* hr) {
1751 +
1752 + if (hr->continuesHumongous()) {
1753 + // We will ignore these here and process them when their
1754 + // associated "starts humongous" region is processed (see
1755 + // set_bit_for_heap_region()). Note that we cannot rely on their
1756 + // associated "starts humongous" region to have their bit set to
1757 + // 1 since, due to the region chunking in the parallel region
1758 + // iteration, a "continues humongous" region might be visited
1759 + // before its associated "starts humongous".
1760 + return false;
1761 + }
1762 +
1763 + HeapWord* start = hr->top_at_conc_mark_count();
1764 + HeapWord* ntams = hr->next_top_at_mark_start();
1765 + HeapWord* top = hr->top();
1766 +
1767 + assert(hr->bottom() <= start && start <= hr->end() &&
1768 + hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1769 +
1770 + size_t words_done = ntams - hr->bottom();
1771 +
1772 + intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1773 + intptr_t last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1774 +
1775 +
1776 + if (start < ntams) {
1777 + // Region was changed between remark and cleanup pauses
1778 + // We need to add (ntams - start) to the marked bytes
1779 + // for this region, and set bits for the range
1780 + // [ card_num(start), card_num(ntams) ) in the
1781 + // card bitmap.
1782 + size_t live_bytes = (ntams - start) * HeapWordSize;
1783 + hr->add_to_marked_bytes(live_bytes);
1784 +
1785 + // Record the new top at conc count
1786 + hr->set_top_at_conc_mark_count(ntams);
1787 +
1788 + // The setting of the bits card bitmap takes place below
1789 + }
1790 +
1791 + // Mark the allocated-since-marking portion...
1792 + if (ntams < top) {
1793 + // This definitely means the region has live objects.
1794 + set_bit_for_region(hr);
1795 + }
1796 +
1797 + // Now set the bits for [start, top]
1798 + mark_card_num_range(start_card_num, last_card_num);
1799 +
1800 + // Set the bit for the region if it contains live data
1801 + if (hr->next_marked_bytes() > 0) {
1802 + set_bit_for_region(hr);
1803 + }
1804 +
1805 + _total_words_done += words_done;
1806 + _total_used_bytes += hr->used();
1807 + _total_live_bytes += hr->next_marked_bytes();
1808 +
1809 + return false;
1810 + }
1811 +
1812 + size_t total_words_done() const { return _total_words_done; }
1813 + size_t total_live_bytes() const { return _total_live_bytes; }
1814 + size_t total_used_bytes() const { return _total_used_bytes; }
1815 +};
1501 1816
1502 1817 class G1ParFinalCountTask: public AbstractGangTask {
1503 1818 protected:
1504 1819 G1CollectedHeap* _g1h;
1505 - CMBitMap* _bm;
1820 + ConcurrentMark* _cm;
1821 + BitMap* _actual_region_bm;
1822 + BitMap* _actual_card_bm;
1823 +
1506 1824 size_t _n_workers;
1825 +
1507 1826 size_t *_live_bytes;
1508 1827 size_t *_used_bytes;
1509 - BitMap* _region_bm;
1510 - BitMap* _card_bm;
1828 +
1511 1829 public:
1512 - G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1513 - BitMap* region_bm, BitMap* card_bm)
1514 - : AbstractGangTask("G1 final counting"), _g1h(g1h),
1515 - _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
1516 - _n_workers(0)
1830 + G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1831 + : AbstractGangTask("G1 final counting"),
1832 + _g1h(g1h), _cm(_g1h->concurrent_mark()),
1833 + _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1834 + _n_workers(0)
1517 1835 {
1518 1836 // Use the value already set as the number of active threads
1519 1837 // in the call to run_task(). Needed for the allocation of
1520 1838 // _live_bytes and _used_bytes.
1521 1839 if (G1CollectedHeap::use_parallel_gc_threads()) {
1522 1840 assert( _g1h->workers()->active_workers() > 0,
1523 1841 "Should have been previously set");
1524 1842 _n_workers = _g1h->workers()->active_workers();
1525 1843 } else {
1526 1844 _n_workers = 1;
1527 1845 }
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
1528 1846
1529 1847 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1530 1848 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1531 1849 }
1532 1850
1533 1851 ~G1ParFinalCountTask() {
1534 1852 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1535 1853 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1536 1854 }
1537 1855
1538 - void work(int i) {
1539 - CalcLiveObjectsClosure calccl(true /*final*/,
1540 - _bm, _g1h->concurrent_mark(),
1541 - _region_bm, _card_bm);
1542 - calccl.no_yield();
1856 + void work(int worker_i) {
1857 + assert((size_t) worker_i < _n_workers, "invariant");
1858 +
1859 + FinalCountDataUpdateClosure final_update_cl(_cm,
1860 + _actual_region_bm,
1861 + _actual_card_bm);
1862 +
1543 1863 if (G1CollectedHeap::use_parallel_gc_threads()) {
1544 - _g1h->heap_region_par_iterate_chunked(&calccl, i,
1864 + _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1865 + worker_i,
1545 1866 (int) _n_workers,
1546 1867 HeapRegion::FinalCountClaimValue);
1547 1868 } else {
1548 - _g1h->heap_region_iterate(&calccl);
1869 + _g1h->heap_region_iterate(&final_update_cl);
1549 1870 }
1550 - assert(calccl.complete(), "Shouldn't have yielded!");
1551 1871
1552 - assert((size_t) i < _n_workers, "invariant");
1553 - _live_bytes[i] = calccl.tot_live();
1554 - _used_bytes[i] = calccl.tot_used();
1872 + _live_bytes[worker_i] = final_update_cl.total_live_bytes();
1873 + _used_bytes[worker_i] = final_update_cl.total_used_bytes();
1555 1874 }
1875 +
1556 1876 size_t live_bytes() {
1557 1877 size_t live_bytes = 0;
1558 1878 for (size_t i = 0; i < _n_workers; ++i)
1559 1879 live_bytes += _live_bytes[i];
1560 1880 return live_bytes;
1561 1881 }
1882 +
1562 1883 size_t used_bytes() {
1563 1884 size_t used_bytes = 0;
1564 1885 for (size_t i = 0; i < _n_workers; ++i)
1565 1886 used_bytes += _used_bytes[i];
1566 1887 return used_bytes;
1567 1888 }
1568 1889 };
1569 1890
1570 1891 class G1ParNoteEndTask;
1571 1892
1572 1893 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1573 1894 G1CollectedHeap* _g1;
1574 1895 int _worker_num;
1575 1896 size_t _max_live_bytes;
1576 1897 size_t _regions_claimed;
1577 1898 size_t _freed_bytes;
1578 1899 FreeRegionList* _local_cleanup_list;
1579 1900 OldRegionSet* _old_proxy_set;
1580 1901 HumongousRegionSet* _humongous_proxy_set;
1581 1902 HRRSCleanupTask* _hrrs_cleanup_task;
1582 1903 double _claimed_region_time;
1583 1904 double _max_region_time;
1584 1905
1585 1906 public:
1586 1907 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1587 1908 int worker_num,
1588 1909 FreeRegionList* local_cleanup_list,
1589 1910 OldRegionSet* old_proxy_set,
1590 1911 HumongousRegionSet* humongous_proxy_set,
1591 1912 HRRSCleanupTask* hrrs_cleanup_task) :
1592 1913 _g1(g1), _worker_num(worker_num),
1593 1914 _max_live_bytes(0), _regions_claimed(0),
1594 1915 _freed_bytes(0),
1595 1916 _claimed_region_time(0.0), _max_region_time(0.0),
1596 1917 _local_cleanup_list(local_cleanup_list),
1597 1918 _old_proxy_set(old_proxy_set),
1598 1919 _humongous_proxy_set(humongous_proxy_set),
1599 1920 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1600 1921
1601 1922 size_t freed_bytes() { return _freed_bytes; }
1602 1923
1603 1924 bool doHeapRegion(HeapRegion *hr) {
1604 1925 // We use a claim value of zero here because all regions
1605 1926 // were claimed with value 1 in the FinalCount task.
1606 1927 hr->reset_gc_time_stamp();
1607 1928 if (!hr->continuesHumongous()) {
1608 1929 double start = os::elapsedTime();
1609 1930 _regions_claimed++;
1610 1931 hr->note_end_of_marking();
1611 1932 _max_live_bytes += hr->max_live_bytes();
1612 1933 _g1->free_region_if_empty(hr,
1613 1934 &_freed_bytes,
1614 1935 _local_cleanup_list,
1615 1936 _old_proxy_set,
1616 1937 _humongous_proxy_set,
1617 1938 _hrrs_cleanup_task,
1618 1939 true /* par */);
1619 1940 double region_time = (os::elapsedTime() - start);
1620 1941 _claimed_region_time += region_time;
1621 1942 if (region_time > _max_region_time) {
1622 1943 _max_region_time = region_time;
1623 1944 }
1624 1945 }
1625 1946 return false;
1626 1947 }
1627 1948
1628 1949 size_t max_live_bytes() { return _max_live_bytes; }
1629 1950 size_t regions_claimed() { return _regions_claimed; }
1630 1951 double claimed_region_time_sec() { return _claimed_region_time; }
1631 1952 double max_region_time_sec() { return _max_region_time; }
1632 1953 };
1633 1954
1634 1955 class G1ParNoteEndTask: public AbstractGangTask {
1635 1956 friend class G1NoteEndOfConcMarkClosure;
1636 1957
1637 1958 protected:
1638 1959 G1CollectedHeap* _g1h;
1639 1960 size_t _max_live_bytes;
1640 1961 size_t _freed_bytes;
1641 1962 FreeRegionList* _cleanup_list;
1642 1963
1643 1964 public:
1644 1965 G1ParNoteEndTask(G1CollectedHeap* g1h,
1645 1966 FreeRegionList* cleanup_list) :
1646 1967 AbstractGangTask("G1 note end"), _g1h(g1h),
1647 1968 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1648 1969
1649 1970 void work(int i) {
1650 1971 double start = os::elapsedTime();
1651 1972 FreeRegionList local_cleanup_list("Local Cleanup List");
1652 1973 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1653 1974 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1654 1975 HRRSCleanupTask hrrs_cleanup_task;
1655 1976 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1656 1977 &old_proxy_set,
1657 1978 &humongous_proxy_set,
1658 1979 &hrrs_cleanup_task);
1659 1980 if (G1CollectedHeap::use_parallel_gc_threads()) {
1660 1981 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1661 1982 _g1h->workers()->active_workers(),
1662 1983 HeapRegion::NoteEndClaimValue);
1663 1984 } else {
1664 1985 _g1h->heap_region_iterate(&g1_note_end);
1665 1986 }
1666 1987 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1667 1988
1668 1989 // Now update the lists
1669 1990 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1670 1991 NULL /* free_list */,
1671 1992 &old_proxy_set,
1672 1993 &humongous_proxy_set,
1673 1994 true /* par */);
1674 1995 {
1675 1996 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1676 1997 _max_live_bytes += g1_note_end.max_live_bytes();
1677 1998 _freed_bytes += g1_note_end.freed_bytes();
1678 1999
1679 2000 // If we iterate over the global cleanup list at the end of
1680 2001 // cleanup to do this printing we will not guarantee to only
1681 2002 // generate output for the newly-reclaimed regions (the list
1682 2003 // might not be empty at the beginning of cleanup; we might
1683 2004 // still be working on its previous contents). So we do the
1684 2005 // printing here, before we append the new regions to the global
1685 2006 // cleanup list.
1686 2007
1687 2008 G1HRPrinter* hr_printer = _g1h->hr_printer();
1688 2009 if (hr_printer->is_active()) {
1689 2010 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1690 2011 while (iter.more_available()) {
1691 2012 HeapRegion* hr = iter.get_next();
1692 2013 hr_printer->cleanup(hr);
1693 2014 }
1694 2015 }
1695 2016
1696 2017 _cleanup_list->add_as_tail(&local_cleanup_list);
1697 2018 assert(local_cleanup_list.is_empty(), "post-condition");
1698 2019
1699 2020 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1700 2021 }
1701 2022 double end = os::elapsedTime();
1702 2023 if (G1PrintParCleanupStats) {
1703 2024 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1704 2025 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1705 2026 i, start, end, (end-start)*1000.0,
1706 2027 g1_note_end.regions_claimed(),
1707 2028 g1_note_end.claimed_region_time_sec()*1000.0,
1708 2029 g1_note_end.max_region_time_sec()*1000.0);
1709 2030 }
1710 2031 }
1711 2032 size_t max_live_bytes() { return _max_live_bytes; }
1712 2033 size_t freed_bytes() { return _freed_bytes; }
1713 2034 };
1714 2035
1715 2036 class G1ParScrubRemSetTask: public AbstractGangTask {
1716 2037 protected:
1717 2038 G1RemSet* _g1rs;
1718 2039 BitMap* _region_bm;
1719 2040 BitMap* _card_bm;
1720 2041 public:
1721 2042 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1722 2043 BitMap* region_bm, BitMap* card_bm) :
1723 2044 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1724 2045 _region_bm(region_bm), _card_bm(card_bm)
1725 2046 {}
1726 2047
1727 2048 void work(int i) {
1728 2049 if (G1CollectedHeap::use_parallel_gc_threads()) {
1729 2050 _g1rs->scrub_par(_region_bm, _card_bm, i,
1730 2051 HeapRegion::ScrubRemSetClaimValue);
1731 2052 } else {
1732 2053 _g1rs->scrub(_region_bm, _card_bm);
1733 2054 }
1734 2055 }
1735 2056
1736 2057 };
1737 2058
1738 2059 void ConcurrentMark::cleanup() {
1739 2060 // world is stopped at this checkpoint
1740 2061 assert(SafepointSynchronize::is_at_safepoint(),
1741 2062 "world should be stopped");
1742 2063 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1743 2064
1744 2065 // If a full collection has happened, we shouldn't do this.
1745 2066 if (has_aborted()) {
1746 2067 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1747 2068 return;
1748 2069 }
1749 2070
1750 2071 HRSPhaseSetter x(HRSPhaseCleanup);
1751 2072 g1h->verify_region_sets_optional();
1752 2073
1753 2074 if (VerifyDuringGC) {
1754 2075 HandleMark hm; // handle scope
1755 2076 gclog_or_tty->print(" VerifyDuringGC:(before)");
1756 2077 Universe::heap()->prepare_for_verify();
1757 2078 Universe::verify(/* allow dirty */ true,
1758 2079 /* silent */ false,
↓ open down ↓ |
187 lines elided |
↑ open up ↑ |
1759 2080 /* option */ VerifyOption_G1UsePrevMarking);
1760 2081 }
1761 2082
1762 2083 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1763 2084 g1p->record_concurrent_mark_cleanup_start();
1764 2085
1765 2086 double start = os::elapsedTime();
1766 2087
1767 2088 HeapRegionRemSet::reset_for_cleanup_tasks();
1768 2089
2090 + // Clear the global region bitmap - it will be filled as part
2091 + // of the final counting task.
2092 + _region_bm.clear();
2093 +
1769 2094 size_t n_workers;
1770 2095
1771 2096 // Do counting once more with the world stopped for good measure.
1772 - G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1773 - &_region_bm, &_card_bm);
2097 + G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2098 +
1774 2099 if (G1CollectedHeap::use_parallel_gc_threads()) {
1775 - assert(g1h->check_heap_region_claim_values(
1776 - HeapRegion::InitialClaimValue),
2100 + assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1777 2101 "sanity check");
1778 2102
1779 2103 g1h->set_par_threads();
1780 2104 n_workers = g1h->n_par_threads();
1781 2105 assert(g1h->n_par_threads() == (int) n_workers,
1782 2106 "Should not have been reset");
1783 2107 g1h->workers()->run_task(&g1_par_count_task);
1784 2108 // Done with the parallel phase so reset to 0.
1785 2109 g1h->set_par_threads(0);
1786 2110
1787 - assert(g1h->check_heap_region_claim_values(
1788 - HeapRegion::FinalCountClaimValue),
2111 + assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1789 2112 "sanity check");
1790 2113 } else {
1791 2114 n_workers = 1;
1792 2115 g1_par_count_task.work(0);
1793 2116 }
1794 2117
2118 + if (VerifyDuringGC) {
2119 + // Verify that the counting data accumulated during marking matches
2120 + // that calculated by walking the marking bitmap.
2121 +
2122 + // Bitmaps to hold expected values
2123 + BitMap expected_region_bm(_region_bm.size(), false);
2124 + BitMap expected_card_bm(_card_bm.size(), false);
2125 +
2126 + G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2127 + &_region_bm,
2128 + &_card_bm,
2129 + &expected_region_bm,
2130 + &expected_card_bm);
2131 +
2132 + if (G1CollectedHeap::use_parallel_gc_threads()) {
2133 + g1h->set_par_threads((int)n_workers);
2134 + g1h->workers()->run_task(&g1_par_verify_task);
2135 + // Done with the parallel phase so reset to 0.
2136 + g1h->set_par_threads(0);
2137 +
2138 + assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2139 + "sanity check");
2140 + } else {
2141 + g1_par_verify_task.work(0);
2142 + }
2143 +
2144 + guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2145 + }
2146 +
1795 2147 size_t known_garbage_bytes =
1796 2148 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1797 2149 g1p->set_known_garbage_bytes(known_garbage_bytes);
1798 2150
1799 2151 size_t start_used_bytes = g1h->used();
1800 2152 _at_least_one_mark_complete = true;
1801 2153 g1h->set_marking_complete();
1802 2154
1803 2155 ergo_verbose4(ErgoConcCycles,
1804 2156 "finish cleanup",
1805 2157 ergo_format_byte("occupancy")
1806 2158 ergo_format_byte("capacity")
1807 2159 ergo_format_byte_perc("known garbage"),
1808 2160 start_used_bytes, g1h->capacity(),
1809 2161 known_garbage_bytes,
1810 2162 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1811 2163
1812 2164 double count_end = os::elapsedTime();
1813 2165 double this_final_counting_time = (count_end - start);
1814 2166 if (G1PrintParCleanupStats) {
1815 2167 gclog_or_tty->print_cr("Cleanup:");
1816 2168 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1817 2169 this_final_counting_time*1000.0);
1818 2170 }
1819 2171 _total_counting_time += this_final_counting_time;
1820 2172
1821 2173 if (G1PrintRegionLivenessInfo) {
1822 2174 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1823 2175 _g1h->heap_region_iterate(&cl);
1824 2176 }
1825 2177
1826 2178 // Install newly created mark bitMap as "prev".
1827 2179 swapMarkBitMaps();
1828 2180
1829 2181 g1h->reset_gc_time_stamp();
1830 2182
1831 2183 // Note end of marking in all heap regions.
1832 2184 double note_end_start = os::elapsedTime();
1833 2185 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1834 2186 if (G1CollectedHeap::use_parallel_gc_threads()) {
1835 2187 g1h->set_par_threads((int)n_workers);
1836 2188 g1h->workers()->run_task(&g1_par_note_end_task);
1837 2189 g1h->set_par_threads(0);
1838 2190
1839 2191 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1840 2192 "sanity check");
1841 2193 } else {
1842 2194 g1_par_note_end_task.work(0);
1843 2195 }
1844 2196
1845 2197 if (!cleanup_list_is_empty()) {
1846 2198 // The cleanup list is not empty, so we'll have to process it
1847 2199 // concurrently. Notify anyone else that might be wanting free
1848 2200 // regions that there will be more free regions coming soon.
1849 2201 g1h->set_free_regions_coming();
1850 2202 }
1851 2203 double note_end_end = os::elapsedTime();
1852 2204 if (G1PrintParCleanupStats) {
1853 2205 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1854 2206 (note_end_end - note_end_start)*1000.0);
1855 2207 }
1856 2208
1857 2209 // call below, since it affects the metric by which we sort the heap
1858 2210 // regions.
1859 2211 if (G1ScrubRemSets) {
1860 2212 double rs_scrub_start = os::elapsedTime();
1861 2213 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1862 2214 if (G1CollectedHeap::use_parallel_gc_threads()) {
1863 2215 g1h->set_par_threads((int)n_workers);
1864 2216 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1865 2217 g1h->set_par_threads(0);
1866 2218
1867 2219 assert(g1h->check_heap_region_claim_values(
1868 2220 HeapRegion::ScrubRemSetClaimValue),
1869 2221 "sanity check");
1870 2222 } else {
1871 2223 g1_par_scrub_rs_task.work(0);
1872 2224 }
1873 2225
1874 2226 double rs_scrub_end = os::elapsedTime();
1875 2227 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1876 2228 _total_rs_scrub_time += this_rs_scrub_time;
1877 2229 }
1878 2230
1879 2231 // this will also free any regions totally full of garbage objects,
1880 2232 // and sort the regions.
1881 2233 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1882 2234
1883 2235 // Statistics.
1884 2236 double end = os::elapsedTime();
1885 2237 _cleanup_times.add((end - start) * 1000.0);
1886 2238
1887 2239 // G1CollectedHeap::heap()->print();
1888 2240 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1889 2241 // G1CollectedHeap::heap()->get_gc_time_stamp());
1890 2242
1891 2243 if (PrintGC || PrintGCDetails) {
1892 2244 g1h->print_size_transition(gclog_or_tty,
1893 2245 start_used_bytes,
1894 2246 g1h->used(),
1895 2247 g1h->capacity());
1896 2248 }
1897 2249
1898 2250 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1899 2251 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1900 2252
1901 2253 // Clean up will have freed any regions completely full of garbage.
1902 2254 // Update the soft reference policy with the new heap occupancy.
1903 2255 Universe::update_heap_info_at_gc();
1904 2256
1905 2257 // We need to make this be a "collection" so any collection pause that
1906 2258 // races with it goes around and waits for completeCleanup to finish.
1907 2259 g1h->increment_total_collections();
1908 2260
1909 2261 if (VerifyDuringGC) {
1910 2262 HandleMark hm; // handle scope
1911 2263 gclog_or_tty->print(" VerifyDuringGC:(after)");
1912 2264 Universe::heap()->prepare_for_verify();
1913 2265 Universe::verify(/* allow dirty */ true,
1914 2266 /* silent */ false,
1915 2267 /* option */ VerifyOption_G1UsePrevMarking);
1916 2268 }
1917 2269
1918 2270 g1h->verify_region_sets_optional();
1919 2271 }
1920 2272
1921 2273 void ConcurrentMark::completeCleanup() {
1922 2274 if (has_aborted()) return;
1923 2275
1924 2276 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1925 2277
1926 2278 _cleanup_list.verify_optional();
1927 2279 FreeRegionList tmp_free_list("Tmp Free List");
1928 2280
1929 2281 if (G1ConcRegionFreeingVerbose) {
1930 2282 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1931 2283 "cleanup list has "SIZE_FORMAT" entries",
1932 2284 _cleanup_list.length());
1933 2285 }
1934 2286
1935 2287 // Noone else should be accessing the _cleanup_list at this point,
1936 2288 // so it's not necessary to take any locks
1937 2289 while (!_cleanup_list.is_empty()) {
1938 2290 HeapRegion* hr = _cleanup_list.remove_head();
1939 2291 assert(hr != NULL, "the list was not empty");
1940 2292 hr->par_clear();
1941 2293 tmp_free_list.add_as_tail(hr);
1942 2294
1943 2295 // Instead of adding one region at a time to the secondary_free_list,
1944 2296 // we accumulate them in the local list and move them a few at a
1945 2297 // time. This also cuts down on the number of notify_all() calls
1946 2298 // we do during this process. We'll also append the local list when
1947 2299 // _cleanup_list is empty (which means we just removed the last
1948 2300 // region from the _cleanup_list).
1949 2301 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1950 2302 _cleanup_list.is_empty()) {
1951 2303 if (G1ConcRegionFreeingVerbose) {
1952 2304 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1953 2305 "appending "SIZE_FORMAT" entries to the "
1954 2306 "secondary_free_list, clean list still has "
1955 2307 SIZE_FORMAT" entries",
1956 2308 tmp_free_list.length(),
1957 2309 _cleanup_list.length());
1958 2310 }
1959 2311
1960 2312 {
1961 2313 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1962 2314 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1963 2315 SecondaryFreeList_lock->notify_all();
1964 2316 }
1965 2317
1966 2318 if (G1StressConcRegionFreeing) {
1967 2319 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1968 2320 os::sleep(Thread::current(), (jlong) 1, false);
1969 2321 }
1970 2322 }
1971 2323 }
1972 2324 }
1973 2325 assert(tmp_free_list.is_empty(), "post-condition");
1974 2326 }
1975 2327
1976 2328 // Support closures for reference procssing in G1
↓ open down ↓ |
172 lines elided |
↑ open up ↑ |
1977 2329
1978 2330 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1979 2331 HeapWord* addr = (HeapWord*)obj;
1980 2332 return addr != NULL &&
1981 2333 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1982 2334 }
1983 2335
1984 2336 class G1CMKeepAliveClosure: public OopClosure {
1985 2337 G1CollectedHeap* _g1;
1986 2338 ConcurrentMark* _cm;
1987 - CMBitMap* _bitMap;
1988 2339 public:
1989 - G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1990 - CMBitMap* bitMap) :
1991 - _g1(g1), _cm(cm),
1992 - _bitMap(bitMap) {}
2340 + G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2341 + _g1(g1), _cm(cm)
2342 + {
2343 + assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2344 + }
1993 2345
1994 2346 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1995 2347 virtual void do_oop( oop* p) { do_oop_work(p); }
1996 2348
1997 2349 template <class T> void do_oop_work(T* p) {
1998 2350 oop obj = oopDesc::load_decode_heap_oop(p);
1999 2351 HeapWord* addr = (HeapWord*)obj;
2000 2352
2001 2353 if (_cm->verbose_high()) {
2002 2354 gclog_or_tty->print_cr("\t[0] we're looking at location "
2003 2355 "*"PTR_FORMAT" = "PTR_FORMAT,
2004 2356 p, (void*) obj);
2005 2357 }
2006 2358
2007 2359 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2008 - _bitMap->mark(addr);
2360 + _cm->mark_and_count(obj);
2009 2361 _cm->mark_stack_push(obj);
2010 2362 }
2011 2363 }
2012 2364 };
2013 2365
2014 2366 class G1CMDrainMarkingStackClosure: public VoidClosure {
2367 + ConcurrentMark* _cm;
2015 2368 CMMarkStack* _markStack;
2016 - CMBitMap* _bitMap;
2017 2369 G1CMKeepAliveClosure* _oopClosure;
2018 2370 public:
2019 - G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
2371 + G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2020 2372 G1CMKeepAliveClosure* oopClosure) :
2021 - _bitMap(bitMap),
2373 + _cm(cm),
2022 2374 _markStack(markStack),
2023 2375 _oopClosure(oopClosure)
2024 2376 {}
2025 2377
2026 2378 void do_void() {
2027 - _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
2379 + _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
2028 2380 }
2029 2381 };
2030 2382
2031 2383 // 'Keep Alive' closure used by parallel reference processing.
2032 2384 // An instance of this closure is used in the parallel reference processing
2033 2385 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2034 2386 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2035 2387 // placed on to discovered ref lists once so we can mark and push with no
2036 2388 // need to check whether the object has already been marked. Using the
2037 2389 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2038 2390 // operating on the global mark stack. This means that an individual
2039 2391 // worker would be doing lock-free pushes while it processes its own
2040 2392 // discovered ref list followed by drain call. If the discovered ref lists
2041 2393 // are unbalanced then this could cause interference with the other
2042 2394 // workers. Using a CMTask (and its embedded local data structures)
2043 2395 // avoids that potential interference.
2044 2396 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2045 2397 ConcurrentMark* _cm;
2046 2398 CMTask* _task;
2047 2399 int _ref_counter_limit;
2048 2400 int _ref_counter;
2049 2401 public:
2050 2402 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2051 2403 _cm(cm), _task(task),
2052 2404 _ref_counter_limit(G1RefProcDrainInterval) {
2053 2405 assert(_ref_counter_limit > 0, "sanity");
2054 2406 _ref_counter = _ref_counter_limit;
2055 2407 }
2056 2408
2057 2409 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2058 2410 virtual void do_oop( oop* p) { do_oop_work(p); }
2059 2411
2060 2412 template <class T> void do_oop_work(T* p) {
2061 2413 if (!_cm->has_overflown()) {
2062 2414 oop obj = oopDesc::load_decode_heap_oop(p);
2063 2415 if (_cm->verbose_high()) {
2064 2416 gclog_or_tty->print_cr("\t[%d] we're looking at location "
2065 2417 "*"PTR_FORMAT" = "PTR_FORMAT,
2066 2418 _task->task_id(), p, (void*) obj);
2067 2419 }
2068 2420
2069 2421 _task->deal_with_reference(obj);
2070 2422 _ref_counter--;
2071 2423
2072 2424 if (_ref_counter == 0) {
2073 2425 // We have dealt with _ref_counter_limit references, pushing them and objects
2074 2426 // reachable from them on to the local stack (and possibly the global stack).
2075 2427 // Call do_marking_step() to process these entries. We call the routine in a
2076 2428 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2077 2429 // with the entries that we've pushed as a result of the deal_with_reference
2078 2430 // calls above) or we overflow.
2079 2431 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2080 2432 // while there may still be some work to do. (See the comment at the
2081 2433 // beginning of CMTask::do_marking_step() for those conditions - one of which
2082 2434 // is reaching the specified time target.) It is only when
2083 2435 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2084 2436 // that the marking has completed.
2085 2437 do {
2086 2438 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2087 2439 _task->do_marking_step(mark_step_duration_ms,
2088 2440 false /* do_stealing */,
2089 2441 false /* do_termination */);
2090 2442 } while (_task->has_aborted() && !_cm->has_overflown());
2091 2443 _ref_counter = _ref_counter_limit;
2092 2444 }
2093 2445 } else {
2094 2446 if (_cm->verbose_high()) {
2095 2447 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2096 2448 }
2097 2449 }
2098 2450 }
2099 2451 };
2100 2452
2101 2453 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2102 2454 ConcurrentMark* _cm;
2103 2455 CMTask* _task;
2104 2456 public:
2105 2457 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2106 2458 _cm(cm), _task(task)
2107 2459 {}
2108 2460
2109 2461 void do_void() {
2110 2462 do {
2111 2463 if (_cm->verbose_high()) {
2112 2464 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2113 2465 _task->task_id());
2114 2466 }
2115 2467
2116 2468 // We call CMTask::do_marking_step() to completely drain the local and
2117 2469 // global marking stacks. The routine is called in a loop, which we'll
2118 2470 // exit if there's nothing more to do (i.e. we'completely drained the
2119 2471 // entries that were pushed as a result of applying the
2120 2472 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2121 2473 // lists above) or we overflow the global marking stack.
2122 2474 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2123 2475 // while there may still be some work to do. (See the comment at the
2124 2476 // beginning of CMTask::do_marking_step() for those conditions - one of which
2125 2477 // is reaching the specified time target.) It is only when
2126 2478 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2127 2479 // that the marking has completed.
2128 2480
2129 2481 _task->do_marking_step(1000000000.0 /* something very large */,
2130 2482 true /* do_stealing */,
2131 2483 true /* do_termination */);
2132 2484 } while (_task->has_aborted() && !_cm->has_overflown());
2133 2485 }
2134 2486 };
2135 2487
2136 2488 // Implementation of AbstractRefProcTaskExecutor for parallel
2137 2489 // reference processing at the end of G1 concurrent marking
2138 2490
2139 2491 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2140 2492 private:
2141 2493 G1CollectedHeap* _g1h;
2142 2494 ConcurrentMark* _cm;
2143 2495 WorkGang* _workers;
2144 2496 int _active_workers;
2145 2497
2146 2498 public:
2147 2499 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2148 2500 ConcurrentMark* cm,
2149 2501 WorkGang* workers,
2150 2502 int n_workers) :
2151 2503 _g1h(g1h), _cm(cm),
2152 2504 _workers(workers), _active_workers(n_workers) { }
2153 2505
2154 2506 // Executes the given task using concurrent marking worker threads.
2155 2507 virtual void execute(ProcessTask& task);
2156 2508 virtual void execute(EnqueueTask& task);
2157 2509 };
2158 2510
2159 2511 class G1CMRefProcTaskProxy: public AbstractGangTask {
2160 2512 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2161 2513 ProcessTask& _proc_task;
2162 2514 G1CollectedHeap* _g1h;
2163 2515 ConcurrentMark* _cm;
2164 2516
2165 2517 public:
2166 2518 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2167 2519 G1CollectedHeap* g1h,
2168 2520 ConcurrentMark* cm) :
2169 2521 AbstractGangTask("Process reference objects in parallel"),
2170 2522 _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2171 2523
2172 2524 virtual void work(int i) {
2173 2525 CMTask* marking_task = _cm->task(i);
2174 2526 G1CMIsAliveClosure g1_is_alive(_g1h);
2175 2527 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2176 2528 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2177 2529
2178 2530 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2179 2531 }
2180 2532 };
2181 2533
2182 2534 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2183 2535 assert(_workers != NULL, "Need parallel worker threads.");
2184 2536
2185 2537 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2186 2538
2187 2539 // We need to reset the phase for each task execution so that
2188 2540 // the termination protocol of CMTask::do_marking_step works.
2189 2541 _cm->set_phase(_active_workers, false /* concurrent */);
2190 2542 _g1h->set_par_threads(_active_workers);
2191 2543 _workers->run_task(&proc_task_proxy);
2192 2544 _g1h->set_par_threads(0);
2193 2545 }
2194 2546
2195 2547 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2196 2548 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2197 2549 EnqueueTask& _enq_task;
2198 2550
2199 2551 public:
2200 2552 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2201 2553 AbstractGangTask("Enqueue reference objects in parallel"),
2202 2554 _enq_task(enq_task) { }
2203 2555
2204 2556 virtual void work(int i) {
2205 2557 _enq_task.work(i);
2206 2558 }
2207 2559 };
2208 2560
2209 2561 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2210 2562 assert(_workers != NULL, "Need parallel worker threads.");
2211 2563
2212 2564 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2213 2565
2214 2566 _g1h->set_par_threads(_active_workers);
2215 2567 _workers->run_task(&enq_task_proxy);
2216 2568 _g1h->set_par_threads(0);
2217 2569 }
2218 2570
2219 2571 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2220 2572 ResourceMark rm;
2221 2573 HandleMark hm;
2222 2574
2223 2575 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2224 2576
2225 2577 // Is alive closure.
2226 2578 G1CMIsAliveClosure g1_is_alive(g1h);
2227 2579
2228 2580 // Inner scope to exclude the cleaning of the string and symbol
2229 2581 // tables from the displayed time.
2230 2582 {
2231 2583 bool verbose = PrintGC && PrintGCDetails;
2232 2584 if (verbose) {
2233 2585 gclog_or_tty->put(' ');
2234 2586 }
2235 2587 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
↓ open down ↓ |
198 lines elided |
↑ open up ↑ |
2236 2588
2237 2589 ReferenceProcessor* rp = g1h->ref_processor_cm();
2238 2590
2239 2591 // See the comment in G1CollectedHeap::ref_processing_init()
2240 2592 // about how reference processing currently works in G1.
2241 2593
2242 2594 // Process weak references.
2243 2595 rp->setup_policy(clear_all_soft_refs);
2244 2596 assert(_markStack.isEmpty(), "mark stack should be empty");
2245 2597
2246 - G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2598 + G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2247 2599 G1CMDrainMarkingStackClosure
2248 - g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2600 + g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2249 2601
2250 2602 // We use the work gang from the G1CollectedHeap and we utilize all
2251 2603 // the worker threads.
2252 2604 int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
2253 2605 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2254 2606
2255 2607 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2256 2608 g1h->workers(), active_workers);
2257 2609
2258 2610 if (rp->processing_is_mt()) {
2259 2611 // Set the degree of MT here. If the discovery is done MT, there
2260 2612 // may have been a different number of threads doing the discovery
2261 2613 // and a different number of discovered lists may have Ref objects.
2262 2614 // That is OK as long as the Reference lists are balanced (see
2263 2615 // balance_all_queues() and balance_queues()).
2264 2616 rp->set_active_mt_degree(active_workers);
2265 2617
2266 2618 rp->process_discovered_references(&g1_is_alive,
2267 2619 &g1_keep_alive,
2268 2620 &g1_drain_mark_stack,
2269 2621 &par_task_executor);
2270 2622
2271 2623 // The work routines of the parallel keep_alive and drain_marking_stack
2272 2624 // will set the has_overflown flag if we overflow the global marking
2273 2625 // stack.
2274 2626 } else {
2275 2627 rp->process_discovered_references(&g1_is_alive,
2276 2628 &g1_keep_alive,
2277 2629 &g1_drain_mark_stack,
2278 2630 NULL);
2279 2631 }
2280 2632
2281 2633 assert(_markStack.overflow() || _markStack.isEmpty(),
2282 2634 "mark stack should be empty (unless it overflowed)");
2283 2635 if (_markStack.overflow()) {
2284 2636 // Should have been done already when we tried to push an
2285 2637 // entry on to the global mark stack. But let's do it again.
2286 2638 set_has_overflown();
2287 2639 }
2288 2640
2289 2641 if (rp->processing_is_mt()) {
2290 2642 assert(rp->num_q() == active_workers, "why not");
2291 2643 rp->enqueue_discovered_references(&par_task_executor);
2292 2644 } else {
2293 2645 rp->enqueue_discovered_references();
2294 2646 }
2295 2647
2296 2648 rp->verify_no_references_recorded();
2297 2649 assert(!rp->discovery_enabled(), "Post condition");
2298 2650 }
2299 2651
2300 2652 // Now clean up stale oops in StringTable
2301 2653 StringTable::unlink(&g1_is_alive);
2302 2654 // Clean up unreferenced symbols in symbol table.
2303 2655 SymbolTable::unlink();
2304 2656 }
2305 2657
2306 2658 void ConcurrentMark::swapMarkBitMaps() {
2307 2659 CMBitMapRO* temp = _prevMarkBitMap;
2308 2660 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2309 2661 _nextMarkBitMap = (CMBitMap*) temp;
2310 2662 }
2311 2663
2312 2664 class CMRemarkTask: public AbstractGangTask {
2313 2665 private:
2314 2666 ConcurrentMark *_cm;
2315 2667
2316 2668 public:
2317 2669 void work(int worker_i) {
2318 2670 // Since all available tasks are actually started, we should
2319 2671 // only proceed if we're supposed to be actived.
2320 2672 if ((size_t)worker_i < _cm->active_tasks()) {
2321 2673 CMTask* task = _cm->task(worker_i);
2322 2674 task->record_start_time();
2323 2675 do {
2324 2676 task->do_marking_step(1000000000.0 /* something very large */,
2325 2677 true /* do_stealing */,
2326 2678 true /* do_termination */);
2327 2679 } while (task->has_aborted() && !_cm->has_overflown());
2328 2680 // If we overflow, then we do not want to restart. We instead
2329 2681 // want to abort remark and do concurrent marking again.
2330 2682 task->record_end_time();
2331 2683 }
2332 2684 }
2333 2685
2334 2686 CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2335 2687 AbstractGangTask("Par Remark"), _cm(cm) {
2336 2688 _cm->terminator()->reset_for_reuse(active_workers);
2337 2689 }
2338 2690 };
2339 2691
2340 2692 void ConcurrentMark::checkpointRootsFinalWork() {
2341 2693 ResourceMark rm;
2342 2694 HandleMark hm;
2343 2695 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2344 2696
2345 2697 g1h->ensure_parsability(false);
2346 2698
2347 2699 if (G1CollectedHeap::use_parallel_gc_threads()) {
2348 2700 G1CollectedHeap::StrongRootsScope srs(g1h);
2349 2701 // this is remark, so we'll use up all active threads
2350 2702 int active_workers = g1h->workers()->active_workers();
2351 2703 if (active_workers == 0) {
2352 2704 assert(active_workers > 0, "Should have been set earlier");
2353 2705 active_workers = ParallelGCThreads;
2354 2706 g1h->workers()->set_active_workers(active_workers);
2355 2707 }
2356 2708 set_phase(active_workers, false /* concurrent */);
2357 2709 // Leave _parallel_marking_threads at it's
2358 2710 // value originally calculated in the ConcurrentMark
2359 2711 // constructor and pass values of the active workers
2360 2712 // through the gang in the task.
2361 2713
2362 2714 CMRemarkTask remarkTask(this, active_workers);
2363 2715 g1h->set_par_threads(active_workers);
2364 2716 g1h->workers()->run_task(&remarkTask);
2365 2717 g1h->set_par_threads(0);
2366 2718 } else {
2367 2719 G1CollectedHeap::StrongRootsScope srs(g1h);
2368 2720 // this is remark, so we'll use up all available threads
2369 2721 int active_workers = 1;
2370 2722 set_phase(active_workers, false /* concurrent */);
2371 2723
2372 2724 CMRemarkTask remarkTask(this, active_workers);
2373 2725 // We will start all available threads, even if we decide that the
2374 2726 // active_workers will be fewer. The extra ones will just bail out
2375 2727 // immediately.
2376 2728 remarkTask.work(0);
2377 2729 }
2378 2730 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2379 2731 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2380 2732
2381 2733 print_stats();
2382 2734
2383 2735 #if VERIFY_OBJS_PROCESSED
2384 2736 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2385 2737 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2386 2738 _scan_obj_cl.objs_processed,
2387 2739 ThreadLocalObjQueue::objs_enqueued);
2388 2740 guarantee(_scan_obj_cl.objs_processed ==
2389 2741 ThreadLocalObjQueue::objs_enqueued,
2390 2742 "Different number of objs processed and enqueued.");
2391 2743 }
2392 2744 #endif
2393 2745 }
2394 2746
2395 2747 #ifndef PRODUCT
2396 2748
2397 2749 class PrintReachableOopClosure: public OopClosure {
2398 2750 private:
2399 2751 G1CollectedHeap* _g1h;
2400 2752 outputStream* _out;
2401 2753 VerifyOption _vo;
2402 2754 bool _all;
2403 2755
2404 2756 public:
2405 2757 PrintReachableOopClosure(outputStream* out,
2406 2758 VerifyOption vo,
2407 2759 bool all) :
2408 2760 _g1h(G1CollectedHeap::heap()),
2409 2761 _out(out), _vo(vo), _all(all) { }
2410 2762
2411 2763 void do_oop(narrowOop* p) { do_oop_work(p); }
2412 2764 void do_oop( oop* p) { do_oop_work(p); }
2413 2765
2414 2766 template <class T> void do_oop_work(T* p) {
2415 2767 oop obj = oopDesc::load_decode_heap_oop(p);
2416 2768 const char* str = NULL;
2417 2769 const char* str2 = "";
2418 2770
2419 2771 if (obj == NULL) {
2420 2772 str = "";
2421 2773 } else if (!_g1h->is_in_g1_reserved(obj)) {
2422 2774 str = " O";
2423 2775 } else {
2424 2776 HeapRegion* hr = _g1h->heap_region_containing(obj);
2425 2777 guarantee(hr != NULL, "invariant");
2426 2778 bool over_tams = false;
2427 2779 bool marked = false;
2428 2780
2429 2781 switch (_vo) {
2430 2782 case VerifyOption_G1UsePrevMarking:
2431 2783 over_tams = hr->obj_allocated_since_prev_marking(obj);
2432 2784 marked = _g1h->isMarkedPrev(obj);
2433 2785 break;
2434 2786 case VerifyOption_G1UseNextMarking:
2435 2787 over_tams = hr->obj_allocated_since_next_marking(obj);
2436 2788 marked = _g1h->isMarkedNext(obj);
2437 2789 break;
2438 2790 case VerifyOption_G1UseMarkWord:
2439 2791 marked = obj->is_gc_marked();
2440 2792 break;
2441 2793 default:
2442 2794 ShouldNotReachHere();
2443 2795 }
2444 2796
2445 2797 if (over_tams) {
2446 2798 str = " >";
2447 2799 if (marked) {
2448 2800 str2 = " AND MARKED";
2449 2801 }
2450 2802 } else if (marked) {
2451 2803 str = " M";
2452 2804 } else {
2453 2805 str = " NOT";
2454 2806 }
2455 2807 }
2456 2808
2457 2809 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2458 2810 p, (void*) obj, str, str2);
2459 2811 }
2460 2812 };
2461 2813
2462 2814 class PrintReachableObjectClosure : public ObjectClosure {
2463 2815 private:
2464 2816 G1CollectedHeap* _g1h;
2465 2817 outputStream* _out;
2466 2818 VerifyOption _vo;
2467 2819 bool _all;
2468 2820 HeapRegion* _hr;
2469 2821
2470 2822 public:
2471 2823 PrintReachableObjectClosure(outputStream* out,
2472 2824 VerifyOption vo,
2473 2825 bool all,
2474 2826 HeapRegion* hr) :
2475 2827 _g1h(G1CollectedHeap::heap()),
2476 2828 _out(out), _vo(vo), _all(all), _hr(hr) { }
2477 2829
2478 2830 void do_object(oop o) {
2479 2831 bool over_tams = false;
2480 2832 bool marked = false;
2481 2833
2482 2834 switch (_vo) {
2483 2835 case VerifyOption_G1UsePrevMarking:
2484 2836 over_tams = _hr->obj_allocated_since_prev_marking(o);
2485 2837 marked = _g1h->isMarkedPrev(o);
2486 2838 break;
2487 2839 case VerifyOption_G1UseNextMarking:
2488 2840 over_tams = _hr->obj_allocated_since_next_marking(o);
2489 2841 marked = _g1h->isMarkedNext(o);
2490 2842 break;
2491 2843 case VerifyOption_G1UseMarkWord:
2492 2844 marked = o->is_gc_marked();
2493 2845 break;
2494 2846 default:
2495 2847 ShouldNotReachHere();
2496 2848 }
2497 2849 bool print_it = _all || over_tams || marked;
2498 2850
2499 2851 if (print_it) {
2500 2852 _out->print_cr(" "PTR_FORMAT"%s",
2501 2853 o, (over_tams) ? " >" : (marked) ? " M" : "");
2502 2854 PrintReachableOopClosure oopCl(_out, _vo, _all);
2503 2855 o->oop_iterate(&oopCl);
2504 2856 }
2505 2857 }
2506 2858 };
2507 2859
2508 2860 class PrintReachableRegionClosure : public HeapRegionClosure {
2509 2861 private:
2510 2862 outputStream* _out;
2511 2863 VerifyOption _vo;
2512 2864 bool _all;
2513 2865
2514 2866 public:
2515 2867 bool doHeapRegion(HeapRegion* hr) {
2516 2868 HeapWord* b = hr->bottom();
2517 2869 HeapWord* e = hr->end();
2518 2870 HeapWord* t = hr->top();
2519 2871 HeapWord* p = NULL;
2520 2872
2521 2873 switch (_vo) {
2522 2874 case VerifyOption_G1UsePrevMarking:
2523 2875 p = hr->prev_top_at_mark_start();
2524 2876 break;
2525 2877 case VerifyOption_G1UseNextMarking:
2526 2878 p = hr->next_top_at_mark_start();
2527 2879 break;
2528 2880 case VerifyOption_G1UseMarkWord:
2529 2881 // When we are verifying marking using the mark word
2530 2882 // TAMS has no relevance.
2531 2883 assert(p == NULL, "post-condition");
2532 2884 break;
2533 2885 default:
2534 2886 ShouldNotReachHere();
2535 2887 }
2536 2888 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2537 2889 "TAMS: "PTR_FORMAT, b, e, t, p);
2538 2890 _out->cr();
2539 2891
2540 2892 HeapWord* from = b;
2541 2893 HeapWord* to = t;
2542 2894
2543 2895 if (to > from) {
2544 2896 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2545 2897 _out->cr();
2546 2898 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2547 2899 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2548 2900 _out->cr();
2549 2901 }
2550 2902
2551 2903 return false;
2552 2904 }
2553 2905
2554 2906 PrintReachableRegionClosure(outputStream* out,
2555 2907 VerifyOption vo,
2556 2908 bool all) :
2557 2909 _out(out), _vo(vo), _all(all) { }
2558 2910 };
2559 2911
2560 2912 static const char* verify_option_to_tams(VerifyOption vo) {
2561 2913 switch (vo) {
2562 2914 case VerifyOption_G1UsePrevMarking:
2563 2915 return "PTAMS";
2564 2916 case VerifyOption_G1UseNextMarking:
2565 2917 return "NTAMS";
2566 2918 default:
2567 2919 return "NONE";
2568 2920 }
2569 2921 }
2570 2922
2571 2923 void ConcurrentMark::print_reachable(const char* str,
2572 2924 VerifyOption vo,
2573 2925 bool all) {
2574 2926 gclog_or_tty->cr();
2575 2927 gclog_or_tty->print_cr("== Doing heap dump... ");
2576 2928
2577 2929 if (G1PrintReachableBaseFile == NULL) {
2578 2930 gclog_or_tty->print_cr(" #### error: no base file defined");
2579 2931 return;
2580 2932 }
2581 2933
2582 2934 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2583 2935 (JVM_MAXPATHLEN - 1)) {
2584 2936 gclog_or_tty->print_cr(" #### error: file name too long");
2585 2937 return;
2586 2938 }
2587 2939
2588 2940 char file_name[JVM_MAXPATHLEN];
2589 2941 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2590 2942 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2591 2943
2592 2944 fileStream fout(file_name);
2593 2945 if (!fout.is_open()) {
2594 2946 gclog_or_tty->print_cr(" #### error: could not open file");
2595 2947 return;
2596 2948 }
2597 2949
2598 2950 outputStream* out = &fout;
2599 2951 out->print_cr("-- USING %s", verify_option_to_tams(vo));
2600 2952 out->cr();
2601 2953
2602 2954 out->print_cr("--- ITERATING OVER REGIONS");
2603 2955 out->cr();
2604 2956 PrintReachableRegionClosure rcl(out, vo, all);
2605 2957 _g1h->heap_region_iterate(&rcl);
2606 2958 out->cr();
2607 2959
2608 2960 gclog_or_tty->print_cr(" done");
2609 2961 gclog_or_tty->flush();
2610 2962 }
2611 2963
2612 2964 #endif // PRODUCT
2613 2965
2614 2966 // This note is for drainAllSATBBuffers and the code in between.
2615 2967 // In the future we could reuse a task to do this work during an
↓ open down ↓ |
357 lines elided |
↑ open up ↑ |
2616 2968 // evacuation pause (since now tasks are not active and can be claimed
2617 2969 // during an evacuation pause). This was a late change to the code and
2618 2970 // is currently not being taken advantage of.
2619 2971
2620 2972 class CMGlobalObjectClosure : public ObjectClosure {
2621 2973 private:
2622 2974 ConcurrentMark* _cm;
2623 2975
2624 2976 public:
2625 2977 void do_object(oop obj) {
2626 - _cm->deal_with_reference(obj);
2978 + _cm->deal_with_reference(obj, 0);
2627 2979 }
2628 2980
2629 2981 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2630 2982 };
2631 2983
2632 -void ConcurrentMark::deal_with_reference(oop obj) {
2984 +void ConcurrentMark::deal_with_reference(oop obj, int worker_i) {
2633 2985 if (verbose_high()) {
2634 2986 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2635 2987 (void*) obj);
2636 2988 }
2637 2989
2638 2990 HeapWord* objAddr = (HeapWord*) obj;
2639 2991 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2640 2992 if (_g1h->is_in_g1_reserved(objAddr)) {
2641 2993 assert(obj != NULL, "null check is implicit");
2642 2994 if (!_nextMarkBitMap->isMarked(objAddr)) {
2643 2995 // Only get the containing region if the object is not marked on the
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
2644 2996 // bitmap (otherwise, it's a waste of time since we won't do
2645 2997 // anything with it).
2646 2998 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2647 2999 if (!hr->obj_allocated_since_next_marking(obj)) {
2648 3000 if (verbose_high()) {
2649 3001 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2650 3002 "marked", (void*) obj);
2651 3003 }
2652 3004
2653 3005 // we need to mark it first
2654 - if (_nextMarkBitMap->parMark(objAddr)) {
3006 + if (par_mark_and_count(obj, hr, worker_i)) {
2655 3007 // No OrderAccess:store_load() is needed. It is implicit in the
2656 - // CAS done in parMark(objAddr) above
3008 + // CAS done in the call to CMBitMap::parMark() in the above
3009 + // routine.
2657 3010 HeapWord* finger = _finger;
2658 3011 if (objAddr < finger) {
2659 3012 if (verbose_high()) {
2660 3013 gclog_or_tty->print_cr("[global] below the global finger "
2661 3014 "("PTR_FORMAT"), pushing it", finger);
2662 3015 }
2663 3016 if (!mark_stack_push(obj)) {
2664 3017 if (verbose_low()) {
2665 3018 gclog_or_tty->print_cr("[global] global stack overflow during "
2666 3019 "deal_with_reference");
2667 3020 }
2668 3021 }
2669 3022 }
2670 3023 }
2671 3024 }
2672 3025 }
2673 3026 }
2674 3027 }
2675 3028
2676 3029 void ConcurrentMark::drainAllSATBBuffers() {
2677 3030 CMGlobalObjectClosure oc(this);
2678 3031 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2679 3032 satb_mq_set.set_closure(&oc);
2680 3033
2681 3034 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2682 3035 if (verbose_medium()) {
2683 3036 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2684 3037 }
2685 3038 }
2686 3039
2687 3040 // no need to check whether we should do this, as this is only
2688 3041 // called during an evacuation pause
2689 3042 satb_mq_set.iterate_closure_all_threads();
2690 3043
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
2691 3044 satb_mq_set.set_closure(NULL);
2692 3045 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2693 3046 }
2694 3047
2695 3048 void ConcurrentMark::markPrev(oop p) {
2696 3049 // Note we are overriding the read-only view of the prev map here, via
2697 3050 // the cast.
2698 3051 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2699 3052 }
2700 3053
2701 -void ConcurrentMark::clear(oop p) {
3054 +void ConcurrentMark::clear_mark(oop p) {
2702 3055 assert(p != NULL && p->is_oop(), "expected an oop");
2703 3056 HeapWord* addr = (HeapWord*)p;
2704 3057 assert(addr >= _nextMarkBitMap->startWord() ||
2705 3058 addr < _nextMarkBitMap->endWord(), "in a region");
2706 3059
2707 3060 _nextMarkBitMap->clear(addr);
2708 3061 }
2709 3062
2710 3063 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2711 3064 // Note we are overriding the read-only view of the prev map here, via
2712 3065 // the cast.
2713 3066 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2714 3067 _nextMarkBitMap->clearRange(mr);
2715 3068 }
2716 3069
2717 3070 HeapRegion*
2718 3071 ConcurrentMark::claim_region(int task_num) {
2719 3072 // "checkpoint" the finger
2720 3073 HeapWord* finger = _finger;
2721 3074
2722 3075 // _heap_end will not change underneath our feet; it only changes at
2723 3076 // yield points.
2724 3077 while (finger < _heap_end) {
2725 3078 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2726 3079
2727 3080 // Note on how this code handles humongous regions. In the
2728 3081 // normal case the finger will reach the start of a "starts
2729 3082 // humongous" (SH) region. Its end will either be the end of the
2730 3083 // last "continues humongous" (CH) region in the sequence, or the
2731 3084 // standard end of the SH region (if the SH is the only region in
2732 3085 // the sequence). That way claim_region() will skip over the CH
2733 3086 // regions. However, there is a subtle race between a CM thread
2734 3087 // executing this method and a mutator thread doing a humongous
2735 3088 // object allocation. The two are not mutually exclusive as the CM
2736 3089 // thread does not need to hold the Heap_lock when it gets
2737 3090 // here. So there is a chance that claim_region() will come across
2738 3091 // a free region that's in the progress of becoming a SH or a CH
2739 3092 // region. In the former case, it will either
2740 3093 // a) Miss the update to the region's end, in which case it will
2741 3094 // visit every subsequent CH region, will find their bitmaps
2742 3095 // empty, and do nothing, or
2743 3096 // b) Will observe the update of the region's end (in which case
2744 3097 // it will skip the subsequent CH regions).
2745 3098 // If it comes across a region that suddenly becomes CH, the
2746 3099 // scenario will be similar to b). So, the race between
2747 3100 // claim_region() and a humongous object allocation might force us
2748 3101 // to do a bit of unnecessary work (due to some unnecessary bitmap
2749 3102 // iterations) but it should not introduce and correctness issues.
2750 3103 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2751 3104 HeapWord* bottom = curr_region->bottom();
2752 3105 HeapWord* end = curr_region->end();
2753 3106 HeapWord* limit = curr_region->next_top_at_mark_start();
2754 3107
2755 3108 if (verbose_low()) {
2756 3109 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2757 3110 "["PTR_FORMAT", "PTR_FORMAT"), "
2758 3111 "limit = "PTR_FORMAT,
2759 3112 task_num, curr_region, bottom, end, limit);
2760 3113 }
2761 3114
2762 3115 // Is the gap between reading the finger and doing the CAS too long?
2763 3116 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2764 3117 if (res == finger) {
2765 3118 // we succeeded
2766 3119
2767 3120 // notice that _finger == end cannot be guaranteed here since,
2768 3121 // someone else might have moved the finger even further
2769 3122 assert(_finger >= end, "the finger should have moved forward");
2770 3123
2771 3124 if (verbose_low()) {
2772 3125 gclog_or_tty->print_cr("[%d] we were successful with region = "
2773 3126 PTR_FORMAT, task_num, curr_region);
2774 3127 }
2775 3128
2776 3129 if (limit > bottom) {
2777 3130 if (verbose_low()) {
2778 3131 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2779 3132 "returning it ", task_num, curr_region);
2780 3133 }
2781 3134 return curr_region;
2782 3135 } else {
2783 3136 assert(limit == bottom,
2784 3137 "the region limit should be at bottom");
2785 3138 if (verbose_low()) {
2786 3139 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2787 3140 "returning NULL", task_num, curr_region);
2788 3141 }
2789 3142 // we return NULL and the caller should try calling
2790 3143 // claim_region() again.
2791 3144 return NULL;
2792 3145 }
2793 3146 } else {
2794 3147 assert(_finger > finger, "the finger should have moved forward");
2795 3148 if (verbose_low()) {
2796 3149 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2797 3150 "global finger = "PTR_FORMAT", "
2798 3151 "our finger = "PTR_FORMAT,
2799 3152 task_num, _finger, finger);
2800 3153 }
2801 3154
2802 3155 // read it again
2803 3156 finger = _finger;
2804 3157 }
2805 3158 }
2806 3159
2807 3160 return NULL;
2808 3161 }
2809 3162
2810 3163 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2811 3164 bool result = false;
2812 3165 for (int i = 0; i < (int)_max_task_num; ++i) {
2813 3166 CMTask* the_task = _tasks[i];
2814 3167 MemRegion mr = the_task->aborted_region();
2815 3168 if (mr.start() != NULL) {
2816 3169 assert(mr.end() != NULL, "invariant");
2817 3170 assert(mr.word_size() > 0, "invariant");
2818 3171 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2819 3172 assert(hr != NULL, "invariant");
2820 3173 if (hr->in_collection_set()) {
2821 3174 // The region points into the collection set
2822 3175 the_task->set_aborted_region(MemRegion());
2823 3176 result = true;
2824 3177 }
2825 3178 }
2826 3179 }
2827 3180 return result;
2828 3181 }
2829 3182
2830 3183 bool ConcurrentMark::has_aborted_regions() {
2831 3184 for (int i = 0; i < (int)_max_task_num; ++i) {
2832 3185 CMTask* the_task = _tasks[i];
2833 3186 MemRegion mr = the_task->aborted_region();
2834 3187 if (mr.start() != NULL) {
2835 3188 assert(mr.end() != NULL, "invariant");
2836 3189 assert(mr.word_size() > 0, "invariant");
2837 3190 return true;
2838 3191 }
2839 3192 }
2840 3193 return false;
2841 3194 }
2842 3195
2843 3196 void ConcurrentMark::oops_do(OopClosure* cl) {
2844 3197 if (_markStack.size() > 0 && verbose_low()) {
2845 3198 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2846 3199 "size = %d", _markStack.size());
2847 3200 }
2848 3201 // we first iterate over the contents of the mark stack...
2849 3202 _markStack.oops_do(cl);
2850 3203
2851 3204 for (int i = 0; i < (int)_max_task_num; ++i) {
2852 3205 OopTaskQueue* queue = _task_queues->queue((int)i);
2853 3206
2854 3207 if (queue->size() > 0 && verbose_low()) {
2855 3208 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2856 3209 "size = %d", i, queue->size());
2857 3210 }
2858 3211
2859 3212 // ...then over the contents of the all the task queues.
2860 3213 queue->oops_do(cl);
2861 3214 }
2862 3215
2863 3216 // Invalidate any entries, that are in the region stack, that
2864 3217 // point into the collection set
2865 3218 if (_regionStack.invalidate_entries_into_cset()) {
2866 3219 // otherwise, any gray objects copied during the evacuation pause
2867 3220 // might not be visited.
2868 3221 assert(_should_gray_objects, "invariant");
2869 3222 }
2870 3223
2871 3224 // Invalidate any aborted regions, recorded in the individual CM
2872 3225 // tasks, that point into the collection set.
2873 3226 if (invalidate_aborted_regions_in_cset()) {
2874 3227 // otherwise, any gray objects copied during the evacuation pause
2875 3228 // might not be visited.
2876 3229 assert(_should_gray_objects, "invariant");
2877 3230 }
2878 3231
2879 3232 }
2880 3233
2881 3234 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2882 3235 _markStack.setEmpty();
2883 3236 _markStack.clear_overflow();
2884 3237 _regionStack.setEmpty();
2885 3238 _regionStack.clear_overflow();
2886 3239 if (clear_overflow) {
2887 3240 clear_has_overflown();
2888 3241 } else {
2889 3242 assert(has_overflown(), "pre-condition");
2890 3243 }
↓ open down ↓ |
179 lines elided |
↑ open up ↑ |
2891 3244 _finger = _heap_start;
2892 3245
2893 3246 for (int i = 0; i < (int)_max_task_num; ++i) {
2894 3247 OopTaskQueue* queue = _task_queues->queue(i);
2895 3248 queue->set_empty();
2896 3249 // Clear any partial regions from the CMTasks
2897 3250 _tasks[i]->clear_aborted_region();
2898 3251 }
2899 3252 }
2900 3253
3254 +// Aggregate the counting data that was constructed concurrently
3255 +// with marking.
3256 +class AggregateCountDataHRClosure: public HeapRegionClosure {
3257 + ConcurrentMark* _cm;
3258 + BitMap* _cm_card_bm;
3259 + intptr_t _bottom_card_num;
3260 + size_t _max_task_num;
3261 +
3262 + public:
3263 + AggregateCountDataHRClosure(ConcurrentMark *cm,
3264 + BitMap* cm_card_bm,
3265 + intptr_t bottom_card_num,
3266 + size_t max_task_num) :
3267 + _cm(cm),
3268 + _cm_card_bm(cm_card_bm),
3269 + _bottom_card_num(bottom_card_num),
3270 + _max_task_num(max_task_num)
3271 + { }
3272 +
3273 + bool is_card_aligned(HeapWord* p) {
3274 + return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
3275 + }
3276 +
3277 + bool doHeapRegion(HeapRegion* hr) {
3278 + if (hr->continuesHumongous()) {
3279 + // We will ignore these here and process them when their
3280 + // associated "starts humongous" region is processed.
3281 + // Note that we cannot rely on their associated
3282 + // "starts humongous" region to have their bit set to 1
3283 + // since, due to the region chunking in the parallel region
3284 + // iteration, a "continues humongous" region might be visited
3285 + // before its associated "starts humongous".
3286 + return false;
3287 + }
3288 +
3289 + HeapWord* start = hr->bottom();
3290 + HeapWord* limit = hr->next_top_at_mark_start();
3291 + HeapWord* end = hr->end();
3292 +
3293 + assert(start <= limit && limit <= hr->top() &&
3294 + hr->top() <= hr->end(), "Preconditions");
3295 +
3296 + assert(hr->next_marked_bytes() == 0, "Precondition");
3297 +
3298 + if (start == limit) {
3299 + // NTAMS of this region has not been set so nothing to do.
3300 + return false;
3301 + }
3302 +
3303 + intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3304 + intptr_t limit_card_num = intptr_t(uintptr_t(limit) >> CardTableModRefBS::card_shift);
3305 + intptr_t end_card_num = intptr_t(uintptr_t(end) >> CardTableModRefBS::card_shift);
3306 +
3307 + assert(is_card_aligned(start), "sanity");
3308 + assert(is_card_aligned(end), "sanity");
3309 +
3310 + // If ntams is not card aligned then we bump the index for
3311 + // limit so that we get the card spanning ntams.
3312 + if (!is_card_aligned(limit)) {
3313 + limit_card_num += 1;
3314 + }
3315 +
3316 + assert(limit_card_num <= end_card_num, "or else use atomics");
3317 +
3318 + BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
3319 + BitMap::idx_t limit_idx = limit_card_num - _bottom_card_num;
3320 +
3321 + // Aggregate the "stripe" in the count data associated with hr.
3322 + size_t hrs_index = hr->hrs_index();
3323 + size_t marked_bytes = 0;
3324 +
3325 + for (int i = 0; (size_t)i < _max_task_num; i += 1) {
3326 + size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3327 + BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3328 +
3329 + // Fetch the marked_bytes in this region for task i and
3330 + // add it to the running total for this region.
3331 + marked_bytes += marked_bytes_array[hrs_index];
3332 +
3333 + // Now clear the value in the task's marked bytes array
3334 + // for this region.
3335 + marked_bytes_array[hrs_index] = 0;
3336 +
3337 + // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
3338 + // into the global card bitmap.
3339 + BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3340 +
3341 + while (scan_idx < limit_idx) {
3342 + assert(task_card_bm->at(scan_idx) == true, "should be");
3343 + _cm_card_bm->set_bit(scan_idx);
3344 + task_card_bm->clear_bit(scan_idx);
3345 + assert(_cm_card_bm->at(scan_idx) == true, "should be");
3346 + scan_idx = task_card_bm->get_next_one_offset(start_idx + 1, limit_idx);
3347 + }
3348 + }
3349 +
3350 + // Update the marked bytes for this region.
3351 + hr->add_to_marked_bytes(marked_bytes);
3352 +
3353 + // Now set the top at count to NTAMS.
3354 + hr->set_top_at_conc_mark_count(limit);
3355 +
3356 + // Next heap region
3357 + return false;
3358 + }
3359 +};
3360 +
3361 +class G1AggregateCountDataTask: public AbstractGangTask {
3362 +protected:
3363 + G1CollectedHeap* _g1h;
3364 + ConcurrentMark* _cm;
3365 + BitMap* _cm_card_bm;
3366 + intptr_t _heap_bottom_card_num;
3367 + size_t _max_task_num;
3368 + int _active_workers;
3369 +
3370 +public:
3371 + G1AggregateCountDataTask(G1CollectedHeap* g1h,
3372 + ConcurrentMark* cm,
3373 + BitMap* cm_card_bm,
3374 + intptr_t bottom_card_num,
3375 + size_t max_task_num,
3376 + int n_workers) :
3377 + AbstractGangTask("Count Aggregation"),
3378 + _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3379 + _heap_bottom_card_num(bottom_card_num),
3380 + _max_task_num(max_task_num),
3381 + _active_workers(n_workers)
3382 + { }
3383 +
3384 + void work(int worker_i) {
3385 + AggregateCountDataHRClosure cl(_cm, _cm_card_bm,
3386 + _heap_bottom_card_num, _max_task_num);
3387 +
3388 + if (G1CollectedHeap::use_parallel_gc_threads()) {
3389 + _g1h->heap_region_par_iterate_chunked(&cl, worker_i,
3390 + _active_workers,
3391 + HeapRegion::AggregateCountClaimValue);
3392 + } else {
3393 + _g1h->heap_region_iterate(&cl);
3394 + }
3395 + }
3396 +};
3397 +
3398 +
3399 +void ConcurrentMark::aggregate_and_clear_count_data() {
3400 + // Clear the global card bitmap
3401 + _card_bm.clear();
3402 +
3403 + int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3404 + _g1h->workers()->active_workers() :
3405 + 1);
3406 +
3407 + G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3408 + _heap_bottom_card_num, _max_task_num,
3409 + n_workers);
3410 +
3411 + if (G1CollectedHeap::use_parallel_gc_threads()) {
3412 + assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3413 + "sanity check");
3414 + _g1h->set_par_threads(n_workers);
3415 + _g1h->workers()->run_task(&g1_par_agg_task);
3416 + _g1h->set_par_threads(0);
3417 +
3418 + assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3419 + "sanity check");
3420 + _g1h->reset_heap_region_claim_values();
3421 + } else {
3422 + g1_par_agg_task.work(0);
3423 + }
3424 +}
3425 +
3426 +// Clear the per-worker arrays used to store the per-region counting data
3427 +void ConcurrentMark::clear_all_count_data() {
3428 + assert(SafepointSynchronize::is_at_safepoint() ||
3429 + !Universe::is_fully_initialized(), "must be");
3430 +
3431 + size_t max_regions = _g1h->max_regions();
3432 +
3433 + assert(_max_task_num != 0, "unitialized");
3434 + assert(_count_card_bitmaps != NULL, "uninitialized");
3435 + assert(_count_marked_bytes != NULL, "uninitialized");
3436 +
3437 + for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3438 + BitMap* task_card_bm = count_card_bitmap_for(i);
3439 + size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3440 +
3441 + assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3442 + assert(marked_bytes_array != NULL, "uninitialized");
3443 +
3444 + for (int j = 0; (size_t) j < max_regions; j++) {
3445 + marked_bytes_array[j] = 0;
3446 + }
3447 + task_card_bm->clear();
3448 + }
3449 +}
3450 +
3451 +void ConcurrentMark::clear_count_data_for_heap_region(HeapRegion* hr) {
3452 + // Clears the count data for the given region from _all_ of
3453 + // the per-task counting data structures.
3454 +
3455 + MemRegion used_region = hr->used_region();
3456 + HeapWord* start = used_region.start();
3457 + HeapWord* last = used_region.last();
3458 + size_t hr_index = hr->hrs_index();
3459 +
3460 + intptr_t start_card_num =
3461 + intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3462 + intptr_t last_card_num =
3463 + intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift);
3464 +
3465 + BitMap::idx_t start_idx = start_card_num - heap_bottom_card_num();
3466 + BitMap::idx_t last_idx = last_card_num - heap_bottom_card_num();
3467 +
3468 + size_t used_region_bytes = used_region.byte_size();
3469 + size_t marked_bytes = 0;
3470 +
3471 + for (int i=0; (size_t)i < _max_task_num; i += 1) {
3472 + BitMap* task_card_bm = count_card_bitmap_for(i);
3473 + size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3474 +
3475 + marked_bytes += marked_bytes_array[hr_index];
3476 + // clear the amount of marked bytes in the task array for this
3477 + // region
3478 + marked_bytes_array[hr_index] = 0;
3479 +
3480 + // Clear the inclusive range [start_idx, last_idx] from the
3481 + // card bitmap. The clear_range routine is exclusive so we
3482 + // need to also explicitly clear the bit at last_idx.
3483 + // Passing last_idx+1 to the clear_range would work in
3484 + // most cases but could trip an OOB assertion.
3485 +
3486 + if ((last_idx - start_idx) > 0) {
3487 + task_card_bm->clear_range(start_idx, last_idx);
3488 + }
3489 + task_card_bm->clear_bit(last_idx);
3490 + }
3491 +}
3492 +
2901 3493 void ConcurrentMark::print_stats() {
2902 3494 if (verbose_stats()) {
2903 3495 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2904 3496 for (size_t i = 0; i < _active_tasks; ++i) {
2905 3497 _tasks[i]->print_stats();
2906 3498 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2907 3499 }
2908 3500 }
2909 3501 }
2910 3502
2911 3503 // Closures used by ConcurrentMark::complete_marking_in_collection_set().
2912 3504
2913 3505 class CSetMarkOopClosure: public OopClosure {
2914 3506 friend class CSetMarkBitMapClosure;
2915 3507
2916 3508 G1CollectedHeap* _g1h;
2917 - CMBitMap* _bm;
2918 3509 ConcurrentMark* _cm;
2919 3510 oop* _ms;
2920 3511 jint* _array_ind_stack;
2921 3512 int _ms_size;
2922 3513 int _ms_ind;
2923 3514 int _array_increment;
2924 3515 int _worker_i;
2925 3516
2926 3517 bool push(oop obj, int arr_ind = 0) {
2927 3518 if (_ms_ind == _ms_size) {
2928 3519 gclog_or_tty->print_cr("Mark stack is full.");
2929 3520 return false;
2930 3521 }
2931 3522 _ms[_ms_ind] = obj;
2932 3523 if (obj->is_objArray()) {
2933 3524 _array_ind_stack[_ms_ind] = arr_ind;
2934 3525 }
2935 3526 _ms_ind++;
2936 3527 return true;
2937 3528 }
2938 3529
2939 3530 oop pop() {
2940 3531 if (_ms_ind == 0) {
2941 3532 return NULL;
2942 3533 } else {
2943 3534 _ms_ind--;
2944 3535 return _ms[_ms_ind];
2945 3536 }
2946 3537 }
2947 3538
2948 3539 template <class T> bool drain() {
2949 3540 while (_ms_ind > 0) {
2950 3541 oop obj = pop();
2951 3542 assert(obj != NULL, "Since index was non-zero.");
2952 3543 if (obj->is_objArray()) {
2953 3544 jint arr_ind = _array_ind_stack[_ms_ind];
2954 3545 objArrayOop aobj = objArrayOop(obj);
2955 3546 jint len = aobj->length();
2956 3547 jint next_arr_ind = arr_ind + _array_increment;
2957 3548 if (next_arr_ind < len) {
2958 3549 push(obj, next_arr_ind);
2959 3550 }
2960 3551 // Now process this portion of this one.
2961 3552 int lim = MIN2(next_arr_ind, len);
2962 3553 for (int j = arr_ind; j < lim; j++) {
2963 3554 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2964 3555 }
2965 3556 } else {
2966 3557 obj->oop_iterate(this);
↓ open down ↓ |
39 lines elided |
↑ open up ↑ |
2967 3558 }
2968 3559 if (abort()) return false;
2969 3560 }
2970 3561 return true;
2971 3562 }
2972 3563
2973 3564 public:
2974 3565 CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
2975 3566 _g1h(G1CollectedHeap::heap()),
2976 3567 _cm(cm),
2977 - _bm(cm->nextMarkBitMap()),
2978 3568 _ms_size(ms_size), _ms_ind(0),
2979 3569 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2980 3570 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2981 3571 _array_increment(MAX2(ms_size/8, 16)),
2982 3572 _worker_i(worker_i) { }
2983 3573
2984 3574 ~CSetMarkOopClosure() {
2985 3575 FREE_C_HEAP_ARRAY(oop, _ms);
2986 3576 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2987 3577 }
2988 3578
2989 3579 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2990 3580 virtual void do_oop( oop* p) { do_oop_work(p); }
2991 3581
2992 3582 template <class T> void do_oop_work(T* p) {
2993 3583 T heap_oop = oopDesc::load_heap_oop(p);
2994 3584 if (oopDesc::is_null(heap_oop)) return;
2995 3585 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2996 3586 if (obj->is_forwarded()) {
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
2997 3587 // If the object has already been forwarded, we have to make sure
2998 3588 // that it's marked. So follow the forwarding pointer. Note that
2999 3589 // this does the right thing for self-forwarding pointers in the
3000 3590 // evacuation failure case.
3001 3591 obj = obj->forwardee();
3002 3592 }
3003 3593 HeapRegion* hr = _g1h->heap_region_containing(obj);
3004 3594 if (hr != NULL) {
3005 3595 if (hr->in_collection_set()) {
3006 3596 if (_g1h->is_obj_ill(obj)) {
3007 - if (_bm->parMark((HeapWord*)obj)) {
3597 + if (_cm->par_mark_and_count(obj, hr, _worker_i)) {
3008 3598 if (!push(obj)) {
3009 3599 gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
3010 3600 set_abort();
3011 3601 }
3012 3602 }
3013 3603 }
3014 3604 } else {
3015 3605 // Outside the collection set; we need to gray it
3016 - _cm->deal_with_reference(obj);
3606 + _cm->deal_with_reference(obj, _worker_i);
3017 3607 }
3018 3608 }
3019 3609 }
3020 3610 };
3021 3611
3022 3612 class CSetMarkBitMapClosure: public BitMapClosure {
3023 3613 G1CollectedHeap* _g1h;
3024 3614 CMBitMap* _bitMap;
3025 3615 ConcurrentMark* _cm;
3026 3616 CSetMarkOopClosure _oop_cl;
3027 3617 int _worker_i;
3028 3618
3029 3619 public:
3030 3620 CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
3031 3621 _g1h(G1CollectedHeap::heap()),
3032 3622 _bitMap(cm->nextMarkBitMap()),
3033 3623 _oop_cl(cm, ms_size, worker_i),
3034 3624 _worker_i(worker_i) { }
3035 3625
3036 3626 bool do_bit(size_t offset) {
3037 3627 // convert offset into a HeapWord*
3038 3628 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
3039 3629 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
3040 3630 "address out of range");
3041 3631 assert(_bitMap->isMarked(addr), "tautology");
3042 3632 oop obj = oop(addr);
3043 3633 if (!obj->is_forwarded()) {
3044 3634 if (!_oop_cl.push(obj)) return false;
3045 3635 if (UseCompressedOops) {
3046 3636 if (!_oop_cl.drain<narrowOop>()) return false;
3047 3637 } else {
3048 3638 if (!_oop_cl.drain<oop>()) return false;
3049 3639 }
3050 3640 }
3051 3641 // Otherwise...
3052 3642 return true;
3053 3643 }
3054 3644 };
3055 3645
3056 3646 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
3057 3647 CMBitMap* _bm;
3058 3648 CSetMarkBitMapClosure _bit_cl;
3059 3649 int _worker_i;
3060 3650
3061 3651 enum SomePrivateConstants {
3062 3652 MSSize = 1000
3063 3653 };
3064 3654
3065 3655 public:
3066 3656 CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) :
3067 3657 _bm(cm->nextMarkBitMap()),
3068 3658 _bit_cl(cm, MSSize, worker_i),
3069 3659 _worker_i(worker_i) { }
3070 3660
3071 3661 bool doHeapRegion(HeapRegion* hr) {
3072 3662 if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
3073 3663 // The current worker has successfully claimed the region.
3074 3664 if (!hr->evacuation_failed()) {
3075 3665 MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start());
3076 3666 if (!mr.is_empty()) {
3077 3667 bool done = false;
3078 3668 while (!done) {
3079 3669 done = _bm->iterate(&_bit_cl, mr);
3080 3670 }
3081 3671 }
3082 3672 }
3083 3673 }
3084 3674 return false;
3085 3675 }
3086 3676 };
3087 3677
3088 3678 class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
3089 3679 jint _claim_value;
3090 3680
3091 3681 public:
3092 3682 SetClaimValuesInCSetHRClosure(jint claim_value) :
3093 3683 _claim_value(claim_value) { }
3094 3684
3095 3685 bool doHeapRegion(HeapRegion* hr) {
3096 3686 hr->set_claim_value(_claim_value);
3097 3687 return false;
3098 3688 }
3099 3689 };
3100 3690
3101 3691 class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
3102 3692 protected:
3103 3693 G1CollectedHeap* _g1h;
3104 3694 ConcurrentMark* _cm;
3105 3695
3106 3696 public:
3107 3697 G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h,
3108 3698 ConcurrentMark* cm) :
3109 3699 AbstractGangTask("Complete Mark in CSet"),
3110 3700 _g1h(g1h), _cm(cm) { }
3111 3701
3112 3702 void work(int worker_i) {
3113 3703 CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i);
3114 3704 HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i);
3115 3705 _g1h->collection_set_iterate_from(hr, &cmplt);
3116 3706 }
3117 3707 };
3118 3708
3119 3709 void ConcurrentMark::complete_marking_in_collection_set() {
3120 3710 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3121 3711
3122 3712 if (!g1h->mark_in_progress()) {
3123 3713 g1h->g1_policy()->record_mark_closure_time(0.0);
3124 3714 return;
3125 3715 }
3126 3716
3127 3717 double start = os::elapsedTime();
3128 3718 G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this);
3129 3719
3130 3720 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3131 3721
3132 3722 if (G1CollectedHeap::use_parallel_gc_threads()) {
3133 3723 int n_workers = g1h->workers()->active_workers();
3134 3724 g1h->set_par_threads(n_workers);
3135 3725 g1h->workers()->run_task(&complete_mark_task);
3136 3726 g1h->set_par_threads(0);
3137 3727 } else {
3138 3728 complete_mark_task.work(0);
3139 3729 }
3140 3730
3141 3731 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
3142 3732
3143 3733 // Now reset the claim values in the regions in the collection set.
3144 3734 SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
3145 3735 g1h->collection_set_iterate(&set_cv_cl);
3146 3736
3147 3737 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3148 3738
3149 3739 double end_time = os::elapsedTime();
3150 3740 double elapsed_time_ms = (end_time - start) * 1000.0;
3151 3741 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3152 3742 }
3153 3743
3154 3744 // The next two methods deal with the following optimisation. Some
3155 3745 // objects are gray by being marked and located above the finger. If
3156 3746 // they are copied, during an evacuation pause, below the finger then
3157 3747 // the need to be pushed on the stack. The observation is that, if
3158 3748 // there are no regions in the collection set located above the
3159 3749 // finger, then the above cannot happen, hence we do not need to
3160 3750 // explicitly gray any objects when copying them to below the
3161 3751 // finger. The global stack will be scanned to ensure that, if it
3162 3752 // points to objects being copied, it will update their
3163 3753 // location. There is a tricky situation with the gray objects in
3164 3754 // region stack that are being coped, however. See the comment in
3165 3755 // newCSet().
3166 3756
3167 3757 void ConcurrentMark::newCSet() {
3168 3758 if (!concurrent_marking_in_progress()) {
3169 3759 // nothing to do if marking is not in progress
3170 3760 return;
3171 3761 }
3172 3762
3173 3763 // find what the lowest finger is among the global and local fingers
3174 3764 _min_finger = _finger;
3175 3765 for (int i = 0; i < (int)_max_task_num; ++i) {
3176 3766 CMTask* task = _tasks[i];
3177 3767 HeapWord* task_finger = task->finger();
3178 3768 if (task_finger != NULL && task_finger < _min_finger) {
3179 3769 _min_finger = task_finger;
3180 3770 }
3181 3771 }
3182 3772
3183 3773 _should_gray_objects = false;
3184 3774
3185 3775 // This fixes a very subtle and fustrating bug. It might be the case
3186 3776 // that, during en evacuation pause, heap regions that contain
3187 3777 // objects that are gray (by being in regions contained in the
3188 3778 // region stack) are included in the collection set. Since such gray
3189 3779 // objects will be moved, and because it's not easy to redirect
3190 3780 // region stack entries to point to a new location (because objects
3191 3781 // in one region might be scattered to multiple regions after they
3192 3782 // are copied), one option is to ensure that all marked objects
3193 3783 // copied during a pause are pushed on the stack. Notice, however,
3194 3784 // that this problem can only happen when the region stack is not
3195 3785 // empty during an evacuation pause. So, we make the fix a bit less
3196 3786 // conservative and ensure that regions are pushed on the stack,
3197 3787 // irrespective whether all collection set regions are below the
3198 3788 // finger, if the region stack is not empty. This is expected to be
3199 3789 // a rare case, so I don't think it's necessary to be smarted about it.
3200 3790 if (!region_stack_empty() || has_aborted_regions()) {
3201 3791 _should_gray_objects = true;
3202 3792 }
3203 3793 }
3204 3794
3205 3795 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3206 3796 if (!concurrent_marking_in_progress()) return;
3207 3797
3208 3798 HeapWord* region_end = hr->end();
3209 3799 if (region_end > _min_finger) {
3210 3800 _should_gray_objects = true;
3211 3801 }
3212 3802 }
3213 3803
3214 3804 // Resets the region fields of active CMTasks whose values point
3215 3805 // into the collection set.
3216 3806 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3217 3807 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3218 3808 assert(parallel_marking_threads() <= _max_task_num, "sanity");
3219 3809
3220 3810 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3221 3811 CMTask* task = _tasks[i];
3222 3812 HeapWord* task_finger = task->finger();
3223 3813 if (task_finger != NULL) {
3224 3814 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3225 3815 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3226 3816 if (finger_region->in_collection_set()) {
3227 3817 // The task's current region is in the collection set.
3228 3818 // This region will be evacuated in the current GC and
3229 3819 // the region fields in the task will be stale.
3230 3820 task->giveup_current_region();
3231 3821 }
3232 3822 }
3233 3823 }
3234 3824 }
3235 3825
3236 3826 // abandon current marking iteration due to a Full GC
3237 3827 void ConcurrentMark::abort() {
3238 3828 // Clear all marks to force marking thread to do nothing
3239 3829 _nextMarkBitMap->clearAll();
3240 3830 // Empty mark stack
3241 3831 clear_marking_state();
3242 3832 for (int i = 0; i < (int)_max_task_num; ++i) {
3243 3833 _tasks[i]->clear_region_fields();
3244 3834 }
3245 3835 _has_aborted = true;
3246 3836
3247 3837 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3248 3838 satb_mq_set.abandon_partial_marking();
3249 3839 // This can be called either during or outside marking, we'll read
3250 3840 // the expected_active value from the SATB queue set.
3251 3841 satb_mq_set.set_active_all_threads(
3252 3842 false, /* new active value */
3253 3843 satb_mq_set.is_active() /* expected_active */);
3254 3844 }
3255 3845
3256 3846 static void print_ms_time_info(const char* prefix, const char* name,
3257 3847 NumberSeq& ns) {
3258 3848 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3259 3849 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3260 3850 if (ns.num() > 0) {
3261 3851 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3262 3852 prefix, ns.sd(), ns.maximum());
3263 3853 }
3264 3854 }
3265 3855
3266 3856 void ConcurrentMark::print_summary_info() {
3267 3857 gclog_or_tty->print_cr(" Concurrent marking:");
3268 3858 print_ms_time_info(" ", "init marks", _init_times);
3269 3859 print_ms_time_info(" ", "remarks", _remark_times);
3270 3860 {
3271 3861 print_ms_time_info(" ", "final marks", _remark_mark_times);
3272 3862 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3273 3863
3274 3864 }
3275 3865 print_ms_time_info(" ", "cleanups", _cleanup_times);
3276 3866 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3277 3867 _total_counting_time,
3278 3868 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3279 3869 (double)_cleanup_times.num()
3280 3870 : 0.0));
3281 3871 if (G1ScrubRemSets) {
↓ open down ↓ |
255 lines elided |
↑ open up ↑ |
3282 3872 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3283 3873 _total_rs_scrub_time,
3284 3874 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3285 3875 (double)_cleanup_times.num()
3286 3876 : 0.0));
3287 3877 }
3288 3878 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3289 3879 (_init_times.sum() + _remark_times.sum() +
3290 3880 _cleanup_times.sum())/1000.0);
3291 3881 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3292 - "(%8.2f s marking, %8.2f s counting).",
3882 + "(%8.2f s marking).",
3293 3883 cmThread()->vtime_accum(),
3294 - cmThread()->vtime_mark_accum(),
3295 - cmThread()->vtime_count_accum());
3884 + cmThread()->vtime_mark_accum());
3296 3885 }
3297 3886
3298 3887 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3299 3888 _parallel_workers->print_worker_threads_on(st);
3300 3889 }
3301 3890
3302 3891 // Closures
3303 3892 // XXX: there seems to be a lot of code duplication here;
3304 3893 // should refactor and consolidate the shared code.
3305 3894
3306 3895 // This closure is used to mark refs into the CMS generation in
3307 3896 // the CMS bit map. Called at the first checkpoint.
3308 3897
3309 3898 // We take a break if someone is trying to stop the world.
3310 3899 bool ConcurrentMark::do_yield_check(int worker_i) {
3311 3900 if (should_yield()) {
3312 3901 if (worker_i == 0) {
3313 3902 _g1h->g1_policy()->record_concurrent_pause();
3314 3903 }
3315 3904 cmThread()->yield();
3316 3905 if (worker_i == 0) {
3317 3906 _g1h->g1_policy()->record_concurrent_pause_end();
3318 3907 }
3319 3908 return true;
3320 3909 } else {
3321 3910 return false;
3322 3911 }
3323 3912 }
3324 3913
3325 3914 bool ConcurrentMark::should_yield() {
3326 3915 return cmThread()->should_yield();
3327 3916 }
3328 3917
3329 3918 bool ConcurrentMark::containing_card_is_marked(void* p) {
3330 3919 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3331 3920 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3332 3921 }
3333 3922
3334 3923 bool ConcurrentMark::containing_cards_are_marked(void* start,
3335 3924 void* last) {
3336 3925 return containing_card_is_marked(start) &&
3337 3926 containing_card_is_marked(last);
3338 3927 }
3339 3928
3340 3929 #ifndef PRODUCT
3341 3930 // for debugging purposes
3342 3931 void ConcurrentMark::print_finger() {
3343 3932 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3344 3933 _heap_start, _heap_end, _finger);
3345 3934 for (int i = 0; i < (int) _max_task_num; ++i) {
3346 3935 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
3347 3936 }
3348 3937 gclog_or_tty->print_cr("");
3349 3938 }
3350 3939 #endif
3351 3940
3352 3941 void CMTask::scan_object(oop obj) {
3353 3942 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3354 3943
3355 3944 if (_cm->verbose_high()) {
3356 3945 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3357 3946 _task_id, (void*) obj);
3358 3947 }
3359 3948
3360 3949 size_t obj_size = obj->size();
3361 3950 _words_scanned += obj_size;
3362 3951
3363 3952 obj->oop_iterate(_cm_oop_closure);
3364 3953 statsOnly( ++_objs_scanned );
3365 3954 check_limits();
3366 3955 }
3367 3956
3368 3957 // Closure for iteration over bitmaps
3369 3958 class CMBitMapClosure : public BitMapClosure {
3370 3959 private:
3371 3960 // the bitmap that is being iterated over
3372 3961 CMBitMap* _nextMarkBitMap;
3373 3962 ConcurrentMark* _cm;
3374 3963 CMTask* _task;
3375 3964 // true if we're scanning a heap region claimed by the task (so that
3376 3965 // we move the finger along), false if we're not, i.e. currently when
3377 3966 // scanning a heap region popped from the region stack (so that we
3378 3967 // do not move the task finger along; it'd be a mistake if we did so).
3379 3968 bool _scanning_heap_region;
3380 3969
3381 3970 public:
3382 3971 CMBitMapClosure(CMTask *task,
3383 3972 ConcurrentMark* cm,
3384 3973 CMBitMap* nextMarkBitMap)
3385 3974 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3386 3975
3387 3976 void set_scanning_heap_region(bool scanning_heap_region) {
3388 3977 _scanning_heap_region = scanning_heap_region;
3389 3978 }
3390 3979
3391 3980 bool do_bit(size_t offset) {
3392 3981 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3393 3982 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3394 3983 assert( addr < _cm->finger(), "invariant");
3395 3984
3396 3985 if (_scanning_heap_region) {
3397 3986 statsOnly( _task->increase_objs_found_on_bitmap() );
3398 3987 assert(addr >= _task->finger(), "invariant");
3399 3988 // We move that task's local finger along.
3400 3989 _task->move_finger_to(addr);
3401 3990 } else {
3402 3991 // We move the task's region finger along.
3403 3992 _task->move_region_finger_to(addr);
3404 3993 }
3405 3994
3406 3995 _task->scan_object(oop(addr));
3407 3996 // we only partially drain the local queue and global stack
3408 3997 _task->drain_local_queue(true);
3409 3998 _task->drain_global_stack(true);
3410 3999
3411 4000 // if the has_aborted flag has been raised, we need to bail out of
3412 4001 // the iteration
3413 4002 return !_task->has_aborted();
3414 4003 }
3415 4004 };
3416 4005
3417 4006 // Closure for iterating over objects, currently only used for
3418 4007 // processing SATB buffers.
3419 4008 class CMObjectClosure : public ObjectClosure {
3420 4009 private:
3421 4010 CMTask* _task;
3422 4011
3423 4012 public:
3424 4013 void do_object(oop obj) {
3425 4014 _task->deal_with_reference(obj);
3426 4015 }
3427 4016
3428 4017 CMObjectClosure(CMTask* task) : _task(task) { }
3429 4018 };
3430 4019
3431 4020 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3432 4021 ConcurrentMark* cm,
3433 4022 CMTask* task)
3434 4023 : _g1h(g1h), _cm(cm), _task(task) {
3435 4024 assert(_ref_processor == NULL, "should be initialized to NULL");
3436 4025
3437 4026 if (G1UseConcMarkReferenceProcessing) {
3438 4027 _ref_processor = g1h->ref_processor_cm();
3439 4028 assert(_ref_processor != NULL, "should not be NULL");
3440 4029 }
3441 4030 }
3442 4031
3443 4032 void CMTask::setup_for_region(HeapRegion* hr) {
3444 4033 // Separated the asserts so that we know which one fires.
3445 4034 assert(hr != NULL,
3446 4035 "claim_region() should have filtered out continues humongous regions");
3447 4036 assert(!hr->continuesHumongous(),
3448 4037 "claim_region() should have filtered out continues humongous regions");
3449 4038
3450 4039 if (_cm->verbose_low()) {
3451 4040 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3452 4041 _task_id, hr);
3453 4042 }
3454 4043
3455 4044 _curr_region = hr;
3456 4045 _finger = hr->bottom();
3457 4046 update_region_limit();
3458 4047 }
3459 4048
3460 4049 void CMTask::update_region_limit() {
3461 4050 HeapRegion* hr = _curr_region;
3462 4051 HeapWord* bottom = hr->bottom();
3463 4052 HeapWord* limit = hr->next_top_at_mark_start();
3464 4053
3465 4054 if (limit == bottom) {
3466 4055 if (_cm->verbose_low()) {
3467 4056 gclog_or_tty->print_cr("[%d] found an empty region "
3468 4057 "["PTR_FORMAT", "PTR_FORMAT")",
3469 4058 _task_id, bottom, limit);
3470 4059 }
3471 4060 // The region was collected underneath our feet.
3472 4061 // We set the finger to bottom to ensure that the bitmap
3473 4062 // iteration that will follow this will not do anything.
3474 4063 // (this is not a condition that holds when we set the region up,
3475 4064 // as the region is not supposed to be empty in the first place)
3476 4065 _finger = bottom;
3477 4066 } else if (limit >= _region_limit) {
3478 4067 assert(limit >= _finger, "peace of mind");
3479 4068 } else {
3480 4069 assert(limit < _region_limit, "only way to get here");
3481 4070 // This can happen under some pretty unusual circumstances. An
3482 4071 // evacuation pause empties the region underneath our feet (NTAMS
3483 4072 // at bottom). We then do some allocation in the region (NTAMS
3484 4073 // stays at bottom), followed by the region being used as a GC
3485 4074 // alloc region (NTAMS will move to top() and the objects
3486 4075 // originally below it will be grayed). All objects now marked in
3487 4076 // the region are explicitly grayed, if below the global finger,
3488 4077 // and we do not need in fact to scan anything else. So, we simply
3489 4078 // set _finger to be limit to ensure that the bitmap iteration
3490 4079 // doesn't do anything.
3491 4080 _finger = limit;
3492 4081 }
3493 4082
3494 4083 _region_limit = limit;
3495 4084 }
3496 4085
3497 4086 void CMTask::giveup_current_region() {
3498 4087 assert(_curr_region != NULL, "invariant");
3499 4088 if (_cm->verbose_low()) {
3500 4089 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3501 4090 _task_id, _curr_region);
3502 4091 }
3503 4092 clear_region_fields();
3504 4093 }
3505 4094
3506 4095 void CMTask::clear_region_fields() {
3507 4096 // Values for these three fields that indicate that we're not
3508 4097 // holding on to a region.
3509 4098 _curr_region = NULL;
3510 4099 _finger = NULL;
3511 4100 _region_limit = NULL;
3512 4101
3513 4102 _region_finger = NULL;
3514 4103 }
3515 4104
3516 4105 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3517 4106 if (cm_oop_closure == NULL) {
3518 4107 assert(_cm_oop_closure != NULL, "invariant");
3519 4108 } else {
3520 4109 assert(_cm_oop_closure == NULL, "invariant");
3521 4110 }
3522 4111 _cm_oop_closure = cm_oop_closure;
3523 4112 }
3524 4113
3525 4114 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3526 4115 guarantee(nextMarkBitMap != NULL, "invariant");
3527 4116
3528 4117 if (_cm->verbose_low()) {
3529 4118 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3530 4119 }
3531 4120
3532 4121 _nextMarkBitMap = nextMarkBitMap;
3533 4122 clear_region_fields();
3534 4123 assert(_aborted_region.is_empty(), "should have been cleared");
3535 4124
3536 4125 _calls = 0;
3537 4126 _elapsed_time_ms = 0.0;
3538 4127 _termination_time_ms = 0.0;
3539 4128 _termination_start_time_ms = 0.0;
3540 4129
3541 4130 #if _MARKING_STATS_
3542 4131 _local_pushes = 0;
3543 4132 _local_pops = 0;
3544 4133 _local_max_size = 0;
3545 4134 _objs_scanned = 0;
3546 4135 _global_pushes = 0;
3547 4136 _global_pops = 0;
3548 4137 _global_max_size = 0;
3549 4138 _global_transfers_to = 0;
3550 4139 _global_transfers_from = 0;
3551 4140 _region_stack_pops = 0;
3552 4141 _regions_claimed = 0;
3553 4142 _objs_found_on_bitmap = 0;
3554 4143 _satb_buffers_processed = 0;
3555 4144 _steal_attempts = 0;
3556 4145 _steals = 0;
3557 4146 _aborted = 0;
3558 4147 _aborted_overflow = 0;
3559 4148 _aborted_cm_aborted = 0;
3560 4149 _aborted_yield = 0;
3561 4150 _aborted_timed_out = 0;
3562 4151 _aborted_satb = 0;
3563 4152 _aborted_termination = 0;
3564 4153 #endif // _MARKING_STATS_
3565 4154 }
3566 4155
3567 4156 bool CMTask::should_exit_termination() {
3568 4157 regular_clock_call();
3569 4158 // This is called when we are in the termination protocol. We should
3570 4159 // quit if, for some reason, this task wants to abort or the global
3571 4160 // stack is not empty (this means that we can get work from it).
3572 4161 return !_cm->mark_stack_empty() || has_aborted();
3573 4162 }
3574 4163
3575 4164 void CMTask::reached_limit() {
3576 4165 assert(_words_scanned >= _words_scanned_limit ||
3577 4166 _refs_reached >= _refs_reached_limit ,
3578 4167 "shouldn't have been called otherwise");
3579 4168 regular_clock_call();
3580 4169 }
3581 4170
3582 4171 void CMTask::regular_clock_call() {
3583 4172 if (has_aborted()) return;
3584 4173
3585 4174 // First, we need to recalculate the words scanned and refs reached
3586 4175 // limits for the next clock call.
3587 4176 recalculate_limits();
3588 4177
3589 4178 // During the regular clock call we do the following
3590 4179
3591 4180 // (1) If an overflow has been flagged, then we abort.
3592 4181 if (_cm->has_overflown()) {
3593 4182 set_has_aborted();
3594 4183 return;
3595 4184 }
3596 4185
3597 4186 // If we are not concurrent (i.e. we're doing remark) we don't need
3598 4187 // to check anything else. The other steps are only needed during
3599 4188 // the concurrent marking phase.
3600 4189 if (!concurrent()) return;
3601 4190
3602 4191 // (2) If marking has been aborted for Full GC, then we also abort.
3603 4192 if (_cm->has_aborted()) {
3604 4193 set_has_aborted();
3605 4194 statsOnly( ++_aborted_cm_aborted );
3606 4195 return;
3607 4196 }
3608 4197
3609 4198 double curr_time_ms = os::elapsedVTime() * 1000.0;
3610 4199
3611 4200 // (3) If marking stats are enabled, then we update the step history.
3612 4201 #if _MARKING_STATS_
3613 4202 if (_words_scanned >= _words_scanned_limit) {
3614 4203 ++_clock_due_to_scanning;
3615 4204 }
3616 4205 if (_refs_reached >= _refs_reached_limit) {
3617 4206 ++_clock_due_to_marking;
3618 4207 }
3619 4208
3620 4209 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3621 4210 _interval_start_time_ms = curr_time_ms;
3622 4211 _all_clock_intervals_ms.add(last_interval_ms);
3623 4212
3624 4213 if (_cm->verbose_medium()) {
3625 4214 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3626 4215 "scanned = %d%s, refs reached = %d%s",
3627 4216 _task_id, last_interval_ms,
3628 4217 _words_scanned,
3629 4218 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3630 4219 _refs_reached,
3631 4220 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3632 4221 }
3633 4222 #endif // _MARKING_STATS_
3634 4223
3635 4224 // (4) We check whether we should yield. If we have to, then we abort.
3636 4225 if (_cm->should_yield()) {
3637 4226 // We should yield. To do this we abort the task. The caller is
3638 4227 // responsible for yielding.
3639 4228 set_has_aborted();
3640 4229 statsOnly( ++_aborted_yield );
3641 4230 return;
3642 4231 }
3643 4232
3644 4233 // (5) We check whether we've reached our time quota. If we have,
3645 4234 // then we abort.
3646 4235 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3647 4236 if (elapsed_time_ms > _time_target_ms) {
3648 4237 set_has_aborted();
3649 4238 _has_timed_out = true;
3650 4239 statsOnly( ++_aborted_timed_out );
3651 4240 return;
3652 4241 }
3653 4242
3654 4243 // (6) Finally, we check whether there are enough completed STAB
3655 4244 // buffers available for processing. If there are, we abort.
3656 4245 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3657 4246 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3658 4247 if (_cm->verbose_low()) {
3659 4248 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3660 4249 _task_id);
3661 4250 }
3662 4251 // we do need to process SATB buffers, we'll abort and restart
3663 4252 // the marking task to do so
3664 4253 set_has_aborted();
3665 4254 statsOnly( ++_aborted_satb );
3666 4255 return;
3667 4256 }
3668 4257 }
3669 4258
3670 4259 void CMTask::recalculate_limits() {
3671 4260 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3672 4261 _words_scanned_limit = _real_words_scanned_limit;
3673 4262
3674 4263 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3675 4264 _refs_reached_limit = _real_refs_reached_limit;
3676 4265 }
3677 4266
3678 4267 void CMTask::decrease_limits() {
3679 4268 // This is called when we believe that we're going to do an infrequent
3680 4269 // operation which will increase the per byte scanned cost (i.e. move
3681 4270 // entries to/from the global stack). It basically tries to decrease the
3682 4271 // scanning limit so that the clock is called earlier.
3683 4272
3684 4273 if (_cm->verbose_medium()) {
3685 4274 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3686 4275 }
3687 4276
3688 4277 _words_scanned_limit = _real_words_scanned_limit -
3689 4278 3 * words_scanned_period / 4;
3690 4279 _refs_reached_limit = _real_refs_reached_limit -
3691 4280 3 * refs_reached_period / 4;
3692 4281 }
3693 4282
3694 4283 void CMTask::move_entries_to_global_stack() {
3695 4284 // local array where we'll store the entries that will be popped
3696 4285 // from the local queue
3697 4286 oop buffer[global_stack_transfer_size];
3698 4287
3699 4288 int n = 0;
3700 4289 oop obj;
3701 4290 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3702 4291 buffer[n] = obj;
3703 4292 ++n;
3704 4293 }
3705 4294
3706 4295 if (n > 0) {
3707 4296 // we popped at least one entry from the local queue
3708 4297
3709 4298 statsOnly( ++_global_transfers_to; _local_pops += n );
3710 4299
3711 4300 if (!_cm->mark_stack_push(buffer, n)) {
3712 4301 if (_cm->verbose_low()) {
3713 4302 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3714 4303 _task_id);
3715 4304 }
3716 4305 set_has_aborted();
3717 4306 } else {
3718 4307 // the transfer was successful
3719 4308
3720 4309 if (_cm->verbose_medium()) {
3721 4310 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3722 4311 _task_id, n);
3723 4312 }
3724 4313 statsOnly( int tmp_size = _cm->mark_stack_size();
3725 4314 if (tmp_size > _global_max_size) {
3726 4315 _global_max_size = tmp_size;
3727 4316 }
3728 4317 _global_pushes += n );
3729 4318 }
3730 4319 }
3731 4320
3732 4321 // this operation was quite expensive, so decrease the limits
3733 4322 decrease_limits();
3734 4323 }
3735 4324
3736 4325 void CMTask::get_entries_from_global_stack() {
3737 4326 // local array where we'll store the entries that will be popped
3738 4327 // from the global stack.
3739 4328 oop buffer[global_stack_transfer_size];
3740 4329 int n;
3741 4330 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3742 4331 assert(n <= global_stack_transfer_size,
3743 4332 "we should not pop more than the given limit");
3744 4333 if (n > 0) {
3745 4334 // yes, we did actually pop at least one entry
3746 4335
3747 4336 statsOnly( ++_global_transfers_from; _global_pops += n );
3748 4337 if (_cm->verbose_medium()) {
3749 4338 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3750 4339 _task_id, n);
3751 4340 }
3752 4341 for (int i = 0; i < n; ++i) {
3753 4342 bool success = _task_queue->push(buffer[i]);
3754 4343 // We only call this when the local queue is empty or under a
3755 4344 // given target limit. So, we do not expect this push to fail.
3756 4345 assert(success, "invariant");
3757 4346 }
3758 4347
3759 4348 statsOnly( int tmp_size = _task_queue->size();
3760 4349 if (tmp_size > _local_max_size) {
3761 4350 _local_max_size = tmp_size;
3762 4351 }
3763 4352 _local_pushes += n );
3764 4353 }
3765 4354
3766 4355 // this operation was quite expensive, so decrease the limits
3767 4356 decrease_limits();
3768 4357 }
3769 4358
3770 4359 void CMTask::drain_local_queue(bool partially) {
3771 4360 if (has_aborted()) return;
3772 4361
3773 4362 // Decide what the target size is, depending whether we're going to
3774 4363 // drain it partially (so that other tasks can steal if they run out
3775 4364 // of things to do) or totally (at the very end).
3776 4365 size_t target_size;
3777 4366 if (partially) {
3778 4367 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3779 4368 } else {
3780 4369 target_size = 0;
3781 4370 }
3782 4371
3783 4372 if (_task_queue->size() > target_size) {
3784 4373 if (_cm->verbose_high()) {
3785 4374 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3786 4375 _task_id, target_size);
3787 4376 }
3788 4377
3789 4378 oop obj;
3790 4379 bool ret = _task_queue->pop_local(obj);
3791 4380 while (ret) {
3792 4381 statsOnly( ++_local_pops );
3793 4382
3794 4383 if (_cm->verbose_high()) {
3795 4384 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3796 4385 (void*) obj);
3797 4386 }
3798 4387
3799 4388 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3800 4389 assert(!_g1h->is_on_master_free_list(
3801 4390 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3802 4391
3803 4392 scan_object(obj);
3804 4393
3805 4394 if (_task_queue->size() <= target_size || has_aborted()) {
3806 4395 ret = false;
3807 4396 } else {
3808 4397 ret = _task_queue->pop_local(obj);
3809 4398 }
3810 4399 }
3811 4400
3812 4401 if (_cm->verbose_high()) {
3813 4402 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3814 4403 _task_id, _task_queue->size());
3815 4404 }
3816 4405 }
3817 4406 }
3818 4407
3819 4408 void CMTask::drain_global_stack(bool partially) {
3820 4409 if (has_aborted()) return;
3821 4410
3822 4411 // We have a policy to drain the local queue before we attempt to
3823 4412 // drain the global stack.
3824 4413 assert(partially || _task_queue->size() == 0, "invariant");
3825 4414
3826 4415 // Decide what the target size is, depending whether we're going to
3827 4416 // drain it partially (so that other tasks can steal if they run out
3828 4417 // of things to do) or totally (at the very end). Notice that,
3829 4418 // because we move entries from the global stack in chunks or
3830 4419 // because another task might be doing the same, we might in fact
3831 4420 // drop below the target. But, this is not a problem.
3832 4421 size_t target_size;
3833 4422 if (partially) {
3834 4423 target_size = _cm->partial_mark_stack_size_target();
3835 4424 } else {
3836 4425 target_size = 0;
3837 4426 }
3838 4427
3839 4428 if (_cm->mark_stack_size() > target_size) {
3840 4429 if (_cm->verbose_low()) {
3841 4430 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3842 4431 _task_id, target_size);
3843 4432 }
3844 4433
3845 4434 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3846 4435 get_entries_from_global_stack();
3847 4436 drain_local_queue(partially);
3848 4437 }
3849 4438
3850 4439 if (_cm->verbose_low()) {
3851 4440 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3852 4441 _task_id, _cm->mark_stack_size());
3853 4442 }
3854 4443 }
3855 4444 }
3856 4445
3857 4446 // SATB Queue has several assumptions on whether to call the par or
3858 4447 // non-par versions of the methods. this is why some of the code is
3859 4448 // replicated. We should really get rid of the single-threaded version
3860 4449 // of the code to simplify things.
3861 4450 void CMTask::drain_satb_buffers() {
3862 4451 if (has_aborted()) return;
3863 4452
3864 4453 // We set this so that the regular clock knows that we're in the
3865 4454 // middle of draining buffers and doesn't set the abort flag when it
3866 4455 // notices that SATB buffers are available for draining. It'd be
3867 4456 // very counter productive if it did that. :-)
3868 4457 _draining_satb_buffers = true;
3869 4458
3870 4459 CMObjectClosure oc(this);
3871 4460 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3872 4461 if (G1CollectedHeap::use_parallel_gc_threads()) {
3873 4462 satb_mq_set.set_par_closure(_task_id, &oc);
3874 4463 } else {
3875 4464 satb_mq_set.set_closure(&oc);
3876 4465 }
3877 4466
3878 4467 // This keeps claiming and applying the closure to completed buffers
3879 4468 // until we run out of buffers or we need to abort.
3880 4469 if (G1CollectedHeap::use_parallel_gc_threads()) {
3881 4470 while (!has_aborted() &&
3882 4471 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3883 4472 if (_cm->verbose_medium()) {
3884 4473 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3885 4474 }
3886 4475 statsOnly( ++_satb_buffers_processed );
3887 4476 regular_clock_call();
3888 4477 }
3889 4478 } else {
3890 4479 while (!has_aborted() &&
3891 4480 satb_mq_set.apply_closure_to_completed_buffer()) {
3892 4481 if (_cm->verbose_medium()) {
3893 4482 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3894 4483 }
3895 4484 statsOnly( ++_satb_buffers_processed );
3896 4485 regular_clock_call();
3897 4486 }
3898 4487 }
3899 4488
3900 4489 if (!concurrent() && !has_aborted()) {
3901 4490 // We should only do this during remark.
3902 4491 if (G1CollectedHeap::use_parallel_gc_threads()) {
3903 4492 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3904 4493 } else {
3905 4494 satb_mq_set.iterate_closure_all_threads();
3906 4495 }
3907 4496 }
3908 4497
3909 4498 _draining_satb_buffers = false;
3910 4499
3911 4500 assert(has_aborted() ||
3912 4501 concurrent() ||
3913 4502 satb_mq_set.completed_buffers_num() == 0, "invariant");
3914 4503
3915 4504 if (G1CollectedHeap::use_parallel_gc_threads()) {
3916 4505 satb_mq_set.set_par_closure(_task_id, NULL);
3917 4506 } else {
3918 4507 satb_mq_set.set_closure(NULL);
3919 4508 }
3920 4509
3921 4510 // again, this was a potentially expensive operation, decrease the
3922 4511 // limits to get the regular clock call early
3923 4512 decrease_limits();
3924 4513 }
3925 4514
3926 4515 void CMTask::drain_region_stack(BitMapClosure* bc) {
3927 4516 if (has_aborted()) return;
3928 4517
3929 4518 assert(_region_finger == NULL,
3930 4519 "it should be NULL when we're not scanning a region");
3931 4520
3932 4521 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3933 4522 if (_cm->verbose_low()) {
3934 4523 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3935 4524 _task_id, _cm->region_stack_size());
3936 4525 }
3937 4526
3938 4527 MemRegion mr;
3939 4528
3940 4529 if (!_aborted_region.is_empty()) {
3941 4530 mr = _aborted_region;
3942 4531 _aborted_region = MemRegion();
3943 4532
3944 4533 if (_cm->verbose_low()) {
3945 4534 gclog_or_tty->print_cr("[%d] scanning aborted region "
3946 4535 "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3947 4536 _task_id, mr.start(), mr.end());
3948 4537 }
3949 4538 } else {
3950 4539 mr = _cm->region_stack_pop_lock_free();
3951 4540 // it returns MemRegion() if the pop fails
3952 4541 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3953 4542 }
3954 4543
3955 4544 while (mr.start() != NULL) {
3956 4545 if (_cm->verbose_medium()) {
3957 4546 gclog_or_tty->print_cr("[%d] we are scanning region "
3958 4547 "["PTR_FORMAT", "PTR_FORMAT")",
3959 4548 _task_id, mr.start(), mr.end());
3960 4549 }
3961 4550
3962 4551 assert(mr.end() <= _cm->finger(),
3963 4552 "otherwise the region shouldn't be on the stack");
3964 4553 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3965 4554 if (_nextMarkBitMap->iterate(bc, mr)) {
3966 4555 assert(!has_aborted(),
3967 4556 "cannot abort the task without aborting the bitmap iteration");
3968 4557
3969 4558 // We finished iterating over the region without aborting.
3970 4559 regular_clock_call();
3971 4560 if (has_aborted()) {
3972 4561 mr = MemRegion();
3973 4562 } else {
3974 4563 mr = _cm->region_stack_pop_lock_free();
3975 4564 // it returns MemRegion() if the pop fails
3976 4565 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3977 4566 }
3978 4567 } else {
3979 4568 assert(has_aborted(), "currently the only way to do so");
3980 4569
3981 4570 // The only way to abort the bitmap iteration is to return
3982 4571 // false from the do_bit() method. However, inside the
3983 4572 // do_bit() method we move the _region_finger to point to the
3984 4573 // object currently being looked at. So, if we bail out, we
3985 4574 // have definitely set _region_finger to something non-null.
3986 4575 assert(_region_finger != NULL, "invariant");
3987 4576
3988 4577 // Make sure that any previously aborted region has been
3989 4578 // cleared.
3990 4579 assert(_aborted_region.is_empty(), "aborted region not cleared");
3991 4580
3992 4581 // The iteration was actually aborted. So now _region_finger
3993 4582 // points to the address of the object we last scanned. If we
3994 4583 // leave it there, when we restart this task, we will rescan
3995 4584 // the object. It is easy to avoid this. We move the finger by
3996 4585 // enough to point to the next possible object header (the
3997 4586 // bitmap knows by how much we need to move it as it knows its
3998 4587 // granularity).
3999 4588 MemRegion newRegion =
4000 4589 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
4001 4590
4002 4591 if (!newRegion.is_empty()) {
4003 4592 if (_cm->verbose_low()) {
4004 4593 gclog_or_tty->print_cr("[%d] recording unscanned region"
4005 4594 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
4006 4595 _task_id,
4007 4596 newRegion.start(), newRegion.end());
4008 4597 }
4009 4598 // Now record the part of the region we didn't scan to
4010 4599 // make sure this task scans it later.
4011 4600 _aborted_region = newRegion;
4012 4601 }
4013 4602 // break from while
4014 4603 mr = MemRegion();
4015 4604 }
4016 4605 _region_finger = NULL;
4017 4606 }
4018 4607
4019 4608 if (_cm->verbose_low()) {
4020 4609 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
4021 4610 _task_id, _cm->region_stack_size());
4022 4611 }
4023 4612 }
4024 4613 }
4025 4614
4026 4615 void CMTask::print_stats() {
4027 4616 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
4028 4617 _task_id, _calls);
4029 4618 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4030 4619 _elapsed_time_ms, _termination_time_ms);
4031 4620 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4032 4621 _step_times_ms.num(), _step_times_ms.avg(),
4033 4622 _step_times_ms.sd());
4034 4623 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4035 4624 _step_times_ms.maximum(), _step_times_ms.sum());
4036 4625
4037 4626 #if _MARKING_STATS_
4038 4627 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4039 4628 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4040 4629 _all_clock_intervals_ms.sd());
4041 4630 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4042 4631 _all_clock_intervals_ms.maximum(),
4043 4632 _all_clock_intervals_ms.sum());
4044 4633 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
4045 4634 _clock_due_to_scanning, _clock_due_to_marking);
4046 4635 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
4047 4636 _objs_scanned, _objs_found_on_bitmap);
4048 4637 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
4049 4638 _local_pushes, _local_pops, _local_max_size);
4050 4639 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
4051 4640 _global_pushes, _global_pops, _global_max_size);
4052 4641 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
4053 4642 _global_transfers_to,_global_transfers_from);
4054 4643 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
4055 4644 _regions_claimed, _region_stack_pops);
4056 4645 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
4057 4646 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
4058 4647 _steal_attempts, _steals);
4059 4648 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
4060 4649 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
4061 4650 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4062 4651 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
4063 4652 _aborted_timed_out, _aborted_satb, _aborted_termination);
4064 4653 #endif // _MARKING_STATS_
4065 4654 }
4066 4655
4067 4656 /*****************************************************************************
4068 4657
4069 4658 The do_marking_step(time_target_ms) method is the building block
4070 4659 of the parallel marking framework. It can be called in parallel
4071 4660 with other invocations of do_marking_step() on different tasks
4072 4661 (but only one per task, obviously) and concurrently with the
4073 4662 mutator threads, or during remark, hence it eliminates the need
4074 4663 for two versions of the code. When called during remark, it will
4075 4664 pick up from where the task left off during the concurrent marking
4076 4665 phase. Interestingly, tasks are also claimable during evacuation
4077 4666 pauses too, since do_marking_step() ensures that it aborts before
4078 4667 it needs to yield.
4079 4668
4080 4669 The data structures that is uses to do marking work are the
4081 4670 following:
4082 4671
4083 4672 (1) Marking Bitmap. If there are gray objects that appear only
4084 4673 on the bitmap (this happens either when dealing with an overflow
4085 4674 or when the initial marking phase has simply marked the roots
4086 4675 and didn't push them on the stack), then tasks claim heap
4087 4676 regions whose bitmap they then scan to find gray objects. A
4088 4677 global finger indicates where the end of the last claimed region
4089 4678 is. A local finger indicates how far into the region a task has
4090 4679 scanned. The two fingers are used to determine how to gray an
4091 4680 object (i.e. whether simply marking it is OK, as it will be
4092 4681 visited by a task in the future, or whether it needs to be also
4093 4682 pushed on a stack).
4094 4683
4095 4684 (2) Local Queue. The local queue of the task which is accessed
4096 4685 reasonably efficiently by the task. Other tasks can steal from
4097 4686 it when they run out of work. Throughout the marking phase, a
4098 4687 task attempts to keep its local queue short but not totally
4099 4688 empty, so that entries are available for stealing by other
4100 4689 tasks. Only when there is no more work, a task will totally
4101 4690 drain its local queue.
4102 4691
4103 4692 (3) Global Mark Stack. This handles local queue overflow. During
4104 4693 marking only sets of entries are moved between it and the local
4105 4694 queues, as access to it requires a mutex and more fine-grain
4106 4695 interaction with it which might cause contention. If it
4107 4696 overflows, then the marking phase should restart and iterate
4108 4697 over the bitmap to identify gray objects. Throughout the marking
4109 4698 phase, tasks attempt to keep the global mark stack at a small
4110 4699 length but not totally empty, so that entries are available for
4111 4700 popping by other tasks. Only when there is no more work, tasks
4112 4701 will totally drain the global mark stack.
4113 4702
4114 4703 (4) Global Region Stack. Entries on it correspond to areas of
4115 4704 the bitmap that need to be scanned since they contain gray
4116 4705 objects. Pushes on the region stack only happen during
4117 4706 evacuation pauses and typically correspond to areas covered by
4118 4707 GC LABS. If it overflows, then the marking phase should restart
4119 4708 and iterate over the bitmap to identify gray objects. Tasks will
4120 4709 try to totally drain the region stack as soon as possible.
4121 4710
4122 4711 (5) SATB Buffer Queue. This is where completed SATB buffers are
4123 4712 made available. Buffers are regularly removed from this queue
4124 4713 and scanned for roots, so that the queue doesn't get too
4125 4714 long. During remark, all completed buffers are processed, as
4126 4715 well as the filled in parts of any uncompleted buffers.
4127 4716
4128 4717 The do_marking_step() method tries to abort when the time target
4129 4718 has been reached. There are a few other cases when the
4130 4719 do_marking_step() method also aborts:
4131 4720
4132 4721 (1) When the marking phase has been aborted (after a Full GC).
4133 4722
4134 4723 (2) When a global overflow (either on the global stack or the
4135 4724 region stack) has been triggered. Before the task aborts, it
4136 4725 will actually sync up with the other tasks to ensure that all
4137 4726 the marking data structures (local queues, stacks, fingers etc.)
4138 4727 are re-initialised so that when do_marking_step() completes,
4139 4728 the marking phase can immediately restart.
4140 4729
4141 4730 (3) When enough completed SATB buffers are available. The
4142 4731 do_marking_step() method only tries to drain SATB buffers right
4143 4732 at the beginning. So, if enough buffers are available, the
4144 4733 marking step aborts and the SATB buffers are processed at
4145 4734 the beginning of the next invocation.
4146 4735
4147 4736 (4) To yield. when we have to yield then we abort and yield
4148 4737 right at the end of do_marking_step(). This saves us from a lot
4149 4738 of hassle as, by yielding we might allow a Full GC. If this
4150 4739 happens then objects will be compacted underneath our feet, the
4151 4740 heap might shrink, etc. We save checking for this by just
4152 4741 aborting and doing the yield right at the end.
4153 4742
4154 4743 From the above it follows that the do_marking_step() method should
4155 4744 be called in a loop (or, otherwise, regularly) until it completes.
4156 4745
4157 4746 If a marking step completes without its has_aborted() flag being
4158 4747 true, it means it has completed the current marking phase (and
4159 4748 also all other marking tasks have done so and have all synced up).
4160 4749
4161 4750 A method called regular_clock_call() is invoked "regularly" (in
4162 4751 sub ms intervals) throughout marking. It is this clock method that
4163 4752 checks all the abort conditions which were mentioned above and
4164 4753 decides when the task should abort. A work-based scheme is used to
4165 4754 trigger this clock method: when the number of object words the
4166 4755 marking phase has scanned or the number of references the marking
4167 4756 phase has visited reach a given limit. Additional invocations to
4168 4757 the method clock have been planted in a few other strategic places
4169 4758 too. The initial reason for the clock method was to avoid calling
4170 4759 vtime too regularly, as it is quite expensive. So, once it was in
4171 4760 place, it was natural to piggy-back all the other conditions on it
4172 4761 too and not constantly check them throughout the code.
4173 4762
4174 4763 *****************************************************************************/
4175 4764
4176 4765 void CMTask::do_marking_step(double time_target_ms,
4177 4766 bool do_stealing,
4178 4767 bool do_termination) {
4179 4768 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4180 4769 assert(concurrent() == _cm->concurrent(), "they should be the same");
4181 4770
4182 4771 assert(concurrent() || _cm->region_stack_empty(),
4183 4772 "the region stack should have been cleared before remark");
4184 4773 assert(concurrent() || !_cm->has_aborted_regions(),
4185 4774 "aborted regions should have been cleared before remark");
4186 4775 assert(_region_finger == NULL,
4187 4776 "this should be non-null only when a region is being scanned");
4188 4777
4189 4778 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4190 4779 assert(_task_queues != NULL, "invariant");
4191 4780 assert(_task_queue != NULL, "invariant");
4192 4781 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4193 4782
4194 4783 assert(!_claimed,
4195 4784 "only one thread should claim this task at any one time");
4196 4785
4197 4786 // OK, this doesn't safeguard again all possible scenarios, as it is
4198 4787 // possible for two threads to set the _claimed flag at the same
4199 4788 // time. But it is only for debugging purposes anyway and it will
4200 4789 // catch most problems.
4201 4790 _claimed = true;
4202 4791
4203 4792 _start_time_ms = os::elapsedVTime() * 1000.0;
4204 4793 statsOnly( _interval_start_time_ms = _start_time_ms );
4205 4794
4206 4795 double diff_prediction_ms =
4207 4796 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4208 4797 _time_target_ms = time_target_ms - diff_prediction_ms;
4209 4798
4210 4799 // set up the variables that are used in the work-based scheme to
4211 4800 // call the regular clock method
4212 4801 _words_scanned = 0;
4213 4802 _refs_reached = 0;
4214 4803 recalculate_limits();
4215 4804
4216 4805 // clear all flags
4217 4806 clear_has_aborted();
4218 4807 _has_timed_out = false;
4219 4808 _draining_satb_buffers = false;
4220 4809
4221 4810 ++_calls;
4222 4811
4223 4812 if (_cm->verbose_low()) {
4224 4813 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4225 4814 "target = %1.2lfms >>>>>>>>>>",
4226 4815 _task_id, _calls, _time_target_ms);
4227 4816 }
4228 4817
4229 4818 // Set up the bitmap and oop closures. Anything that uses them is
4230 4819 // eventually called from this method, so it is OK to allocate these
4231 4820 // statically.
4232 4821 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4233 4822 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4234 4823 set_cm_oop_closure(&cm_oop_closure);
4235 4824
4236 4825 if (_cm->has_overflown()) {
4237 4826 // This can happen if the region stack or the mark stack overflows
4238 4827 // during a GC pause and this task, after a yield point,
4239 4828 // restarts. We have to abort as we need to get into the overflow
4240 4829 // protocol which happens right at the end of this task.
4241 4830 set_has_aborted();
4242 4831 }
4243 4832
4244 4833 // First drain any available SATB buffers. After this, we will not
4245 4834 // look at SATB buffers before the next invocation of this method.
4246 4835 // If enough completed SATB buffers are queued up, the regular clock
4247 4836 // will abort this task so that it restarts.
4248 4837 drain_satb_buffers();
4249 4838 // ...then partially drain the local queue and the global stack
4250 4839 drain_local_queue(true);
4251 4840 drain_global_stack(true);
4252 4841
4253 4842 // Then totally drain the region stack. We will not look at
4254 4843 // it again before the next invocation of this method. Entries on
4255 4844 // the region stack are only added during evacuation pauses, for
4256 4845 // which we have to yield. When we do, we abort the task anyway so
4257 4846 // it will look at the region stack again when it restarts.
4258 4847 bitmap_closure.set_scanning_heap_region(false);
4259 4848 drain_region_stack(&bitmap_closure);
4260 4849 // ...then partially drain the local queue and the global stack
4261 4850 drain_local_queue(true);
4262 4851 drain_global_stack(true);
4263 4852
4264 4853 do {
4265 4854 if (!has_aborted() && _curr_region != NULL) {
4266 4855 // This means that we're already holding on to a region.
4267 4856 assert(_finger != NULL, "if region is not NULL, then the finger "
4268 4857 "should not be NULL either");
4269 4858
4270 4859 // We might have restarted this task after an evacuation pause
4271 4860 // which might have evacuated the region we're holding on to
4272 4861 // underneath our feet. Let's read its limit again to make sure
4273 4862 // that we do not iterate over a region of the heap that
4274 4863 // contains garbage (update_region_limit() will also move
4275 4864 // _finger to the start of the region if it is found empty).
4276 4865 update_region_limit();
4277 4866 // We will start from _finger not from the start of the region,
4278 4867 // as we might be restarting this task after aborting half-way
4279 4868 // through scanning this region. In this case, _finger points to
4280 4869 // the address where we last found a marked object. If this is a
4281 4870 // fresh region, _finger points to start().
4282 4871 MemRegion mr = MemRegion(_finger, _region_limit);
4283 4872
4284 4873 if (_cm->verbose_low()) {
4285 4874 gclog_or_tty->print_cr("[%d] we're scanning part "
4286 4875 "["PTR_FORMAT", "PTR_FORMAT") "
4287 4876 "of region "PTR_FORMAT,
4288 4877 _task_id, _finger, _region_limit, _curr_region);
4289 4878 }
4290 4879
4291 4880 // Let's iterate over the bitmap of the part of the
4292 4881 // region that is left.
4293 4882 bitmap_closure.set_scanning_heap_region(true);
4294 4883 if (mr.is_empty() ||
4295 4884 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4296 4885 // We successfully completed iterating over the region. Now,
4297 4886 // let's give up the region.
4298 4887 giveup_current_region();
4299 4888 regular_clock_call();
4300 4889 } else {
4301 4890 assert(has_aborted(), "currently the only way to do so");
4302 4891 // The only way to abort the bitmap iteration is to return
4303 4892 // false from the do_bit() method. However, inside the
4304 4893 // do_bit() method we move the _finger to point to the
4305 4894 // object currently being looked at. So, if we bail out, we
4306 4895 // have definitely set _finger to something non-null.
4307 4896 assert(_finger != NULL, "invariant");
4308 4897
4309 4898 // Region iteration was actually aborted. So now _finger
4310 4899 // points to the address of the object we last scanned. If we
4311 4900 // leave it there, when we restart this task, we will rescan
4312 4901 // the object. It is easy to avoid this. We move the finger by
4313 4902 // enough to point to the next possible object header (the
4314 4903 // bitmap knows by how much we need to move it as it knows its
4315 4904 // granularity).
4316 4905 assert(_finger < _region_limit, "invariant");
4317 4906 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4318 4907 // Check if bitmap iteration was aborted while scanning the last object
4319 4908 if (new_finger >= _region_limit) {
4320 4909 giveup_current_region();
4321 4910 } else {
4322 4911 move_finger_to(new_finger);
4323 4912 }
4324 4913 }
4325 4914 }
4326 4915 // At this point we have either completed iterating over the
4327 4916 // region we were holding on to, or we have aborted.
4328 4917
4329 4918 // We then partially drain the local queue and the global stack.
4330 4919 // (Do we really need this?)
4331 4920 drain_local_queue(true);
4332 4921 drain_global_stack(true);
4333 4922
4334 4923 // Read the note on the claim_region() method on why it might
4335 4924 // return NULL with potentially more regions available for
4336 4925 // claiming and why we have to check out_of_regions() to determine
4337 4926 // whether we're done or not.
4338 4927 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4339 4928 // We are going to try to claim a new region. We should have
4340 4929 // given up on the previous one.
4341 4930 // Separated the asserts so that we know which one fires.
4342 4931 assert(_curr_region == NULL, "invariant");
4343 4932 assert(_finger == NULL, "invariant");
4344 4933 assert(_region_limit == NULL, "invariant");
4345 4934 if (_cm->verbose_low()) {
4346 4935 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4347 4936 }
4348 4937 HeapRegion* claimed_region = _cm->claim_region(_task_id);
4349 4938 if (claimed_region != NULL) {
4350 4939 // Yes, we managed to claim one
4351 4940 statsOnly( ++_regions_claimed );
4352 4941
4353 4942 if (_cm->verbose_low()) {
4354 4943 gclog_or_tty->print_cr("[%d] we successfully claimed "
4355 4944 "region "PTR_FORMAT,
4356 4945 _task_id, claimed_region);
4357 4946 }
4358 4947
4359 4948 setup_for_region(claimed_region);
4360 4949 assert(_curr_region == claimed_region, "invariant");
4361 4950 }
4362 4951 // It is important to call the regular clock here. It might take
4363 4952 // a while to claim a region if, for example, we hit a large
4364 4953 // block of empty regions. So we need to call the regular clock
4365 4954 // method once round the loop to make sure it's called
4366 4955 // frequently enough.
4367 4956 regular_clock_call();
4368 4957 }
4369 4958
4370 4959 if (!has_aborted() && _curr_region == NULL) {
4371 4960 assert(_cm->out_of_regions(),
4372 4961 "at this point we should be out of regions");
4373 4962 }
4374 4963 } while ( _curr_region != NULL && !has_aborted());
4375 4964
4376 4965 if (!has_aborted()) {
4377 4966 // We cannot check whether the global stack is empty, since other
4378 4967 // tasks might be pushing objects to it concurrently. We also cannot
4379 4968 // check if the region stack is empty because if a thread is aborting
4380 4969 // it can push a partially done region back.
4381 4970 assert(_cm->out_of_regions(),
4382 4971 "at this point we should be out of regions");
4383 4972
4384 4973 if (_cm->verbose_low()) {
4385 4974 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4386 4975 }
4387 4976
4388 4977 // Try to reduce the number of available SATB buffers so that
4389 4978 // remark has less work to do.
4390 4979 drain_satb_buffers();
4391 4980 }
4392 4981
4393 4982 // Since we've done everything else, we can now totally drain the
4394 4983 // local queue and global stack.
4395 4984 drain_local_queue(false);
4396 4985 drain_global_stack(false);
4397 4986
4398 4987 // Attempt at work stealing from other task's queues.
4399 4988 if (do_stealing && !has_aborted()) {
4400 4989 // We have not aborted. This means that we have finished all that
4401 4990 // we could. Let's try to do some stealing...
4402 4991
4403 4992 // We cannot check whether the global stack is empty, since other
4404 4993 // tasks might be pushing objects to it concurrently. We also cannot
4405 4994 // check if the region stack is empty because if a thread is aborting
4406 4995 // it can push a partially done region back.
4407 4996 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4408 4997 "only way to reach here");
4409 4998
4410 4999 if (_cm->verbose_low()) {
4411 5000 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4412 5001 }
4413 5002
4414 5003 while (!has_aborted()) {
4415 5004 oop obj;
4416 5005 statsOnly( ++_steal_attempts );
4417 5006
4418 5007 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4419 5008 if (_cm->verbose_medium()) {
4420 5009 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4421 5010 _task_id, (void*) obj);
4422 5011 }
4423 5012
4424 5013 statsOnly( ++_steals );
4425 5014
4426 5015 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4427 5016 "any stolen object should be marked");
4428 5017 scan_object(obj);
4429 5018
4430 5019 // And since we're towards the end, let's totally drain the
4431 5020 // local queue and global stack.
4432 5021 drain_local_queue(false);
4433 5022 drain_global_stack(false);
4434 5023 } else {
4435 5024 break;
4436 5025 }
4437 5026 }
4438 5027 }
4439 5028
4440 5029 // If we are about to wrap up and go into termination, check if we
4441 5030 // should raise the overflow flag.
4442 5031 if (do_termination && !has_aborted()) {
4443 5032 if (_cm->force_overflow()->should_force()) {
4444 5033 _cm->set_has_overflown();
4445 5034 regular_clock_call();
4446 5035 }
4447 5036 }
4448 5037
4449 5038 // We still haven't aborted. Now, let's try to get into the
4450 5039 // termination protocol.
4451 5040 if (do_termination && !has_aborted()) {
4452 5041 // We cannot check whether the global stack is empty, since other
4453 5042 // tasks might be concurrently pushing objects on it. We also cannot
4454 5043 // check if the region stack is empty because if a thread is aborting
4455 5044 // it can push a partially done region back.
4456 5045 // Separated the asserts so that we know which one fires.
4457 5046 assert(_cm->out_of_regions(), "only way to reach here");
4458 5047 assert(_task_queue->size() == 0, "only way to reach here");
4459 5048
4460 5049 if (_cm->verbose_low()) {
4461 5050 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4462 5051 }
4463 5052
4464 5053 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4465 5054 // The CMTask class also extends the TerminatorTerminator class,
4466 5055 // hence its should_exit_termination() method will also decide
4467 5056 // whether to exit the termination protocol or not.
4468 5057 bool finished = _cm->terminator()->offer_termination(this);
4469 5058 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4470 5059 _termination_time_ms +=
4471 5060 termination_end_time_ms - _termination_start_time_ms;
4472 5061
4473 5062 if (finished) {
4474 5063 // We're all done.
4475 5064
4476 5065 if (_task_id == 0) {
4477 5066 // let's allow task 0 to do this
4478 5067 if (concurrent()) {
4479 5068 assert(_cm->concurrent_marking_in_progress(), "invariant");
4480 5069 // we need to set this to false before the next
4481 5070 // safepoint. This way we ensure that the marking phase
4482 5071 // doesn't observe any more heap expansions.
4483 5072 _cm->clear_concurrent_marking_in_progress();
4484 5073 }
4485 5074 }
4486 5075
4487 5076 // We can now guarantee that the global stack is empty, since
4488 5077 // all other tasks have finished. We separated the guarantees so
4489 5078 // that, if a condition is false, we can immediately find out
4490 5079 // which one.
4491 5080 guarantee(_cm->out_of_regions(), "only way to reach here");
4492 5081 guarantee(_aborted_region.is_empty(), "only way to reach here");
4493 5082 guarantee(_cm->region_stack_empty(), "only way to reach here");
4494 5083 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4495 5084 guarantee(_task_queue->size() == 0, "only way to reach here");
4496 5085 guarantee(!_cm->has_overflown(), "only way to reach here");
4497 5086 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4498 5087 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4499 5088
4500 5089 if (_cm->verbose_low()) {
4501 5090 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4502 5091 }
4503 5092 } else {
4504 5093 // Apparently there's more work to do. Let's abort this task. It
4505 5094 // will restart it and we can hopefully find more things to do.
4506 5095
4507 5096 if (_cm->verbose_low()) {
4508 5097 gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4509 5098 _task_id);
4510 5099 }
4511 5100
4512 5101 set_has_aborted();
4513 5102 statsOnly( ++_aborted_termination );
4514 5103 }
4515 5104 }
4516 5105
4517 5106 // Mainly for debugging purposes to make sure that a pointer to the
4518 5107 // closure which was statically allocated in this frame doesn't
4519 5108 // escape it by accident.
4520 5109 set_cm_oop_closure(NULL);
4521 5110 double end_time_ms = os::elapsedVTime() * 1000.0;
4522 5111 double elapsed_time_ms = end_time_ms - _start_time_ms;
4523 5112 // Update the step history.
4524 5113 _step_times_ms.add(elapsed_time_ms);
4525 5114
4526 5115 if (has_aborted()) {
4527 5116 // The task was aborted for some reason.
4528 5117
4529 5118 statsOnly( ++_aborted );
4530 5119
4531 5120 if (_has_timed_out) {
4532 5121 double diff_ms = elapsed_time_ms - _time_target_ms;
4533 5122 // Keep statistics of how well we did with respect to hitting
4534 5123 // our target only if we actually timed out (if we aborted for
4535 5124 // other reasons, then the results might get skewed).
4536 5125 _marking_step_diffs_ms.add(diff_ms);
4537 5126 }
4538 5127
4539 5128 if (_cm->has_overflown()) {
4540 5129 // This is the interesting one. We aborted because a global
4541 5130 // overflow was raised. This means we have to restart the
4542 5131 // marking phase and start iterating over regions. However, in
4543 5132 // order to do this we have to make sure that all tasks stop
4544 5133 // what they are doing and re-initialise in a safe manner. We
4545 5134 // will achieve this with the use of two barrier sync points.
4546 5135
4547 5136 if (_cm->verbose_low()) {
4548 5137 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4549 5138 }
4550 5139
4551 5140 _cm->enter_first_sync_barrier(_task_id);
4552 5141 // When we exit this sync barrier we know that all tasks have
4553 5142 // stopped doing marking work. So, it's now safe to
4554 5143 // re-initialise our data structures. At the end of this method,
4555 5144 // task 0 will clear the global data structures.
4556 5145
4557 5146 statsOnly( ++_aborted_overflow );
4558 5147
4559 5148 // We clear the local state of this task...
4560 5149 clear_region_fields();
4561 5150
4562 5151 // ...and enter the second barrier.
4563 5152 _cm->enter_second_sync_barrier(_task_id);
4564 5153 // At this point everything has bee re-initialised and we're
4565 5154 // ready to restart.
4566 5155 }
4567 5156
4568 5157 if (_cm->verbose_low()) {
4569 5158 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4570 5159 "elapsed = %1.2lfms <<<<<<<<<<",
4571 5160 _task_id, _time_target_ms, elapsed_time_ms);
4572 5161 if (_cm->has_aborted()) {
4573 5162 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4574 5163 _task_id);
4575 5164 }
4576 5165 }
4577 5166 } else {
4578 5167 if (_cm->verbose_low()) {
4579 5168 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
↓ open down ↓ |
1274 lines elided |
↑ open up ↑ |
4580 5169 "elapsed = %1.2lfms <<<<<<<<<<",
4581 5170 _task_id, _time_target_ms, elapsed_time_ms);
4582 5171 }
4583 5172 }
4584 5173
4585 5174 _claimed = false;
4586 5175 }
4587 5176
4588 5177 CMTask::CMTask(int task_id,
4589 5178 ConcurrentMark* cm,
5179 + size_t* marked_bytes,
5180 + BitMap* card_bm,
4590 5181 CMTaskQueue* task_queue,
4591 5182 CMTaskQueueSet* task_queues)
4592 5183 : _g1h(G1CollectedHeap::heap()),
4593 5184 _task_id(task_id), _cm(cm),
4594 5185 _claimed(false),
4595 5186 _nextMarkBitMap(NULL), _hash_seed(17),
4596 5187 _task_queue(task_queue),
4597 5188 _task_queues(task_queues),
4598 5189 _cm_oop_closure(NULL),
4599 - _aborted_region(MemRegion()) {
5190 + _aborted_region(MemRegion()),
5191 + _marked_bytes_array(marked_bytes),
5192 + _card_bm(card_bm) {
4600 5193 guarantee(task_queue != NULL, "invariant");
4601 5194 guarantee(task_queues != NULL, "invariant");
4602 5195
4603 5196 statsOnly( _clock_due_to_scanning = 0;
4604 5197 _clock_due_to_marking = 0 );
4605 5198
4606 5199 _marking_step_diffs_ms.add(0.5);
4607 5200 }
4608 5201
4609 5202 // These are formatting macros that are used below to ensure
4610 5203 // consistent formatting. The *_H_* versions are used to format the
4611 5204 // header for a particular value and they should be kept consistent
4612 5205 // with the corresponding macro. Also note that most of the macros add
4613 5206 // the necessary white space (as a prefix) which makes them a bit
4614 5207 // easier to compose.
4615 5208
4616 5209 // All the output lines are prefixed with this string to be able to
4617 5210 // identify them easily in a large log file.
4618 5211 #define G1PPRL_LINE_PREFIX "###"
4619 5212
4620 5213 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4621 5214 #ifdef _LP64
4622 5215 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4623 5216 #else // _LP64
4624 5217 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4625 5218 #endif // _LP64
4626 5219
4627 5220 // For per-region info
4628 5221 #define G1PPRL_TYPE_FORMAT " %-4s"
4629 5222 #define G1PPRL_TYPE_H_FORMAT " %4s"
4630 5223 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4631 5224 #define G1PPRL_BYTE_H_FORMAT " %9s"
4632 5225 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4633 5226 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4634 5227
4635 5228 // For summary info
4636 5229 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4637 5230 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4638 5231 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4639 5232 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4640 5233
4641 5234 G1PrintRegionLivenessInfoClosure::
4642 5235 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4643 5236 : _out(out),
4644 5237 _total_used_bytes(0), _total_capacity_bytes(0),
4645 5238 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4646 5239 _hum_used_bytes(0), _hum_capacity_bytes(0),
4647 5240 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4648 5241 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4649 5242 MemRegion g1_committed = g1h->g1_committed();
4650 5243 MemRegion g1_reserved = g1h->g1_reserved();
4651 5244 double now = os::elapsedTime();
4652 5245
4653 5246 // Print the header of the output.
4654 5247 _out->cr();
4655 5248 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4656 5249 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4657 5250 G1PPRL_SUM_ADDR_FORMAT("committed")
4658 5251 G1PPRL_SUM_ADDR_FORMAT("reserved")
4659 5252 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4660 5253 g1_committed.start(), g1_committed.end(),
4661 5254 g1_reserved.start(), g1_reserved.end(),
4662 5255 HeapRegion::GrainBytes);
4663 5256 _out->print_cr(G1PPRL_LINE_PREFIX);
4664 5257 _out->print_cr(G1PPRL_LINE_PREFIX
4665 5258 G1PPRL_TYPE_H_FORMAT
4666 5259 G1PPRL_ADDR_BASE_H_FORMAT
4667 5260 G1PPRL_BYTE_H_FORMAT
4668 5261 G1PPRL_BYTE_H_FORMAT
4669 5262 G1PPRL_BYTE_H_FORMAT
4670 5263 G1PPRL_DOUBLE_H_FORMAT,
4671 5264 "type", "address-range",
4672 5265 "used", "prev-live", "next-live", "gc-eff");
4673 5266 _out->print_cr(G1PPRL_LINE_PREFIX
4674 5267 G1PPRL_TYPE_H_FORMAT
4675 5268 G1PPRL_ADDR_BASE_H_FORMAT
4676 5269 G1PPRL_BYTE_H_FORMAT
4677 5270 G1PPRL_BYTE_H_FORMAT
4678 5271 G1PPRL_BYTE_H_FORMAT
4679 5272 G1PPRL_DOUBLE_H_FORMAT,
4680 5273 "", "",
4681 5274 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4682 5275 }
4683 5276
4684 5277 // It takes as a parameter a reference to one of the _hum_* fields, it
4685 5278 // deduces the corresponding value for a region in a humongous region
4686 5279 // series (either the region size, or what's left if the _hum_* field
4687 5280 // is < the region size), and updates the _hum_* field accordingly.
4688 5281 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4689 5282 size_t bytes = 0;
4690 5283 // The > 0 check is to deal with the prev and next live bytes which
4691 5284 // could be 0.
4692 5285 if (*hum_bytes > 0) {
4693 5286 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4694 5287 *hum_bytes -= bytes;
4695 5288 }
4696 5289 return bytes;
4697 5290 }
4698 5291
4699 5292 // It deduces the values for a region in a humongous region series
4700 5293 // from the _hum_* fields and updates those accordingly. It assumes
4701 5294 // that that _hum_* fields have already been set up from the "starts
4702 5295 // humongous" region and we visit the regions in address order.
4703 5296 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4704 5297 size_t* capacity_bytes,
4705 5298 size_t* prev_live_bytes,
4706 5299 size_t* next_live_bytes) {
4707 5300 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4708 5301 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4709 5302 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4710 5303 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4711 5304 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4712 5305 }
4713 5306
4714 5307 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4715 5308 const char* type = "";
4716 5309 HeapWord* bottom = r->bottom();
4717 5310 HeapWord* end = r->end();
4718 5311 size_t capacity_bytes = r->capacity();
4719 5312 size_t used_bytes = r->used();
4720 5313 size_t prev_live_bytes = r->live_bytes();
4721 5314 size_t next_live_bytes = r->next_live_bytes();
4722 5315 double gc_eff = r->gc_efficiency();
4723 5316 if (r->used() == 0) {
4724 5317 type = "FREE";
4725 5318 } else if (r->is_survivor()) {
4726 5319 type = "SURV";
4727 5320 } else if (r->is_young()) {
4728 5321 type = "EDEN";
4729 5322 } else if (r->startsHumongous()) {
4730 5323 type = "HUMS";
4731 5324
4732 5325 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4733 5326 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4734 5327 "they should have been zeroed after the last time we used them");
4735 5328 // Set up the _hum_* fields.
4736 5329 _hum_capacity_bytes = capacity_bytes;
4737 5330 _hum_used_bytes = used_bytes;
4738 5331 _hum_prev_live_bytes = prev_live_bytes;
4739 5332 _hum_next_live_bytes = next_live_bytes;
4740 5333 get_hum_bytes(&used_bytes, &capacity_bytes,
4741 5334 &prev_live_bytes, &next_live_bytes);
4742 5335 end = bottom + HeapRegion::GrainWords;
4743 5336 } else if (r->continuesHumongous()) {
4744 5337 type = "HUMC";
4745 5338 get_hum_bytes(&used_bytes, &capacity_bytes,
4746 5339 &prev_live_bytes, &next_live_bytes);
4747 5340 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4748 5341 } else {
4749 5342 type = "OLD";
4750 5343 }
4751 5344
4752 5345 _total_used_bytes += used_bytes;
4753 5346 _total_capacity_bytes += capacity_bytes;
4754 5347 _total_prev_live_bytes += prev_live_bytes;
4755 5348 _total_next_live_bytes += next_live_bytes;
4756 5349
4757 5350 // Print a line for this particular region.
4758 5351 _out->print_cr(G1PPRL_LINE_PREFIX
4759 5352 G1PPRL_TYPE_FORMAT
4760 5353 G1PPRL_ADDR_BASE_FORMAT
4761 5354 G1PPRL_BYTE_FORMAT
4762 5355 G1PPRL_BYTE_FORMAT
4763 5356 G1PPRL_BYTE_FORMAT
4764 5357 G1PPRL_DOUBLE_FORMAT,
4765 5358 type, bottom, end,
4766 5359 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4767 5360
4768 5361 return false;
4769 5362 }
4770 5363
4771 5364 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4772 5365 // Print the footer of the output.
4773 5366 _out->print_cr(G1PPRL_LINE_PREFIX);
4774 5367 _out->print_cr(G1PPRL_LINE_PREFIX
4775 5368 " SUMMARY"
4776 5369 G1PPRL_SUM_MB_FORMAT("capacity")
4777 5370 G1PPRL_SUM_MB_PERC_FORMAT("used")
4778 5371 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4779 5372 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4780 5373 bytes_to_mb(_total_capacity_bytes),
4781 5374 bytes_to_mb(_total_used_bytes),
4782 5375 perc(_total_used_bytes, _total_capacity_bytes),
4783 5376 bytes_to_mb(_total_prev_live_bytes),
4784 5377 perc(_total_prev_live_bytes, _total_capacity_bytes),
4785 5378 bytes_to_mb(_total_next_live_bytes),
4786 5379 perc(_total_next_live_bytes, _total_capacity_bytes));
4787 5380 _out->cr();
4788 5381 }
↓ open down ↓ |
179 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX