1 /* 2 * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "gc/g1/g1BufferNodeList.hpp" 27 #include "gc/g1/g1CardTableEntryClosure.hpp" 28 #include "gc/g1/g1CollectedHeap.inline.hpp" 29 #include "gc/g1/g1ConcurrentRefineThread.hpp" 30 #include "gc/g1/g1DirtyCardQueue.hpp" 31 #include "gc/g1/g1FreeIdSet.hpp" 32 #include "gc/g1/g1RedirtyCardsQueue.hpp" 33 #include "gc/g1/g1RemSet.hpp" 34 #include "gc/g1/g1ThreadLocalData.hpp" 35 #include "gc/g1/heapRegionRemSet.hpp" 36 #include "gc/shared/suspendibleThreadSet.hpp" 37 #include "gc/shared/workgroup.hpp" 38 #include "memory/iterator.hpp" 39 #include "runtime/flags/flagSetting.hpp" 40 #include "runtime/mutexLocker.hpp" 41 #include "runtime/orderAccess.hpp" 42 #include "runtime/os.hpp" 43 #include "runtime/safepoint.hpp" 44 #include "runtime/thread.inline.hpp" 45 #include "runtime/threadSMR.hpp" 46 #include "utilities/globalCounter.inline.hpp" 47 #include "utilities/macros.hpp" 48 #include "utilities/quickSort.hpp" 49 #include <new> 50 51 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) : 52 // Dirty card queues are always active, so we create them with their 53 // active field set to true. 54 PtrQueue(qset, true /* active */) 55 { } 56 57 G1DirtyCardQueue::~G1DirtyCardQueue() { 58 flush(); 59 } 60 61 BufferNode* const NULL_buffer = NULL; 62 63 void G1DirtyCardQueue::handle_completed_buffer() { 64 assert(_buf != NULL, "precondition"); 65 BufferNode* node = BufferNode::make_node_from_buffer(_buf, index()); 66 G1DirtyCardQueueSet* dcqs = dirty_card_qset(); 67 if (dcqs->process_or_enqueue_completed_buffer(node)) { 68 reset(); // Buffer fully processed, reset index. 69 } else { 70 allocate_buffer(); // Buffer enqueued, get a new one. 71 } 72 } 73 74 // Assumed to be zero by concurrent threads. 75 static uint par_ids_start() { return 0; } 76 77 G1DirtyCardQueueSet::G1DirtyCardQueueSet(BufferNode::Allocator* allocator) : 78 PtrQueueSet(allocator), 79 _primary_refinement_thread(NULL), 80 _completed_buffers_head(NULL_buffer), 81 _completed_buffers_tail(NULL_buffer), 82 _num_cards(0), 83 DEBUG_ONLY(_concurrency(0) COMMA) 84 _paused(), 85 _process_cards_threshold(ProcessCardsThresholdNever), 86 _max_cards(MaxCardsUnlimited), 87 _max_cards_padding(0), 88 _free_ids(par_ids_start(), num_par_ids()), 89 _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC)) 90 { 91 ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t)); 92 _all_active = true; 93 } 94 95 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() { 96 abandon_completed_buffers(); 97 FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters); 98 } 99 100 // Determines how many mutator threads can process the buffers in parallel. 101 uint G1DirtyCardQueueSet::num_par_ids() { 102 return (uint)os::initial_active_processor_count(); 103 } 104 105 size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const { 106 size_t sum = 0; 107 for (uint i = 0; i < num_par_ids(); ++i) { 108 sum += _mutator_refined_cards_counters[i]; 109 } 110 return sum; 111 } 112 113 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) { 114 G1ThreadLocalData::dirty_card_queue(t).handle_zero_index(); 115 } 116 117 // _concurrency is an int that is used in debug-only context to verify 118 // we're not overlapping queue operations that support concurrency with 119 // those which don't. The value is initially zero, meaning there are no 120 // relevant operations in progress. A "no concurrency" context is entered 121 // by atomically changing the value from 0 to -1, with an assert on failure. 122 // It is similarly exited by reverting the value back to 0. A "concurrent" 123 // context is entered by atomically incrementing the value and verifying the 124 // result is greater than zero (so we weren't in a "no concurrency" context). 125 // It is similarly exited by atomically decrementing the value and verifying 126 // the result is at least zero (so no mismatches). 127 // 128 // ConcurrentVerifier and NonconcurrentVerifier are helper classes to 129 // establish and remove such contexts. 130 131 class G1DirtyCardQueueSet::ConcurrentVerifier : public StackObj { 132 #ifdef ASSERT 133 const G1DirtyCardQueueSet* _dcqs; 134 135 public: 136 ~ConcurrentVerifier() { 137 assert(Atomic::sub(&_dcqs->_concurrency, 1) >= 0, "invariant"); 138 } 139 #endif // ASSERT 140 141 public: 142 ConcurrentVerifier(const G1DirtyCardQueueSet* dcqs) DEBUG_ONLY(: _dcqs(dcqs)) { 143 assert(Atomic::add(&_dcqs->_concurrency, 1) > 0, "invariant"); 144 } 145 }; 146 147 class G1DirtyCardQueueSet::NonconcurrentVerifier : public StackObj { 148 #ifdef ASSERT 149 const G1DirtyCardQueueSet* _dcqs; 150 151 public: 152 ~NonconcurrentVerifier() { 153 assert(Atomic::cmpxchg(&_dcqs->_concurrency, -1, 0) == -1, "invariant"); 154 } 155 #endif // ASSERT 156 157 public: 158 NonconcurrentVerifier(const G1DirtyCardQueueSet* dcqs) DEBUG_ONLY(: _dcqs(dcqs)) { 159 assert(Atomic::cmpxchg(&_dcqs->_concurrency, 0, -1) == 0, "invariant"); 160 } 161 }; 162 163 // _completed_buffers_{head,tail} and _num_cards provide a lock-free FIFO 164 // of buffers, linked through their next() fields. 165 // 166 // The key idea to make this work is that pop (get_completed_buffer) never 167 // returns an element of the queue if it is the only accessible element, 168 // e.g. its "next" value is NULL. It is expected that there will be a 169 // later push/append that will make that element available to a future pop, 170 // or there will eventually be a complete transfer (take_all_completed_buffers). 171 // 172 // An append operation atomically exchanges the new tail with the queue tail. 173 // It then sets the "next" value of the old tail to the head of the list being 174 // appended. (It is an invariant that the old tail's "next" value is NULL.) 175 // But if the old tail is NULL then the queue was empty. In this case the 176 // head of the list being appended is instead stored in the queue head (which 177 // must be NULL). 178 // 179 // A push operation is just a degenerate append, where the buffer being pushed 180 // is both the head and the tail of the list being appended. 181 // 182 // This means there is a period between the exchange and the old tail update 183 // where the queue sequence is split into two parts, the list from the queue 184 // head to the old tail, and the list being appended. If there are concurrent 185 // push/append operations, each may introduce another such segment. But they 186 // all eventually get resolved by their respective updates of their old tail's 187 // "next" value. 188 // 189 // pop gets the queue head as the candidate result (returning NULL if the 190 // queue head was NULL), and then gets that result node's "next" value. If 191 // that "next" value is NULL and the queue head hasn't changed, then there 192 // is only one element in the (accessible) list. We can't return that 193 // element, because it may be the old tail of a concurrent push/append. So 194 // return NULL in this case. Otherwise, attempt to cmpxchg that "next" 195 // value into the queue head, retrying the whole operation if that fails. 196 // This is the "usual" lock-free pop from head of slist, with the additional 197 // restriction on taking the last element. 198 // 199 // In order to address the ABA problem for pop, a pop operation protects its 200 // access to the head of the list with a GlobalCounter critical section. This 201 // works with the buffer allocator's use of GlobalCounter synchronization to 202 // prevent ABA from arising in the normal buffer usage cycle. The paused 203 // buffer handling prevents another ABA source (see record_paused_buffer and 204 // enqueue_previous_paused_buffers). 205 206 size_t G1DirtyCardQueueSet::append_buffers(BufferNode* first, 207 BufferNode* last, 208 size_t card_count) { 209 assert(last->next() == NULL_buffer, "precondition"); 210 ConcurrentVerifier cv(this); 211 // Increment _num_cards before adding to queue, so queue removal doesn't 212 // need to deal with _num_cards possibly going negative. 213 size_t new_num_cards = Atomic::add(&_num_cards, card_count); 214 BufferNode* old_tail = Atomic::xchg(&_completed_buffers_tail, last); 215 if (old_tail == NULL_buffer) { // Empty list. 216 assert(Atomic::load(&_completed_buffers_head) == NULL_buffer, "invariant"); 217 Atomic::store(&_completed_buffers_head, first); 218 } else { 219 assert(old_tail->next() == NULL_buffer, "invariant"); 220 old_tail->set_next(first); 221 } 222 return new_num_cards; 223 } 224 225 void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) { 226 assert(cbn != NULL_buffer, "precondition"); 227 size_t new_num_cards = append_buffers(cbn, cbn, buffer_size() - cbn->index()); 228 if ((_primary_refinement_thread != NULL) && 229 (new_num_cards > process_cards_threshold())) { 230 _primary_refinement_thread->activate(); 231 } 232 } 233 234 BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) { 235 enqueue_previous_paused_buffers(); 236 237 ConcurrentVerifier cv(this); 238 239 // Check for unsufficient cards to satisfy request. We only do this once, 240 // up front, rather than on each iteration below, since the test is racy 241 // regardless of when we do it. 242 if (Atomic::load_acquire(&_num_cards) <= stop_at) { 243 return NULL_buffer; 244 } 245 246 Thread* current_thread = Thread::current(); 247 248 while (true) { 249 // Use a critical section per iteration, rather than over the whole 250 // operation. We're not guaranteed to make progress, because of possible 251 // contention on the queue head. Lingering in one CS the whole time could 252 // lead to excessive allocation of buffers, because the CS blocks return 253 // of released buffers to the free list for reuse. 254 GlobalCounter::CriticalSection cs(current_thread); 255 256 BufferNode* result = Atomic::load_acquire(&_completed_buffers_head); 257 // Check for empty queue. Only needs to be done on first iteration, 258 // since we never take the last element, but it's messy to make use 259 // of that and we expect one iteration to be the common case. 260 if (result == NULL_buffer) return result; 261 262 BufferNode* next = Atomic::load_acquire(BufferNode::next_ptr(*result)); 263 if (next != NULL_buffer) { 264 next = Atomic::cmpxchg(&_completed_buffers_head, result, next); 265 if (next == result) { 266 // Former head successfully taken; it is not the last. 267 assert(Atomic::load(&_completed_buffers_tail) != result, "invariant"); 268 assert(result->next() != NULL_buffer, "invariant"); 269 result->set_next(NULL_buffer); 270 Atomic::sub(&_num_cards, buffer_size() - result->index()); 271 return result; 272 } 273 // cmpxchg failed; try again. 274 } else if (result == Atomic::load_acquire(&_completed_buffers_head)) { 275 // If follower of head is NULL and head hasn't changed, then only 276 // the one element is currently accessible. We don't take the last 277 // accessible element, because there may be a concurrent add using it. 278 // The check for unchanged head isn't needed for correctness, but the 279 // retry on change may sometimes let us get a buffer after all. 280 return NULL_buffer; 281 } 282 // Head changed; try again. 283 } 284 // Unreachable 285 } 286 287 #ifdef ASSERT 288 void G1DirtyCardQueueSet::verify_num_cards() const { 289 NonconcurrentVerifier ncv(this); 290 size_t actual = 0; 291 BufferNode* cur = Atomic::load(&_completed_buffers_head); 292 for ( ; cur != NULL_buffer; cur = cur->next()) { 293 actual += buffer_size() - cur->index(); 294 } 295 assert(actual == Atomic::load(&_num_cards), 296 "Num entries in completed buffers should be " SIZE_FORMAT " but are " SIZE_FORMAT, 297 Atomic::load(&_num_cards), actual); 298 } 299 #endif // ASSERT 300 301 // Refinement processing stops early if there is a pending safepoint, to 302 // avoid long delays to safepoint. We need to record the partially 303 // processed buffer for later continued processing. However, we can't 304 // simply add it back to the completed buffer queue, as that would introduce 305 // a new source of ABA for the queue. Instead, we have a pair of buffer 306 // lists (with each list represented by head and tail), one for each of the 307 // previous and next safepoints (*). When pausing the processing of a 308 // buffer for a safepoint, we add the buffer (lock free) to the list for the 309 // next safepoint. Before attempting to obtain a buffer from the queue we 310 // first transfer any buffers in the previous safepoint list to the queue. 311 // This is safe (doesn't introduce ABA) because threads cannot be in the 312 // midst of a queue pop across a safepoint. 313 // 314 // These paused buffer lists are conceptually an extension of the queue, and 315 // operations which need to deal with all of the queued buffers (such as 316 // concatenate_logs) also need to deal with any paused buffers. In general, 317 // if the safepoint performs a GC then the paused buffers will be processed 318 // as part of it and both lists will be empty afterward. 319 // 320 // An alternative would be to directly reenqueue a paused buffer, but only 321 // after first calling GlobalCounter::write_synchronize. However, that 322 // might noticeably delay the pending safepoint. 323 // 324 // A single paused list and a safepoint cleanup action to perform the transfer 325 // doesn't work because cleanup actions are not invoked for every safepoint. 326 // 327 // (*) If the safepoint does not perform a GC, the next list becomes the 328 // previous list after the safepoint. Since buffers are only added to the 329 // next list if there were threads performing refinement work, there will 330 // likely be refinement work done after the safepoint, which will transfer 331 // those buffers to the queue. However, multiple non-GC safepoints in 332 // succession, without intervening refinement work to perform a transfer 333 // (possibly through lack of time), can result in old buffers being present 334 // and inaccessible in the next list. This doesn't affect correctness, but 335 // might affect performance. The alternatives discussed above don't have 336 // this problem, but have problems of their own. 337 338 static size_t next_paused_buffer_list_index() { 339 return SafepointSynchronize::safepoint_id() & 1; 340 } 341 342 static size_t previous_paused_buffer_list_index() { 343 return next_paused_buffer_list_index() ^ 1; 344 } 345 346 void G1DirtyCardQueueSet::record_paused_buffer(BufferNode* node) { 347 assert_not_at_safepoint(); 348 assert(node->next() == NULL_buffer, "precondition"); 349 size_t next_index = next_paused_buffer_list_index(); 350 // Cards for paused buffers are included in count, to contribute to 351 // notification checking after the coming safepoint if it doesn't GC. 352 Atomic::add(&_num_cards, buffer_size() - node->index()); 353 BufferNode* old_head = Atomic::xchg(&_paused[next_index]._head, node); 354 if (old_head == NULL_buffer) { 355 assert(_paused[next_index]._tail == NULL, "invariant"); 356 _paused[next_index]._tail = node; 357 } else { 358 node->set_next(old_head); 359 } 360 } 361 362 void G1DirtyCardQueueSet::enqueue_paused_buffers_aux(size_t index) { 363 if (Atomic::load(&_paused[index]._head) != NULL_buffer) { 364 BufferNode* paused = Atomic::xchg(&_paused[index]._head, NULL_buffer); 365 if (paused != NULL_buffer) { 366 BufferNode* tail = _paused[index]._tail; 367 assert(tail != NULL, "invariant"); 368 _paused[index]._tail = NULL_buffer; 369 append_buffers(paused, tail, 0); // Cards already counted when recorded. 370 } 371 } 372 } 373 374 void G1DirtyCardQueueSet::enqueue_previous_paused_buffers() { 375 size_t previous_index = previous_paused_buffer_list_index(); 376 enqueue_paused_buffers_aux(previous_index); 377 } 378 379 void G1DirtyCardQueueSet::enqueue_all_paused_buffers() { 380 assert_at_safepoint(); 381 for (size_t i = 0; i < ARRAY_SIZE(_paused); ++i) { 382 enqueue_paused_buffers_aux(i); 383 } 384 } 385 386 void G1DirtyCardQueueSet::clear_completed_buffers() { 387 Atomic::store(&_completed_buffers_head, NULL_buffer); 388 Atomic::store(&_completed_buffers_tail, NULL_buffer); 389 Atomic::store(&_num_cards, size_t(0)); 390 } 391 392 void G1DirtyCardQueueSet::abandon_completed_buffers() { 393 enqueue_all_paused_buffers(); 394 verify_num_cards(); 395 NonconcurrentVerifier ncv(this); 396 BufferNode* buffers_to_delete = Atomic::load(&_completed_buffers_head); 397 clear_completed_buffers(); 398 while (buffers_to_delete != NULL_buffer) { 399 BufferNode* bn = buffers_to_delete; 400 buffers_to_delete = bn->next(); 401 bn->set_next(NULL_buffer); 402 deallocate_buffer(bn); 403 } 404 } 405 406 void G1DirtyCardQueueSet::notify_if_necessary() { 407 if ((_primary_refinement_thread != NULL) && 408 (num_cards() > process_cards_threshold())) { 409 _primary_refinement_thread->activate(); 410 } 411 } 412 413 // Merge lists of buffers. The source queue set is emptied as a 414 // result. The queue sets must share the same allocator. 415 void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) { 416 assert(allocator() == src->allocator(), "precondition"); 417 const G1BufferNodeList from = src->take_all_completed_buffers(); 418 if (from._head == NULL_buffer) return; 419 append_buffers(from._head, from._tail, from._entry_count); 420 } 421 422 G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() { 423 #ifdef ASSERT 424 for (size_t i = 0; i < ARRAY_SIZE(_paused); ++i) { 425 assert(Atomic::load(&_paused[i]._head) == NULL_buffer, "precondition"); 426 assert(Atomic::load(&_paused[i]._tail) == NULL_buffer, "precondition"); 427 } 428 #endif // ASSERT 429 verify_num_cards(); 430 NonconcurrentVerifier ncv(this); 431 G1BufferNodeList result(Atomic::load(&_completed_buffers_head), 432 Atomic::load(&_completed_buffers_tail), 433 Atomic::load(&_num_cards)); 434 clear_completed_buffers(); 435 return result; 436 } 437 438 class G1RefineBufferedCards : public StackObj { 439 BufferNode* const _node; 440 CardTable::CardValue** const _node_buffer; 441 const size_t _node_buffer_size; 442 const uint _worker_id; 443 size_t* _total_refined_cards; 444 G1RemSet* const _g1rs; 445 446 static inline int compare_card(const CardTable::CardValue* p1, 447 const CardTable::CardValue* p2) { 448 return p2 - p1; 449 } 450 451 // Sorts the cards from start_index to _node_buffer_size in *decreasing* 452 // address order. Tests showed that this order is preferable to not sorting 453 // or increasing address order. 454 void sort_cards(size_t start_index) { 455 QuickSort::sort(&_node_buffer[start_index], 456 _node_buffer_size - start_index, 457 compare_card, 458 false); 459 } 460 461 // Returns the index to the first clean card in the buffer. 462 size_t clean_cards() { 463 const size_t start = _node->index(); 464 assert(start <= _node_buffer_size, "invariant"); 465 466 // Two-fingered compaction algorithm similar to the filtering mechanism in 467 // SATBMarkQueue. The main difference is that clean_card_before_refine() 468 // could change the buffer element in-place. 469 // We don't check for SuspendibleThreadSet::should_yield(), because 470 // cleaning and redirtying the cards is fast. 471 CardTable::CardValue** src = &_node_buffer[start]; 472 CardTable::CardValue** dst = &_node_buffer[_node_buffer_size]; 473 assert(src <= dst, "invariant"); 474 for ( ; src < dst; ++src) { 475 // Search low to high for a card to keep. 476 if (_g1rs->clean_card_before_refine(src)) { 477 // Found keeper. Search high to low for a card to discard. 478 while (src < --dst) { 479 if (!_g1rs->clean_card_before_refine(dst)) { 480 *dst = *src; // Replace discard with keeper. 481 break; 482 } 483 } 484 // If discard search failed (src == dst), the outer loop will also end. 485 } 486 } 487 488 // dst points to the first retained clean card, or the end of the buffer 489 // if all the cards were discarded. 490 const size_t first_clean = dst - _node_buffer; 491 assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant"); 492 // Discarded cards are considered as refined. 493 *_total_refined_cards += first_clean - start; 494 return first_clean; 495 } 496 497 bool refine_cleaned_cards(size_t start_index) { 498 bool result = true; 499 size_t i = start_index; 500 for ( ; i < _node_buffer_size; ++i) { 501 if (SuspendibleThreadSet::should_yield()) { 502 redirty_unrefined_cards(i); 503 result = false; 504 break; 505 } 506 _g1rs->refine_card_concurrently(_node_buffer[i], _worker_id); 507 } 508 _node->set_index(i); 509 *_total_refined_cards += i - start_index; 510 return result; 511 } 512 513 void redirty_unrefined_cards(size_t start) { 514 for ( ; start < _node_buffer_size; ++start) { 515 *_node_buffer[start] = G1CardTable::dirty_card_val(); 516 } 517 } 518 519 public: 520 G1RefineBufferedCards(BufferNode* node, 521 size_t node_buffer_size, 522 uint worker_id, 523 size_t* total_refined_cards) : 524 _node(node), 525 _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))), 526 _node_buffer_size(node_buffer_size), 527 _worker_id(worker_id), 528 _total_refined_cards(total_refined_cards), 529 _g1rs(G1CollectedHeap::heap()->rem_set()) {} 530 531 bool refine() { 532 size_t first_clean_index = clean_cards(); 533 if (first_clean_index == _node_buffer_size) { 534 _node->set_index(first_clean_index); 535 return true; 536 } 537 // This fence serves two purposes. First, the cards must be cleaned 538 // before processing the contents. Second, we can't proceed with 539 // processing a region until after the read of the region's top in 540 // collect_and_clean_cards(), for synchronization with possibly concurrent 541 // humongous object allocation (see comment at the StoreStore fence before 542 // setting the regions' tops in humongous allocation path). 543 // It's okay that reading region's top and reading region's type were racy 544 // wrto each other. We need both set, in any order, to proceed. 545 OrderAccess::fence(); 546 sort_cards(first_clean_index); 547 return refine_cleaned_cards(first_clean_index); 548 } 549 }; 550 551 bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, 552 uint worker_id, 553 size_t* total_refined_cards) { 554 G1RefineBufferedCards buffered_cards(node, 555 buffer_size(), 556 worker_id, 557 total_refined_cards); 558 return buffered_cards.refine(); 559 } 560 561 #ifndef ASSERT 562 #define assert_fully_consumed(node, buffer_size) 563 #else 564 #define assert_fully_consumed(node, buffer_size) \ 565 do { \ 566 size_t _afc_index = (node)->index(); \ 567 size_t _afc_size = (buffer_size); \ 568 assert(_afc_index == _afc_size, \ 569 "Buffer was not fully consumed as claimed: index: " \ 570 SIZE_FORMAT ", size: " SIZE_FORMAT, \ 571 _afc_index, _afc_size); \ 572 } while (0) 573 #endif // ASSERT 574 575 bool G1DirtyCardQueueSet::process_or_enqueue_completed_buffer(BufferNode* node) { 576 if (Thread::current()->is_Java_thread()) { 577 // If the number of buffers exceeds the limit, make this Java 578 // thread do the processing itself. We don't lock to access 579 // buffer count or padding; it is fine to be imprecise here. The 580 // add of padding could overflow, which is treated as unlimited. 581 size_t limit = max_cards() + max_cards_padding(); 582 if ((num_cards() > limit) && (limit >= max_cards())) { 583 if (mut_process_buffer(node)) { 584 return true; 585 } 586 } 587 } 588 enqueue_completed_buffer(node); 589 return false; 590 } 591 592 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) { 593 uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id 594 uint counter_index = worker_id - par_ids_start(); 595 size_t* counter = &_mutator_refined_cards_counters[counter_index]; 596 bool result = refine_buffer(node, worker_id, counter); 597 _free_ids.release_par_id(worker_id); // release the id 598 599 if (result) { 600 assert_fully_consumed(node, buffer_size()); 601 } 602 return result; 603 } 604 605 bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, 606 size_t stop_at, 607 size_t* total_refined_cards) { 608 BufferNode* node = get_completed_buffer(stop_at); 609 if (node == NULL_buffer) { 610 return false; 611 } else if (refine_buffer(node, worker_id, total_refined_cards)) { 612 assert_fully_consumed(node, buffer_size()); 613 // Done with fully processed buffer. 614 deallocate_buffer(node); 615 return true; 616 } else { 617 // Buffer incompletely processed because there is a pending safepoint. 618 // Record partially processed buffer, to be finished later. 619 record_paused_buffer(node); 620 return true; 621 } 622 } 623 624 void G1DirtyCardQueueSet::abandon_logs() { 625 assert_at_safepoint(); 626 abandon_completed_buffers(); 627 628 // Since abandon is done only at safepoints, we can safely manipulate 629 // these queues. 630 struct AbandonThreadLogClosure : public ThreadClosure { 631 virtual void do_thread(Thread* t) { 632 G1ThreadLocalData::dirty_card_queue(t).reset(); 633 } 634 } closure; 635 Threads::threads_do(&closure); 636 637 G1BarrierSet::shared_dirty_card_queue().reset(); 638 } 639 640 void G1DirtyCardQueueSet::concatenate_logs() { 641 // Iterate over all the threads, if we find a partial log add it to 642 // the global list of logs. Temporarily turn off the limit on the number 643 // of outstanding buffers. 644 assert_at_safepoint(); 645 size_t old_limit = max_cards(); 646 set_max_cards(MaxCardsUnlimited); 647 648 struct ConcatenateThreadLogClosure : public ThreadClosure { 649 virtual void do_thread(Thread* t) { 650 G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t); 651 if (!dcq.is_empty()) { 652 dcq.flush(); 653 } 654 } 655 } closure; 656 Threads::threads_do(&closure); 657 658 G1BarrierSet::shared_dirty_card_queue().flush(); 659 enqueue_all_paused_buffers(); 660 verify_num_cards(); 661 set_max_cards(old_limit); 662 }