open Sdiff src/hotspot/share/gc/g1

src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp

rev 57716 : [mq]: remove_cbl_mon

   1 /*
   2  * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/g1/g1BufferNodeList.hpp"
  27 #include "gc/g1/g1CardTableEntryClosure.hpp"
  28 #include "gc/g1/g1CollectedHeap.inline.hpp"

  29 #include "gc/g1/g1DirtyCardQueue.hpp"
  30 #include "gc/g1/g1FreeIdSet.hpp"
  31 #include "gc/g1/g1RedirtyCardsQueue.hpp"
  32 #include "gc/g1/g1RemSet.hpp"
  33 #include "gc/g1/g1ThreadLocalData.hpp"
  34 #include "gc/g1/heapRegionRemSet.hpp"
  35 #include "gc/shared/suspendibleThreadSet.hpp"
  36 #include "gc/shared/workgroup.hpp"
  37 #include "memory/iterator.hpp"
  38 #include "runtime/flags/flagSetting.hpp"
  39 #include "runtime/mutexLocker.hpp"
  40 #include "runtime/orderAccess.hpp"
  41 #include "runtime/os.hpp"
  42 #include "runtime/safepoint.hpp"
  43 #include "runtime/thread.inline.hpp"
  44 #include "runtime/threadSMR.hpp"


  45 #include "utilities/quickSort.hpp"

  46 
  47 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
  48   // Dirty card queues are always active, so we create them with their
  49   // active field set to true.
  50   PtrQueue(qset, true /* active */)
  51 { }
  52 
  53 G1DirtyCardQueue::~G1DirtyCardQueue() {
  54   flush();
  55 }
  56 


  57 void G1DirtyCardQueue::handle_completed_buffer() {
  58   assert(_buf != NULL, "precondition");
  59   BufferNode* node = BufferNode::make_node_from_buffer(_buf, index());
  60   G1DirtyCardQueueSet* dcqs = dirty_card_qset();
  61   if (dcqs->process_or_enqueue_completed_buffer(node)) {
  62     reset();                    // Buffer fully processed, reset index.
  63   } else {
  64     allocate_buffer();          // Buffer enqueued, get a new one.
  65   }
  66 }
  67 
  68 // Assumed to be zero by concurrent threads.
  69 static uint par_ids_start() { return 0; }
  70 
  71 G1DirtyCardQueueSet::G1DirtyCardQueueSet(Monitor* cbl_mon,
  72                                          BufferNode::Allocator* allocator) :
  73   PtrQueueSet(allocator),
  74   _cbl_mon(cbl_mon),
  75   _completed_buffers_head(NULL),
  76   _completed_buffers_tail(NULL),
  77   _num_cards(0),


  78   _process_cards_threshold(ProcessCardsThresholdNever),
  79   _process_completed_buffers(false),
  80   _max_cards(MaxCardsUnlimited),
  81   _max_cards_padding(0),
  82   _free_ids(par_ids_start(), num_par_ids()),
  83   _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC))
  84 {
  85   ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t));
  86   _all_active = true;
  87 }
  88 
  89 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
  90   abandon_completed_buffers();
  91   FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters);
  92 }
  93 
  94 // Determines how many mutator threads can process the buffers in parallel.
  95 uint G1DirtyCardQueueSet::num_par_ids() {
  96   return (uint)os::initial_active_processor_count();
  97 }
  98 
  99 size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const {
 100   size_t sum = 0;
 101   for (uint i = 0; i < num_par_ids(); ++i) {
 102     sum += _mutator_refined_cards_counters[i];
 103   }
 104   return sum;
 105 }
 106 
 107 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
 108   G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
 109 }
 110 
 111 void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
 112   MonitorLocker ml(_cbl_mon, Mutex::_no_safepoint_check_flag);
 113   cbn->set_next(NULL);
 114   if (_completed_buffers_tail == NULL) {
 115     assert(_completed_buffers_head == NULL, "Well-formedness");
 116     _completed_buffers_head = cbn;
 117     _completed_buffers_tail = cbn;






























































































 118   } else {
 119     _completed_buffers_tail->set_next(cbn);
 120     _completed_buffers_tail = cbn;
 121   }
 122   _num_cards += buffer_size() - cbn->index();

 123 
 124   if (!process_completed_buffers() &&
 125       (num_cards() > process_cards_threshold())) {
 126     set_process_completed_buffers(true);
 127     ml.notify_all();


 128   }
 129   verify_num_cards();
 130 }
 131 
 132 BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) {
 133   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 134 
 135   if (num_cards() <= stop_at) {
 136     return NULL;
 137   }
 138 
 139   assert(num_cards() > 0, "invariant");
 140   assert(_completed_buffers_head != NULL, "invariant");
 141   assert(_completed_buffers_tail != NULL, "invariant");
 142 
 143   BufferNode* bn = _completed_buffers_head;
 144   _num_cards -= buffer_size() - bn->index();
 145   _completed_buffers_head = bn->next();
 146   if (_completed_buffers_head == NULL) {
 147     assert(num_cards() == 0, "invariant");
 148     _completed_buffers_tail = NULL;
 149     set_process_completed_buffers(false);






















 150   }
 151   verify_num_cards();
 152   bn->set_next(NULL);
 153   return bn;









 154 }
 155 
 156 #ifdef ASSERT
 157 void G1DirtyCardQueueSet::verify_num_cards() const {

 158   size_t actual = 0;
 159   BufferNode* cur = _completed_buffers_head;
 160   while (cur != NULL) {
 161     actual += buffer_size() - cur->index();
 162     cur = cur->next();
 163   }
 164   assert(actual == _num_cards,
 165          "Num entries in completed buffers should be " SIZE_FORMAT " but are " SIZE_FORMAT,
 166          _num_cards, actual);
 167 }
 168 #endif
 169 
 170 void G1DirtyCardQueueSet::abandon_completed_buffers() {
 171   BufferNode* buffers_to_delete = NULL;
 172   {
 173     MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 174     buffers_to_delete = _completed_buffers_head;
 175     _completed_buffers_head = NULL;
 176     _completed_buffers_tail = NULL;
 177     _num_cards = 0;
 178     set_process_completed_buffers(false);




























































 179   }
 180   while (buffers_to_delete != NULL) {



























 181     BufferNode* bn = buffers_to_delete;
 182     buffers_to_delete = bn->next();
 183     bn->set_next(NULL);
 184     deallocate_buffer(bn);
 185   }
 186 }
 187 
 188 void G1DirtyCardQueueSet::notify_if_necessary() {
 189   MonitorLocker ml(_cbl_mon, Mutex::_no_safepoint_check_flag);
 190   if (num_cards() > process_cards_threshold()) {
 191     set_process_completed_buffers(true);
 192     ml.notify_all();
 193   }
 194 }
 195 
 196 // Merge lists of buffers. Notify the processing threads.
 197 // The source queue is emptied as a result. The queues
 198 // must share the monitor.
 199 void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
 200   assert(allocator() == src->allocator(), "precondition");
 201   const G1BufferNodeList from = src->take_all_completed_buffers();
 202   if (from._head == NULL) return;
 203 
 204   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 205   if (_completed_buffers_tail == NULL) {
 206     assert(_completed_buffers_head == NULL, "Well-formedness");
 207     _completed_buffers_head = from._head;
 208     _completed_buffers_tail = from._tail;
 209   } else {
 210     assert(_completed_buffers_head != NULL, "Well formedness");
 211     _completed_buffers_tail->set_next(from._head);
 212     _completed_buffers_tail = from._tail;
 213   }
 214   _num_cards += from._entry_count;
 215 
 216   assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
 217          _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
 218          "Sanity");
 219   verify_num_cards();
 220 }
 221 
 222 G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
 223   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 224   G1BufferNodeList result(_completed_buffers_head, _completed_buffers_tail, _num_cards);
 225   _completed_buffers_head = NULL;
 226   _completed_buffers_tail = NULL;
 227   _num_cards = 0;







 228   return result;
 229 }
 230 
 231 class G1RefineBufferedCards : public StackObj {
 232   BufferNode* const _node;
 233   CardTable::CardValue** const _node_buffer;
 234   const size_t _node_buffer_size;
 235   const uint _worker_id;
 236   size_t* _total_refined_cards;
 237   G1RemSet* const _g1rs;
 238 
 239   static inline int compare_card(const CardTable::CardValue* p1,
 240                                  const CardTable::CardValue* p2) {
 241     return p2 - p1;
 242   }
 243 
 244   // Sorts the cards from start_index to _node_buffer_size in *decreasing*
 245   // address order. Tests showed that this order is preferable to not sorting
 246   // or increasing address order.
 247   void sort_cards(size_t start_index) {

 382   return false;
 383 }
 384 
 385 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
 386   uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
 387   uint counter_index = worker_id - par_ids_start();
 388   size_t* counter = &_mutator_refined_cards_counters[counter_index];
 389   bool result = refine_buffer(node, worker_id, counter);
 390   _free_ids.release_par_id(worker_id); // release the id
 391 
 392   if (result) {
 393     assert_fully_consumed(node, buffer_size());
 394   }
 395   return result;
 396 }
 397 
 398 bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
 399                                                                size_t stop_at,
 400                                                                size_t* total_refined_cards) {
 401   BufferNode* node = get_completed_buffer(stop_at);
 402   if (node == NULL) {
 403     return false;
 404   } else if (refine_buffer(node, worker_id, total_refined_cards)) {
 405     assert_fully_consumed(node, buffer_size());
 406     // Done with fully processed buffer.
 407     deallocate_buffer(node);
 408     return true;
 409   } else {
 410     // Return partially processed buffer to the queue.
 411     enqueue_completed_buffer(node);

 412     return true;
 413   }
 414 }
 415 
 416 void G1DirtyCardQueueSet::abandon_logs() {
 417   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
 418   abandon_completed_buffers();
 419 
 420   // Since abandon is done only at safepoints, we can safely manipulate
 421   // these queues.
 422   struct AbandonThreadLogClosure : public ThreadClosure {
 423     virtual void do_thread(Thread* t) {
 424       G1ThreadLocalData::dirty_card_queue(t).reset();
 425     }
 426   } closure;
 427   Threads::threads_do(&closure);
 428 
 429   G1BarrierSet::shared_dirty_card_queue().reset();
 430 }
 431 
 432 void G1DirtyCardQueueSet::concatenate_logs() {
 433   // Iterate over all the threads, if we find a partial log add it to
 434   // the global list of logs.  Temporarily turn off the limit on the number
 435   // of outstanding buffers.
 436   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
 437   size_t old_limit = max_cards();
 438   set_max_cards(MaxCardsUnlimited);
 439 
 440   struct ConcatenateThreadLogClosure : public ThreadClosure {
 441     virtual void do_thread(Thread* t) {
 442       G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t);
 443       if (!dcq.is_empty()) {
 444         dcq.flush();
 445       }
 446     }
 447   } closure;
 448   Threads::threads_do(&closure);
 449 
 450   G1BarrierSet::shared_dirty_card_queue().flush();


 451   set_max_cards(old_limit);
 452 }

   1 /*
   2  * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/g1/g1BufferNodeList.hpp"
  27 #include "gc/g1/g1CardTableEntryClosure.hpp"
  28 #include "gc/g1/g1CollectedHeap.inline.hpp"
  29 #include "gc/g1/g1ConcurrentRefineThread.hpp"
  30 #include "gc/g1/g1DirtyCardQueue.hpp"
  31 #include "gc/g1/g1FreeIdSet.hpp"
  32 #include "gc/g1/g1RedirtyCardsQueue.hpp"
  33 #include "gc/g1/g1RemSet.hpp"
  34 #include "gc/g1/g1ThreadLocalData.hpp"
  35 #include "gc/g1/heapRegionRemSet.hpp"
  36 #include "gc/shared/suspendibleThreadSet.hpp"
  37 #include "gc/shared/workgroup.hpp"
  38 #include "memory/iterator.hpp"
  39 #include "runtime/flags/flagSetting.hpp"
  40 #include "runtime/mutexLocker.hpp"
  41 #include "runtime/orderAccess.hpp"
  42 #include "runtime/os.hpp"
  43 #include "runtime/safepoint.hpp"
  44 #include "runtime/thread.inline.hpp"
  45 #include "runtime/threadSMR.hpp"
  46 #include "utilities/globalCounter.inline.hpp"
  47 #include "utilities/macros.hpp"
  48 #include "utilities/quickSort.hpp"
  49 #include <new>
  50 
  51 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
  52   // Dirty card queues are always active, so we create them with their
  53   // active field set to true.
  54   PtrQueue(qset, true /* active */)
  55 { }
  56 
  57 G1DirtyCardQueue::~G1DirtyCardQueue() {
  58   flush();
  59 }
  60 
  61 BufferNode* const NULL_buffer = NULL;
  62 
  63 void G1DirtyCardQueue::handle_completed_buffer() {
  64   assert(_buf != NULL, "precondition");
  65   BufferNode* node = BufferNode::make_node_from_buffer(_buf, index());
  66   G1DirtyCardQueueSet* dcqs = dirty_card_qset();
  67   if (dcqs->process_or_enqueue_completed_buffer(node)) {
  68     reset();                    // Buffer fully processed, reset index.
  69   } else {
  70     allocate_buffer();          // Buffer enqueued, get a new one.
  71   }
  72 }
  73 
  74 // Assumed to be zero by concurrent threads.
  75 static uint par_ids_start() { return 0; }
  76 
  77 G1DirtyCardQueueSet::G1DirtyCardQueueSet(BufferNode::Allocator* allocator) :

  78   PtrQueueSet(allocator),
  79   _primary_refinement_thread(NULL),
  80   _completed_buffers_head(NULL_buffer),
  81   _completed_buffers_tail(NULL_buffer),
  82   _num_cards(0),
  83   DEBUG_ONLY(_concurrency(0) COMMA)
  84   _paused(),
  85   _process_cards_threshold(ProcessCardsThresholdNever),

  86   _max_cards(MaxCardsUnlimited),
  87   _max_cards_padding(0),
  88   _free_ids(par_ids_start(), num_par_ids()),
  89   _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC))
  90 {
  91   ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t));
  92   _all_active = true;
  93 }
  94 
  95 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
  96   abandon_completed_buffers();
  97   FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters);
  98 }
  99 
 100 // Determines how many mutator threads can process the buffers in parallel.
 101 uint G1DirtyCardQueueSet::num_par_ids() {
 102   return (uint)os::initial_active_processor_count();
 103 }
 104 
 105 size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const {
 106   size_t sum = 0;
 107   for (uint i = 0; i < num_par_ids(); ++i) {
 108     sum += _mutator_refined_cards_counters[i];
 109   }
 110   return sum;
 111 }
 112 
 113 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
 114   G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
 115 }
 116 
 117 // _concurrency is an int that is used in debug-only context to verify
 118 // we're not overlapping queue operations that support concurrency with
 119 // those which don't.  The value is initially zero, meaning there are no
 120 // relevant operations in progress.  A "no concurrency" context is entered
 121 // by atomically changing the value from 0 to -1, with an assert on failure.
 122 // It is similarly exited by reverting the value back to 0.  A "concurrent"
 123 // context is entered by atomically incrementing the value and verifying the
 124 // result is greater than zero (so we weren't in a "no concurrency" context).
 125 // It is similarly exited by atomically decrementing the value and verifying
 126 // the result is at least zero (so no mismatches).
 127 //
 128 // ConcurrentVerifier and NonconcurrentVerifier are helper classes to
 129 // establish and remove such contexts.
 130 
 131 class G1DirtyCardQueueSet::ConcurrentVerifier : public StackObj {
 132 #ifdef ASSERT
 133   const G1DirtyCardQueueSet* _dcqs;
 134 
 135 public:
 136   ~ConcurrentVerifier() {
 137     assert(Atomic::sub(&_dcqs->_concurrency, 1) >= 0, "invariant");
 138   }
 139 #endif // ASSERT
 140 
 141 public:
 142   ConcurrentVerifier(const G1DirtyCardQueueSet* dcqs) DEBUG_ONLY(: _dcqs(dcqs)) {
 143     assert(Atomic::add(&_dcqs->_concurrency, 1) > 0, "invariant");
 144   }
 145 };
 146 
 147 class G1DirtyCardQueueSet::NonconcurrentVerifier : public StackObj {
 148 #ifdef ASSERT
 149   const G1DirtyCardQueueSet* _dcqs;
 150 
 151 public:
 152   ~NonconcurrentVerifier() {
 153     assert(Atomic::cmpxchg(&_dcqs->_concurrency, -1, 0) == -1, "invariant");
 154   }
 155 #endif // ASSERT
 156 
 157 public:
 158   NonconcurrentVerifier(const G1DirtyCardQueueSet* dcqs) DEBUG_ONLY(: _dcqs(dcqs)) {
 159     assert(Atomic::cmpxchg(&_dcqs->_concurrency, 0, -1) == 0, "invariant");
 160   }
 161 };
 162 
 163 // _completed_buffers_{head,tail} and _num_cards provide a lock-free FIFO
 164 // of buffers, linked through their next() fields.
 165 //
 166 // The key idea to make this work is that pop (get_completed_buffer) never
 167 // returns an element of the queue if it is the only accessible element,
 168 // e.g.  its "next" value is NULL.  It is expected that there will be a
 169 // later push/append that will make that element available to a future pop,
 170 // or there will eventually be a complete transfer (take_all_completed_buffers).
 171 //
 172 // An append operation atomically exchanges the new tail with the queue tail.
 173 // It then sets the "next" value of the old tail to the head of the list being
 174 // appended.  (It is an invariant that the old tail's "next" value is NULL.)
 175 // But if the old tail is NULL then the queue was empty.  In this case the
 176 // head of the list being appended is instead stored in the queue head (which
 177 // must be NULL).
 178 //
 179 // A push operation is just a degenerate append, where the buffer being pushed
 180 // is both the head and the tail of the list being appended.
 181 //
 182 // This means there is a period between the exchange and the old tail update
 183 // where the queue sequence is split into two parts, the list from the queue
 184 // head to the old tail, and the list being appended.  If there are concurrent
 185 // push/append operations, each may introduce another such segment.  But they
 186 // all eventually get resolved by their respective updates of their old tail's
 187 // "next" value.
 188 //
 189 // pop gets the queue head as the candidate result (returning NULL if the
 190 // queue head was NULL), and then gets that result node's "next" value.  If
 191 // that "next" value is NULL and the queue head hasn't changed, then there
 192 // is only one element in the (accessible) list.  We can't return that
 193 // element, because it may be the old tail of a concurrent push/append.  So
 194 // return NULL in this case.  Otherwise, attempt to cmpxchg that "next"
 195 // value into the queue head, retrying the whole operation if that fails.
 196 // This is the "usual" lock-free pop from head of slist, with the additional
 197 // restriction on taking the last element.
 198 //
 199 // In order to address the ABA problem for pop, a pop operation protects its
 200 // access to the head of the list with a GlobalCounter critical section. This
 201 // works with the buffer allocator's use of GlobalCounter synchronization to
 202 // prevent ABA from arising in the normal buffer usage cycle.  The paused
 203 // buffer handling prevents another ABA source (see record_paused_buffer and
 204 // enqueue_previous_paused_buffers).
 205 
 206 size_t G1DirtyCardQueueSet::append_buffers(BufferNode* first,
 207                                            BufferNode* last,
 208                                            size_t card_count) {
 209   assert(last->next() == NULL_buffer, "precondition");
 210   ConcurrentVerifier cv(this);
 211   // Increment _num_cards before adding to queue, so queue removal doesn't
 212   // need to deal with _num_cards possibly going negative.
 213   size_t new_num_cards = Atomic::add(&_num_cards, card_count);
 214   BufferNode* old_tail = Atomic::xchg(&_completed_buffers_tail, last);
 215   if (old_tail == NULL_buffer) { // Empty list.
 216     assert(Atomic::load(&_completed_buffers_head) == NULL_buffer, "invariant");
 217     Atomic::store(&_completed_buffers_head, first);
 218   } else {
 219     assert(old_tail->next() == NULL_buffer, "invariant");
 220     old_tail->set_next(first);
 221   }
 222   return new_num_cards;
 223 }
 224 
 225 void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
 226   assert(cbn != NULL_buffer, "precondition");
 227   size_t new_num_cards = append_buffers(cbn, cbn, buffer_size() - cbn->index());
 228   if ((_primary_refinement_thread != NULL) &&
 229       (new_num_cards > process_cards_threshold())) {
 230     _primary_refinement_thread->activate();
 231   }

 232 }
 233 
 234 BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) {
 235   enqueue_previous_paused_buffers();
 236 
 237   ConcurrentVerifier cv(this);


 238 
 239   // Check for unsufficient cards to satisfy request.  We only do this once,
 240   // up front, rather than on each iteration below, since the test is racy
 241   // regardless of when we do it.
 242   if (Atomic::load_acquire(&_num_cards) <= stop_at) {
 243     return NULL_buffer;
 244   }
 245 
 246   Thread* current_thread = Thread::current();
 247 
 248   while (true) {
 249     // Use a critical section per iteration, rather than over the whole
 250     // operation.  We're not guaranteed to make progress, because of possible
 251     // contention on the queue head.  Lingering in one CS the whole time could
 252     // lead to excessive allocation of buffers, because the CS blocks return
 253     // of released buffers to the free list for reuse.
 254     GlobalCounter::CriticalSection cs(current_thread);
 255 
 256     BufferNode* result = Atomic::load_acquire(&_completed_buffers_head);
 257     // Check for empty queue.  Only needs to be done on first iteration,
 258     // since we never take the last element, but it's messy to make use
 259     // of that and we expect one iteration to be the common case.
 260     if (result == NULL_buffer) return result;
 261 
 262     BufferNode* next = Atomic::load_acquire(BufferNode::next_ptr(*result));
 263     if (next != NULL_buffer) {
 264       next = Atomic::cmpxchg(&_completed_buffers_head, result, next);
 265       if (next == result) {
 266         // Former head successfully taken; it is not the last.
 267         assert(Atomic::load(&_completed_buffers_tail) != result, "invariant");
 268         assert(result->next() != NULL_buffer, "invariant");
 269         result->set_next(NULL_buffer);
 270         Atomic::sub(&_num_cards, buffer_size() - result->index());
 271         return result;
 272       }
 273       // cmpxchg failed; try again.
 274     } else if (result == Atomic::load_acquire(&_completed_buffers_head)) {
 275       // If follower of head is NULL and head hasn't changed, then only
 276       // the one element is currently accessible.  We don't take the last
 277       // accessible element, because there may be a concurrent add using it.
 278       // The check for unchanged head isn't needed for correctness, but the
 279       // retry on change may sometimes let us get a buffer after all.
 280       return NULL_buffer;
 281     }
 282     // Head changed; try again.
 283   }
 284   // Unreachable
 285 }
 286 
 287 #ifdef ASSERT
 288 void G1DirtyCardQueueSet::verify_num_cards() const {
 289   NonconcurrentVerifier ncv(this);
 290   size_t actual = 0;
 291   BufferNode* cur = Atomic::load(&_completed_buffers_head);
 292   for ( ; cur != NULL_buffer; cur = cur->next()) {
 293     actual += buffer_size() - cur->index();

 294   }
 295   assert(actual == Atomic::load(&_num_cards),
 296          "Num entries in completed buffers should be " SIZE_FORMAT " but are " SIZE_FORMAT,
 297          Atomic::load(&_num_cards), actual);
 298 }
 299 #endif // ASSERT
 300 
 301 // Refinement processing stops early if there is a pending safepoint, to
 302 // avoid long delays to safepoint.  We need to record the partially
 303 // processed buffer for later continued processing.  However, we can't
 304 // simply add it back to the completed buffer queue, as that would introduce
 305 // a new source of ABA for the queue.  Instead, we have a pair of buffer
 306 // lists (with each list represented by head and tail), one for each of the
 307 // previous and next safepoints (*).  When pausing the processing of a
 308 // buffer for a safepoint, we add the buffer (lock free) to the list for the
 309 // next safepoint.  Before attempting to obtain a buffer from the queue we
 310 // first transfer any buffers in the previous safepoint list to the queue.
 311 // This is safe (doesn't introduce ABA) because threads cannot be in the
 312 // midst of a queue pop across a safepoint.
 313 //
 314 // These paused buffer lists are conceptually an extension of the queue, and
 315 // operations which need to deal with all of the queued buffers (such as
 316 // concatenate_logs) also need to deal with any paused buffers.  In general,
 317 // if the safepoint performs a GC then the paused buffers will be processed
 318 // as part of it and both lists will be empty afterward.
 319 //
 320 // An alternative would be to directly reenqueue a paused buffer, but only
 321 // after first calling GlobalCounter::write_synchronize.  However, that
 322 // might noticeably delay the pending safepoint.
 323 //
 324 // A single paused list and a safepoint cleanup action to perform the transfer
 325 // doesn't work because cleanup actions are not invoked for every safepoint.
 326 //
 327 // (*) If the safepoint does not perform a GC, the next list becomes the
 328 // previous list after the safepoint.  Since buffers are only added to the
 329 // next list if there were threads performing refinement work, there will
 330 // likely be refinement work done after the safepoint, which will transfer
 331 // those buffers to the queue.  However, multiple non-GC safepoints in
 332 // succession, without intervening refinement work to perform a transfer
 333 // (possibly through lack of time), can result in old buffers being present
 334 // and inaccessible in the next list.  This doesn't affect correctness, but
 335 // might affect performance.  The alternatives discussed above don't have
 336 // this problem, but have problems of their own.
 337 
 338 static size_t next_paused_buffer_list_index() {
 339   return SafepointSynchronize::safepoint_id() & 1;
 340 }
 341 
 342 static size_t previous_paused_buffer_list_index() {
 343   return next_paused_buffer_list_index() ^ 1;
 344 }
 345 
 346 void G1DirtyCardQueueSet::record_paused_buffer(BufferNode* node) {
 347   assert_not_at_safepoint();
 348   assert(node->next() == NULL_buffer, "precondition");
 349   size_t next_index = next_paused_buffer_list_index();
 350   // Cards for paused buffers are included in count, to contribute to
 351   // notification checking after the coming safepoint if it doesn't GC.
 352   Atomic::add(&_num_cards, buffer_size() - node->index());
 353   BufferNode* old_head = Atomic::xchg(&_paused[next_index]._head, node);
 354   if (old_head == NULL_buffer) {
 355     assert(_paused[next_index]._tail == NULL, "invariant");
 356     _paused[next_index]._tail = node;
 357   } else {
 358     node->set_next(old_head);
 359   }
 360 }
 361 
 362 void G1DirtyCardQueueSet::enqueue_paused_buffers_aux(size_t index) {
 363   if (Atomic::load(&_paused[index]._head) != NULL_buffer) {
 364     BufferNode* paused = Atomic::xchg(&_paused[index]._head, NULL_buffer);
 365     if (paused != NULL_buffer) {
 366       BufferNode* tail = _paused[index]._tail;
 367       assert(tail != NULL, "invariant");
 368       _paused[index]._tail = NULL_buffer;
 369       append_buffers(paused, tail, 0); // Cards already counted when recorded.
 370     }
 371   }
 372 }
 373 
 374 void G1DirtyCardQueueSet::enqueue_previous_paused_buffers() {
 375   size_t previous_index = previous_paused_buffer_list_index();
 376   enqueue_paused_buffers_aux(previous_index);
 377 }
 378 
 379 void G1DirtyCardQueueSet::enqueue_all_paused_buffers() {
 380   assert_at_safepoint();
 381   for (size_t i = 0; i < ARRAY_SIZE(_paused); ++i) {
 382     enqueue_paused_buffers_aux(i);
 383   }
 384 }
 385 
 386 void G1DirtyCardQueueSet::clear_completed_buffers() {
 387   Atomic::store(&_completed_buffers_head, NULL_buffer);
 388   Atomic::store(&_completed_buffers_tail, NULL_buffer);
 389   Atomic::store(&_num_cards, size_t(0));
 390 }
 391 
 392 void G1DirtyCardQueueSet::abandon_completed_buffers() {
 393   enqueue_all_paused_buffers();
 394   verify_num_cards();
 395   NonconcurrentVerifier ncv(this);
 396   BufferNode* buffers_to_delete = Atomic::load(&_completed_buffers_head);
 397   clear_completed_buffers();
 398   while (buffers_to_delete != NULL_buffer) {
 399     BufferNode* bn = buffers_to_delete;
 400     buffers_to_delete = bn->next();
 401     bn->set_next(NULL_buffer);
 402     deallocate_buffer(bn);
 403   }
 404 }
 405 
 406 void G1DirtyCardQueueSet::notify_if_necessary() {
 407   if ((_primary_refinement_thread != NULL) &&
 408       (num_cards() > process_cards_threshold())) {
 409     _primary_refinement_thread->activate();

 410   }
 411 }
 412 
 413 // Merge lists of buffers. The source queue set is emptied as a
 414 // result. The queue sets must share the same allocator.

 415 void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
 416   assert(allocator() == src->allocator(), "precondition");
 417   const G1BufferNodeList from = src->take_all_completed_buffers();
 418   if (from._head == NULL_buffer) return;
 419   append_buffers(from._head, from._tail, from._entry_count);
















 420 }
 421 
 422 G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
 423 #ifdef ASSERT
 424   for (size_t i = 0; i < ARRAY_SIZE(_paused); ++i) {
 425     assert(Atomic::load(&_paused[i]._head) == NULL_buffer, "precondition");
 426     assert(Atomic::load(&_paused[i]._tail) == NULL_buffer, "precondition");
 427   }
 428 #endif // ASSERT
 429   verify_num_cards();
 430   NonconcurrentVerifier ncv(this);
 431   G1BufferNodeList result(Atomic::load(&_completed_buffers_head),
 432                           Atomic::load(&_completed_buffers_tail),
 433                           Atomic::load(&_num_cards));
 434   clear_completed_buffers();
 435   return result;
 436 }
 437 
 438 class G1RefineBufferedCards : public StackObj {
 439   BufferNode* const _node;
 440   CardTable::CardValue** const _node_buffer;
 441   const size_t _node_buffer_size;
 442   const uint _worker_id;
 443   size_t* _total_refined_cards;
 444   G1RemSet* const _g1rs;
 445 
 446   static inline int compare_card(const CardTable::CardValue* p1,
 447                                  const CardTable::CardValue* p2) {
 448     return p2 - p1;
 449   }
 450 
 451   // Sorts the cards from start_index to _node_buffer_size in *decreasing*
 452   // address order. Tests showed that this order is preferable to not sorting
 453   // or increasing address order.
 454   void sort_cards(size_t start_index) {

 589   return false;
 590 }
 591 
 592 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
 593   uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
 594   uint counter_index = worker_id - par_ids_start();
 595   size_t* counter = &_mutator_refined_cards_counters[counter_index];
 596   bool result = refine_buffer(node, worker_id, counter);
 597   _free_ids.release_par_id(worker_id); // release the id
 598 
 599   if (result) {
 600     assert_fully_consumed(node, buffer_size());
 601   }
 602   return result;
 603 }
 604 
 605 bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
 606                                                                size_t stop_at,
 607                                                                size_t* total_refined_cards) {
 608   BufferNode* node = get_completed_buffer(stop_at);
 609   if (node == NULL_buffer) {
 610     return false;
 611   } else if (refine_buffer(node, worker_id, total_refined_cards)) {
 612     assert_fully_consumed(node, buffer_size());
 613     // Done with fully processed buffer.
 614     deallocate_buffer(node);
 615     return true;
 616   } else {
 617     // Buffer incompletely processed because there is a pending safepoint.
 618     // Record partially processed buffer, to be finished later.
 619     record_paused_buffer(node);
 620     return true;
 621   }
 622 }
 623 
 624 void G1DirtyCardQueueSet::abandon_logs() {
 625   assert_at_safepoint();
 626   abandon_completed_buffers();
 627 
 628   // Since abandon is done only at safepoints, we can safely manipulate
 629   // these queues.
 630   struct AbandonThreadLogClosure : public ThreadClosure {
 631     virtual void do_thread(Thread* t) {
 632       G1ThreadLocalData::dirty_card_queue(t).reset();
 633     }
 634   } closure;
 635   Threads::threads_do(&closure);
 636 
 637   G1BarrierSet::shared_dirty_card_queue().reset();
 638 }
 639 
 640 void G1DirtyCardQueueSet::concatenate_logs() {
 641   // Iterate over all the threads, if we find a partial log add it to
 642   // the global list of logs.  Temporarily turn off the limit on the number
 643   // of outstanding buffers.
 644   assert_at_safepoint();
 645   size_t old_limit = max_cards();
 646   set_max_cards(MaxCardsUnlimited);
 647 
 648   struct ConcatenateThreadLogClosure : public ThreadClosure {
 649     virtual void do_thread(Thread* t) {
 650       G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t);
 651       if (!dcq.is_empty()) {
 652         dcq.flush();
 653       }
 654     }
 655   } closure;
 656   Threads::threads_do(&closure);
 657 
 658   G1BarrierSet::shared_dirty_card_queue().flush();
 659   enqueue_all_paused_buffers();
 660   verify_num_cards();
 661   set_max_cards(old_limit);
 662 }

< prev index next >