1 /*
2 * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "gc/g1/g1BufferNodeList.hpp"
27 #include "gc/g1/g1CardTableEntryClosure.hpp"
28 #include "gc/g1/g1CollectedHeap.inline.hpp"
29 #include "gc/g1/g1DirtyCardQueue.hpp"
30 #include "gc/g1/g1FreeIdSet.hpp"
31 #include "gc/g1/g1RedirtyCardsQueue.hpp"
32 #include "gc/g1/g1RemSet.hpp"
33 #include "gc/g1/g1ThreadLocalData.hpp"
34 #include "gc/g1/heapRegionRemSet.hpp"
35 #include "gc/shared/suspendibleThreadSet.hpp"
36 #include "gc/shared/workgroup.hpp"
37 #include "memory/iterator.hpp"
38 #include "runtime/flags/flagSetting.hpp"
39 #include "runtime/mutexLocker.hpp"
40 #include "runtime/orderAccess.hpp"
41 #include "runtime/os.hpp"
42 #include "runtime/safepoint.hpp"
43 #include "runtime/thread.inline.hpp"
44 #include "runtime/threadSMR.hpp"
45 #include "utilities/quickSort.hpp"
46
47 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
48 // Dirty card queues are always active, so we create them with their
49 // active field set to true.
50 PtrQueue(qset, true /* active */)
51 { }
52
53 G1DirtyCardQueue::~G1DirtyCardQueue() {
54 flush();
55 }
56
57 void G1DirtyCardQueue::handle_completed_buffer() {
58 assert(_buf != NULL, "precondition");
59 BufferNode* node = BufferNode::make_node_from_buffer(_buf, index());
60 G1DirtyCardQueueSet* dcqs = dirty_card_qset();
61 if (dcqs->process_or_enqueue_completed_buffer(node)) {
62 reset(); // Buffer fully processed, reset index.
63 } else {
64 allocate_buffer(); // Buffer enqueued, get a new one.
65 }
66 }
67
68 // Assumed to be zero by concurrent threads.
69 static uint par_ids_start() { return 0; }
70
71 G1DirtyCardQueueSet::G1DirtyCardQueueSet(Monitor* cbl_mon,
72 BufferNode::Allocator* allocator) :
73 PtrQueueSet(allocator),
74 _cbl_mon(cbl_mon),
75 _completed_buffers_head(NULL),
76 _completed_buffers_tail(NULL),
77 _num_cards(0),
78 _process_cards_threshold(ProcessCardsThresholdNever),
79 _process_completed_buffers(false),
80 _max_cards(MaxCardsUnlimited),
81 _max_cards_padding(0),
82 _free_ids(par_ids_start(), num_par_ids()),
83 _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC))
84 {
85 ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t));
86 _all_active = true;
87 }
88
89 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
90 abandon_completed_buffers();
91 FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters);
92 }
93
94 // Determines how many mutator threads can process the buffers in parallel.
95 uint G1DirtyCardQueueSet::num_par_ids() {
96 return (uint)os::initial_active_processor_count();
97 }
98
99 size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const {
100 size_t sum = 0;
101 for (uint i = 0; i < num_par_ids(); ++i) {
102 sum += _mutator_refined_cards_counters[i];
103 }
104 return sum;
105 }
106
107 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
108 G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
109 }
110
111 void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
112 MonitorLocker ml(_cbl_mon, Mutex::_no_safepoint_check_flag);
113 cbn->set_next(NULL);
114 if (_completed_buffers_tail == NULL) {
115 assert(_completed_buffers_head == NULL, "Well-formedness");
116 _completed_buffers_head = cbn;
117 _completed_buffers_tail = cbn;
118 } else {
119 _completed_buffers_tail->set_next(cbn);
120 _completed_buffers_tail = cbn;
121 }
122 _num_cards += buffer_size() - cbn->index();
123
124 if (!process_completed_buffers() &&
125 (num_cards() > process_cards_threshold())) {
126 set_process_completed_buffers(true);
127 ml.notify_all();
128 }
129 verify_num_cards();
130 }
131
132 BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) {
133 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
134
135 if (num_cards() <= stop_at) {
136 return NULL;
137 }
138
139 assert(num_cards() > 0, "invariant");
140 assert(_completed_buffers_head != NULL, "invariant");
141 assert(_completed_buffers_tail != NULL, "invariant");
142
143 BufferNode* bn = _completed_buffers_head;
144 _num_cards -= buffer_size() - bn->index();
145 _completed_buffers_head = bn->next();
146 if (_completed_buffers_head == NULL) {
147 assert(num_cards() == 0, "invariant");
148 _completed_buffers_tail = NULL;
149 set_process_completed_buffers(false);
150 }
151 verify_num_cards();
152 bn->set_next(NULL);
153 return bn;
154 }
155
156 #ifdef ASSERT
157 void G1DirtyCardQueueSet::verify_num_cards() const {
158 size_t actual = 0;
159 BufferNode* cur = _completed_buffers_head;
160 while (cur != NULL) {
161 actual += buffer_size() - cur->index();
162 cur = cur->next();
163 }
164 assert(actual == _num_cards,
165 "Num entries in completed buffers should be " SIZE_FORMAT " but are " SIZE_FORMAT,
166 _num_cards, actual);
167 }
168 #endif
169
170 void G1DirtyCardQueueSet::abandon_completed_buffers() {
171 BufferNode* buffers_to_delete = NULL;
172 {
173 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
174 buffers_to_delete = _completed_buffers_head;
175 _completed_buffers_head = NULL;
176 _completed_buffers_tail = NULL;
177 _num_cards = 0;
178 set_process_completed_buffers(false);
179 }
180 while (buffers_to_delete != NULL) {
181 BufferNode* bn = buffers_to_delete;
182 buffers_to_delete = bn->next();
183 bn->set_next(NULL);
184 deallocate_buffer(bn);
185 }
186 }
187
188 void G1DirtyCardQueueSet::notify_if_necessary() {
189 MonitorLocker ml(_cbl_mon, Mutex::_no_safepoint_check_flag);
190 if (num_cards() > process_cards_threshold()) {
191 set_process_completed_buffers(true);
192 ml.notify_all();
193 }
194 }
195
196 // Merge lists of buffers. Notify the processing threads.
197 // The source queue is emptied as a result. The queues
198 // must share the monitor.
199 void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
200 assert(allocator() == src->allocator(), "precondition");
201 const G1BufferNodeList from = src->take_all_completed_buffers();
202 if (from._head == NULL) return;
203
204 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
205 if (_completed_buffers_tail == NULL) {
206 assert(_completed_buffers_head == NULL, "Well-formedness");
207 _completed_buffers_head = from._head;
208 _completed_buffers_tail = from._tail;
209 } else {
210 assert(_completed_buffers_head != NULL, "Well formedness");
211 _completed_buffers_tail->set_next(from._head);
212 _completed_buffers_tail = from._tail;
213 }
214 _num_cards += from._entry_count;
215
216 assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
217 _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
218 "Sanity");
219 verify_num_cards();
220 }
221
222 G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
223 MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
224 G1BufferNodeList result(_completed_buffers_head, _completed_buffers_tail, _num_cards);
225 _completed_buffers_head = NULL;
226 _completed_buffers_tail = NULL;
227 _num_cards = 0;
228 return result;
229 }
230
231 class G1RefineBufferedCards : public StackObj {
232 BufferNode* const _node;
233 CardTable::CardValue** const _node_buffer;
234 const size_t _node_buffer_size;
235 const uint _worker_id;
236 size_t* _total_refined_cards;
237 G1RemSet* const _g1rs;
238
239 static inline int compare_card(const CardTable::CardValue* p1,
240 const CardTable::CardValue* p2) {
241 return p2 - p1;
242 }
243
244 // Sorts the cards from start_index to _node_buffer_size in *decreasing*
245 // address order. Tests showed that this order is preferable to not sorting
246 // or increasing address order.
247 void sort_cards(size_t start_index) {
382 return false;
383 }
384
385 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
386 uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
387 uint counter_index = worker_id - par_ids_start();
388 size_t* counter = &_mutator_refined_cards_counters[counter_index];
389 bool result = refine_buffer(node, worker_id, counter);
390 _free_ids.release_par_id(worker_id); // release the id
391
392 if (result) {
393 assert_fully_consumed(node, buffer_size());
394 }
395 return result;
396 }
397
398 bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
399 size_t stop_at,
400 size_t* total_refined_cards) {
401 BufferNode* node = get_completed_buffer(stop_at);
402 if (node == NULL) {
403 return false;
404 } else if (refine_buffer(node, worker_id, total_refined_cards)) {
405 assert_fully_consumed(node, buffer_size());
406 // Done with fully processed buffer.
407 deallocate_buffer(node);
408 return true;
409 } else {
410 // Return partially processed buffer to the queue.
411 enqueue_completed_buffer(node);
412 return true;
413 }
414 }
415
416 void G1DirtyCardQueueSet::abandon_logs() {
417 assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
418 abandon_completed_buffers();
419
420 // Since abandon is done only at safepoints, we can safely manipulate
421 // these queues.
422 struct AbandonThreadLogClosure : public ThreadClosure {
423 virtual void do_thread(Thread* t) {
424 G1ThreadLocalData::dirty_card_queue(t).reset();
425 }
426 } closure;
427 Threads::threads_do(&closure);
428
429 G1BarrierSet::shared_dirty_card_queue().reset();
430 }
431
432 void G1DirtyCardQueueSet::concatenate_logs() {
433 // Iterate over all the threads, if we find a partial log add it to
434 // the global list of logs. Temporarily turn off the limit on the number
435 // of outstanding buffers.
436 assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
437 size_t old_limit = max_cards();
438 set_max_cards(MaxCardsUnlimited);
439
440 struct ConcatenateThreadLogClosure : public ThreadClosure {
441 virtual void do_thread(Thread* t) {
442 G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t);
443 if (!dcq.is_empty()) {
444 dcq.flush();
445 }
446 }
447 } closure;
448 Threads::threads_do(&closure);
449
450 G1BarrierSet::shared_dirty_card_queue().flush();
451 set_max_cards(old_limit);
452 }
|
1 /*
2 * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "gc/g1/g1BufferNodeList.hpp"
27 #include "gc/g1/g1CardTableEntryClosure.hpp"
28 #include "gc/g1/g1CollectedHeap.inline.hpp"
29 #include "gc/g1/g1ConcurrentRefineThread.hpp"
30 #include "gc/g1/g1DirtyCardQueue.hpp"
31 #include "gc/g1/g1FreeIdSet.hpp"
32 #include "gc/g1/g1RedirtyCardsQueue.hpp"
33 #include "gc/g1/g1RemSet.hpp"
34 #include "gc/g1/g1ThreadLocalData.hpp"
35 #include "gc/g1/heapRegionRemSet.hpp"
36 #include "gc/shared/suspendibleThreadSet.hpp"
37 #include "gc/shared/workgroup.hpp"
38 #include "memory/iterator.hpp"
39 #include "runtime/flags/flagSetting.hpp"
40 #include "runtime/mutexLocker.hpp"
41 #include "runtime/orderAccess.hpp"
42 #include "runtime/os.hpp"
43 #include "runtime/safepoint.hpp"
44 #include "runtime/thread.inline.hpp"
45 #include "runtime/threadSMR.hpp"
46 #include "utilities/globalCounter.inline.hpp"
47 #include "utilities/macros.hpp"
48 #include "utilities/quickSort.hpp"
49 #include <new>
50
51 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
52 // Dirty card queues are always active, so we create them with their
53 // active field set to true.
54 PtrQueue(qset, true /* active */)
55 { }
56
57 G1DirtyCardQueue::~G1DirtyCardQueue() {
58 flush();
59 }
60
61 BufferNode* const NULL_buffer = NULL;
62
63 void G1DirtyCardQueue::handle_completed_buffer() {
64 assert(_buf != NULL, "precondition");
65 BufferNode* node = BufferNode::make_node_from_buffer(_buf, index());
66 G1DirtyCardQueueSet* dcqs = dirty_card_qset();
67 if (dcqs->process_or_enqueue_completed_buffer(node)) {
68 reset(); // Buffer fully processed, reset index.
69 } else {
70 allocate_buffer(); // Buffer enqueued, get a new one.
71 }
72 }
73
74 // Assumed to be zero by concurrent threads.
75 static uint par_ids_start() { return 0; }
76
77 G1DirtyCardQueueSet::G1DirtyCardQueueSet(BufferNode::Allocator* allocator) :
78 PtrQueueSet(allocator),
79 _primary_refinement_thread(NULL),
80 _completed_buffers_head(NULL_buffer),
81 _completed_buffers_tail(NULL_buffer),
82 _num_cards(0),
83 DEBUG_ONLY(_concurrency(0) COMMA)
84 _paused(),
85 _process_cards_threshold(ProcessCardsThresholdNever),
86 _max_cards(MaxCardsUnlimited),
87 _max_cards_padding(0),
88 _free_ids(par_ids_start(), num_par_ids()),
89 _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC))
90 {
91 ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t));
92 _all_active = true;
93 }
94
95 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
96 abandon_completed_buffers();
97 FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters);
98 }
99
100 // Determines how many mutator threads can process the buffers in parallel.
101 uint G1DirtyCardQueueSet::num_par_ids() {
102 return (uint)os::initial_active_processor_count();
103 }
104
105 size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const {
106 size_t sum = 0;
107 for (uint i = 0; i < num_par_ids(); ++i) {
108 sum += _mutator_refined_cards_counters[i];
109 }
110 return sum;
111 }
112
113 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
114 G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
115 }
116
117 // _concurrency is an int that is used in debug-only context to verify
118 // we're not overlapping queue operations that support concurrency with
119 // those which don't. The value is initially zero, meaning there are no
120 // relevant operations in progress. A "no concurrency" context is entered
121 // by atomically changing the value from 0 to -1, with an assert on failure.
122 // It is similarly exited by reverting the value back to 0. A "concurrent"
123 // context is entered by atomically incrementing the value and verifying the
124 // result is greater than zero (so we weren't in a "no concurrency" context).
125 // It is similarly exited by atomically decrementing the value and verifying
126 // the result is at least zero (so no mismatches).
127 //
128 // ConcurrentVerifier and NonconcurrentVerifier are helper classes to
129 // establish and remove such contexts.
130
131 class G1DirtyCardQueueSet::ConcurrentVerifier : public StackObj {
132 #ifdef ASSERT
133 const G1DirtyCardQueueSet* _dcqs;
134
135 public:
136 ~ConcurrentVerifier() {
137 assert(Atomic::sub(&_dcqs->_concurrency, 1) >= 0, "invariant");
138 }
139 #endif // ASSERT
140
141 public:
142 ConcurrentVerifier(const G1DirtyCardQueueSet* dcqs) DEBUG_ONLY(: _dcqs(dcqs)) {
143 assert(Atomic::add(&_dcqs->_concurrency, 1) > 0, "invariant");
144 }
145 };
146
147 class G1DirtyCardQueueSet::NonconcurrentVerifier : public StackObj {
148 #ifdef ASSERT
149 const G1DirtyCardQueueSet* _dcqs;
150
151 public:
152 ~NonconcurrentVerifier() {
153 assert(Atomic::cmpxchg(&_dcqs->_concurrency, -1, 0) == -1, "invariant");
154 }
155 #endif // ASSERT
156
157 public:
158 NonconcurrentVerifier(const G1DirtyCardQueueSet* dcqs) DEBUG_ONLY(: _dcqs(dcqs)) {
159 assert(Atomic::cmpxchg(&_dcqs->_concurrency, 0, -1) == 0, "invariant");
160 }
161 };
162
163 // _completed_buffers_{head,tail} and _num_cards provide a lock-free FIFO
164 // of buffers, linked through their next() fields.
165 //
166 // The key idea to make this work is that pop (get_completed_buffer) never
167 // returns an element of the queue if it is the only accessible element,
168 // e.g. its "next" value is NULL. It is expected that there will be a
169 // later push/append that will make that element available to a future pop,
170 // or there will eventually be a complete transfer (take_all_completed_buffers).
171 //
172 // An append operation atomically exchanges the new tail with the queue tail.
173 // It then sets the "next" value of the old tail to the head of the list being
174 // appended. (It is an invariant that the old tail's "next" value is NULL.)
175 // But if the old tail is NULL then the queue was empty. In this case the
176 // head of the list being appended is instead stored in the queue head (which
177 // must be NULL).
178 //
179 // A push operation is just a degenerate append, where the buffer being pushed
180 // is both the head and the tail of the list being appended.
181 //
182 // This means there is a period between the exchange and the old tail update
183 // where the queue sequence is split into two parts, the list from the queue
184 // head to the old tail, and the list being appended. If there are concurrent
185 // push/append operations, each may introduce another such segment. But they
186 // all eventually get resolved by their respective updates of their old tail's
187 // "next" value.
188 //
189 // pop gets the queue head as the candidate result (returning NULL if the
190 // queue head was NULL), and then gets that result node's "next" value. If
191 // that "next" value is NULL and the queue head hasn't changed, then there
192 // is only one element in the (accessible) list. We can't return that
193 // element, because it may be the old tail of a concurrent push/append. So
194 // return NULL in this case. Otherwise, attempt to cmpxchg that "next"
195 // value into the queue head, retrying the whole operation if that fails.
196 // This is the "usual" lock-free pop from head of slist, with the additional
197 // restriction on taking the last element.
198 //
199 // In order to address the ABA problem for pop, a pop operation protects its
200 // access to the head of the list with a GlobalCounter critical section. This
201 // works with the buffer allocator's use of GlobalCounter synchronization to
202 // prevent ABA from arising in the normal buffer usage cycle. The paused
203 // buffer handling prevents another ABA source (see record_paused_buffer and
204 // enqueue_previous_paused_buffers).
205
206 size_t G1DirtyCardQueueSet::append_buffers(BufferNode* first,
207 BufferNode* last,
208 size_t card_count) {
209 assert(last->next() == NULL_buffer, "precondition");
210 ConcurrentVerifier cv(this);
211 // Increment _num_cards before adding to queue, so queue removal doesn't
212 // need to deal with _num_cards possibly going negative.
213 size_t new_num_cards = Atomic::add(&_num_cards, card_count);
214 BufferNode* old_tail = Atomic::xchg(&_completed_buffers_tail, last);
215 if (old_tail == NULL_buffer) { // Empty list.
216 assert(Atomic::load(&_completed_buffers_head) == NULL_buffer, "invariant");
217 Atomic::store(&_completed_buffers_head, first);
218 } else {
219 assert(old_tail->next() == NULL_buffer, "invariant");
220 old_tail->set_next(first);
221 }
222 return new_num_cards;
223 }
224
225 void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
226 assert(cbn != NULL_buffer, "precondition");
227 size_t new_num_cards = append_buffers(cbn, cbn, buffer_size() - cbn->index());
228 if ((_primary_refinement_thread != NULL) &&
229 (new_num_cards > process_cards_threshold())) {
230 _primary_refinement_thread->activate();
231 }
232 }
233
234 BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) {
235 enqueue_previous_paused_buffers();
236
237 ConcurrentVerifier cv(this);
238
239 // Check for unsufficient cards to satisfy request. We only do this once,
240 // up front, rather than on each iteration below, since the test is racy
241 // regardless of when we do it.
242 if (Atomic::load_acquire(&_num_cards) <= stop_at) {
243 return NULL_buffer;
244 }
245
246 Thread* current_thread = Thread::current();
247
248 while (true) {
249 // Use a critical section per iteration, rather than over the whole
250 // operation. We're not guaranteed to make progress, because of possible
251 // contention on the queue head. Lingering in one CS the whole time could
252 // lead to excessive allocation of buffers, because the CS blocks return
253 // of released buffers to the free list for reuse.
254 GlobalCounter::CriticalSection cs(current_thread);
255
256 BufferNode* result = Atomic::load_acquire(&_completed_buffers_head);
257 // Check for empty queue. Only needs to be done on first iteration,
258 // since we never take the last element, but it's messy to make use
259 // of that and we expect one iteration to be the common case.
260 if (result == NULL_buffer) return result;
261
262 BufferNode* next = Atomic::load_acquire(BufferNode::next_ptr(*result));
263 if (next != NULL_buffer) {
264 next = Atomic::cmpxchg(&_completed_buffers_head, result, next);
265 if (next == result) {
266 // Former head successfully taken; it is not the last.
267 assert(Atomic::load(&_completed_buffers_tail) != result, "invariant");
268 assert(result->next() != NULL_buffer, "invariant");
269 result->set_next(NULL_buffer);
270 Atomic::sub(&_num_cards, buffer_size() - result->index());
271 return result;
272 }
273 // cmpxchg failed; try again.
274 } else if (result == Atomic::load_acquire(&_completed_buffers_head)) {
275 // If follower of head is NULL and head hasn't changed, then only
276 // the one element is currently accessible. We don't take the last
277 // accessible element, because there may be a concurrent add using it.
278 // The check for unchanged head isn't needed for correctness, but the
279 // retry on change may sometimes let us get a buffer after all.
280 return NULL_buffer;
281 }
282 // Head changed; try again.
283 }
284 // Unreachable
285 }
286
287 #ifdef ASSERT
288 void G1DirtyCardQueueSet::verify_num_cards() const {
289 NonconcurrentVerifier ncv(this);
290 size_t actual = 0;
291 BufferNode* cur = Atomic::load(&_completed_buffers_head);
292 for ( ; cur != NULL_buffer; cur = cur->next()) {
293 actual += buffer_size() - cur->index();
294 }
295 assert(actual == Atomic::load(&_num_cards),
296 "Num entries in completed buffers should be " SIZE_FORMAT " but are " SIZE_FORMAT,
297 Atomic::load(&_num_cards), actual);
298 }
299 #endif // ASSERT
300
301 // Refinement processing stops early if there is a pending safepoint, to
302 // avoid long delays to safepoint. We need to record the partially
303 // processed buffer for later continued processing. However, we can't
304 // simply add it back to the completed buffer queue, as that would introduce
305 // a new source of ABA for the queue. Instead, we have a pair of buffer
306 // lists (with each list represented by head and tail), one for each of the
307 // previous and next safepoints (*). When pausing the processing of a
308 // buffer for a safepoint, we add the buffer (lock free) to the list for the
309 // next safepoint. Before attempting to obtain a buffer from the queue we
310 // first transfer any buffers in the previous safepoint list to the queue.
311 // This is safe (doesn't introduce ABA) because threads cannot be in the
312 // midst of a queue pop across a safepoint.
313 //
314 // These paused buffer lists are conceptually an extension of the queue, and
315 // operations which need to deal with all of the queued buffers (such as
316 // concatenate_logs) also need to deal with any paused buffers. In general,
317 // if the safepoint performs a GC then the paused buffers will be processed
318 // as part of it and both lists will be empty afterward.
319 //
320 // An alternative would be to directly reenqueue a paused buffer, but only
321 // after first calling GlobalCounter::write_synchronize. However, that
322 // might noticeably delay the pending safepoint.
323 //
324 // A single paused list and a safepoint cleanup action to perform the transfer
325 // doesn't work because cleanup actions are not invoked for every safepoint.
326 //
327 // (*) If the safepoint does not perform a GC, the next list becomes the
328 // previous list after the safepoint. Since buffers are only added to the
329 // next list if there were threads performing refinement work, there will
330 // likely be refinement work done after the safepoint, which will transfer
331 // those buffers to the queue. However, multiple non-GC safepoints in
332 // succession, without intervening refinement work to perform a transfer
333 // (possibly through lack of time), can result in old buffers being present
334 // and inaccessible in the next list. This doesn't affect correctness, but
335 // might affect performance. The alternatives discussed above don't have
336 // this problem, but have problems of their own.
337
338 static size_t next_paused_buffer_list_index() {
339 return SafepointSynchronize::safepoint_id() & 1;
340 }
341
342 static size_t previous_paused_buffer_list_index() {
343 return next_paused_buffer_list_index() ^ 1;
344 }
345
346 void G1DirtyCardQueueSet::record_paused_buffer(BufferNode* node) {
347 assert_not_at_safepoint();
348 assert(node->next() == NULL_buffer, "precondition");
349 size_t next_index = next_paused_buffer_list_index();
350 // Cards for paused buffers are included in count, to contribute to
351 // notification checking after the coming safepoint if it doesn't GC.
352 Atomic::add(&_num_cards, buffer_size() - node->index());
353 BufferNode* old_head = Atomic::xchg(&_paused[next_index]._head, node);
354 if (old_head == NULL_buffer) {
355 assert(_paused[next_index]._tail == NULL, "invariant");
356 _paused[next_index]._tail = node;
357 } else {
358 node->set_next(old_head);
359 }
360 }
361
362 void G1DirtyCardQueueSet::enqueue_paused_buffers_aux(size_t index) {
363 if (Atomic::load(&_paused[index]._head) != NULL_buffer) {
364 BufferNode* paused = Atomic::xchg(&_paused[index]._head, NULL_buffer);
365 if (paused != NULL_buffer) {
366 BufferNode* tail = _paused[index]._tail;
367 assert(tail != NULL, "invariant");
368 _paused[index]._tail = NULL_buffer;
369 append_buffers(paused, tail, 0); // Cards already counted when recorded.
370 }
371 }
372 }
373
374 void G1DirtyCardQueueSet::enqueue_previous_paused_buffers() {
375 size_t previous_index = previous_paused_buffer_list_index();
376 enqueue_paused_buffers_aux(previous_index);
377 }
378
379 void G1DirtyCardQueueSet::enqueue_all_paused_buffers() {
380 assert_at_safepoint();
381 for (size_t i = 0; i < ARRAY_SIZE(_paused); ++i) {
382 enqueue_paused_buffers_aux(i);
383 }
384 }
385
386 void G1DirtyCardQueueSet::clear_completed_buffers() {
387 Atomic::store(&_completed_buffers_head, NULL_buffer);
388 Atomic::store(&_completed_buffers_tail, NULL_buffer);
389 Atomic::store(&_num_cards, size_t(0));
390 }
391
392 void G1DirtyCardQueueSet::abandon_completed_buffers() {
393 enqueue_all_paused_buffers();
394 verify_num_cards();
395 NonconcurrentVerifier ncv(this);
396 BufferNode* buffers_to_delete = Atomic::load(&_completed_buffers_head);
397 clear_completed_buffers();
398 while (buffers_to_delete != NULL_buffer) {
399 BufferNode* bn = buffers_to_delete;
400 buffers_to_delete = bn->next();
401 bn->set_next(NULL_buffer);
402 deallocate_buffer(bn);
403 }
404 }
405
406 void G1DirtyCardQueueSet::notify_if_necessary() {
407 if ((_primary_refinement_thread != NULL) &&
408 (num_cards() > process_cards_threshold())) {
409 _primary_refinement_thread->activate();
410 }
411 }
412
413 // Merge lists of buffers. The source queue set is emptied as a
414 // result. The queue sets must share the same allocator.
415 void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
416 assert(allocator() == src->allocator(), "precondition");
417 const G1BufferNodeList from = src->take_all_completed_buffers();
418 if (from._head == NULL_buffer) return;
419 append_buffers(from._head, from._tail, from._entry_count);
420 }
421
422 G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
423 #ifdef ASSERT
424 for (size_t i = 0; i < ARRAY_SIZE(_paused); ++i) {
425 assert(Atomic::load(&_paused[i]._head) == NULL_buffer, "precondition");
426 assert(Atomic::load(&_paused[i]._tail) == NULL_buffer, "precondition");
427 }
428 #endif // ASSERT
429 verify_num_cards();
430 NonconcurrentVerifier ncv(this);
431 G1BufferNodeList result(Atomic::load(&_completed_buffers_head),
432 Atomic::load(&_completed_buffers_tail),
433 Atomic::load(&_num_cards));
434 clear_completed_buffers();
435 return result;
436 }
437
438 class G1RefineBufferedCards : public StackObj {
439 BufferNode* const _node;
440 CardTable::CardValue** const _node_buffer;
441 const size_t _node_buffer_size;
442 const uint _worker_id;
443 size_t* _total_refined_cards;
444 G1RemSet* const _g1rs;
445
446 static inline int compare_card(const CardTable::CardValue* p1,
447 const CardTable::CardValue* p2) {
448 return p2 - p1;
449 }
450
451 // Sorts the cards from start_index to _node_buffer_size in *decreasing*
452 // address order. Tests showed that this order is preferable to not sorting
453 // or increasing address order.
454 void sort_cards(size_t start_index) {
589 return false;
590 }
591
592 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
593 uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
594 uint counter_index = worker_id - par_ids_start();
595 size_t* counter = &_mutator_refined_cards_counters[counter_index];
596 bool result = refine_buffer(node, worker_id, counter);
597 _free_ids.release_par_id(worker_id); // release the id
598
599 if (result) {
600 assert_fully_consumed(node, buffer_size());
601 }
602 return result;
603 }
604
605 bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
606 size_t stop_at,
607 size_t* total_refined_cards) {
608 BufferNode* node = get_completed_buffer(stop_at);
609 if (node == NULL_buffer) {
610 return false;
611 } else if (refine_buffer(node, worker_id, total_refined_cards)) {
612 assert_fully_consumed(node, buffer_size());
613 // Done with fully processed buffer.
614 deallocate_buffer(node);
615 return true;
616 } else {
617 // Buffer incompletely processed because there is a pending safepoint.
618 // Record partially processed buffer, to be finished later.
619 record_paused_buffer(node);
620 return true;
621 }
622 }
623
624 void G1DirtyCardQueueSet::abandon_logs() {
625 assert_at_safepoint();
626 abandon_completed_buffers();
627
628 // Since abandon is done only at safepoints, we can safely manipulate
629 // these queues.
630 struct AbandonThreadLogClosure : public ThreadClosure {
631 virtual void do_thread(Thread* t) {
632 G1ThreadLocalData::dirty_card_queue(t).reset();
633 }
634 } closure;
635 Threads::threads_do(&closure);
636
637 G1BarrierSet::shared_dirty_card_queue().reset();
638 }
639
640 void G1DirtyCardQueueSet::concatenate_logs() {
641 // Iterate over all the threads, if we find a partial log add it to
642 // the global list of logs. Temporarily turn off the limit on the number
643 // of outstanding buffers.
644 assert_at_safepoint();
645 size_t old_limit = max_cards();
646 set_max_cards(MaxCardsUnlimited);
647
648 struct ConcatenateThreadLogClosure : public ThreadClosure {
649 virtual void do_thread(Thread* t) {
650 G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t);
651 if (!dcq.is_empty()) {
652 dcq.flush();
653 }
654 }
655 } closure;
656 Threads::threads_do(&closure);
657
658 G1BarrierSet::shared_dirty_card_queue().flush();
659 enqueue_all_paused_buffers();
660 verify_num_cards();
661 set_max_cards(old_limit);
662 }
|