--- old/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp 2020-01-16 01:05:19.052384223 -0500 +++ new/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp 2020-01-16 01:05:18.644362197 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,11 +29,12 @@ #include "gc/g1/g1FreeIdSet.hpp" #include "gc/shared/ptrQueue.hpp" #include "memory/allocation.hpp" +#include "memory/padded.hpp" +class G1ConcurrentRefineThread; class G1DirtyCardQueueSet; class G1RedirtyCardsQueueSet; class Thread; -class Monitor; // A ptrQueue whose elements are "oops", pointers to object heads. class G1DirtyCardQueue: public PtrQueue { @@ -66,18 +67,45 @@ }; class G1DirtyCardQueueSet: public PtrQueueSet { - Monitor* _cbl_mon; // Protects the list and count members. - BufferNode* _completed_buffers_head; - BufferNode* _completed_buffers_tail; - - // Number of actual cards in the list of completed buffers. + G1ConcurrentRefineThread* _primary_refinement_thread; + // Add padding for improved performance for shared access. There's only + // one instance of this class, so using a little extra space is fine. + // _completed_buffers_{head,tail} and _num_cards are isolated to their + // own cache lines. Other members are not, even if shared access, because + // they aren't as critical to performance. + DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(G1ConcurrentRefineThread*)); + BufferNode* volatile _completed_buffers_head; + DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode* volatile)); + BufferNode* volatile _completed_buffers_tail; + DEFINE_PAD_MINUS_SIZE(3, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode* volatile)); volatile size_t _num_cards; + DEFINE_PAD_MINUS_SIZE(4, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile size_t)); - size_t _process_cards_threshold; - volatile bool _process_completed_buffers; + DEBUG_ONLY(mutable volatile int _concurrency;) + class ConcurrentVerifier; + class NonconcurrentVerifier; + + size_t append_buffers(BufferNode* first, BufferNode* last, size_t card_count); + // Verify _num_cards == sum of cards in the completed queue. + void verify_num_cards() const NOT_DEBUG_RETURN; + + struct PauseList { + BufferNode* volatile _head; + BufferNode* _tail; + PauseList() : _head(NULL), _tail(NULL) {} + }; + PauseList _paused[2]; + + void record_paused_buffer(BufferNode* node); + void enqueue_paused_buffers_aux(size_t index); + void enqueue_previous_paused_buffers(); + void enqueue_all_paused_buffers(); + void clear_completed_buffers(); void abandon_completed_buffers(); + size_t _process_cards_threshold; + // Refine the cards in "node" from its index to buffer_size. // Stops processing if SuspendibleThreadSet::should_yield() is true. // Returns true if the entire buffer was processed, false if there @@ -103,9 +131,13 @@ size_t* _mutator_refined_cards_counters; public: - G1DirtyCardQueueSet(Monitor* cbl_mon, BufferNode::Allocator* allocator); + G1DirtyCardQueueSet(BufferNode::Allocator* allocator); ~G1DirtyCardQueueSet(); + void set_primary_refinement_thread(G1ConcurrentRefineThread* thread) { + _primary_refinement_thread = thread; + } + // The number of parallel ids that can be claimed to allow collector or // mutator threads to do card-processing work. static uint num_par_ids(); @@ -126,13 +158,6 @@ // The number of cards in completed buffers. Read without synchronization. size_t num_cards() const { return _num_cards; } - // Verify that _num_cards is equal to the sum of actual cards - // in the completed buffers. - void verify_num_cards() const NOT_DEBUG_RETURN; - - bool process_completed_buffers() { return _process_completed_buffers; } - void set_process_completed_buffers(bool x) { _process_completed_buffers = x; } - // Get/Set the number of cards that triggers log processing. // Log processing should be done when the number of cards exceeds the // threshold.