< prev index next >

src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp

Print this page
rev 57716 : [mq]: remove_cbl_mon

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.

@@ -27,15 +27,16 @@
 
 #include "gc/g1/g1BufferNodeList.hpp"
 #include "gc/g1/g1FreeIdSet.hpp"
 #include "gc/shared/ptrQueue.hpp"
 #include "memory/allocation.hpp"
+#include "memory/padded.hpp"
 
+class G1ConcurrentRefineThread;
 class G1DirtyCardQueueSet;
 class G1RedirtyCardsQueueSet;
 class Thread;
-class Monitor;
 
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class G1DirtyCardQueue: public PtrQueue {
 protected:
   virtual void handle_completed_buffer();

@@ -64,22 +65,49 @@
   using PtrQueue::byte_width_of_buf;
 
 };
 
 class G1DirtyCardQueueSet: public PtrQueueSet {
-  Monitor* _cbl_mon;  // Protects the list and count members.
-  BufferNode* _completed_buffers_head;
-  BufferNode* _completed_buffers_tail;
-
-  // Number of actual cards in the list of completed buffers.
+  G1ConcurrentRefineThread* _primary_refinement_thread;
+  // Add padding for improved performance for shared access.  There's only
+  // one instance of this class, so using a little extra space is fine.
+  // _completed_buffers_{head,tail} and _num_cards are isolated to their
+  // own cache lines.  Other members are not, even if shared access, because
+  // they aren't as critical to performance.
+  DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(G1ConcurrentRefineThread*));
+  BufferNode* volatile _completed_buffers_head;
+  DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode* volatile));
+  BufferNode* volatile _completed_buffers_tail;
+  DEFINE_PAD_MINUS_SIZE(3, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode* volatile));
   volatile size_t _num_cards;
+  DEFINE_PAD_MINUS_SIZE(4, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile size_t));
 
-  size_t _process_cards_threshold;
-  volatile bool _process_completed_buffers;
+  DEBUG_ONLY(mutable volatile int _concurrency;)
+  class ConcurrentVerifier;
+  class NonconcurrentVerifier;
+
+  size_t append_buffers(BufferNode* first, BufferNode* last, size_t card_count);
+  // Verify _num_cards == sum of cards in the completed queue.
+  void verify_num_cards() const NOT_DEBUG_RETURN;
+
+  struct PauseList {
+    BufferNode* volatile _head;
+    BufferNode* _tail;
+    PauseList() : _head(NULL), _tail(NULL) {}
+  };
+  PauseList _paused[2];
+
+  void record_paused_buffer(BufferNode* node);
+  void enqueue_paused_buffers_aux(size_t index);
+  void enqueue_previous_paused_buffers();
+  void enqueue_all_paused_buffers();
 
+  void clear_completed_buffers();
   void abandon_completed_buffers();
 
+  size_t _process_cards_threshold;
+
   // Refine the cards in "node" from its index to buffer_size.
   // Stops processing if SuspendibleThreadSet::should_yield() is true.
   // Returns true if the entire buffer was processed, false if there
   // is a pending yield request.  The node's index is updated to exclude
   // the processed elements, e.g. up to the element before processing

@@ -101,13 +129,17 @@
   // Array of cumulative dirty cards refined by mutator threads.
   // Array has an entry per id in _free_ids.
   size_t* _mutator_refined_cards_counters;
 
 public:
-  G1DirtyCardQueueSet(Monitor* cbl_mon, BufferNode::Allocator* allocator);
+  G1DirtyCardQueueSet(BufferNode::Allocator* allocator);
   ~G1DirtyCardQueueSet();
 
+  void set_primary_refinement_thread(G1ConcurrentRefineThread* thread) {
+    _primary_refinement_thread = thread;
+  }
+
   // The number of parallel ids that can be claimed to allow collector or
   // mutator threads to do card-processing work.
   static uint num_par_ids();
 
   static void handle_zero_index_for_thread(Thread* t);

@@ -124,17 +156,10 @@
   BufferNode* get_completed_buffer(size_t stop_at = 0);
 
   // The number of cards in completed buffers. Read without synchronization.
   size_t num_cards() const { return _num_cards; }
 
-  // Verify that _num_cards is equal to the sum of actual cards
-  // in the completed buffers.
-  void verify_num_cards() const NOT_DEBUG_RETURN;
-
-  bool process_completed_buffers() { return _process_completed_buffers; }
-  void set_process_completed_buffers(bool x) { _process_completed_buffers = x; }
-
   // Get/Set the number of cards that triggers log processing.
   // Log processing should be done when the number of cards exceeds the
   // threshold.
   void set_process_cards_threshold(size_t sz) {
     _process_cards_threshold = sz;
< prev index next >