
Print this page
rev 2896 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by: brutisso

@@ -428,11 +428,10 @@
   // structures (local and global) have been re-initialised. When they
   // exit it, they are free to start working again.
   WorkGangBarrierSync     _first_overflow_barrier_sync;
   WorkGangBarrierSync     _second_overflow_barrier_sync;
   // this is set by any task, when an overflow on the global data
   // structures is detected.
   volatile bool           _has_overflown;
   // true: marking is concurrent, false: we're in remark
   volatile bool           _concurrent;

@@ -580,10 +579,31 @@
     } else {
       return force_overflow_stw();
+  // Live Data Counting data structures...
+  // These data structures are initialized at the start of
+  // marking. They are written to while marking is active.
+  // They are aggregated during remark; the aggregated values
+  // are then used to populate the _region_bm, _card_bm, and
+  // the total live bytes, which are then subsequently updated
+  // during cleanup.
+  // An array of bitmaps (one bit map per task). Each bitmap
+  // is used to record the cards spanned by the live objects
+  // marked by that task/worker.
+  BitMap*  _count_card_bitmaps;
+  // Used to record the number of marked live bytes
+  // (for each region, by worker thread).
+  size_t** _count_marked_bytes;
+  // Card index of the bottom of the G1 heap. Used for biasing indices into
+  // the card bitmaps.
+  intptr_t _heap_bottom_card_num;
   // Manipulation of the global mark stack.
   // Notice that the first mark_stack_push is CAS-based, whereas the
   // two below are Mutex-based. This is OK since the first one is only
   // called during evacuation pauses and doesn't compete with the

@@ -701,14 +721,14 @@
     return _task_queues->steal(task_num, hash_seed, obj);
   // It grays an object by first marking it. Then, if it's behind the
   // global finger, it also pushes it on the global stack.
-  void deal_with_reference(oop obj);
+  void deal_with_reference(oop obj, int worker_i);
   ConcurrentMark(ReservedSpace rs, int max_regions);
-  ~ConcurrentMark();
   ConcurrentMarkThread* cmThread() { return _cmThread; }
   CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
   CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }

@@ -725,25 +745,27 @@
   // This notifies CM that a root during initial-mark needs to be
   // grayed and it's MT-safe. Currently, we just mark it. But, in the
   // future, we can experiment with pushing it on the stack and we can
   // do this without changing G1CollectedHeap.
-  void grayRoot(oop p);
+  void grayRoot(oop p, int worker_i);
   // It's used during evacuation pauses to gray a region, if
   // necessary, and it's MT-safe. It assumes that the caller has
   // marked any objects on that region. If _should_gray_objects is
   // true and we're still doing concurrent marking, the region is
   // pushed on the region stack, if it is located below the global
   // finger, otherwise we do nothing.
   void grayRegionIfNecessary(MemRegion mr);
   // It's used during evacuation pauses to mark and, if necessary,
   // gray a single object and it's MT-safe. It assumes the caller did
   // not mark the object. If _should_gray_objects is true and we're
   // still doing concurrent marking, the objects is pushed on the
   // global stack, if it is located below the global finger, otherwise
   // we do nothing.
-  void markAndGrayObjectIfNecessary(oop p);
+  void markAndGrayObjectIfNecessary(oop p, int worker_i);
   // It iterates over the heap and for each object it comes across it
   // will dump the contents of its reference fields, as well as
   // liveness information for the object and its referents. The dump
   // will be written to a file with the following name:

@@ -783,18 +805,20 @@
   // beginning of an evacuation pause.
   void drainAllSATBBuffers();
   void checkpointRootsFinal(bool clear_all_soft_refs);
   void checkpointRootsFinalWork();
-  void calcDesiredRegions();
   void cleanup();
   void completeCleanup();
   // Mark in the previous bitmap.  NB: this is usually read-only, so use
   // this carefully!
   void markPrev(oop p);
-  void clear(oop p);
+  // Clears the mark in the next bitmap for the given object.
+  void clear_mark(oop p);
   // Clears marks for all objects in the given range, for both prev and
   // next bitmaps.  NB: the previous bitmap is usually read-only, so use
   // this carefully!
   void clearRangeBothMaps(MemRegion mr);

@@ -899,10 +923,102 @@
     return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose;
   bool verbose_high() {
     return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
+  // Counting data structure accessors
+  // Returns the card number of the bottom of the G1 heap.
+  // Used in biasing indices into accounting card bitmaps.
+  intptr_t heap_bottom_card_num() const {
+    return _heap_bottom_card_num;
+  }
+  // Returns the card bitmap for a given task or worker id.
+  BitMap* count_card_bitmap_for(int worker_i) {
+    assert(0 <= worker_i && (size_t) worker_i < _max_task_num, "oob");
+    assert(_count_card_bitmaps != NULL, "uninitialized");
+    BitMap* task_card_bm = &_count_card_bitmaps[worker_i];
+    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
+    return task_card_bm;
+  }
+  // Returns the array containing the marked bytes for each region,
+  // for the given worker or task id.
+  size_t* count_marked_bytes_array_for(int worker_i) {
+    assert(0 <= worker_i && (size_t) worker_i < _max_task_num, "oob");
+    assert(_count_marked_bytes != NULL, "uninitialized");
+    size_t* marked_bytes_array = _count_marked_bytes[worker_i];
+    assert(marked_bytes_array != NULL, "uninitialized");
+    return marked_bytes_array;
+  }
+  // Counts the size of the given memory region in the the given
+  // marked_bytes array slot for the given HeapRegion.
+  // Sets the bits in the given card bitmap that are associated with the
+  // cards that are spanned by the memory region.
+  inline void count_region(MemRegion mr, HeapRegion* hr,
+                           size_t* marked_bytes_array,
+                           BitMap* task_card_bm);
+  // Counts the given memory region in the ask/worker counting
+  // data structures for the given worker id.
+  inline void count_region(MemRegion mr, int worker_i);
+  // Counts the given object in the given task/worker counting
+  // data structures.
+  inline void count_object(oop obj, HeapRegion* hr,
+                           size_t* marked_bytes_array,
+                           BitMap* task_card_bm);
+  // Counts the given object in the task/worker counting data
+  // structures for the given worker id.
+  inline void count_object(oop obj, HeapRegion* hr, int worker_i);
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the given task/worker counting structures.
+  inline bool par_mark_and_count(oop obj, HeapRegion* hr,
+                                 size_t* marked_bytes_array,
+                                 BitMap* task_card_bm);
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the task/worker counting structures for the
+  // given worker id.
+  inline bool par_mark_and_count(oop obj, HeapRegion* hr, int worker_i);
+  // Similar to the above routine but we don't know the heap region that
+  // contains the object to be marked/counted, which this routine looks up.
+  inline bool par_mark_and_count(oop obj, int worker_i);
+  // Unconditionally mark the given object, and unconditinally count
+  // the object in the counting structures for worker id 0.
+  // Should *not* be called from parallel code.
+  inline bool mark_and_count(oop obj, HeapRegion* hr);
+  // Similar to the above routine but we don't know the heap region that
+  // contains the object to be marked/counted, which this routine looks up.
+  // Should *not* be called from parallel code.
+  inline bool mark_and_count(oop obj);
+  // Clears the count data for the given region from _all_ of
+  // the per-task counting data structures.
+  void clear_count_data_for_heap_region(HeapRegion* hr);
+  // Clear all the per-task bitmaps and arrays used to store the
+  // counting data.
+  void clear_all_count_data();
+  // Aggregates the counting data for each worker/task
+  // that was constructed while marking. Also sets
+  // the amount of marked bytes for each region and
+  // the top at concurrent mark count.
+  void aggregate_and_clear_count_data();
+  // Verification routine
+  void verify_count_data();
 // A class representing a marking task.
 class CMTask : public TerminatorTerminator {

@@ -1017,10 +1133,16 @@
   // phase, i.e. SATB buffer availability...)
   bool                        _concurrent;
   TruncatedSeq                _marking_step_diffs_ms;
+  // Counting data structures. Embedding the task's marked_bytes_array
+  // and card bitmap into the actual task saves having to go through
+  // the ConcurrentMark object.
+  size_t*                     _marked_bytes_array;
+  BitMap*                     _card_bm;
   // LOTS of statistics related with this task
   NumberSeq                   _all_clock_intervals_ms;
   double                      _interval_start_time_ms;

@@ -1181,10 +1303,11 @@
     assert(new_finger < _cm->finger(), "invariant");
     _region_finger = new_finger;
   CMTask(int task_num, ConcurrentMark *cm,
+         size_t* marked_bytes, BitMap* card_bm,
          CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
   // it prints statistics associated with this task
   void print_stats();