Print this page
rev 2896 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by: brutisso

Split Close
Expand all
Collapse all
          --- old/src/share/vm/gc_implementation/g1/concurrentMark.hpp
          +++ new/src/share/vm/gc_implementation/g1/concurrentMark.hpp
↓ open down ↓ 422 lines elided ↑ open up ↑
 423  423    // the first one to ensure that they have all stopped manipulating
 424  424    // the global data structures. After they exit it, they re-initialise
 425  425    // their data structures and task 0 re-initialises the global data
 426  426    // structures. Then, they enter the second sync barrier. This
 427  427    // ensure, that no task starts doing work before all data
 428  428    // structures (local and global) have been re-initialised. When they
 429  429    // exit it, they are free to start working again.
 430  430    WorkGangBarrierSync     _first_overflow_barrier_sync;
 431  431    WorkGangBarrierSync     _second_overflow_barrier_sync;
 432  432  
 433      -
 434  433    // this is set by any task, when an overflow on the global data
 435  434    // structures is detected.
 436  435    volatile bool           _has_overflown;
 437  436    // true: marking is concurrent, false: we're in remark
 438  437    volatile bool           _concurrent;
 439  438    // set at the end of a Full GC so that marking aborts
 440  439    volatile bool           _has_aborted;
 441  440  
 442  441    // used when remark aborts due to an overflow to indicate that
 443  442    // another concurrent marking phase should start
↓ open down ↓ 131 lines elided ↑ open up ↑
 575  574    }
 576  575  
 577  576    ForceOverflowSettings* force_overflow() {
 578  577      if (concurrent()) {
 579  578        return force_overflow_conc();
 580  579      } else {
 581  580        return force_overflow_stw();
 582  581      }
 583  582    }
 584  583  
      584 +  // Live Data Counting data structures...
      585 +  // These data structures are initialized at the start of
      586 +  // marking. They are written to while marking is active.
      587 +  // They are aggregated during remark; the aggregated values
      588 +  // are then used to populate the _region_bm, _card_bm, and
      589 +  // the total live bytes, which are then subsequently updated
      590 +  // during cleanup.
      591 +
      592 +  // An array of bitmaps (one bit map per task). Each bitmap
      593 +  // is used to record the cards spanned by the live objects
      594 +  // marked by that task/worker.
      595 +  BitMap*  _count_card_bitmaps;
      596 +
      597 +  // Used to record the number of marked live bytes
      598 +  // (for each region, by worker thread).
      599 +  size_t** _count_marked_bytes;
      600 +
      601 +  // Card index of the bottom of the G1 heap. Used for biasing indices into
      602 +  // the card bitmaps.
      603 +  intptr_t _heap_bottom_card_num;
      604 +
 585  605  public:
 586  606    // Manipulation of the global mark stack.
 587  607    // Notice that the first mark_stack_push is CAS-based, whereas the
 588  608    // two below are Mutex-based. This is OK since the first one is only
 589  609    // called during evacuation pauses and doesn't compete with the
 590  610    // other two (which are called by the marking tasks during
 591  611    // concurrent marking or remark).
 592  612    bool mark_stack_push(oop p) {
 593  613      _markStack.par_push(p);
 594  614      if (_markStack.overflow()) {
↓ open down ↓ 101 lines elided ↑ open up ↑
 696  716      return ret;
 697  717    }
 698  718  
 699  719    // Attempts to steal an object from the task queues of other tasks
 700  720    bool try_stealing(int task_num, int* hash_seed, oop& obj) {
 701  721      return _task_queues->steal(task_num, hash_seed, obj);
 702  722    }
 703  723  
 704  724    // It grays an object by first marking it. Then, if it's behind the
 705  725    // global finger, it also pushes it on the global stack.
 706      -  void deal_with_reference(oop obj);
      726 +  void deal_with_reference(oop obj, int worker_i);
 707  727  
 708  728    ConcurrentMark(ReservedSpace rs, int max_regions);
 709      -  ~ConcurrentMark();
      729 +
 710  730    ConcurrentMarkThread* cmThread() { return _cmThread; }
 711  731  
 712  732    CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
 713  733    CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
 714  734  
 715  735    // Returns the number of GC threads to be used in a concurrent
 716  736    // phase based on the number of GC threads being used in a STW
 717  737    // phase.
 718  738    size_t scale_parallel_threads(size_t n_par_threads);
 719  739  
 720  740    // Calculates the number of GC threads to be used in a concurrent phase.
 721  741    size_t calc_parallel_marking_threads();
 722  742  
 723  743    // The following three are interaction between CM and
 724  744    // G1CollectedHeap
 725  745  
 726  746    // This notifies CM that a root during initial-mark needs to be
 727  747    // grayed and it's MT-safe. Currently, we just mark it. But, in the
 728  748    // future, we can experiment with pushing it on the stack and we can
 729  749    // do this without changing G1CollectedHeap.
 730      -  void grayRoot(oop p);
      750 +  void grayRoot(oop p, int worker_i);
      751 +
 731  752    // It's used during evacuation pauses to gray a region, if
 732  753    // necessary, and it's MT-safe. It assumes that the caller has
 733  754    // marked any objects on that region. If _should_gray_objects is
 734  755    // true and we're still doing concurrent marking, the region is
 735  756    // pushed on the region stack, if it is located below the global
 736  757    // finger, otherwise we do nothing.
 737  758    void grayRegionIfNecessary(MemRegion mr);
      759 +
 738  760    // It's used during evacuation pauses to mark and, if necessary,
 739  761    // gray a single object and it's MT-safe. It assumes the caller did
 740  762    // not mark the object. If _should_gray_objects is true and we're
 741  763    // still doing concurrent marking, the objects is pushed on the
 742  764    // global stack, if it is located below the global finger, otherwise
 743  765    // we do nothing.
 744      -  void markAndGrayObjectIfNecessary(oop p);
      766 +  void markAndGrayObjectIfNecessary(oop p, int worker_i);
 745  767  
 746  768    // It iterates over the heap and for each object it comes across it
 747  769    // will dump the contents of its reference fields, as well as
 748  770    // liveness information for the object and its referents. The dump
 749  771    // will be written to a file with the following name:
 750  772    // G1PrintReachableBaseFile + "." + str.
 751  773    // vo decides whether the prev (vo == UsePrevMarking), the next
 752  774    // (vo == UseNextMarking) marking information, or the mark word
 753  775    // (vo == UseMarkWord) will be used to determine the liveness of
 754  776    // each object / referent.
↓ open down ↓ 23 lines elided ↑ open up ↑
 778  800  
 779  801    // Do concurrent phase of marking, to a tentative transitive closure.
 780  802    void markFromRoots();
 781  803  
 782  804    // Process all unprocessed SATB buffers. It is called at the
 783  805    // beginning of an evacuation pause.
 784  806    void drainAllSATBBuffers();
 785  807  
 786  808    void checkpointRootsFinal(bool clear_all_soft_refs);
 787  809    void checkpointRootsFinalWork();
 788      -  void calcDesiredRegions();
 789  810    void cleanup();
 790  811    void completeCleanup();
 791  812  
 792  813    // Mark in the previous bitmap.  NB: this is usually read-only, so use
 793  814    // this carefully!
 794  815    void markPrev(oop p);
 795      -  void clear(oop p);
      816 +
      817 +  // Clears the mark in the next bitmap for the given object.
      818 +  void clear_mark(oop p);
      819 +
 796  820    // Clears marks for all objects in the given range, for both prev and
 797  821    // next bitmaps.  NB: the previous bitmap is usually read-only, so use
 798  822    // this carefully!
 799  823    void clearRangeBothMaps(MemRegion mr);
 800  824  
 801  825    // Record the current top of the mark and region stacks; a
 802  826    // subsequent oops_do() on the mark stack and
 803  827    // invalidate_entries_into_cset() on the region stack will iterate
 804  828    // only over indices valid at the time of this call.
 805  829    void set_oops_do_bound() {
↓ open down ↓ 88 lines elided ↑ open up ↑
 894  918    }
 895  919    bool verbose_low() {
 896  920      return _MARKING_VERBOSE_ && _verbose_level >= low_verbose;
 897  921    }
 898  922    bool verbose_medium() {
 899  923      return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose;
 900  924    }
 901  925    bool verbose_high() {
 902  926      return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
 903  927    }
      928 +
      929 +  // Counting data structure accessors
      930 +
      931 +  // Returns the card number of the bottom of the G1 heap.
      932 +  // Used in biasing indices into accounting card bitmaps.
      933 +  intptr_t heap_bottom_card_num() const {
      934 +    return _heap_bottom_card_num;
      935 +  }
      936 +
      937 +  // Returns the card bitmap for a given task or worker id.
      938 +  BitMap* count_card_bitmap_for(int worker_i) {
      939 +    assert(0 <= worker_i && (size_t) worker_i < _max_task_num, "oob");
      940 +    assert(_count_card_bitmaps != NULL, "uninitialized");
      941 +    BitMap* task_card_bm = &_count_card_bitmaps[worker_i];
      942 +    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
      943 +    return task_card_bm;
      944 +  }
      945 +
      946 +  // Returns the array containing the marked bytes for each region,
      947 +  // for the given worker or task id.
      948 +  size_t* count_marked_bytes_array_for(int worker_i) {
      949 +    assert(0 <= worker_i && (size_t) worker_i < _max_task_num, "oob");
      950 +    assert(_count_marked_bytes != NULL, "uninitialized");
      951 +    size_t* marked_bytes_array = _count_marked_bytes[worker_i];
      952 +    assert(marked_bytes_array != NULL, "uninitialized");
      953 +    return marked_bytes_array;
      954 +  }
      955 +
      956 +  // Counts the size of the given memory region in the the given
      957 +  // marked_bytes array slot for the given HeapRegion.
      958 +  // Sets the bits in the given card bitmap that are associated with the
      959 +  // cards that are spanned by the memory region.
      960 +  inline void count_region(MemRegion mr, HeapRegion* hr,
      961 +                           size_t* marked_bytes_array,
      962 +                           BitMap* task_card_bm);
      963 +  
      964 +  // Counts the given memory region in the ask/worker counting
      965 +  // data structures for the given worker id.
      966 +  inline void count_region(MemRegion mr, int worker_i);
      967 +  
      968 +  // Counts the given object in the given task/worker counting
      969 +  // data structures.
      970 +  inline void count_object(oop obj, HeapRegion* hr,
      971 +                           size_t* marked_bytes_array,
      972 +                           BitMap* task_card_bm);
      973 +  
      974 +  // Counts the given object in the task/worker counting data
      975 +  // structures for the given worker id.
      976 +  inline void count_object(oop obj, HeapRegion* hr, int worker_i);
      977 +  
      978 +  // Attempts to mark the given object and, if successful, counts
      979 +  // the object in the given task/worker counting structures.
      980 +  inline bool par_mark_and_count(oop obj, HeapRegion* hr,
      981 +                                 size_t* marked_bytes_array,
      982 +                                 BitMap* task_card_bm);
      983 +
      984 +  // Attempts to mark the given object and, if successful, counts
      985 +  // the object in the task/worker counting structures for the
      986 +  // given worker id.
      987 +  inline bool par_mark_and_count(oop obj, HeapRegion* hr, int worker_i);
      988 +
      989 +  // Similar to the above routine but we don't know the heap region that
      990 +  // contains the object to be marked/counted, which this routine looks up.
      991 +  inline bool par_mark_and_count(oop obj, int worker_i);
      992 +
      993 +  // Unconditionally mark the given object, and unconditinally count
      994 +  // the object in the counting structures for worker id 0.
      995 +  // Should *not* be called from parallel code.
      996 +  inline bool mark_and_count(oop obj, HeapRegion* hr);
      997 + 
      998 +  // Similar to the above routine but we don't know the heap region that
      999 +  // contains the object to be marked/counted, which this routine looks up.
     1000 +  // Should *not* be called from parallel code.
     1001 +  inline bool mark_and_count(oop obj);
     1002 +
     1003 +  // Clears the count data for the given region from _all_ of
     1004 +  // the per-task counting data structures.
     1005 +  void clear_count_data_for_heap_region(HeapRegion* hr);
     1006 +
     1007 +protected:
     1008 +  // Clear all the per-task bitmaps and arrays used to store the
     1009 +  // counting data.
     1010 +  void clear_all_count_data();
     1011 +
     1012 +  // Aggregates the counting data for each worker/task
     1013 +  // that was constructed while marking. Also sets
     1014 +  // the amount of marked bytes for each region and
     1015 +  // the top at concurrent mark count.
     1016 +  void aggregate_and_clear_count_data();
     1017 +
     1018 +  // Verification routine
     1019 +  void verify_count_data();
 904 1020  };
 905 1021  
 906 1022  // A class representing a marking task.
 907 1023  class CMTask : public TerminatorTerminator {
 908 1024  private:
 909 1025    enum PrivateConstants {
 910 1026      // the regular clock call is called once the scanned words reaches
 911 1027      // this limit
 912 1028      words_scanned_period          = 12*1024,
 913 1029      // the regular clock call is called once the number of visited
↓ open down ↓ 98 lines elided ↑ open up ↑
1012 1128    double                      _termination_start_time_ms;
1013 1129  
1014 1130    // true when the task is during a concurrent phase, false when it is
1015 1131    // in the remark phase (so, in the latter case, we do not have to
1016 1132    // check all the things that we have to check during the concurrent
1017 1133    // phase, i.e. SATB buffer availability...)
1018 1134    bool                        _concurrent;
1019 1135  
1020 1136    TruncatedSeq                _marking_step_diffs_ms;
1021 1137  
     1138 +  // Counting data structures. Embedding the task's marked_bytes_array
     1139 +  // and card bitmap into the actual task saves having to go through
     1140 +  // the ConcurrentMark object.
     1141 +  size_t*                     _marked_bytes_array;
     1142 +  BitMap*                     _card_bm;
     1143 +
1022 1144    // LOTS of statistics related with this task
1023 1145  #if _MARKING_STATS_
1024 1146    NumberSeq                   _all_clock_intervals_ms;
1025 1147    double                      _interval_start_time_ms;
1026 1148  
1027 1149    int                         _aborted;
1028 1150    int                         _aborted_overflow;
1029 1151    int                         _aborted_cm_aborted;
1030 1152    int                         _aborted_yield;
1031 1153    int                         _aborted_timed_out;
↓ open down ↓ 144 lines elided ↑ open up ↑
1176 1298      _finger = new_finger;
1177 1299    }
1178 1300  
1179 1301    // moves the region finger to a new location
1180 1302    inline void move_region_finger_to(HeapWord* new_finger) {
1181 1303      assert(new_finger < _cm->finger(), "invariant");
1182 1304      _region_finger = new_finger;
1183 1305    }
1184 1306  
1185 1307    CMTask(int task_num, ConcurrentMark *cm,
     1308 +         size_t* marked_bytes, BitMap* card_bm,
1186 1309           CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
1187 1310  
1188 1311    // it prints statistics associated with this task
1189 1312    void print_stats();
1190 1313  
1191 1314  #if _MARKING_STATS_
1192 1315    void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
1193 1316  #endif // _MARKING_STATS_
1194 1317  };
1195 1318  
↓ open down ↓ 51 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX