src/share/vm/gc_implementation/g1/concurrentMark.hpp

Print this page
rev 2896 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by: brutisso


 413 
 414   // marking tasks
 415   size_t                  _max_task_num; // maximum task number
 416   size_t                  _active_tasks; // task num currently active
 417   CMTask**                _tasks;        // task queue array (max_task_num len)
 418   CMTaskQueueSet*         _task_queues;  // task queue set
 419   ParallelTaskTerminator  _terminator;   // for termination
 420 
 421   // Two sync barriers that are used to synchronise tasks when an
 422   // overflow occurs. The algorithm is the following. All tasks enter
 423   // the first one to ensure that they have all stopped manipulating
 424   // the global data structures. After they exit it, they re-initialise
 425   // their data structures and task 0 re-initialises the global data
 426   // structures. Then, they enter the second sync barrier. This
 427   // ensure, that no task starts doing work before all data
 428   // structures (local and global) have been re-initialised. When they
 429   // exit it, they are free to start working again.
 430   WorkGangBarrierSync     _first_overflow_barrier_sync;
 431   WorkGangBarrierSync     _second_overflow_barrier_sync;
 432 
 433 
 434   // this is set by any task, when an overflow on the global data
 435   // structures is detected.
 436   volatile bool           _has_overflown;
 437   // true: marking is concurrent, false: we're in remark
 438   volatile bool           _concurrent;
 439   // set at the end of a Full GC so that marking aborts
 440   volatile bool           _has_aborted;
 441 
 442   // used when remark aborts due to an overflow to indicate that
 443   // another concurrent marking phase should start
 444   volatile bool           _restart_for_overflow;
 445 
 446   // This is true from the very start of concurrent marking until the
 447   // point when all the tasks complete their work. It is really used
 448   // to determine the points between the end of concurrent marking and
 449   // time of remark.
 450   volatile bool           _concurrent_marking_in_progress;
 451 
 452   // verbose level
 453   CMVerboseLevel          _verbose_level;


 565   // Methods to enter the two overflow sync barriers
 566   void enter_first_sync_barrier(int task_num);
 567   void enter_second_sync_barrier(int task_num);
 568 
 569   ForceOverflowSettings* force_overflow_conc() {
 570     return &_force_overflow_conc;
 571   }
 572 
 573   ForceOverflowSettings* force_overflow_stw() {
 574     return &_force_overflow_stw;
 575   }
 576 
 577   ForceOverflowSettings* force_overflow() {
 578     if (concurrent()) {
 579       return force_overflow_conc();
 580     } else {
 581       return force_overflow_stw();
 582     }
 583   }
 584 





















 585 public:
 586   // Manipulation of the global mark stack.
 587   // Notice that the first mark_stack_push is CAS-based, whereas the
 588   // two below are Mutex-based. This is OK since the first one is only
 589   // called during evacuation pauses and doesn't compete with the
 590   // other two (which are called by the marking tasks during
 591   // concurrent marking or remark).
 592   bool mark_stack_push(oop p) {
 593     _markStack.par_push(p);
 594     if (_markStack.overflow()) {
 595       set_has_overflown();
 596       return false;
 597     }
 598     return true;
 599   }
 600   bool mark_stack_push(oop* arr, int n) {
 601     _markStack.par_push_arr(arr, n);
 602     if (_markStack.overflow()) {
 603       set_has_overflown();
 604       return false;


 686   }
 687 
 688   void update_accum_task_vtime(int i, double vtime) {
 689     _accum_task_vtime[i] += vtime;
 690   }
 691 
 692   double all_task_accum_vtime() {
 693     double ret = 0.0;
 694     for (int i = 0; i < (int)_max_task_num; ++i)
 695       ret += _accum_task_vtime[i];
 696     return ret;
 697   }
 698 
 699   // Attempts to steal an object from the task queues of other tasks
 700   bool try_stealing(int task_num, int* hash_seed, oop& obj) {
 701     return _task_queues->steal(task_num, hash_seed, obj);
 702   }
 703 
 704   // It grays an object by first marking it. Then, if it's behind the
 705   // global finger, it also pushes it on the global stack.
 706   void deal_with_reference(oop obj);
 707 
 708   ConcurrentMark(ReservedSpace rs, int max_regions);
 709   ~ConcurrentMark();
 710   ConcurrentMarkThread* cmThread() { return _cmThread; }
 711 
 712   CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
 713   CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
 714 
 715   // Returns the number of GC threads to be used in a concurrent
 716   // phase based on the number of GC threads being used in a STW
 717   // phase.
 718   size_t scale_parallel_threads(size_t n_par_threads);
 719 
 720   // Calculates the number of GC threads to be used in a concurrent phase.
 721   size_t calc_parallel_marking_threads();
 722 
 723   // The following three are interaction between CM and
 724   // G1CollectedHeap
 725 
 726   // This notifies CM that a root during initial-mark needs to be
 727   // grayed and it's MT-safe. Currently, we just mark it. But, in the
 728   // future, we can experiment with pushing it on the stack and we can
 729   // do this without changing G1CollectedHeap.
 730   void grayRoot(oop p);

 731   // It's used during evacuation pauses to gray a region, if
 732   // necessary, and it's MT-safe. It assumes that the caller has
 733   // marked any objects on that region. If _should_gray_objects is
 734   // true and we're still doing concurrent marking, the region is
 735   // pushed on the region stack, if it is located below the global
 736   // finger, otherwise we do nothing.
 737   void grayRegionIfNecessary(MemRegion mr);

 738   // It's used during evacuation pauses to mark and, if necessary,
 739   // gray a single object and it's MT-safe. It assumes the caller did
 740   // not mark the object. If _should_gray_objects is true and we're
 741   // still doing concurrent marking, the objects is pushed on the
 742   // global stack, if it is located below the global finger, otherwise
 743   // we do nothing.
 744   void markAndGrayObjectIfNecessary(oop p);
 745 
 746   // It iterates over the heap and for each object it comes across it
 747   // will dump the contents of its reference fields, as well as
 748   // liveness information for the object and its referents. The dump
 749   // will be written to a file with the following name:
 750   // G1PrintReachableBaseFile + "." + str.
 751   // vo decides whether the prev (vo == UsePrevMarking), the next
 752   // (vo == UseNextMarking) marking information, or the mark word
 753   // (vo == UseMarkWord) will be used to determine the liveness of
 754   // each object / referent.
 755   // If all is true, all objects in the heap will be dumped, otherwise
 756   // only the live ones. In the dump the following symbols / breviations
 757   // are used:
 758   //   M : an explicitly live object (its bitmap bit is set)
 759   //   > : an implicitly live object (over tams)
 760   //   O : an object outside the G1 heap (typically: in the perm gen)
 761   //   NOT : a reference field whose referent is not live
 762   //   AND MARKED : indicates that an object is both explicitly and
 763   //   implicitly live (it should be one or the other, not both)
 764   void print_reachable(const char* str,


 768   void clearNextBitmap();
 769 
 770   // These two do the work that needs to be done before and after the
 771   // initial root checkpoint. Since this checkpoint can be done at two
 772   // different points (i.e. an explicit pause or piggy-backed on a
 773   // young collection), then it's nice to be able to easily share the
 774   // pre/post code. It might be the case that we can put everything in
 775   // the post method. TP
 776   void checkpointRootsInitialPre();
 777   void checkpointRootsInitialPost();
 778 
 779   // Do concurrent phase of marking, to a tentative transitive closure.
 780   void markFromRoots();
 781 
 782   // Process all unprocessed SATB buffers. It is called at the
 783   // beginning of an evacuation pause.
 784   void drainAllSATBBuffers();
 785 
 786   void checkpointRootsFinal(bool clear_all_soft_refs);
 787   void checkpointRootsFinalWork();
 788   void calcDesiredRegions();
 789   void cleanup();
 790   void completeCleanup();
 791 
 792   // Mark in the previous bitmap.  NB: this is usually read-only, so use
 793   // this carefully!
 794   void markPrev(oop p);
 795   void clear(oop p);



 796   // Clears marks for all objects in the given range, for both prev and
 797   // next bitmaps.  NB: the previous bitmap is usually read-only, so use
 798   // this carefully!
 799   void clearRangeBothMaps(MemRegion mr);
 800 
 801   // Record the current top of the mark and region stacks; a
 802   // subsequent oops_do() on the mark stack and
 803   // invalidate_entries_into_cset() on the region stack will iterate
 804   // only over indices valid at the time of this call.
 805   void set_oops_do_bound() {
 806     _markStack.set_oops_do_bound();
 807     _regionStack.set_oops_do_bound();
 808   }
 809   // Iterate over the oops in the mark stack and all local queues. It
 810   // also calls invalidate_entries_into_cset() on the region stack.
 811   void oops_do(OopClosure* f);
 812   // It is called at the end of an evacuation pause during marking so
 813   // that CM is notified of where the new end of the heap is. It
 814   // doesn't do anything if concurrent_marking_in_progress() is false,
 815   // unless the force parameter is true.


 884 
 885   void print_summary_info();
 886 
 887   void print_worker_threads_on(outputStream* st) const;
 888 
 889   // The following indicate whether a given verbose level has been
 890   // set. Notice that anything above stats is conditional to
 891   // _MARKING_VERBOSE_ having been set to 1
 892   bool verbose_stats() {
 893     return _verbose_level >= stats_verbose;
 894   }
 895   bool verbose_low() {
 896     return _MARKING_VERBOSE_ && _verbose_level >= low_verbose;
 897   }
 898   bool verbose_medium() {
 899     return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose;
 900   }
 901   bool verbose_high() {
 902     return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
 903   }




























































































 904 };
 905 
 906 // A class representing a marking task.
 907 class CMTask : public TerminatorTerminator {
 908 private:
 909   enum PrivateConstants {
 910     // the regular clock call is called once the scanned words reaches
 911     // this limit
 912     words_scanned_period          = 12*1024,
 913     // the regular clock call is called once the number of visited
 914     // references reaches this limit
 915     refs_reached_period           = 384,
 916     // initial value for the hash seed, used in the work stealing code
 917     init_hash_seed                = 17,
 918     // how many entries will be transferred between global stack and
 919     // local queues
 920     global_stack_transfer_size    = 16
 921   };
 922 
 923   int                         _task_id;


1002   // dealing with them)
1003   bool                        _draining_satb_buffers;
1004 
1005   // number sequence of past step times
1006   NumberSeq                   _step_times_ms;
1007   // elapsed time of this task
1008   double                      _elapsed_time_ms;
1009   // termination time of this task
1010   double                      _termination_time_ms;
1011   // when this task got into the termination protocol
1012   double                      _termination_start_time_ms;
1013 
1014   // true when the task is during a concurrent phase, false when it is
1015   // in the remark phase (so, in the latter case, we do not have to
1016   // check all the things that we have to check during the concurrent
1017   // phase, i.e. SATB buffer availability...)
1018   bool                        _concurrent;
1019 
1020   TruncatedSeq                _marking_step_diffs_ms;
1021 






1022   // LOTS of statistics related with this task
1023 #if _MARKING_STATS_
1024   NumberSeq                   _all_clock_intervals_ms;
1025   double                      _interval_start_time_ms;
1026 
1027   int                         _aborted;
1028   int                         _aborted_overflow;
1029   int                         _aborted_cm_aborted;
1030   int                         _aborted_yield;
1031   int                         _aborted_timed_out;
1032   int                         _aborted_satb;
1033   int                         _aborted_termination;
1034 
1035   int                         _steal_attempts;
1036   int                         _steals;
1037 
1038   int                         _clock_due_to_marking;
1039   int                         _clock_due_to_scanning;
1040 
1041   int                         _local_pushes;


1166   // It keeps picking SATB buffers and processing them until no SATB
1167   // buffers are available.
1168   void drain_satb_buffers();
1169   // It keeps popping regions from the region stack and processing
1170   // them until the region stack is empty.
1171   void drain_region_stack(BitMapClosure* closure);
1172 
1173   // moves the local finger to a new location
1174   inline void move_finger_to(HeapWord* new_finger) {
1175     assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
1176     _finger = new_finger;
1177   }
1178 
1179   // moves the region finger to a new location
1180   inline void move_region_finger_to(HeapWord* new_finger) {
1181     assert(new_finger < _cm->finger(), "invariant");
1182     _region_finger = new_finger;
1183   }
1184 
1185   CMTask(int task_num, ConcurrentMark *cm,

1186          CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
1187 
1188   // it prints statistics associated with this task
1189   void print_stats();
1190 
1191 #if _MARKING_STATS_
1192   void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
1193 #endif // _MARKING_STATS_
1194 };
1195 
1196 // Class that's used to to print out per-region liveness
1197 // information. It's currently used at the end of marking and also
1198 // after we sort the old regions at the end of the cleanup operation.
1199 class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure {
1200 private:
1201   outputStream* _out;
1202 
1203   // Accumulators for these values.
1204   size_t _total_used_bytes;
1205   size_t _total_capacity_bytes;




 413 
 414   // marking tasks
 415   size_t                  _max_task_num; // maximum task number
 416   size_t                  _active_tasks; // task num currently active
 417   CMTask**                _tasks;        // task queue array (max_task_num len)
 418   CMTaskQueueSet*         _task_queues;  // task queue set
 419   ParallelTaskTerminator  _terminator;   // for termination
 420 
 421   // Two sync barriers that are used to synchronise tasks when an
 422   // overflow occurs. The algorithm is the following. All tasks enter
 423   // the first one to ensure that they have all stopped manipulating
 424   // the global data structures. After they exit it, they re-initialise
 425   // their data structures and task 0 re-initialises the global data
 426   // structures. Then, they enter the second sync barrier. This
 427   // ensure, that no task starts doing work before all data
 428   // structures (local and global) have been re-initialised. When they
 429   // exit it, they are free to start working again.
 430   WorkGangBarrierSync     _first_overflow_barrier_sync;
 431   WorkGangBarrierSync     _second_overflow_barrier_sync;
 432 

 433   // this is set by any task, when an overflow on the global data
 434   // structures is detected.
 435   volatile bool           _has_overflown;
 436   // true: marking is concurrent, false: we're in remark
 437   volatile bool           _concurrent;
 438   // set at the end of a Full GC so that marking aborts
 439   volatile bool           _has_aborted;
 440 
 441   // used when remark aborts due to an overflow to indicate that
 442   // another concurrent marking phase should start
 443   volatile bool           _restart_for_overflow;
 444 
 445   // This is true from the very start of concurrent marking until the
 446   // point when all the tasks complete their work. It is really used
 447   // to determine the points between the end of concurrent marking and
 448   // time of remark.
 449   volatile bool           _concurrent_marking_in_progress;
 450 
 451   // verbose level
 452   CMVerboseLevel          _verbose_level;


 564   // Methods to enter the two overflow sync barriers
 565   void enter_first_sync_barrier(int task_num);
 566   void enter_second_sync_barrier(int task_num);
 567 
 568   ForceOverflowSettings* force_overflow_conc() {
 569     return &_force_overflow_conc;
 570   }
 571 
 572   ForceOverflowSettings* force_overflow_stw() {
 573     return &_force_overflow_stw;
 574   }
 575 
 576   ForceOverflowSettings* force_overflow() {
 577     if (concurrent()) {
 578       return force_overflow_conc();
 579     } else {
 580       return force_overflow_stw();
 581     }
 582   }
 583 
 584   // Live Data Counting data structures...
 585   // These data structures are initialized at the start of
 586   // marking. They are written to while marking is active.
 587   // They are aggregated during remark; the aggregated values
 588   // are then used to populate the _region_bm, _card_bm, and
 589   // the total live bytes, which are then subsequently updated
 590   // during cleanup.
 591 
 592   // An array of bitmaps (one bit map per task). Each bitmap
 593   // is used to record the cards spanned by the live objects
 594   // marked by that task/worker.
 595   BitMap*  _count_card_bitmaps;
 596 
 597   // Used to record the number of marked live bytes
 598   // (for each region, by worker thread).
 599   size_t** _count_marked_bytes;
 600 
 601   // Card index of the bottom of the G1 heap. Used for biasing indices into
 602   // the card bitmaps.
 603   intptr_t _heap_bottom_card_num;
 604 
 605 public:
 606   // Manipulation of the global mark stack.
 607   // Notice that the first mark_stack_push is CAS-based, whereas the
 608   // two below are Mutex-based. This is OK since the first one is only
 609   // called during evacuation pauses and doesn't compete with the
 610   // other two (which are called by the marking tasks during
 611   // concurrent marking or remark).
 612   bool mark_stack_push(oop p) {
 613     _markStack.par_push(p);
 614     if (_markStack.overflow()) {
 615       set_has_overflown();
 616       return false;
 617     }
 618     return true;
 619   }
 620   bool mark_stack_push(oop* arr, int n) {
 621     _markStack.par_push_arr(arr, n);
 622     if (_markStack.overflow()) {
 623       set_has_overflown();
 624       return false;


 706   }
 707 
 708   void update_accum_task_vtime(int i, double vtime) {
 709     _accum_task_vtime[i] += vtime;
 710   }
 711 
 712   double all_task_accum_vtime() {
 713     double ret = 0.0;
 714     for (int i = 0; i < (int)_max_task_num; ++i)
 715       ret += _accum_task_vtime[i];
 716     return ret;
 717   }
 718 
 719   // Attempts to steal an object from the task queues of other tasks
 720   bool try_stealing(int task_num, int* hash_seed, oop& obj) {
 721     return _task_queues->steal(task_num, hash_seed, obj);
 722   }
 723 
 724   // It grays an object by first marking it. Then, if it's behind the
 725   // global finger, it also pushes it on the global stack.
 726   void deal_with_reference(oop obj, int worker_i);
 727 
 728   ConcurrentMark(ReservedSpace rs, int max_regions);
 729 
 730   ConcurrentMarkThread* cmThread() { return _cmThread; }
 731 
 732   CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
 733   CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
 734 
 735   // Returns the number of GC threads to be used in a concurrent
 736   // phase based on the number of GC threads being used in a STW
 737   // phase.
 738   size_t scale_parallel_threads(size_t n_par_threads);
 739 
 740   // Calculates the number of GC threads to be used in a concurrent phase.
 741   size_t calc_parallel_marking_threads();
 742 
 743   // The following three are interaction between CM and
 744   // G1CollectedHeap
 745 
 746   // This notifies CM that a root during initial-mark needs to be
 747   // grayed and it's MT-safe. Currently, we just mark it. But, in the
 748   // future, we can experiment with pushing it on the stack and we can
 749   // do this without changing G1CollectedHeap.
 750   void grayRoot(oop p, int worker_i);
 751 
 752   // It's used during evacuation pauses to gray a region, if
 753   // necessary, and it's MT-safe. It assumes that the caller has
 754   // marked any objects on that region. If _should_gray_objects is
 755   // true and we're still doing concurrent marking, the region is
 756   // pushed on the region stack, if it is located below the global
 757   // finger, otherwise we do nothing.
 758   void grayRegionIfNecessary(MemRegion mr);
 759 
 760   // It's used during evacuation pauses to mark and, if necessary,
 761   // gray a single object and it's MT-safe. It assumes the caller did
 762   // not mark the object. If _should_gray_objects is true and we're
 763   // still doing concurrent marking, the objects is pushed on the
 764   // global stack, if it is located below the global finger, otherwise
 765   // we do nothing.
 766   void markAndGrayObjectIfNecessary(oop p, int worker_i);
 767 
 768   // It iterates over the heap and for each object it comes across it
 769   // will dump the contents of its reference fields, as well as
 770   // liveness information for the object and its referents. The dump
 771   // will be written to a file with the following name:
 772   // G1PrintReachableBaseFile + "." + str.
 773   // vo decides whether the prev (vo == UsePrevMarking), the next
 774   // (vo == UseNextMarking) marking information, or the mark word
 775   // (vo == UseMarkWord) will be used to determine the liveness of
 776   // each object / referent.
 777   // If all is true, all objects in the heap will be dumped, otherwise
 778   // only the live ones. In the dump the following symbols / breviations
 779   // are used:
 780   //   M : an explicitly live object (its bitmap bit is set)
 781   //   > : an implicitly live object (over tams)
 782   //   O : an object outside the G1 heap (typically: in the perm gen)
 783   //   NOT : a reference field whose referent is not live
 784   //   AND MARKED : indicates that an object is both explicitly and
 785   //   implicitly live (it should be one or the other, not both)
 786   void print_reachable(const char* str,


 790   void clearNextBitmap();
 791 
 792   // These two do the work that needs to be done before and after the
 793   // initial root checkpoint. Since this checkpoint can be done at two
 794   // different points (i.e. an explicit pause or piggy-backed on a
 795   // young collection), then it's nice to be able to easily share the
 796   // pre/post code. It might be the case that we can put everything in
 797   // the post method. TP
 798   void checkpointRootsInitialPre();
 799   void checkpointRootsInitialPost();
 800 
 801   // Do concurrent phase of marking, to a tentative transitive closure.
 802   void markFromRoots();
 803 
 804   // Process all unprocessed SATB buffers. It is called at the
 805   // beginning of an evacuation pause.
 806   void drainAllSATBBuffers();
 807 
 808   void checkpointRootsFinal(bool clear_all_soft_refs);
 809   void checkpointRootsFinalWork();

 810   void cleanup();
 811   void completeCleanup();
 812 
 813   // Mark in the previous bitmap.  NB: this is usually read-only, so use
 814   // this carefully!
 815   void markPrev(oop p);
 816 
 817   // Clears the mark in the next bitmap for the given object.
 818   void clear_mark(oop p);
 819 
 820   // Clears marks for all objects in the given range, for both prev and
 821   // next bitmaps.  NB: the previous bitmap is usually read-only, so use
 822   // this carefully!
 823   void clearRangeBothMaps(MemRegion mr);
 824 
 825   // Record the current top of the mark and region stacks; a
 826   // subsequent oops_do() on the mark stack and
 827   // invalidate_entries_into_cset() on the region stack will iterate
 828   // only over indices valid at the time of this call.
 829   void set_oops_do_bound() {
 830     _markStack.set_oops_do_bound();
 831     _regionStack.set_oops_do_bound();
 832   }
 833   // Iterate over the oops in the mark stack and all local queues. It
 834   // also calls invalidate_entries_into_cset() on the region stack.
 835   void oops_do(OopClosure* f);
 836   // It is called at the end of an evacuation pause during marking so
 837   // that CM is notified of where the new end of the heap is. It
 838   // doesn't do anything if concurrent_marking_in_progress() is false,
 839   // unless the force parameter is true.


 908 
 909   void print_summary_info();
 910 
 911   void print_worker_threads_on(outputStream* st) const;
 912 
 913   // The following indicate whether a given verbose level has been
 914   // set. Notice that anything above stats is conditional to
 915   // _MARKING_VERBOSE_ having been set to 1
 916   bool verbose_stats() {
 917     return _verbose_level >= stats_verbose;
 918   }
 919   bool verbose_low() {
 920     return _MARKING_VERBOSE_ && _verbose_level >= low_verbose;
 921   }
 922   bool verbose_medium() {
 923     return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose;
 924   }
 925   bool verbose_high() {
 926     return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
 927   }
 928 
 929   // Counting data structure accessors
 930 
 931   // Returns the card number of the bottom of the G1 heap.
 932   // Used in biasing indices into accounting card bitmaps.
 933   intptr_t heap_bottom_card_num() const {
 934     return _heap_bottom_card_num;
 935   }
 936 
 937   // Returns the card bitmap for a given task or worker id.
 938   BitMap* count_card_bitmap_for(int worker_i) {
 939     assert(0 <= worker_i && (size_t) worker_i < _max_task_num, "oob");
 940     assert(_count_card_bitmaps != NULL, "uninitialized");
 941     BitMap* task_card_bm = &_count_card_bitmaps[worker_i];
 942     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
 943     return task_card_bm;
 944   }
 945 
 946   // Returns the array containing the marked bytes for each region,
 947   // for the given worker or task id.
 948   size_t* count_marked_bytes_array_for(int worker_i) {
 949     assert(0 <= worker_i && (size_t) worker_i < _max_task_num, "oob");
 950     assert(_count_marked_bytes != NULL, "uninitialized");
 951     size_t* marked_bytes_array = _count_marked_bytes[worker_i];
 952     assert(marked_bytes_array != NULL, "uninitialized");
 953     return marked_bytes_array;
 954   }
 955 
 956   // Counts the size of the given memory region in the the given
 957   // marked_bytes array slot for the given HeapRegion.
 958   // Sets the bits in the given card bitmap that are associated with the
 959   // cards that are spanned by the memory region.
 960   inline void count_region(MemRegion mr, HeapRegion* hr,
 961                            size_t* marked_bytes_array,
 962                            BitMap* task_card_bm);
 963   
 964   // Counts the given memory region in the ask/worker counting
 965   // data structures for the given worker id.
 966   inline void count_region(MemRegion mr, int worker_i);
 967   
 968   // Counts the given object in the given task/worker counting
 969   // data structures.
 970   inline void count_object(oop obj, HeapRegion* hr,
 971                            size_t* marked_bytes_array,
 972                            BitMap* task_card_bm);
 973   
 974   // Counts the given object in the task/worker counting data
 975   // structures for the given worker id.
 976   inline void count_object(oop obj, HeapRegion* hr, int worker_i);
 977   
 978   // Attempts to mark the given object and, if successful, counts
 979   // the object in the given task/worker counting structures.
 980   inline bool par_mark_and_count(oop obj, HeapRegion* hr,
 981                                  size_t* marked_bytes_array,
 982                                  BitMap* task_card_bm);
 983 
 984   // Attempts to mark the given object and, if successful, counts
 985   // the object in the task/worker counting structures for the
 986   // given worker id.
 987   inline bool par_mark_and_count(oop obj, HeapRegion* hr, int worker_i);
 988 
 989   // Similar to the above routine but we don't know the heap region that
 990   // contains the object to be marked/counted, which this routine looks up.
 991   inline bool par_mark_and_count(oop obj, int worker_i);
 992 
 993   // Unconditionally mark the given object, and unconditinally count
 994   // the object in the counting structures for worker id 0.
 995   // Should *not* be called from parallel code.
 996   inline bool mark_and_count(oop obj, HeapRegion* hr);
 997  
 998   // Similar to the above routine but we don't know the heap region that
 999   // contains the object to be marked/counted, which this routine looks up.
1000   // Should *not* be called from parallel code.
1001   inline bool mark_and_count(oop obj);
1002 
1003   // Clears the count data for the given region from _all_ of
1004   // the per-task counting data structures.
1005   void clear_count_data_for_heap_region(HeapRegion* hr);
1006 
1007 protected:
1008   // Clear all the per-task bitmaps and arrays used to store the
1009   // counting data.
1010   void clear_all_count_data();
1011 
1012   // Aggregates the counting data for each worker/task
1013   // that was constructed while marking. Also sets
1014   // the amount of marked bytes for each region and
1015   // the top at concurrent mark count.
1016   void aggregate_and_clear_count_data();
1017 
1018   // Verification routine
1019   void verify_count_data();
1020 };
1021 
1022 // A class representing a marking task.
1023 class CMTask : public TerminatorTerminator {
1024 private:
1025   enum PrivateConstants {
1026     // the regular clock call is called once the scanned words reaches
1027     // this limit
1028     words_scanned_period          = 12*1024,
1029     // the regular clock call is called once the number of visited
1030     // references reaches this limit
1031     refs_reached_period           = 384,
1032     // initial value for the hash seed, used in the work stealing code
1033     init_hash_seed                = 17,
1034     // how many entries will be transferred between global stack and
1035     // local queues
1036     global_stack_transfer_size    = 16
1037   };
1038 
1039   int                         _task_id;


1118   // dealing with them)
1119   bool                        _draining_satb_buffers;
1120 
1121   // number sequence of past step times
1122   NumberSeq                   _step_times_ms;
1123   // elapsed time of this task
1124   double                      _elapsed_time_ms;
1125   // termination time of this task
1126   double                      _termination_time_ms;
1127   // when this task got into the termination protocol
1128   double                      _termination_start_time_ms;
1129 
1130   // true when the task is during a concurrent phase, false when it is
1131   // in the remark phase (so, in the latter case, we do not have to
1132   // check all the things that we have to check during the concurrent
1133   // phase, i.e. SATB buffer availability...)
1134   bool                        _concurrent;
1135 
1136   TruncatedSeq                _marking_step_diffs_ms;
1137 
1138   // Counting data structures. Embedding the task's marked_bytes_array
1139   // and card bitmap into the actual task saves having to go through
1140   // the ConcurrentMark object.
1141   size_t*                     _marked_bytes_array;
1142   BitMap*                     _card_bm;
1143 
1144   // LOTS of statistics related with this task
1145 #if _MARKING_STATS_
1146   NumberSeq                   _all_clock_intervals_ms;
1147   double                      _interval_start_time_ms;
1148 
1149   int                         _aborted;
1150   int                         _aborted_overflow;
1151   int                         _aborted_cm_aborted;
1152   int                         _aborted_yield;
1153   int                         _aborted_timed_out;
1154   int                         _aborted_satb;
1155   int                         _aborted_termination;
1156 
1157   int                         _steal_attempts;
1158   int                         _steals;
1159 
1160   int                         _clock_due_to_marking;
1161   int                         _clock_due_to_scanning;
1162 
1163   int                         _local_pushes;


1288   // It keeps picking SATB buffers and processing them until no SATB
1289   // buffers are available.
1290   void drain_satb_buffers();
1291   // It keeps popping regions from the region stack and processing
1292   // them until the region stack is empty.
1293   void drain_region_stack(BitMapClosure* closure);
1294 
1295   // moves the local finger to a new location
1296   inline void move_finger_to(HeapWord* new_finger) {
1297     assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
1298     _finger = new_finger;
1299   }
1300 
1301   // moves the region finger to a new location
1302   inline void move_region_finger_to(HeapWord* new_finger) {
1303     assert(new_finger < _cm->finger(), "invariant");
1304     _region_finger = new_finger;
1305   }
1306 
1307   CMTask(int task_num, ConcurrentMark *cm,
1308          size_t* marked_bytes, BitMap* card_bm,
1309          CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
1310 
1311   // it prints statistics associated with this task
1312   void print_stats();
1313 
1314 #if _MARKING_STATS_
1315   void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
1316 #endif // _MARKING_STATS_
1317 };
1318 
1319 // Class that's used to to print out per-region liveness
1320 // information. It's currently used at the end of marking and also
1321 // after we sort the old regions at the end of the cleanup operation.
1322 class G1PrintRegionLivenessInfoClosure: public HeapRegionClosure {
1323 private:
1324   outputStream* _out;
1325 
1326   // Accumulators for these values.
1327   size_t _total_used_bytes;
1328   size_t _total_capacity_bytes;