--- old/src/share/vm/opto/macro.cpp 2016-04-28 11:48:50.252973685 +0200 +++ new/src/share/vm/opto/macro.cpp 2016-04-28 11:48:50.032973695 +0200 @@ -1897,7 +1897,7 @@ Node *prefetch_adr; Node *prefetch; - uint lines = AllocatePrefetchDistance / AllocatePrefetchStepSize; + uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines; uint step_size = AllocatePrefetchStepSize; uint distance = 0; @@ -1926,12 +1926,8 @@ contended_phi_rawmem = pf_phi_rawmem; i_o = pf_phi_abio; } else if( UseTLAB && AllocatePrefetchStyle == 3 ) { - // Insert a prefetch for each allocation. - // This code is used for Sparc with BIS. - Node *pf_region = new RegionNode(3); - Node *pf_phi_rawmem = new PhiNode( pf_region, Type::MEMORY, - TypeRawPtr::BOTTOM ); - transform_later(pf_region); + // Insert a prefetch instruction for each allocation. + // This code is used for SPARC with BIS. // Generate several prefetch instructions. uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines; @@ -1940,10 +1936,15 @@ // Next cache address. Node *cache_adr = new AddPNode(old_eden_top, old_eden_top, - _igvn.MakeConX(distance)); + _igvn.MakeConX(step_size + distance)); transform_later(cache_adr); cache_adr = new CastP2XNode(needgc_false, cache_adr); transform_later(cache_adr); + // For BIS instructions to be emitted, the address must be aligned at cache line size. + // (The VM sets AllocatePrefetchStepSize to the cache line size, unless a value is + // specified at the command line.) If the address is not aligned at cache line size + // boundary, a standard store instruction is triggered (instead of the BIS). For the + // latter, 8-byte alignment is necessary. Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1)); cache_adr = new AndXNode(cache_adr, mask); transform_later(cache_adr); --- old/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp 2016-04-28 11:48:50.340973681 +0200 +++ new/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp 2016-04-28 11:48:50.128973691 +0200 @@ -36,6 +36,7 @@ // static member initialization size_t ThreadLocalAllocBuffer::_max_size = 0; +int ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0; unsigned ThreadLocalAllocBuffer::_target_refills = 0; GlobalTLABStats* ThreadLocalAllocBuffer::_global_stats = NULL; @@ -215,6 +216,23 @@ _global_stats = new GlobalTLABStats(); + // Need extra space at the end of TLAB, otherwise prefetching + // instructions will fault (due to accessing memory outside of heap). + // The amount of space is the max of the number of lines to + // prefetch for array and for instance allocations. (Extra space must be + // reserved to accommodate both types of allocations.) + // + // Only SPARC-specific BIS instructions are known to fault. (Those + // instructions are generated if AllocatePrefetchStyle==3 and + // AllocatePrefetchInstr==1). To be on the safe side, however, + // extra space is reserved for all combinations of + // AllocatePrefetchStyle and AllocatePrefetchInstr. + + // +1 for rounding up to next cache line, +1 to be safe + int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2; + _reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) / + (int)HeapWordSize; + // During jvm startup, the main (primordial) thread is initialized // before the heap is initialized. So reinitialize it now. guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread"); --- old/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp 2016-04-28 11:48:50.316973682 +0200 +++ new/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp 2016-04-28 11:48:50.088973693 +0200 @@ -49,8 +49,9 @@ size_t _refill_waste_limit; // hold onto tlab if free() is larger than this size_t _allocated_before_last_gc; // total bytes allocated up until the last gc - static size_t _max_size; // maximum size of any TLAB - static unsigned _target_refills; // expected number of refills between GCs + static size_t _max_size; // maximum size of any TLAB + static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB + static unsigned _target_refills; // expected number of refills between GCs unsigned _number_of_refills; unsigned _fast_refill_waste; @@ -129,7 +130,7 @@ // Reserve space at the end of TLAB static size_t end_reserve() { int reserve_size = typeArrayOopDesc::header_size(T_INT); - return MAX2(reserve_size, VM_Version::reserve_for_allocation_prefetch()); + return MAX2(reserve_size, _reserve_for_allocation_prefetch); } static size_t alignment_reserve() { return align_object_size(end_reserve()); } static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; } --- old/src/share/vm/runtime/vm_version.cpp 2016-04-28 11:48:50.368973680 +0200 +++ new/src/share/vm/runtime/vm_version.cpp 2016-04-28 11:48:50.108973692 +0200 @@ -43,7 +43,6 @@ bool Abstract_VM_Version::_supports_atomic_getadd8 = false; unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U; unsigned int Abstract_VM_Version::_L1_data_cache_line_size = 0; -int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0; #ifndef HOTSPOT_VERSION_STRING #error HOTSPOT_VERSION_STRING must be defined --- old/src/cpu/sparc/vm/vm_version_sparc.cpp 2016-04-28 11:48:50.400973678 +0200 +++ new/src/cpu/sparc/vm/vm_version_sparc.cpp 2016-04-28 11:48:50.132973691 +0200 @@ -49,9 +49,11 @@ AllocatePrefetchDistance = allocate_prefetch_distance(); AllocatePrefetchStyle = allocate_prefetch_style(); - if (AllocatePrefetchStyle == 3 && !has_blk_init()) { - warning("BIS instructions are not available on this CPU"); - FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); + if (!has_blk_init()) { + if (AllocatePrefetchInstr == 1) { + warning("BIS instructions required for AllocatePrefetchInstr 1 unavailable"); + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); + } } UseSSE = 0; // Only on x86 and x64 @@ -88,11 +90,13 @@ if (has_blk_init() && UseTLAB && FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { // Use BIS instruction for TLAB allocation prefetch. - FLAG_SET_ERGO(intx, AllocatePrefetchInstr, 1); - if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { - FLAG_SET_ERGO(intx, AllocatePrefetchStyle, 3); - } - if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1); + } + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + if (AllocatePrefetchInstr == 0) { + // Use different prefetch distance without BIS + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + } else { // Use smaller prefetch distance with BIS FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); } @@ -107,25 +111,14 @@ FLAG_SET_ERGO(intx, AllocateInstancePrefetchLines, AllocateInstancePrefetchLines*2); } } - if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { - // Use different prefetch distance without BIS - FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); - } - if (AllocatePrefetchInstr == 1) { - // Need extra space at the end of TLAB for BIS, otherwise prefetching - // instructions will fault (due to accessing memory outside of heap). - // The amount of space is the max of the number of lines to - // prefetch for array and for instance allocations. (Extra space must be - // reserved to accomodate both types of allocations.) - - // +1 for rounding up to next cache line, +1 to be safe - int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2; - int step_size = AllocatePrefetchStepSize; - int distance = AllocatePrefetchDistance; - _reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize; - } } -#endif + + if (AllocatePrefetchInstr == 1) { + // Use allocation prefetch style 3 because BIS instructions + // require aligned memory addresses. + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3); + } +#endif /* COMPILER2 */ } // Use hardware population count instruction if available. --- old/src/share/vm/runtime/vmStructs.cpp 2016-04-28 11:48:50.492973674 +0200 +++ new/src/share/vm/runtime/vmStructs.cpp 2016-04-28 11:48:50.256973685 +0200 @@ -600,6 +600,7 @@ nonstatic_field(ThreadLocalAllocBuffer, _pf_top, HeapWord*) \ nonstatic_field(ThreadLocalAllocBuffer, _desired_size, size_t) \ nonstatic_field(ThreadLocalAllocBuffer, _refill_waste_limit, size_t) \ + static_field(ThreadLocalAllocBuffer, _reserve_for_allocation_prefetch, int) \ static_field(ThreadLocalAllocBuffer, _target_refills, unsigned) \ nonstatic_field(ThreadLocalAllocBuffer, _number_of_refills, unsigned) \ nonstatic_field(ThreadLocalAllocBuffer, _fast_refill_waste, unsigned) \ @@ -1318,7 +1319,6 @@ static_field(Abstract_VM_Version, _vm_minor_version, int) \ static_field(Abstract_VM_Version, _vm_security_version, int) \ static_field(Abstract_VM_Version, _vm_build_number, int) \ - static_field(Abstract_VM_Version, _reserve_for_allocation_prefetch, int) \ \ static_field(JDK_Version, _current, JDK_Version) \ nonstatic_field(JDK_Version, _major, unsigned char) \ --- old/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java 2016-04-28 11:48:50.488973674 +0200 +++ new/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java 2016-04-28 11:48:50.180973689 +0200 @@ -324,8 +324,9 @@ Address vmInternalInfoAddr = vmVersion.getAddressField("_s_internal_vm_info_string").getValue(); vmInternalInfo = CStringUtilities.getString(vmInternalInfoAddr); + Type threadLocalAllocBuffer = db.lookupType("ThreadLocalAllocBuffer"); CIntegerType intType = (CIntegerType) db.lookupType("int"); - CIntegerField reserveForAllocationPrefetchField = vmVersion.getCIntegerField("_reserve_for_allocation_prefetch"); + CIntegerField reserveForAllocationPrefetchField = threadLocalAllocBuffer.getCIntegerField("_reserve_for_allocation_prefetch"); reserveForAllocationPrefetch = (int)reserveForAllocationPrefetchField.getCInteger(intType); } catch (Exception exp) { throw new RuntimeException("can't determine target's VM version : " + exp.getMessage()); --- old/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java 2016-04-28 11:48:50.500973674 +0200 +++ new/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java 2016-04-28 11:48:50.184973688 +0200 @@ -90,13 +90,6 @@ excludeTestMaxRange("CICompilerCount"); /* - * JDK-8153340 - * Temporary exclude AllocatePrefetchDistance option from testing - */ - excludeTestRange("AllocatePrefetchDistance"); - - - /* * JDK-8136766 * Temporarily remove ThreadStackSize from testing because Windows can set it to 0 * (for default OS size) but other platforms insist it must be greater than 0 --- old/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp 2016-04-28 11:48:50.476973675 +0200 +++ new/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp 2016-04-28 11:48:50.136973691 +0200 @@ -90,16 +90,29 @@ } Flag::Error AllocatePrefetchDistanceConstraintFunc(intx value, bool verbose) { - if (value < 0) { + if (value < 0 || value > 512) { CommandLineError::print(verbose, - "Unable to determine system-specific value for AllocatePrefetchDistance. " - "Please provide appropriate value, if unsure, use 0 to disable prefetching\n"); + "AllocatePrefetchDistance (" INTX_FORMAT ") must be " + "between 0 and " INTX_FORMAT "\n", + AllocatePrefetchDistance, 512); return Flag::VIOLATES_CONSTRAINT; } return Flag::SUCCESS; } +Flag::Error AllocatePrefetchStepSizeConstraintFunc(intx value, bool verbose) { + if (AllocatePrefetchStyle == 3) { + if (value % wordSize != 0) { + CommandLineError::print(verbose, + "AllocatePrefetchStepSize (" INTX_FORMAT ") must be multiple of %d\n", + value, wordSize); + return Flag::VIOLATES_CONSTRAINT; + } + } + return Flag::SUCCESS; +} + Flag::Error AllocatePrefetchInstrConstraintFunc(intx value, bool verbose) { intx max_value = max_intx; #if defined(SPARC) @@ -114,49 +127,6 @@ return Flag::VIOLATES_CONSTRAINT; } - return Flag::SUCCESS; -} - -Flag::Error AllocatePrefetchStepSizeConstraintFunc(intx value, bool verbose) { - intx max_value = 512; - if (value < 1 || value > max_value) { - CommandLineError::print(verbose, - "AllocatePrefetchStepSize (" INTX_FORMAT ") " - "must be between 1 and %d\n", - AllocatePrefetchStepSize, - max_value); - return Flag::VIOLATES_CONSTRAINT; - } - - if (AllocatePrefetchDistance % AllocatePrefetchStepSize != 0) { - CommandLineError::print(verbose, - "AllocatePrefetchDistance (" INTX_FORMAT ") " - "%% AllocatePrefetchStepSize (" INTX_FORMAT ") " - "= " INTX_FORMAT " " - "must be 0\n", - AllocatePrefetchDistance, AllocatePrefetchStepSize, - AllocatePrefetchDistance % AllocatePrefetchStepSize); - return Flag::VIOLATES_CONSTRAINT; - } - - /* The limit of 64 for the quotient of AllocatePrefetchDistance and AllocatePrefetchSize - * originates from the limit of 64 for AllocatePrefetchLines/AllocateInstancePrefetchLines. - * If AllocatePrefetchStyle == 2, the quotient from above is used in PhaseMacroExpand::prefetch_allocation() - * to determine the number of lines to prefetch. For other values of AllocatePrefetchStyle, - * AllocatePrefetchDistance and AllocatePrefetchSize is used. For consistency, all these - * quantities must have the same limit (64 in this case). - */ - if (AllocatePrefetchDistance / AllocatePrefetchStepSize > 64) { - CommandLineError::print(verbose, - "AllocatePrefetchDistance (" INTX_FORMAT ") too large or " - "AllocatePrefetchStepSize (" INTX_FORMAT ") too small; " - "try decreasing/increasing values so that " - "AllocatePrefetchDistance / AllocatePrefetchStepSize <= 64\n", - AllocatePrefetchDistance, AllocatePrefetchStepSize, - AllocatePrefetchDistance % AllocatePrefetchStepSize); - return Flag::VIOLATES_CONSTRAINT; - } - return Flag::SUCCESS; } --- old/src/share/vm/runtime/vm_version.hpp 2016-04-28 11:48:50.504973673 +0200 +++ new/src/share/vm/runtime/vm_version.hpp 2016-04-28 11:48:50.140973690 +0200 @@ -57,7 +57,6 @@ static int _vm_build_number; static unsigned int _parallel_worker_threads; static bool _parallel_worker_threads_initialized; - static int _reserve_for_allocation_prefetch; static unsigned int nof_parallel_worker_threads(unsigned int num, unsigned int dem, @@ -139,12 +138,6 @@ return _L1_data_cache_line_size; } - // Need a space at the end of TLAB for prefetch instructions - // which may fault when accessing memory outside of heap. - static int reserve_for_allocation_prefetch() { - return _reserve_for_allocation_prefetch; - } - // ARCH specific policy for the BiasedLocking static bool use_biased_locking() { return true; } --- old/src/share/vm/runtime/globals.hpp 2016-04-28 11:48:50.536973672 +0200 +++ new/src/share/vm/runtime/globals.hpp 2016-04-28 11:48:50.204973687 +0200 @@ -2901,9 +2901,9 @@ \ product(intx, AllocatePrefetchStyle, 1, \ "0 = no prefetch, " \ - "1 = prefetch instructions for each allocation, " \ + "1 = generate prefetch instructions for each allocation, " \ "2 = use TLAB watermark to gate allocation prefetch, " \ - "3 = use BIS instruction on Sparc for allocation prefetch") \ + "3 = generate one prefetch instruction per cache line") \ range(0, 3) \ \ product(intx, AllocatePrefetchDistance, -1, \ @@ -2926,8 +2926,8 @@ constraint(AllocatePrefetchStepSizeConstraintFunc,AfterMemoryInit)\ \ product(intx, AllocatePrefetchInstr, 0, \ - "Prefetch instruction to prefetch ahead of allocation pointer") \ - constraint(AllocatePrefetchInstrConstraintFunc, AfterErgo) \ + "Select instruction to prefetch ahead of allocation pointer") \ + constraint(AllocatePrefetchInstrConstraintFunc, AfterMemoryInit) \ \ /* deoptimization */ \ develop(bool, TraceDeoptimization, false, \