5677 __ enter(); // required for proper stackwalking of RuntimeStub frame 5678 5679 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx 5680 // len => rcx, k => r8 5681 // r9 and r10 may be used to save non-volatile registers 5682 #ifdef _WIN64 5683 // last argument is on stack on Win64 5684 __ movl(k, Address(rsp, 6 * wordSize)); 5685 #endif 5686 __ movptr(r11, rdx); // move offset in rdx to offset(r11) 5687 __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax); 5688 5689 restore_arg_regs(); 5690 5691 __ leave(); // required for proper stackwalking of RuntimeStub frame 5692 __ ret(0); 5693 5694 return start; 5695 } 5696 5697 address generate_libmExp() { 5698 StubCodeMark mark(this, "StubRoutines", "libmExp"); 5699 5700 address start = __ pc(); 5701 5702 const XMMRegister x0 = xmm0; 5703 const XMMRegister x1 = xmm1; 5704 const XMMRegister x2 = xmm2; 5705 const XMMRegister x3 = xmm3; 5706 5707 const XMMRegister x4 = xmm4; 5708 const XMMRegister x5 = xmm5; 5709 const XMMRegister x6 = xmm6; 5710 const XMMRegister x7 = xmm7; 5711 5712 const Register tmp = r11; 5713 5714 BLOCK_COMMENT("Entry:"); 5715 __ enter(); // required for proper stackwalking of RuntimeStub frame 5716 6297 &StubRoutines::_safefetch32_fault_pc, 6298 &StubRoutines::_safefetch32_continuation_pc); 6299 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 6300 &StubRoutines::_safefetchN_fault_pc, 6301 &StubRoutines::_safefetchN_continuation_pc); 6302 6303 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); 6304 if (bs_nm != NULL) { 6305 StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier(); 6306 } 6307 #ifdef COMPILER2 6308 if (UseMultiplyToLenIntrinsic) { 6309 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 6310 } 6311 if (UseSquareToLenIntrinsic) { 6312 StubRoutines::_squareToLen = generate_squareToLen(); 6313 } 6314 if (UseMulAddIntrinsic) { 6315 StubRoutines::_mulAdd = generate_mulAdd(); 6316 } 6317 #ifndef _WINDOWS 6318 if (UseMontgomeryMultiplyIntrinsic) { 6319 StubRoutines::_montgomeryMultiply 6320 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); 6321 } 6322 if (UseMontgomerySquareIntrinsic) { 6323 StubRoutines::_montgomerySquare 6324 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); 6325 } 6326 #endif // WINDOWS 6327 #endif // COMPILER2 6328 6329 if (UseVectorizedMismatchIntrinsic) { 6330 StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch(); 6331 } 6332 } 6333 6334 public: 6335 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 6336 if (all) { | 5677 __ enter(); // required for proper stackwalking of RuntimeStub frame 5678 5679 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx 5680 // len => rcx, k => r8 5681 // r9 and r10 may be used to save non-volatile registers 5682 #ifdef _WIN64 5683 // last argument is on stack on Win64 5684 __ movl(k, Address(rsp, 6 * wordSize)); 5685 #endif 5686 __ movptr(r11, rdx); // move offset in rdx to offset(r11) 5687 __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax); 5688 5689 restore_arg_regs(); 5690 5691 __ leave(); // required for proper stackwalking of RuntimeStub frame 5692 __ ret(0); 5693 5694 return start; 5695 } 5696 5697 address generate_bigIntegerRightShift() { 5698 __ align(CodeEntryAlignment); 5699 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); 5700 5701 address start = __ pc(); 5702 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit; 5703 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8. 5704 const Register newArr = rdi; 5705 const Register oldArr = rsi; 5706 const Register newIdx = rdx; 5707 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift. 5708 const Register totalNumIter = r8; 5709 5710 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps. 5711 // For everything else, we prefer using r9 and r10 since we do not have to save them before use. 5712 const Register tmp1 = r11; // Caller save. 5713 const Register tmp2 = rax; // Caller save. 5714 const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9); // Windows: Callee save. Linux: Caller save. 5715 const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10); // Windows: Callee save. Linux: Caller save. 5716 const Register tmp5 = r14; // Callee save. 5717 const Register tmp6 = r15; 5718 5719 const XMMRegister x0 = xmm0; 5720 const XMMRegister x1 = xmm1; 5721 const XMMRegister x2 = xmm2; 5722 5723 BLOCK_COMMENT("Entry:"); 5724 __ enter(); // required for proper stackwalking of RuntimeStub frame 5725 5726 #ifdef _WINDOWS 5727 setup_arg_regs(4); 5728 // For windows, since last argument is on stack, we need to move it to the appropriate register. 5729 __ movl(totalNumIter, Address(rsp, 6 * wordSize)); 5730 // Save callee save registers. 5731 __ push(tmp3); 5732 __ push(tmp4); 5733 #endif 5734 __ push(tmp5); 5735 5736 // Rename temps used throughout the code. 5737 const Register idx = tmp1; 5738 const Register nIdx = tmp2; 5739 5740 __ cmpl(totalNumIter, 1); 5741 __ jcc(Assembler::less, Exit); 5742 5743 __ xorl(idx, idx); 5744 5745 // Start right shift from end of the array. 5746 // For example, if #iteration = 4 and newIdx = 1 5747 // then dest[4] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32) 5748 // if #iteration = 4 and newIdx = 0 5749 // then dest[3] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32) 5750 __ movl(idx, totalNumIter); 5751 __ movl(nIdx, idx); 5752 __ addl(nIdx, newIdx); 5753 5754 // If vectorization is enabled, check if the number of iterations is greater than 63 5755 // If not, then go to ShifTwo processing 2 iterations 5756 if (UseAVX > 2 && UseVBMI2) { 5757 __ cmpl(totalNumIter, 63); 5758 __ jcc(Assembler::less, ShiftTwo); 5759 __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit); 5760 __ subl(idx, 16); 5761 __ subl(nIdx, 16); 5762 __ BIND(Shift512Loop); 5763 __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 4), Assembler::AVX_512bit); 5764 __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit); 5765 __ vpshrdvd(x2, x1, x0, Assembler::AVX_512bit); 5766 __ evmovdqul(Address(newArr, nIdx, Address::times_4), x2, Assembler::AVX_512bit); 5767 __ subl(nIdx, 16); 5768 __ subl(idx, 16); 5769 __ jcc(Assembler::greaterEqual, Shift512Loop); 5770 __ addl(idx, 16); 5771 __ addl(nIdx, 16); 5772 } 5773 __ BIND(ShiftTwo); 5774 __ cmpl(idx, 2); 5775 __ jcc(Assembler::less, ShiftOne); 5776 __ subl(idx, 2); 5777 __ subl(nIdx, 2); 5778 __ BIND(ShiftTwoLoop); 5779 __ movl(tmp5, Address(oldArr, idx, Address::times_4, 8)); 5780 __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4)); 5781 __ movl(tmp3, Address(oldArr, idx, Address::times_4)); 5782 __ shrdl(tmp5, tmp4); 5783 __ shrdl(tmp4, tmp3); 5784 __ movl(Address(newArr, nIdx, Address::times_4, 4), tmp5); 5785 __ movl(Address(newArr, nIdx, Address::times_4), tmp4); 5786 __ subl(nIdx, 2); 5787 __ subl(idx, 2); 5788 __ jcc(Assembler::greaterEqual, ShiftTwoLoop); 5789 __ addl(idx, 2); 5790 __ addl(nIdx, 2); 5791 5792 // Do the last iteration 5793 __ BIND(ShiftOne); 5794 __ cmpl(idx, 1); 5795 __ jcc(Assembler::less, Exit); 5796 __ subl(idx, 1); 5797 __ subl(nIdx, 1); 5798 __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4)); 5799 __ movl(tmp3, Address(oldArr, idx, Address::times_4)); 5800 __ shrdl(tmp4, tmp3); 5801 __ movl(Address(newArr, nIdx, Address::times_4), tmp4); 5802 __ BIND(Exit); 5803 // Restore callee save registers. 5804 __ pop(tmp5); 5805 #ifdef _WINDOWS 5806 __ pop(tmp4); 5807 __ pop(tmp3); 5808 restore_arg_regs(); 5809 #endif 5810 __ leave(); // required for proper stackwalking of RuntimeStub frame 5811 __ ret(0); 5812 return start; 5813 } 5814 5815 /** 5816 * Arguments: 5817 * 5818 * Input: 5819 * c_rarg0 - newArr address 5820 * c_rarg1 - oldArr address 5821 * c_rarg2 - newIdx 5822 * c_rarg3 - shiftCount 5823 * not Win64 5824 * c_rarg4 - numIter 5825 * Win64 5826 * rsp40 - numIter 5827 */ 5828 address generate_bigIntegerLeftShift() { 5829 __ align(CodeEntryAlignment); 5830 StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); 5831 address start = __ pc(); 5832 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit; 5833 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8. 5834 const Register newArr = rdi; 5835 const Register oldArr = rsi; 5836 const Register newIdx = rdx; 5837 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift. 5838 const Register totalNumIter = r8; 5839 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps. 5840 // For everything else, we prefer using r9 and r10 since we do not have to save them before use. 5841 const Register tmp1 = r11; // Caller save. 5842 const Register tmp2 = rax; // Caller save. 5843 const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9); // Windows: Callee save. Linux: Caller save. 5844 const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10); // Windows: Callee save. Linux: Caller save. 5845 const Register tmp5 = r14; // Callee save. 5846 5847 const XMMRegister x0 = xmm0; 5848 const XMMRegister x1 = xmm1; 5849 const XMMRegister x2 = xmm2; 5850 BLOCK_COMMENT("Entry:"); 5851 __ enter(); // required for proper stackwalking of RuntimeStub frame 5852 5853 #ifdef _WINDOWS 5854 setup_arg_regs(4); 5855 // For windows, since last argument is on stack, we need to move it to the appropriate register. 5856 __ movl(totalNumIter, Address(rsp, 6 * wordSize)); 5857 // Save callee save registers. 5858 __ push(tmp3); 5859 __ push(tmp4); 5860 #endif 5861 __ push(tmp5); 5862 5863 // Rename temps used throughout the code 5864 const Register idx = tmp1; 5865 const Register numIterTmp = tmp2; 5866 5867 __ cmpl(totalNumIter, 1); 5868 __ jcc(Assembler::less, Exit); 5869 5870 // Start idx from zero. 5871 __ xorl(idx, idx); 5872 // Compute interior pointer for new array. We do this so that we can use same index for both old and new arrays. 5873 __ lea(newArr, Address(newArr, newIdx, Address::times_4)); 5874 __ movl(numIterTmp, totalNumIter); 5875 5876 // If vectorization is enabled, check if the number of iterations is greater than 63. 5877 // If not, then go to ShiftTwo shifting two numbers at a time 5878 if (UseAVX > 2 && UseVBMI2) { 5879 __ cmpl(totalNumIter, 63); 5880 __ jcc(Assembler::less, ShiftTwo); 5881 __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit); 5882 __ subl(numIterTmp, 16); 5883 __ BIND(Shift512Loop); 5884 __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit); 5885 __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 0x4), Assembler::AVX_512bit); 5886 __ vpshldvd(x1, x2, x0, Assembler::AVX_512bit); 5887 __ evmovdqul(Address(newArr, idx, Address::times_4), x1, Assembler::AVX_512bit); 5888 __ addl(idx, 16); 5889 __ subl(numIterTmp, 16); 5890 __ jcc(Assembler::greaterEqual, Shift512Loop); 5891 __ addl(numIterTmp, 16); 5892 } 5893 __ BIND(ShiftTwo); 5894 __ movl(tmp3, Address(oldArr, idx, Address::times_4)); 5895 __ subl(numIterTmp, 2); 5896 __ jcc(Assembler::less, ShiftOne); 5897 5898 __ BIND(ShiftTwoLoop); 5899 __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4)); 5900 __ movl(tmp5, Address(oldArr, idx, Address::times_4, 0x8)); 5901 __ shldl(tmp3, tmp4); 5902 __ shldl(tmp4, tmp5); 5903 __ movl(Address(newArr, idx, Address::times_4), tmp3); 5904 __ movl(Address(newArr, idx, Address::times_4, 0x4), tmp4); 5905 __ movl(tmp3, tmp5); 5906 __ addl(idx, 2); 5907 __ subl(numIterTmp, 2); 5908 __ jcc(Assembler::greaterEqual, ShiftTwoLoop); 5909 5910 // Do the last iteration 5911 __ BIND(ShiftOne); 5912 __ addl(numIterTmp, 2); 5913 __ cmpl(numIterTmp, 1); 5914 __ jcc(Assembler::less, Exit); 5915 __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4)); 5916 __ shldl(tmp3, tmp4); 5917 __ movl(Address(newArr, idx, Address::times_4), tmp3); 5918 5919 __ BIND(Exit); 5920 // Restore callee save registers. 5921 __ pop(tmp5); 5922 #ifdef _WINDOWS 5923 __ pop(tmp4); 5924 __ pop(tmp3); 5925 restore_arg_regs(); 5926 #endif 5927 __ leave(); // required for proper stackwalking of RuntimeStub frame 5928 __ ret(0); 5929 return start; 5930 } 5931 5932 address generate_libmExp() { 5933 StubCodeMark mark(this, "StubRoutines", "libmExp"); 5934 5935 address start = __ pc(); 5936 5937 const XMMRegister x0 = xmm0; 5938 const XMMRegister x1 = xmm1; 5939 const XMMRegister x2 = xmm2; 5940 const XMMRegister x3 = xmm3; 5941 5942 const XMMRegister x4 = xmm4; 5943 const XMMRegister x5 = xmm5; 5944 const XMMRegister x6 = xmm6; 5945 const XMMRegister x7 = xmm7; 5946 5947 const Register tmp = r11; 5948 5949 BLOCK_COMMENT("Entry:"); 5950 __ enter(); // required for proper stackwalking of RuntimeStub frame 5951 6532 &StubRoutines::_safefetch32_fault_pc, 6533 &StubRoutines::_safefetch32_continuation_pc); 6534 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 6535 &StubRoutines::_safefetchN_fault_pc, 6536 &StubRoutines::_safefetchN_continuation_pc); 6537 6538 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); 6539 if (bs_nm != NULL) { 6540 StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier(); 6541 } 6542 #ifdef COMPILER2 6543 if (UseMultiplyToLenIntrinsic) { 6544 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 6545 } 6546 if (UseSquareToLenIntrinsic) { 6547 StubRoutines::_squareToLen = generate_squareToLen(); 6548 } 6549 if (UseMulAddIntrinsic) { 6550 StubRoutines::_mulAdd = generate_mulAdd(); 6551 } 6552 StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); 6553 StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); 6554 #ifndef _WINDOWS 6555 if (UseMontgomeryMultiplyIntrinsic) { 6556 StubRoutines::_montgomeryMultiply 6557 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); 6558 } 6559 if (UseMontgomerySquareIntrinsic) { 6560 StubRoutines::_montgomerySquare 6561 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); 6562 } 6563 #endif // WINDOWS 6564 #endif // COMPILER2 6565 6566 if (UseVectorizedMismatchIntrinsic) { 6567 StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch(); 6568 } 6569 } 6570 6571 public: 6572 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 6573 if (all) { |