< prev index next >

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Print this page




5677     __ enter(); // required for proper stackwalking of RuntimeStub frame
5678 
5679     setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
5680                        // len => rcx, k => r8
5681                        // r9 and r10 may be used to save non-volatile registers
5682 #ifdef _WIN64
5683     // last argument is on stack on Win64
5684     __ movl(k, Address(rsp, 6 * wordSize));
5685 #endif
5686     __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
5687     __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
5688 
5689     restore_arg_regs();
5690 
5691     __ leave(); // required for proper stackwalking of RuntimeStub frame
5692     __ ret(0);
5693 
5694     return start;
5695   }
5696 











































































































































































































































5697   address generate_libmExp() {
5698     StubCodeMark mark(this, "StubRoutines", "libmExp");
5699 
5700     address start = __ pc();
5701 
5702     const XMMRegister x0  = xmm0;
5703     const XMMRegister x1  = xmm1;
5704     const XMMRegister x2  = xmm2;
5705     const XMMRegister x3  = xmm3;
5706 
5707     const XMMRegister x4  = xmm4;
5708     const XMMRegister x5  = xmm5;
5709     const XMMRegister x6  = xmm6;
5710     const XMMRegister x7  = xmm7;
5711 
5712     const Register tmp   = r11;
5713 
5714     BLOCK_COMMENT("Entry:");
5715     __ enter(); // required for proper stackwalking of RuntimeStub frame
5716 


6297                                                        &StubRoutines::_safefetch32_fault_pc,
6298                                                        &StubRoutines::_safefetch32_continuation_pc);
6299     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
6300                                                        &StubRoutines::_safefetchN_fault_pc,
6301                                                        &StubRoutines::_safefetchN_continuation_pc);
6302 
6303     BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
6304     if (bs_nm != NULL) {
6305       StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier();
6306     }
6307 #ifdef COMPILER2
6308     if (UseMultiplyToLenIntrinsic) {
6309       StubRoutines::_multiplyToLen = generate_multiplyToLen();
6310     }
6311     if (UseSquareToLenIntrinsic) {
6312       StubRoutines::_squareToLen = generate_squareToLen();
6313     }
6314     if (UseMulAddIntrinsic) {
6315       StubRoutines::_mulAdd = generate_mulAdd();
6316     }


6317 #ifndef _WINDOWS
6318     if (UseMontgomeryMultiplyIntrinsic) {
6319       StubRoutines::_montgomeryMultiply
6320         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
6321     }
6322     if (UseMontgomerySquareIntrinsic) {
6323       StubRoutines::_montgomerySquare
6324         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
6325     }
6326 #endif // WINDOWS
6327 #endif // COMPILER2
6328 
6329     if (UseVectorizedMismatchIntrinsic) {
6330       StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
6331     }
6332   }
6333 
6334  public:
6335   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
6336     if (all) {


5677     __ enter(); // required for proper stackwalking of RuntimeStub frame
5678 
5679     setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
5680                        // len => rcx, k => r8
5681                        // r9 and r10 may be used to save non-volatile registers
5682 #ifdef _WIN64
5683     // last argument is on stack on Win64
5684     __ movl(k, Address(rsp, 6 * wordSize));
5685 #endif
5686     __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
5687     __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
5688 
5689     restore_arg_regs();
5690 
5691     __ leave(); // required for proper stackwalking of RuntimeStub frame
5692     __ ret(0);
5693 
5694     return start;
5695   }
5696 
5697   address generate_bigIntegerRightShift() {
5698     __ align(CodeEntryAlignment);
5699     StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
5700 
5701     address start = __ pc();
5702     Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
5703     // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
5704     const Register newArr = rdi;
5705     const Register oldArr = rsi;
5706     const Register newIdx = rdx;
5707     const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
5708     const Register totalNumIter = r8;
5709 
5710     // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
5711     // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
5712     const Register tmp1 = r11;                    // Caller save.
5713     const Register tmp2 = rax;                    // Caller save.
5714     const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9);   // Windows: Callee save. Linux: Caller save.
5715     const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10);  // Windows: Callee save. Linux: Caller save.
5716     const Register tmp5 = r14;                    // Callee save.
5717     const Register tmp6 = r15;
5718 
5719     const XMMRegister x0 = xmm0;
5720     const XMMRegister x1 = xmm1;
5721     const XMMRegister x2 = xmm2;
5722 
5723     BLOCK_COMMENT("Entry:");
5724     __ enter(); // required for proper stackwalking of RuntimeStub frame
5725 
5726 #ifdef _WINDOWS
5727     setup_arg_regs(4);
5728     // For windows, since last argument is on stack, we need to move it to the appropriate register.
5729     __ movl(totalNumIter, Address(rsp, 6 * wordSize));
5730     // Save callee save registers.
5731     __ push(tmp3);
5732     __ push(tmp4);
5733 #endif
5734     __ push(tmp5);
5735 
5736     // Rename temps used throughout the code.
5737     const Register idx = tmp1;
5738     const Register nIdx = tmp2;
5739 
5740     __ cmpl(totalNumIter, 1);
5741     __ jcc(Assembler::less, Exit);
5742 
5743     __ xorl(idx, idx);
5744 
5745     // Start right shift from end of the array.
5746     // For example, if #iteration = 4 and newIdx = 1
5747     // then dest[4] = src[4] >> shiftCount  | src[3] <<< (shiftCount - 32)
5748     // if #iteration = 4 and newIdx = 0
5749     // then dest[3] = src[4] >> shiftCount  | src[3] <<< (shiftCount - 32)
5750     __ movl(idx, totalNumIter);
5751     __ movl(nIdx, idx);
5752     __ addl(nIdx, newIdx);
5753 
5754     // If vectorization is enabled, check if the number of iterations is greater than 63
5755     // If not, then go to ShifTwo processing 2 iterations
5756     if (UseAVX > 2 && UseVBMI2) {
5757       __ cmpl(totalNumIter, 63);
5758       __ jcc(Assembler::less, ShiftTwo);
5759       __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
5760       __ subl(idx, 16);
5761       __ subl(nIdx, 16);
5762       __ BIND(Shift512Loop);
5763       __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 4), Assembler::AVX_512bit);
5764       __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
5765       __ vpshrdvd(x2, x1, x0, Assembler::AVX_512bit);
5766       __ evmovdqul(Address(newArr, nIdx, Address::times_4), x2, Assembler::AVX_512bit);
5767       __ subl(nIdx, 16);
5768       __ subl(idx, 16);
5769       __ jcc(Assembler::greaterEqual, Shift512Loop);
5770       __ addl(idx, 16);
5771       __ addl(nIdx, 16);
5772     }
5773     __ BIND(ShiftTwo);
5774     __ cmpl(idx, 2);
5775     __ jcc(Assembler::less, ShiftOne);
5776     __ subl(idx, 2);
5777     __ subl(nIdx, 2);
5778     __ BIND(ShiftTwoLoop);
5779     __ movl(tmp5, Address(oldArr, idx, Address::times_4, 8));
5780     __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
5781     __ movl(tmp3, Address(oldArr, idx, Address::times_4));
5782     __ shrdl(tmp5, tmp4);
5783     __ shrdl(tmp4, tmp3);
5784     __ movl(Address(newArr, nIdx, Address::times_4, 4), tmp5);
5785     __ movl(Address(newArr, nIdx, Address::times_4), tmp4);
5786     __ subl(nIdx, 2);
5787     __ subl(idx, 2);
5788     __ jcc(Assembler::greaterEqual, ShiftTwoLoop);
5789     __ addl(idx, 2);
5790     __ addl(nIdx, 2);
5791 
5792     // Do the last iteration
5793     __ BIND(ShiftOne);
5794     __ cmpl(idx, 1);
5795     __ jcc(Assembler::less, Exit);
5796     __ subl(idx, 1);
5797     __ subl(nIdx, 1);
5798     __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
5799     __ movl(tmp3, Address(oldArr, idx, Address::times_4));
5800     __ shrdl(tmp4, tmp3);
5801     __ movl(Address(newArr, nIdx, Address::times_4), tmp4);
5802     __ BIND(Exit);
5803     // Restore callee save registers.
5804     __ pop(tmp5);
5805 #ifdef _WINDOWS
5806     __ pop(tmp4);
5807     __ pop(tmp3);
5808     restore_arg_regs();
5809 #endif
5810     __ leave(); // required for proper stackwalking of RuntimeStub frame
5811     __ ret(0);
5812     return start;
5813   }
5814 
5815    /**
5816    *  Arguments:
5817    *
5818    *  Input:
5819    *    c_rarg0   - newArr address
5820    *    c_rarg1   - oldArr address
5821    *    c_rarg2   - newIdx
5822    *    c_rarg3   - shiftCount
5823    * not Win64
5824    *    c_rarg4   - numIter
5825    * Win64
5826    *    rsp40    - numIter
5827    */
5828   address generate_bigIntegerLeftShift() {
5829     __ align(CodeEntryAlignment);
5830     StubCodeMark mark(this,  "StubRoutines", "bigIntegerLeftShiftWorker");
5831     address start = __ pc();
5832     Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
5833     // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
5834     const Register newArr = rdi;
5835     const Register oldArr = rsi;
5836     const Register newIdx = rdx;
5837     const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
5838     const Register totalNumIter = r8;
5839     // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
5840     // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
5841     const Register tmp1 = r11;                    // Caller save.
5842     const Register tmp2 = rax;                    // Caller save.
5843     const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9);   // Windows: Callee save. Linux: Caller save.
5844     const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10);  // Windows: Callee save. Linux: Caller save.
5845     const Register tmp5 = r14;                    // Callee save.
5846 
5847     const XMMRegister x0 = xmm0;
5848     const XMMRegister x1 = xmm1;
5849     const XMMRegister x2 = xmm2;
5850     BLOCK_COMMENT("Entry:");
5851     __ enter(); // required for proper stackwalking of RuntimeStub frame
5852 
5853 #ifdef _WINDOWS
5854     setup_arg_regs(4);
5855     // For windows, since last argument is on stack, we need to move it to the appropriate register.
5856     __ movl(totalNumIter, Address(rsp, 6 * wordSize));
5857     // Save callee save registers.
5858     __ push(tmp3);
5859     __ push(tmp4);
5860 #endif
5861     __ push(tmp5);
5862 
5863     // Rename temps used throughout the code
5864     const Register idx = tmp1;
5865     const Register numIterTmp = tmp2;
5866 
5867     __ cmpl(totalNumIter, 1);
5868     __ jcc(Assembler::less, Exit);
5869 
5870     // Start idx from zero.
5871     __ xorl(idx, idx);
5872     // Compute interior pointer for new array. We do this so that we can use same index for both old and new arrays.
5873     __ lea(newArr, Address(newArr, newIdx, Address::times_4));
5874     __ movl(numIterTmp, totalNumIter);
5875 
5876     // If vectorization is enabled, check if the number of iterations is greater than 63.
5877     // If not, then go to ShiftTwo shifting two numbers at a time
5878     if (UseAVX > 2 && UseVBMI2) {
5879       __ cmpl(totalNumIter, 63);
5880       __ jcc(Assembler::less, ShiftTwo);
5881       __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
5882       __ subl(numIterTmp, 16);
5883       __ BIND(Shift512Loop);
5884       __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
5885       __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 0x4), Assembler::AVX_512bit);
5886       __ vpshldvd(x1, x2, x0, Assembler::AVX_512bit);
5887       __ evmovdqul(Address(newArr, idx, Address::times_4), x1, Assembler::AVX_512bit);
5888       __ addl(idx, 16);
5889       __ subl(numIterTmp, 16);
5890       __ jcc(Assembler::greaterEqual, Shift512Loop);
5891       __ addl(numIterTmp, 16);
5892     }
5893     __ BIND(ShiftTwo);
5894     __ movl(tmp3, Address(oldArr, idx, Address::times_4));
5895     __ subl(numIterTmp, 2);
5896     __ jcc(Assembler::less, ShiftOne);
5897 
5898     __ BIND(ShiftTwoLoop);
5899     __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
5900     __ movl(tmp5, Address(oldArr, idx, Address::times_4, 0x8));
5901     __ shldl(tmp3, tmp4);
5902     __ shldl(tmp4, tmp5);
5903     __ movl(Address(newArr, idx, Address::times_4), tmp3);
5904     __ movl(Address(newArr, idx, Address::times_4, 0x4), tmp4);
5905     __ movl(tmp3, tmp5);
5906     __ addl(idx, 2);
5907     __ subl(numIterTmp, 2);
5908     __ jcc(Assembler::greaterEqual, ShiftTwoLoop);
5909 
5910     // Do the last iteration
5911     __ BIND(ShiftOne);
5912     __ addl(numIterTmp, 2);
5913     __ cmpl(numIterTmp, 1);
5914     __ jcc(Assembler::less, Exit);
5915     __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
5916     __ shldl(tmp3, tmp4);
5917     __ movl(Address(newArr, idx, Address::times_4), tmp3);
5918 
5919     __ BIND(Exit);
5920     // Restore callee save registers.
5921     __ pop(tmp5);
5922 #ifdef _WINDOWS
5923     __ pop(tmp4);
5924     __ pop(tmp3);
5925     restore_arg_regs();
5926 #endif
5927     __ leave(); // required for proper stackwalking of RuntimeStub frame
5928     __ ret(0);
5929     return start;
5930   }
5931 
5932   address generate_libmExp() {
5933     StubCodeMark mark(this, "StubRoutines", "libmExp");
5934 
5935     address start = __ pc();
5936 
5937     const XMMRegister x0  = xmm0;
5938     const XMMRegister x1  = xmm1;
5939     const XMMRegister x2  = xmm2;
5940     const XMMRegister x3  = xmm3;
5941 
5942     const XMMRegister x4  = xmm4;
5943     const XMMRegister x5  = xmm5;
5944     const XMMRegister x6  = xmm6;
5945     const XMMRegister x7  = xmm7;
5946 
5947     const Register tmp   = r11;
5948 
5949     BLOCK_COMMENT("Entry:");
5950     __ enter(); // required for proper stackwalking of RuntimeStub frame
5951 


6532                                                        &StubRoutines::_safefetch32_fault_pc,
6533                                                        &StubRoutines::_safefetch32_continuation_pc);
6534     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
6535                                                        &StubRoutines::_safefetchN_fault_pc,
6536                                                        &StubRoutines::_safefetchN_continuation_pc);
6537 
6538     BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
6539     if (bs_nm != NULL) {
6540       StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier();
6541     }
6542 #ifdef COMPILER2
6543     if (UseMultiplyToLenIntrinsic) {
6544       StubRoutines::_multiplyToLen = generate_multiplyToLen();
6545     }
6546     if (UseSquareToLenIntrinsic) {
6547       StubRoutines::_squareToLen = generate_squareToLen();
6548     }
6549     if (UseMulAddIntrinsic) {
6550       StubRoutines::_mulAdd = generate_mulAdd();
6551     }
6552     StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
6553     StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
6554 #ifndef _WINDOWS
6555     if (UseMontgomeryMultiplyIntrinsic) {
6556       StubRoutines::_montgomeryMultiply
6557         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
6558     }
6559     if (UseMontgomerySquareIntrinsic) {
6560       StubRoutines::_montgomerySquare
6561         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
6562     }
6563 #endif // WINDOWS
6564 #endif // COMPILER2
6565 
6566     if (UseVectorizedMismatchIntrinsic) {
6567       StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
6568     }
6569   }
6570 
6571  public:
6572   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
6573     if (all) {
< prev index next >