< prev index next >

src/cpu/aarch64/vm/macroAssembler_aarch64.cpp

Print this page
rev 12409 : 8169177: aarch64: SIGSEGV when "-XX:+ZeroTLAB" is specified along with GC options
Summary: Add zero-initialization to C1 for fast TLAB refills
Reviewed-by: aph, drwhite
Contributed-by: kavitha.natarajan@linaro.org


3927     Label ok;
3928     Register tsize = r4;
3929     assert_different_registers(tsize, rthread, t1);
3930     str(tsize, Address(pre(sp, -16)));
3931     ldr(tsize, Address(rthread, in_bytes(JavaThread::tlab_size_offset())));
3932     lsl(tsize, tsize, LogHeapWordSize);
3933     cmp(t1, tsize);
3934     br(Assembler::EQ, ok);
3935     STOP("assert(t1 != tlab size)");
3936     should_not_reach_here();
3937 
3938     bind(ok);
3939     ldr(tsize, Address(post(sp, 16)));
3940   }
3941 #endif
3942   str(top, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
3943   str(top, Address(rthread, in_bytes(JavaThread::tlab_top_offset())));
3944   add(top, top, t1);
3945   sub(top, top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
3946   str(top, Address(rthread, in_bytes(JavaThread::tlab_end_offset())));








3947   verify_tlab();
3948   b(retry);
3949 
3950   return rthread; // for use by caller






























































3951 }
3952 
3953 // Defines obj, preserves var_size_in_bytes
3954 void MacroAssembler::eden_allocate(Register obj,
3955                                    Register var_size_in_bytes,
3956                                    int con_size_in_bytes,
3957                                    Register t1,
3958                                    Label& slow_case) {
3959   assert_different_registers(obj, var_size_in_bytes, t1);
3960   if (!Universe::heap()->supports_inline_contig_alloc()) {
3961     b(slow_case);
3962   } else {
3963     Register end = t1;
3964     Register heap_end = rscratch2;
3965     Label retry;
3966     bind(retry);
3967     {
3968       unsigned long offset;
3969       adrp(rscratch1, ExternalAddress((address) Universe::heap()->end_addr()), offset);
3970       ldr(heap_end, Address(rscratch1, offset));




3927     Label ok;
3928     Register tsize = r4;
3929     assert_different_registers(tsize, rthread, t1);
3930     str(tsize, Address(pre(sp, -16)));
3931     ldr(tsize, Address(rthread, in_bytes(JavaThread::tlab_size_offset())));
3932     lsl(tsize, tsize, LogHeapWordSize);
3933     cmp(t1, tsize);
3934     br(Assembler::EQ, ok);
3935     STOP("assert(t1 != tlab size)");
3936     should_not_reach_here();
3937 
3938     bind(ok);
3939     ldr(tsize, Address(post(sp, 16)));
3940   }
3941 #endif
3942   str(top, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
3943   str(top, Address(rthread, in_bytes(JavaThread::tlab_top_offset())));
3944   add(top, top, t1);
3945   sub(top, top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
3946   str(top, Address(rthread, in_bytes(JavaThread::tlab_end_offset())));
3947 
3948   if (ZeroTLAB) {
3949     // This is a fast TLAB refill, therefore the GC is not notified of it.
3950     // So compiled code must fill the new TLAB with zeroes.
3951     ldr(top, Address(rthread, in_bytes(JavaThread::tlab_start_offset())));
3952     zero_memory(top,t1,t2);
3953   }
3954 
3955   verify_tlab();
3956   b(retry);
3957 
3958   return rthread; // for use by caller
3959 }
3960 
3961 // Zero words; len is in bytes
3962 // Destroys all registers except addr
3963 // len must be a nonzero multiple of wordSize
3964 void MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
3965   assert_different_registers(addr, len, t1, rscratch1, rscratch2);
3966 
3967 #ifdef ASSERT
3968   { Label L;
3969     tst(len, BytesPerWord - 1);
3970     br(Assembler::EQ, L);
3971     stop("len is not a multiple of BytesPerWord");
3972     bind(L);
3973   }
3974 #endif
3975 
3976 #ifndef PRODUCT
3977   block_comment("zero memory");
3978 #endif
3979 
3980   Label loop;
3981   Label entry;
3982 
3983 //  Algorithm:
3984 //
3985 //    scratch1 = cnt & 7;
3986 //    cnt -= scratch1;
3987 //    p += scratch1;
3988 //    switch (scratch1) {
3989 //      do {
3990 //        cnt -= 8;
3991 //          p[-8] = 0;
3992 //        case 7:
3993 //          p[-7] = 0;
3994 //        case 6:
3995 //          p[-6] = 0;
3996 //          // ...
3997 //        case 1:
3998 //          p[-1] = 0;
3999 //        case 0:
4000 //          p += 8;
4001 //      } while (cnt);
4002 //    }
4003 
4004   const int unroll = 8; // Number of str(zr) instructions we'll unroll
4005 
4006   lsr(len, len, LogBytesPerWord);
4007   andr(rscratch1, len, unroll - 1);  // tmp1 = cnt % unroll
4008   sub(len, len, rscratch1);      // cnt -= unroll
4009   // t1 always points to the end of the region we're about to zero
4010   add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
4011   adr(rscratch2, entry);
4012   sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
4013   br(rscratch2);
4014   bind(loop);
4015   sub(len, len, unroll);
4016   for (int i = -unroll; i < 0; i++)
4017     str(zr, Address(t1, i * wordSize));
4018   bind(entry);
4019   add(t1, t1, unroll * wordSize);
4020   cbnz(len, loop);
4021 }
4022 
4023 // Defines obj, preserves var_size_in_bytes
4024 void MacroAssembler::eden_allocate(Register obj,
4025                                    Register var_size_in_bytes,
4026                                    int con_size_in_bytes,
4027                                    Register t1,
4028                                    Label& slow_case) {
4029   assert_different_registers(obj, var_size_in_bytes, t1);
4030   if (!Universe::heap()->supports_inline_contig_alloc()) {
4031     b(slow_case);
4032   } else {
4033     Register end = t1;
4034     Register heap_end = rscratch2;
4035     Label retry;
4036     bind(retry);
4037     {
4038       unsigned long offset;
4039       adrp(rscratch1, ExternalAddress((address) Universe::heap()->end_addr()), offset);
4040       ldr(heap_end, Address(rscratch1, offset));


< prev index next >