--- old/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp 2019-09-02 10:59:13.592460729 +0000 +++ new/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp 2019-09-02 10:59:12.388406655 +0000 @@ -167,10 +167,9 @@ } // Implementation of SubstitutabilityCheckStub -SubstitutabilityCheckStub::SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, LIR_Opr result, CodeEmitInfo* info) { +SubstitutabilityCheckStub::SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, CodeEmitInfo* info) { _left = left; _right = right; - _result = result; _scratch_reg = FrameMap::r0_oop_opr; _info = new CodeEmitInfo(info); } @@ -183,9 +182,6 @@ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::substitutability_check_id))); ce->add_call_info_here(_info); ce->verify_oop_map(_info); - if (_result->as_register() != r0) { - __ mov(_result->as_register(), r0); - } __ b(_continuation); } --- old/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp 2019-09-02 10:59:15.728556659 +0000 +++ new/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp 2019-09-02 10:59:14.640507796 +0000 @@ -34,6 +34,7 @@ #include "c1/c1_ValueStack.hpp" #include "ci/ciArrayKlass.hpp" #include "ci/ciInstance.hpp" +#include "ci/ciValueKlass.hpp" #include "code/compiledIC.hpp" #include "gc/shared/barrierSet.hpp" #include "gc/shared/cardTableBarrierSet.hpp" @@ -505,6 +506,21 @@ assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,"); ciMethod* method = compilation()->method(); + + if (ValueTypeReturnedAsFields && method->signature()->returns_never_null()) { + ciType* return_type = method->return_type(); + if (return_type->is_valuetype()) { + ciValueKlass* vk = return_type->as_value_klass(); + if (vk->can_be_returned_as_fields()) { + address unpack_handler = vk->unpack_handler(); + assert(unpack_handler != NULL, "must be"); + __ far_call(RuntimeAddress(unpack_handler)); + // At this point, rax points to the value object (for interpreter or C1 caller). + // The fields of the object are copied into registers (for C2 caller). + } + } + } + // Pop the stack before the safepoint code __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair()); @@ -517,8 +533,8 @@ __ ret(lr); } -void LIR_Assembler::store_value_type_fields_to_buf(ciValueKlass* vk) { - __ store_value_type_fields_to_buf(vk); +int LIR_Assembler::store_value_type_fields_to_buf(ciValueKlass* vk) { + return (__ store_value_type_fields_to_buf(vk, false)); } int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { @@ -682,9 +698,11 @@ assert(c->as_jint() == 0, "should be"); insn = &Assembler::strw; break; - case T_VALUETYPE: // DMS CHECK: the code is significantly differ from x86 + case T_VALUETYPE: case T_OBJECT: case T_ARRAY: + // Non-null case is not handled on aarch64 but handled on x86 + // FIXME: do we need to add it here? assert(c->as_jobject() == 0, "should be"); if (UseCompressedOops && !wide) { insn = &Assembler::strw; @@ -1640,10 +1658,16 @@ Register left_klass_op = op->left_klass_op()->as_register(); Register right_klass_op = op->right_klass_op()->as_register(); - // DMS CHECK, likely x86 bug, make aarch64 implementation correct - __ load_klass(left_klass_op, left); - __ load_klass(right_klass_op, right); - __ cmp(left_klass_op, right_klass_op); + if (UseCompressedOops) { + __ ldrw(left_klass_op, Address(left, oopDesc::klass_offset_in_bytes())); + __ ldrw(right_klass_op, Address(right, oopDesc::klass_offset_in_bytes())); + __ cmpw(left_klass_op, right_klass_op); + } else { + __ ldr(left_klass_op, Address(left, oopDesc::klass_offset_in_bytes())); + __ ldr(right_klass_op, Address(right, oopDesc::klass_offset_in_bytes())); + __ cmp(left_klass_op, right_klass_op); + } + __ br(Assembler::EQ, *op->stub()->entry()); // same klass -> do slow check // fall through to L_oops_not_equal } --- old/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp 2019-09-02 10:59:17.992658336 +0000 +++ new/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp 2019-09-02 10:59:16.884608575 +0000 @@ -385,14 +385,86 @@ } // build frame - // DMS CHECK: is it nop? // verify_FPU(0, "method_entry"); - } int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, Label& verified_value_entry_label, bool is_value_ro_entry) { - guarantee(false, "Support for ValueTypePassFieldsAsArgs and ValueTypeReturnedAsFields is not implemented"); - return 0; + // This function required to support for ValueTypePassFieldsAsArgs + if (C1Breakpoint || VerifyFPU || !UseStackBanging) { + // Verified Entry first instruction should be 5 bytes long for correct + // patching by patch_verified_entry(). + // + // C1Breakpoint and VerifyFPU have one byte first instruction. + // Also first instruction will be one byte "push(rbp)" if stack banging + // code is not generated (see build_frame() above). + // For all these cases generate long instruction first. + nop(); + } + + // verify_FPU(0, "method_entry"); + + assert(ValueTypePassFieldsAsArgs, "sanity"); + + GrowableArray* sig = &ces->sig(); + GrowableArray* sig_cc = is_value_ro_entry ? &ces->sig_cc_ro() : &ces->sig_cc(); + VMRegPair* regs = ces->regs(); + VMRegPair* regs_cc = is_value_ro_entry ? ces->regs_cc_ro() : ces->regs_cc(); + int args_on_stack = ces->args_on_stack(); + int args_on_stack_cc = is_value_ro_entry ? ces->args_on_stack_cc_ro() : ces->args_on_stack_cc(); + + assert(sig->length() <= sig_cc->length(), "Zero-sized value class not allowed!"); + BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, sig_cc->length()); + int args_passed = sig->length(); + int args_passed_cc = SigEntry::fill_sig_bt(sig_cc, sig_bt); + + int extra_stack_offset = wordSize; // tos is return address. + + // Create a temp frame so we can call into runtime. It must be properly set up to accomodate GC. + int sp_inc = (args_on_stack - args_on_stack_cc) * VMRegImpl::stack_slot_size; + if (sp_inc > 0) { + sp_inc = align_up(sp_inc, StackAlignmentInBytes); + sub(sp, sp, sp_inc); + } else { + sp_inc = 0; + } + + sub(sp, sp, frame_size_in_bytes); + if (sp_inc > 0) { + int real_frame_size = frame_size_in_bytes + + + wordSize // pushed rbp + + wordSize // returned address pushed by the stack extension code + + sp_inc; // stack extension + mov(rscratch1, real_frame_size); + str(rscratch1, Address(sp, frame_size_in_bytes - wordSize)); + } + + // FIXME -- call runtime only if we cannot in-line allocate all the incoming value args. + mov(r1, (intptr_t) ces->method()); + if (is_value_ro_entry) { + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::buffer_value_args_no_receiver_id))); + } else { + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::buffer_value_args_id))); + } + int rt_call_offset = offset(); + + // Remove the temp frame + add(sp, sp, frame_size_in_bytes); + + int n = shuffle_value_args(true, is_value_ro_entry, extra_stack_offset, sig_bt, sig_cc, + args_passed_cc, args_on_stack_cc, regs_cc, // from + args_passed, args_on_stack, regs); // to + assert(sp_inc == n, "must be"); + + if (sp_inc != 0) { + // Do the stack banging here, and skip over the stack repair code in the + // verified_value_entry (which has a different real_frame_size). + assert(sp_inc > 0, "stack should not shrink"); + generate_stack_overflow_check(bang_size_in_bytes); + decrement(sp, frame_size_in_bytes); + } + + b(verified_value_entry_label); + return rt_call_offset; } --- old/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp 2019-09-02 10:59:20.284761268 +0000 +++ new/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp 2019-09-02 10:59:19.028704862 +0000 @@ -1219,9 +1219,8 @@ break; default: - // DMS CHECK: This code should be fixed in JDK workspace, because it fails - // with assert during vm intialization rather than insert a call - // to unimplemented_entry + // FIXME: For unhandled trap_id this code fails with assert during vm intialization + // rather than insert a call to unimplemented_entry { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); __ mov(r0, (int)id); __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0); --- old/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp 2019-09-02 10:59:22.504860964 +0000 +++ new/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp 2019-09-02 10:59:21.392811026 +0000 @@ -275,44 +275,64 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool as_normal = (decorators & AS_NORMAL) != 0; + assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); + + bool needs_pre_barrier = as_normal; + bool needs_post_barrier = (val != noreg && in_heap); + + if (tmp3 == noreg) { - tmp3 = r8; + tmp3 = rscratch2; } + // assert_different_registers(val, tmp1, tmp2, tmp3, rscratch1, rscratch2); + assert_different_registers(val, tmp1, tmp2, tmp3); // flatten object address if needed if (dst.index() == noreg && dst.offset() == 0) { - if (dst.base() != r3) { - __ mov(r3, dst.base()); + if (dst.base() != tmp1) { + __ mov(tmp1, dst.base()); } } else { - __ lea(r3, dst); + __ lea(tmp1, dst); } - g1_write_barrier_pre(masm, - r3 /* obj */, - tmp2 /* pre_val */, + + if (needs_pre_barrier) { + g1_write_barrier_pre(masm, + tmp1 /* obj */, + tmp2 /* pre_val */, rthread /* thread */, - tmp1 /* tmp */, + tmp3 /* tmp */, val != noreg /* tosca_live */, false /* expand_call */); + } if (val == noreg) { - BarrierSetAssembler::store_at(masm, decorators, type, Address(r3, 0), noreg, noreg, noreg, noreg); + BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), noreg, noreg, noreg, noreg); } else { // G1 barrier needs uncompressed oop for region cross check. Register new_val = val; - if (UseCompressedOops) { - new_val = rscratch2; - __ mov(new_val, val); - } - BarrierSetAssembler::store_at(masm, decorators, type, Address(r3, 0), val, noreg, noreg, noreg); - g1_write_barrier_post(masm, - r3 /* store_adr */, + if (needs_post_barrier) { + if (UseCompressedOops) { + // FIXME: Refactor the code to avoid usage of r19 and stay within tmpX + new_val = r19; + __ mov(new_val, val); + } + } + + BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg); + + if (needs_post_barrier) { + g1_write_barrier_post(masm, + tmp1 /* store_adr */, new_val /* new_val */, rthread /* thread */, - tmp1 /* tmp */, - tmp2 /* tmp2 */); - } + tmp2 /* tmp */, + tmp3 /* tmp2 */); + } + } } --- old/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp 2019-09-02 10:59:24.652957425 +0000 +++ new/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp 2019-09-02 10:59:23.540907488 +0000 @@ -76,22 +76,35 @@ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { bool in_heap = (decorators & IN_HEAP) != 0; bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + switch (type) { case T_OBJECT: case T_ARRAY: { - val = val == noreg ? zr : val; - if (in_heap) { - if (UseCompressedOops) { - assert(!dst.uses(val), "not enough registers"); - if (val != zr) { - __ encode_heap_oop(val); + if (in_heap) { + if (val == noreg) { + assert(!is_not_null, "inconsistent access"); + if (UseCompressedOops) { + __ strw(zr, dst); + } else { + __ str(zr, dst); } - __ strw(val, dst); } else { - __ str(val, dst); + if (UseCompressedOops) { + assert(!dst.uses(val), "not enough registers"); + if (is_not_null) { + __ encode_heap_oop_not_null(val); + } else { + __ encode_heap_oop(val); + } + __ strw(val, dst); + } else { + __ str(val, dst); + } } } else { assert(in_native, "why else?"); + assert(val != noreg, "not supported"); __ str(val, dst); } break; @@ -231,7 +244,7 @@ } void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { -// DMS CHECK: 8210498: nmethod entry barriers is not implemented +// FIXME: 8210498: nmethod entry barriers is not implemented #if 0 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); if (bs_nm == NULL) { --- old/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp 2019-09-02 10:59:26.885057656 +0000 +++ new/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp 2019-09-02 10:59:25.681003589 +0000 @@ -116,4 +116,5 @@ store_check(masm, r3, dst); } } + } --- old/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp 2019-09-02 10:59:28.993152317 +0000 +++ new/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp 2019-09-02 10:59:27.893102921 +0000 @@ -675,6 +675,32 @@ bind(no_reserved_zone_enabling); } + + if (state == atos && ValueTypeReturnedAsFields) { + Label skip; + // Test if the return type is a value type + ldr(rscratch1, Address(rfp, frame::interpreter_frame_method_offset * wordSize)); + ldr(rscratch1, Address(rscratch1, Method::const_offset())); + ldrb(rscratch1, Address(rscratch1, ConstMethod::result_type_offset())); + cmpw(rscratch1, (u1) T_VALUETYPE); + br(Assembler::NE, skip); + + // We are returning a value type, load its fields into registers + // Load fields from a buffered value with a value class specific handler + + load_klass(rscratch1 /*dst*/, r0 /*src*/); + ldr(rscratch1, Address(rscratch1, InstanceKlass::adr_valueklass_fixed_block_offset())); + ldr(rscratch1, Address(rscratch1, ValueKlass::unpack_handler_offset())); + cbz(rscratch1, skip); + + blr(rscratch1); + + // call above kills the value in r1. Reload it. + ldr(r1, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); + bind(skip); + } + + // remove frame anchor leave(); // If we're returning to interpreted code we will shortly be --- old/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp 2019-09-02 10:59:31.177250389 +0000 +++ new/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp 2019-09-02 10:59:30.069200635 +0000 @@ -46,6 +46,7 @@ #include "runtime/interfaceSupport.inline.hpp" #include "runtime/jniHandles.inline.hpp" #include "runtime/sharedRuntime.hpp" +#include "runtime/signature_cc.hpp" #include "runtime/thread.hpp" #ifdef COMPILER1 #include "c1/c1_LIRAssembler.hpp" @@ -4088,6 +4089,7 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, Register tmp1, Register thread_tmp, Register tmp3) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); decorators = AccessInternal::decorator_fixup(decorators); bool as_raw = (decorators & AS_RAW) != 0; @@ -5956,11 +5958,369 @@ } } +int MacroAssembler::store_value_type_fields_to_buf(ciValueKlass* vk, bool from_interpreter) { + // A value type might be returned. If fields are in registers we + // need to allocate a value type instance and initialize it with + // the value of the fields. + Label skip; + // We only need a new buffered value if a new one is not returned + cmp(r0, (u1) 1); + br(Assembler::EQ, skip); + int call_offset = -1; + + Label slow_case; + + // Try to allocate a new buffered value (from the heap) + if (UseTLAB) { + + if (vk != NULL) { + // Called from C1, where the return type is statically known. + mov(r1, (intptr_t)vk->get_ValueKlass()); + jint lh = vk->layout_helper(); + assert(lh != Klass::_lh_neutral_value, "inline class in return type must have been resolved"); + mov(r14, lh); + } else { + // Call from interpreter. R0 contains ((the ValueKlass* of the return type) | 0x01) + andr(r1, r0, -2); + // get obj size + ldrw(r14, Address(rscratch1 /*klass*/, Klass::layout_helper_offset())); + } + + ldr(r13, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); + + // check whether we have space in TLAB, + // rscratch1 contains pointer to just allocated obj + lea(r14, Address(r13, r14)); + ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_end_offset()))); + + cmp(r14, rscratch1); + br(Assembler::GT, slow_case); + + // OK we have room in TLAB, + // Set new TLAB top + str(r14, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); + + // Set new class always locked + mov(rscratch1, (uint64_t) markOopDesc::always_locked_prototype()); + str(rscratch1, Address(r13, oopDesc::mark_offset_in_bytes())); + + store_klass_gap(r13, zr); // zero klass gap for compressed oops + if (vk == NULL) { + // store_klass corrupts rbx, so save it in rax for later use (interpreter case only). + mov(r0, r1); + } + + store_klass(r13, r1); // klass + + if (vk != NULL) { + // FIXME -- do the packing in-line to avoid the runtime call + mov(r0, r13); + far_call(RuntimeAddress(vk->pack_handler())); // no need for call info as this will not safepoint. + } else { + + // We have our new buffered value, initialize its fields with a + // value class specific handler + ldr(r1, Address(r0, InstanceKlass::adr_valueklass_fixed_block_offset())); + ldr(r1, Address(r1, ValueKlass::pack_handler_offset())); + + // Mov new class to r0 and call pack_handler + mov(r0, r13); + blr(r1); + } + b(skip); + } + + bind(slow_case); + // We failed to allocate a new value, fall back to a runtime + // call. Some oop field may be live in some registers but we can't + // tell. That runtime call will take care of preserving them + // across a GC if there's one. + + + if (from_interpreter) { + super_call_VM_leaf(StubRoutines::store_value_type_fields_to_buf()); + } else { + ldr(rscratch1, RuntimeAddress(StubRoutines::store_value_type_fields_to_buf())); + blr(rscratch1); + call_offset = offset(); + } + + bind(skip); + return call_offset; +} + +// Move a value between registers/stack slots and update the reg_state +bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[], int ret_off, int extra_stack_offset) { + if (reg_state[to->value()] == reg_written) { + return true; // Already written + } + + if (from != to && bt != T_VOID) { + if (reg_state[to->value()] == reg_readonly) { + return false; // Not yet writable + } + if (from->is_reg()) { + if (to->is_reg()) { + mov(to->as_Register(), from->as_Register()); + } else { + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset; + Address to_addr = Address(sp, st_off); + if (from->is_FloatRegister()) { + if (bt == T_DOUBLE) { + strd(from->as_FloatRegister(), to_addr); + } else { + assert(bt == T_FLOAT, "must be float"); + strs(from->as_FloatRegister(), to_addr); + } + } else { + str(from->as_Register(), to_addr); + } + } + } else { + Address from_addr = Address(sp, from->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset); + if (to->is_reg()) { + if (to->is_FloatRegister()) { + if (bt == T_DOUBLE) { + ldrd(to->as_FloatRegister(), from_addr); + } else { + assert(bt == T_FLOAT, "must be float"); + ldrs(to->as_FloatRegister(), from_addr); + } + } else { + ldr(to->as_Register(), from_addr); + } + } else { + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset; + ldr(rscratch1, from_addr); + str(rscratch1, Address(sp, st_off)); + } + } + } + + // Update register states + reg_state[from->value()] = reg_writable; + reg_state[to->value()] = reg_written; + return true; +} + +// Read all fields from a value type oop and store the values in registers/stack slots +bool MacroAssembler::unpack_value_helper(const GrowableArray* sig, int& sig_index, VMReg from, VMRegPair* regs_to, + int& to_index, RegState reg_state[], int ret_off, int extra_stack_offset) { + Register fromReg = from->is_reg() ? from->as_Register() : noreg; + assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter"); + + + int vt = 1; + bool done = true; + bool mark_done = true; + do { + sig_index--; + BasicType bt = sig->at(sig_index)._bt; + if (bt == T_VALUETYPE) { + vt--; + } else if (bt == T_VOID && + sig->at(sig_index-1)._bt != T_LONG && + sig->at(sig_index-1)._bt != T_DOUBLE) { + vt++; + } else if (SigEntry::is_reserved_entry(sig, sig_index)) { + to_index--; // Ignore this + } else { + assert(to_index >= 0, "invalid to_index"); + VMRegPair pair_to = regs_to[to_index--]; + VMReg to = pair_to.first(); + + if (bt == T_VOID) continue; + + int idx = (int) to->value(); + if (reg_state[idx] == reg_readonly) { + if (idx != from->value()) { + mark_done = false; + } + done = false; + continue; + } else if (reg_state[idx] == reg_written) { + continue; + } else { + assert(reg_state[idx] == reg_writable, "must be writable"); + reg_state[idx] = reg_written; + } + + if (fromReg == noreg) { + int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset; + ldr(rscratch2, Address(sp, st_off)); + fromReg = rscratch2; + } + + int off = sig->at(sig_index)._offset; + assert(off > 0, "offset in object should be positive"); + bool is_oop = (bt == T_OBJECT || bt == T_ARRAY); + + Address fromAddr = Address(fromReg, off); + bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN); + + if (!to->is_FloatRegister()) { + + Register dst = to->is_stack() ? rscratch1 : to->as_Register(); + + if (is_oop) { + load_heap_oop(dst, fromAddr); + } else { + load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed); + } + if (to->is_stack()) { + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset; + str(dst, Address(sp, st_off)); + } + } else { + if (bt == T_DOUBLE) { + ldrd(to->as_FloatRegister(), fromAddr); + } else { + assert(bt == T_FLOAT, "must be float"); + ldrs(to->as_FloatRegister(), fromAddr); + } + } + + } + + } while (vt != 0); + + if (mark_done && reg_state[from->value()] != reg_written) { + // This is okay because no one else will write to that slot + reg_state[from->value()] = reg_writable; + } + return done; +} + +// Pack fields back into a value type oop +bool MacroAssembler::pack_value_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, + VMReg to, VMRegPair* regs_from, int regs_from_count, int& from_index, RegState reg_state[], + int ret_off, int extra_stack_offset) { + assert(sig->at(sig_index)._bt == T_VALUETYPE, "should be at end delimiter"); + assert(to->is_valid(), "must be"); + + if (reg_state[to->value()] == reg_written) { + skip_unpacked_fields(sig, sig_index, regs_from, regs_from_count, from_index); + return true; // Already written + } + + Register val_array = r0; + Register val_obj_tmp = r11; + Register from_reg_tmp = r10; + Register tmp1 = r14; + Register tmp2 = r13; + Register tmp3 = r1; + Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register(); + + if (reg_state[to->value()] == reg_readonly) { + if (!is_reg_in_unpacked_fields(sig, sig_index, to, regs_from, regs_from_count, from_index)) { + skip_unpacked_fields(sig, sig_index, regs_from, regs_from_count, from_index); + return false; // Not yet writable + } + val_obj = val_obj_tmp; + } + + int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + vtarg_index * type2aelembytes(T_VALUETYPE); + load_heap_oop(val_obj, Address(val_array, index)); + + ScalarizedValueArgsStream stream(sig, sig_index, regs_from, regs_from_count, from_index); + VMRegPair from_pair; + BasicType bt; + + while (stream.next(from_pair, bt)) { + int off = sig->at(stream.sig_cc_index())._offset; + assert(off > 0, "offset in object should be positive"); + bool is_oop = (bt == T_OBJECT || bt == T_ARRAY); + size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; + + VMReg from_r1 = from_pair.first(); + VMReg from_r2 = from_pair.second(); + + // Pack the scalarized field into the value object. + Address dst(val_obj, off); + + if (!from_r1->is_FloatRegister()) { + Register from_reg; + if (from_r1->is_stack()) { + from_reg = from_reg_tmp; + int ld_off = from_r1->reg2stack() * VMRegImpl::stack_slot_size + extra_stack_offset; + load_sized_value(from_reg, Address(sp, ld_off), size_in_bytes, /* is_signed */ false); + } else { + from_reg = from_r1->as_Register(); + } + + if (is_oop) { + DecoratorSet decorators = IN_HEAP | ACCESS_WRITE; + store_heap_oop(dst, from_reg, tmp1, tmp2, tmp3, decorators); + } else { + store_sized_value(dst, from_reg, size_in_bytes); + } + } else { + if (from_r2->is_valid()) { + strd(from_r1->as_FloatRegister(), dst); + } else { + strs(from_r1->as_FloatRegister(), dst); + } + } + + reg_state[from_r1->value()] = reg_writable; + } + sig_index = stream.sig_cc_index(); + from_index = stream.regs_cc_index(); + + assert(reg_state[to->value()] == reg_writable, "must have already been read"); + bool success = move_helper(val_obj->as_VMReg(), to, T_OBJECT, reg_state, ret_off, extra_stack_offset); + assert(success, "to register must be writeable"); + + return true; +} + +// Unpack all value type arguments passed as oops void MacroAssembler::unpack_value_args(Compile* C, bool receiver_only) { - // Called from MachVEP node - unimplemented("Support for ValueTypePassFieldsAsArgs and ValueTypeReturnedAsFields is not implemented"); + int sp_inc = unpack_value_args_common(C, receiver_only); + // Emit code for verified entry and save increment for stack repair on return + verified_entry(C, sp_inc); +} + +int MacroAssembler::shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset, + BasicType* sig_bt, const GrowableArray* sig_cc, + int args_passed, int args_on_stack, VMRegPair* regs, // from + int args_passed_to, int args_on_stack_to, VMRegPair* regs_to) { // to + // Check if we need to extend the stack for packing/unpacking + int sp_inc = (args_on_stack_to - args_on_stack) * VMRegImpl::stack_slot_size; + if (sp_inc > 0) { + sp_inc = align_up(sp_inc, StackAlignmentInBytes); + if (!is_packing) { + // Save the return address, adjust the stack (make sure it is properly + // 16-byte aligned) and copy the return address to the new top of the stack. + // (Note: C1 does this in C1_MacroAssembler::scalarized_entry). + // FIXME: We need not to preserve return address on aarch64 + pop(rscratch1); + sub(sp, sp, sp_inc); + push(rscratch1); + } + } else { + // The scalarized calling convention needs less stack space than the unscalarized one. + // No need to extend the stack, the caller will take care of these adjustments. + sp_inc = 0; + } + + int ret_off; // make sure we don't overwrite the return address + if (is_packing) { + // For C1 code, the VVEP doesn't have reserved slots, so we store the returned address at + // rsp[0] during shuffling. + ret_off = 0; + } else { + // C2 code ensures that sp_inc is a reserved slot. + ret_off = sp_inc; + } + + return shuffle_value_args_common(is_packing, receiver_only, extra_stack_offset, + sig_bt, sig_cc, + args_passed, args_on_stack, regs, + args_passed_to, args_on_stack_to, regs_to, + sp_inc, ret_off); } -void MacroAssembler::store_value_type_fields_to_buf(ciValueKlass* vk) { - super_call_VM_leaf(StubRoutines::store_value_type_fields_to_buf()); +VMReg MacroAssembler::spill_reg_for(VMReg reg) { + return (reg->is_FloatRegister()) ? v0->as_VMReg() : r14->as_VMReg(); } --- old/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp 2019-09-02 10:59:33.617359954 +0000 +++ new/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp 2019-09-02 10:59:32.477308764 +0000 @@ -28,6 +28,9 @@ #include "asm/assembler.hpp" #include "oops/compressedOops.hpp" +#include "utilities/macros.hpp" +#include "runtime/signature.hpp" + class ciValueKlass; @@ -1167,9 +1170,27 @@ void verified_entry(Compile* C, int sp_inc); + int store_value_type_fields_to_buf(ciValueKlass* vk, bool from_interpreter = true); + // Unpack all value type arguments passed as oops void unpack_value_args(Compile* C, bool receiver_only); - void store_value_type_fields_to_buf(ciValueKlass* vk); + bool move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[], int ret_off, int extra_stack_offset); + bool unpack_value_helper(const GrowableArray* sig, int& sig_index, VMReg from, VMRegPair* regs_to, int& to_index, + RegState reg_state[], int ret_off, int extra_stack_offset); + bool pack_value_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, + VMReg to, VMRegPair* regs_from, int regs_from_count, int& from_index, RegState reg_state[], + int ret_off, int extra_stack_offset); + void restore_stack(Compile* C); + + int shuffle_value_args(bool is_packing, bool receiver_only, int extra_stack_offset, + BasicType* sig_bt, const GrowableArray* sig_cc, + int args_passed, int args_on_stack, VMRegPair* regs, + int args_passed_to, int args_on_stack_to, VMRegPair* regs_to); + bool shuffle_value_args_spill(bool is_packing, const GrowableArray* sig_cc, int sig_cc_index, + VMRegPair* regs_from, int from_index, int regs_from_count, + RegState* reg_state, int sp_inc, int extra_stack_offset); + VMReg spill_reg_for(VMReg reg); + void tableswitch(Register index, jint lowbound, jint highbound, Label &jumptable, Label &jumptable_end, int stride = 1) { @@ -1392,6 +1413,9 @@ spill(tmp1, true, dst_offset+8); } } + + #include "asm/macroAssembler_common.hpp" + }; #ifdef ASSERT --- old/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp 2019-09-02 10:59:35.857460536 +0000 +++ new/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp 2019-09-02 10:59:34.729409886 +0000 @@ -352,7 +352,7 @@ case T_BYTE: case T_SHORT: case T_INT: - if (int_args < Argument::n_int_register_parameters_j) { + if (int_args < SharedRuntime::java_return_convention_max_int) { regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); int_args ++; } else { @@ -374,7 +374,7 @@ // Should T_METADATA be added to java_calling_convention as well ? case T_METADATA: case T_VALUETYPE: - if (int_args < Argument::n_int_register_parameters_j) { + if (int_args < SharedRuntime::java_return_convention_max_int) { regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); int_args ++; } else { @@ -382,7 +382,7 @@ } break; case T_FLOAT: - if (fp_args < Argument::n_float_register_parameters_j) { + if (fp_args < SharedRuntime::java_return_convention_max_float) { regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); fp_args ++; } else { @@ -442,14 +442,52 @@ // arguments for the call if value types are passed by reference (the // calling convention the interpreter expects). static int compute_total_args_passed_int(const GrowableArray* sig_extended) { - int total_args_passed = 0; - total_args_passed = sig_extended->length(); - return total_args_passed; + int total_args_passed = 0; + if (ValueTypePassFieldsAsArgs) { + for (int i = 0; i < sig_extended->length(); i++) { + BasicType bt = sig_extended->at(i)._bt; + if (SigEntry::is_reserved_entry(sig_extended, i)) { + // Ignore reserved entry + } else if (bt == T_VALUETYPE) { + // In sig_extended, a value type argument starts with: + // T_VALUETYPE, followed by the types of the fields of the + // value type and T_VOID to mark the end of the value + // type. Value types are flattened so, for instance, in the + // case of a value type with an int field and a value type + // field that itself has 2 fields, an int and a long: + // T_VALUETYPE T_INT T_VALUETYPE T_INT T_LONG T_VOID (second + // slot for the T_LONG) T_VOID (inner T_VALUETYPE) T_VOID + // (outer T_VALUETYPE) + total_args_passed++; + int vt = 1; + do { + i++; + BasicType bt = sig_extended->at(i)._bt; + BasicType prev_bt = sig_extended->at(i-1)._bt; + if (bt == T_VALUETYPE) { + vt++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + } + } while (vt != 0); + } else { + total_args_passed++; + } + } + } else { + total_args_passed = sig_extended->length(); + } + + return total_args_passed; } static void gen_c2i_adapter_helper(MacroAssembler* masm, BasicType bt, const VMRegPair& reg_pair, int extraspace, const Address& to) { + assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here"); + // Say 4 args: // i st_off // 0 32 T_LONG @@ -475,9 +513,8 @@ if (r_1->is_stack()) { // memory to memory use rscratch1 - // DMS CHECK: words_pushed is always 0 and can be removed? - // int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace + words_pushed * wordSize); - int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace); + // words_pushed is always 0 so we don't use it. + int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace /* + word_pushed * wordSize */); if (!r_2->is_valid()) { // sign extend?? __ ldrw(rscratch1, Address(sp, ld_off)); @@ -521,6 +558,51 @@ __ bind(skip_fixup); bool has_value_argument = false; + + if (ValueTypePassFieldsAsArgs) { + // Is there a value type argument? + for (int i = 0; i < sig_extended->length() && !has_value_argument; i++) { + has_value_argument = (sig_extended->at(i)._bt == T_VALUETYPE); + } + if (has_value_argument) { + // There is at least a value type argument: we're coming from + // compiled code so we have no buffers to back the value + // types. Allocate the buffers here with a runtime call. + OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + frame_complete = __ offset(); + address the_pc = __ pc(); + + __ set_last_Java_frame(noreg, noreg, the_pc, rscratch1); + + __ mov(c_rarg0, rthread); + __ mov(c_rarg1, r1); + __ mov(c_rarg2, (int64_t)alloc_value_receiver); + + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_value_types))); + __ blrt(rscratch1, 3, 0, 1); + + oop_maps->add_gc_map((int)(__ pc() - start), map); + __ reset_last_Java_frame(false); + + RegisterSaver::restore_live_registers(masm); + + Label no_exception; + __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); + __ cbz(r0, no_exception); + + __ str(zr, Address(rthread, JavaThread::vm_result_offset())); + __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); + __ b(RuntimeAddress(StubRoutines::forward_exception_entry())); + + __ bind(no_exception); + + // We get an array of objects from the runtime call + __ get_vm_result(r10, rthread); + __ get_vm_result_2(r1, rthread); // TODO: required to keep the callee Method live? + } + } + int words_pushed = 0; // Since all args are passed on the stack, total_args_passed * @@ -547,21 +629,63 @@ // offset to start parameters int st_off = (total_args_passed - next_arg_int - 1) * Interpreter::stackElementSize; - if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) { - continue; // Ignore reserved entry - } + if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) { - if (bt == T_VOID) { - assert(next_arg_comp > 0 && (sig_extended->at(next_arg_comp - 1)._bt == T_LONG || sig_extended->at(next_arg_comp - 1)._bt == T_DOUBLE), "missing half"); - next_arg_int ++; - continue; - } - - int next_off = st_off - Interpreter::stackElementSize; - int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; - - gen_c2i_adapter_helper(masm, bt, regs[next_arg_comp], extraspace, Address(sp, offset)); - next_arg_int ++; + if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) { + continue; // Ignore reserved entry + } + + if (bt == T_VOID) { + assert(next_arg_comp > 0 && (sig_extended->at(next_arg_comp - 1)._bt == T_LONG || sig_extended->at(next_arg_comp - 1)._bt == T_DOUBLE), "missing half"); + next_arg_int ++; + continue; + } + + int next_off = st_off - Interpreter::stackElementSize; + int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; + + gen_c2i_adapter_helper(masm, bt, regs[next_arg_comp], extraspace, Address(sp, offset)); + next_arg_int ++; + } else { + ignored++; + // get the buffer from the just allocated pool of buffers + int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_VALUETYPE); + __ load_heap_oop(rscratch1, Address(r10, index)); + next_vt_arg++; + next_arg_int++; + int vt = 1; + // write fields we get from compiled code in registers/stack + // slots to the buffer: we know we are done with that value type + // argument when we hit the T_VOID that acts as an end of value + // type delimiter for this value type. Value types are flattened + // so we might encounter embedded value types. Each entry in + // sig_extended contains a field offset in the buffer. + do { + next_arg_comp++; + BasicType bt = sig_extended->at(next_arg_comp)._bt; + BasicType prev_bt = sig_extended->at(next_arg_comp - 1)._bt; + if (bt == T_VALUETYPE) { + vt++; + ignored++; + } else if (bt == T_VOID && prev_bt != T_LONG && prev_bt != T_DOUBLE) { + vt--; + ignored++; + } else if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) { + // Ignore reserved entry + } else { + int off = sig_extended->at(next_arg_comp)._offset; + assert(off > 0, "offset in object should be positive"); + + bool is_oop = (bt == T_OBJECT || bt == T_ARRAY); + has_oop_field = has_oop_field || is_oop; + + gen_c2i_adapter_helper(masm, bt, regs[next_arg_comp - ignored], extraspace, Address(r11, off)); + } + } while (vt != 0); + // pass the buffer to the interpreter + __ str(rscratch1, Address(sp, st_off)); + } + } // If a value type was allocated and initialized, apply post barrier to all oop fields --- old/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp 2019-09-02 10:59:38.145563271 +0000 +++ new/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp 2019-09-02 10:59:37.033513341 +0000 @@ -324,12 +324,12 @@ // n.b. this assumes Java returns an integral result in r0 // and a floating result in j_farg0 __ ldr(j_rarg2, result); - Label is_long, is_float, is_double, exit; + Label is_long, is_float, is_double, is_value, exit; __ ldr(j_rarg1, result_type); __ cmp(j_rarg1, (u1)T_OBJECT); __ br(Assembler::EQ, is_long); __ cmp(j_rarg1, (u1)T_VALUETYPE); - __ br(Assembler::EQ, is_long); + __ br(Assembler::EQ, is_value); __ cmp(j_rarg1, (u1)T_LONG); __ br(Assembler::EQ, is_long); __ cmp(j_rarg1, (u1)T_FLOAT); @@ -390,6 +390,19 @@ __ ret(lr); // handle return types different from T_INT + __ BIND(is_value); + if (ValueTypeReturnedAsFields) { + // Check for flattened return value + __ cbz(r0, is_long); + // Initialize pre-allocated buffer + __ mov(r1, r0); + __ andr(r1, r1, -2); + __ ldr(r1, Address(r1, InstanceKlass::adr_valueklass_fixed_block_offset())); + __ ldr(r1, Address(r1, ValueKlass::pack_handler_offset())); + __ ldr(r0, Address(j_rarg2, 0)); + __ blr(r1); + __ b(exit); + } __ BIND(is_long); __ str(r0, Address(j_rarg2, 0)); --- old/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp 2019-09-02 10:59:40.581672649 +0000 +++ new/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp 2019-09-02 10:59:39.429620924 +0000 @@ -442,6 +442,10 @@ // and NULL it as marker that esp is now tos until next java call __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + if (state == atos && ValueTypeReturnedAsFields) { + __ store_value_type_fields_to_buf(NULL, true); + } + __ restore_bcp(); __ restore_locals(); __ restore_constant_pool_cache(); --- old/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp 2019-09-02 10:59:42.849774482 +0000 +++ new/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp 2019-09-02 10:59:41.729724194 +0000 @@ -1121,7 +1121,7 @@ index_check(r3, r2); // kills r1 - // DMS CHECK: what does line below do? + // FIXME: Could we remove the line below? __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); // do array store check - check for NULL value first @@ -1178,7 +1178,7 @@ } // Store a NULL - do_oop_store(_masm, element_address, noreg, IS_ARRAY); + do_oop_store(_masm, element_address, noreg, IS_ARRAY); __ b(done); if (EnableValhalla) { @@ -1211,8 +1211,8 @@ __ bind(is_type_ok); - // DMS CHECK: Reload from TOS to be safe, because of profile_typecheck that blows r2 and r0. - // Should we really do it? + // Reload from TOS to be safe, because of profile_typecheck that blows r2 and r0. + // FIXME: Should we really do it? __ ldr(r1, at_tos()); // value __ mov(r2, r3); // array, ldr(r2, at_tos_p2()); __ ldr(r3, at_tos_p1()); // index @@ -2104,7 +2104,7 @@ Register is_value_mask = rscratch1; __ mov(is_value_mask, markOopDesc::always_locked_pattern); - if (EnableValhalla && ACmpOnValues == 3) { + if (EnableValhalla) { __ cmp(r1, r0); __ br(Assembler::EQ, (cc == equal) ? taken : not_taken); @@ -2136,28 +2136,7 @@ __ stop("Not reachable"); } - if (EnableValhalla && ACmpOnValues == 1) { - Label is_null; - __ cbz(r1, is_null); - __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes())); - __ andr(r2, r2, is_value_mask); - __ cmp(r2, is_value_mask); - __ cset(r2, Assembler::EQ); - __ orr(r1, r1, r2); - __ bind(is_null); - } - __ cmpoop(r1, r0); - - if (EnableValhalla && ACmpOnValues == 2) { - __ br(Assembler::NE, (cc == not_equal) ? taken : not_taken); - __ cbz(r1, (cc == equal) ? taken : not_taken); - __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes())); - __ andr(r2, r2, is_value_mask); - __ cmp(r2, is_value_mask); - cc = (cc == equal) ? not_equal : equal; - } - __ br(j_not(cc), not_taken); __ bind(taken); branch(false, false); @@ -3347,7 +3326,7 @@ case Bytecodes::_fast_qgetfield: { Label isFlattened, isInitialized, Done; - // DMS CHECK: We don't need to reload multiple times, but stay close to original code + // FIXME: We don't need to reload registers multiple times, but stay close to x86 code __ ldrw(r9, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); __ test_field_is_flattened(r9, r8 /* temp */, isFlattened); // Non-flattened field case