8300197: Freeze/thaw an interpreter frame using a single copy_to_chunk() call

Reviewed-by: rrich, pchilanomate, fyang
This commit is contained in:
Fredrik Bredberg
2023-05-02 12:14:12 +00:00
committed by Richard Reingruber
parent 1532a1b0bd
commit a8d16dea8e
8 changed files with 46 additions and 97 deletions

View File

@@ -84,11 +84,11 @@ frame FreezeBase::new_heap_frame(frame& f, frame& caller) {
if (FKind::interpreted) {
assert((intptr_t*)f.at(frame::interpreter_frame_last_sp_offset) == nullptr
|| f.unextended_sp() == (intptr_t*)f.at(frame::interpreter_frame_last_sp_offset), "");
int locals = f.interpreter_frame_method()->max_locals();
intptr_t locals_offset = *f.addr_at(frame::interpreter_frame_locals_offset);
// If the caller.is_empty(), i.e. we're freezing into an empty chunk, then we set
// the chunk's argsize in finalize_freeze and make room for it above the unextended_sp
bool overlap_caller = caller.is_interpreted_frame() || caller.is_empty();
fp = caller.unextended_sp() - (locals + frame::sender_sp_offset) + (overlap_caller ? ContinuationHelper::InterpretedFrame::stack_argsize(f) : 0);
fp = caller.unextended_sp() - 1 - locals_offset + (overlap_caller ? ContinuationHelper::InterpretedFrame::stack_argsize(f) : 0);
sp = fp - (f.fp() - f.unextended_sp());
assert(sp <= fp, "");
assert(fp <= caller.unextended_sp(), "");
@@ -97,7 +97,8 @@ frame FreezeBase::new_heap_frame(frame& f, frame& caller) {
assert(_cont.tail()->is_in_chunk(sp), "");
frame hf(sp, sp, fp, f.pc(), nullptr, nullptr, true /* on_heap */);
*hf.addr_at(frame::interpreter_frame_locals_offset) = frame::sender_sp_offset + locals - 1;
// copy relativized locals from the stack frame
*hf.addr_at(frame::interpreter_frame_locals_offset) = locals_offset;
return hf;
} else {
// We need to re-read fp out of the frame because it may be an oop and we might have
@@ -145,13 +146,11 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
// on AARCH64, we may insert padding between the locals and the rest of the frame
// (see TemplateInterpreterGenerator::generate_normal_entry, and AbstractInterpreter::layout_activation)
// so we compute locals "from scratch" rather than relativizing the value in the stack frame, which might include padding,
// since we don't freeze the padding word (see recurse_freeze_interpreted_frame).
// because we freeze the padding word (see recurse_freeze_interpreted_frame) in order to keep the same relativized
// locals value, we don't need to change the locals value here.
// at(frame::interpreter_frame_last_sp_offset) can be NULL at safepoint preempts
*hf.addr_at(frame::interpreter_frame_last_sp_offset) = hf.unextended_sp() - hf.fp();
// This line can be changed into an assert when we have fixed the "frame padding problem", see JDK-8300197
*hf.addr_at(frame::interpreter_frame_locals_offset) = frame::sender_sp_offset + f.interpreter_frame_method()->max_locals() - 1;
relativize_one(vfp, hfp, frame::interpreter_frame_initial_sp_offset); // == block_top == block_bottom
relativize_one(vfp, hfp, frame::interpreter_frame_extended_sp_offset);
@@ -222,11 +221,9 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
const int locals = hf.interpreter_frame_method()->max_locals();
intptr_t* frame_sp = caller.unextended_sp() - fsize;
intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
int padding = 0;
if ((intptr_t)fp % frame::frame_alignment != 0) {
fp--;
frame_sp--;
padding++;
log_develop_trace(continuations)("Adding internal interpreted frame alignment");
}
DEBUG_ONLY(intptr_t* unextended_sp = fp + *hf.addr_at(frame::interpreter_frame_last_sp_offset);)
@@ -235,10 +232,8 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
frame f(frame_sp, frame_sp, fp, hf.pc());
// we need to set the locals so that the caller of new_stack_frame() can call
// ContinuationHelper::InterpretedFrame::frame_bottom
intptr_t offset = *hf.addr_at(frame::interpreter_frame_locals_offset);
assert((int)offset == frame::sender_sp_offset + locals - 1, "");
// set relativized locals
*f.addr_at(frame::interpreter_frame_locals_offset) = padding + offset;
// copy relativized locals from the heap frame
*f.addr_at(frame::interpreter_frame_locals_offset) = *hf.addr_at(frame::interpreter_frame_locals_offset);
assert((intptr_t)f.fp() % frame::frame_alignment == 0, "");
return f;
} else {
@@ -300,10 +295,4 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
derelativize_one(vfp, frame::interpreter_frame_extended_sp_offset);
}
inline void ThawBase::set_interpreter_frame_bottom(const frame& f, intptr_t* bottom) {
// set relativized locals
// this line can be changed into an assert when we have fixed the "frame padding problem", see JDK-8300197
*f.addr_at(frame::interpreter_frame_locals_offset) = (bottom - 1) - f.fp();
}
#endif // CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP

View File

@@ -70,10 +70,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
return frame();
}
inline void ThawBase::set_interpreter_frame_bottom(const frame& f, intptr_t* bottom) {
Unimplemented();
}
inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, const frame& f) {
Unimplemented();
}

View File

@@ -84,9 +84,9 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
assert(f.fp() > (intptr_t*)f.interpreter_frame_esp(), "");
// There is alignment padding between vfp and f's locals array in the original
// frame, therefore we cannot use it to relativize the locals pointer.
// This line can be changed into an assert when we have fixed the "frame padding problem", see JDK-8300197
*hf.addr_at(ijava_idx(locals)) = frame::metadata_words + f.interpreter_frame_method()->max_locals() - 1;
// frame, because we freeze the padding (see recurse_freeze_interpreted_frame)
// in order to keep the same relativized locals pointer, we don't need to change it here.
relativize_one(vfp, hfp, ijava_idx(monitors));
relativize_one(vfp, hfp, ijava_idx(esp));
relativize_one(vfp, hfp, ijava_idx(top_frame_sp));
@@ -264,7 +264,7 @@ frame FreezeBase::new_heap_frame(frame& f, frame& caller) {
intptr_t *sp, *fp;
if (FKind::interpreted) {
int locals = f.interpreter_frame_method()->max_locals();
intptr_t locals_offset = *f.addr_at(ijava_idx(locals));
// If the caller.is_empty(), i.e. we're freezing into an empty chunk, then we set
// the chunk's argsize in finalize_freeze and make room for it above the unextended_sp
// See also comment on StackChunkFrameStream<frame_kind>::interpreter_frame_size()
@@ -272,7 +272,7 @@ frame FreezeBase::new_heap_frame(frame& f, frame& caller) {
(caller.is_interpreted_frame() || caller.is_empty())
? ContinuationHelper::InterpretedFrame::stack_argsize(f) + frame::metadata_words_at_top
: 0;
fp = caller.unextended_sp() + overlap - locals - frame::metadata_words_at_top;
fp = caller.unextended_sp() - 1 - locals_offset + overlap;
// esp points one slot below the last argument
intptr_t* x86_64_like_unextended_sp = f.interpreter_frame_esp() + 1 - frame::metadata_words_at_top;
sp = fp - (f.fp() - x86_64_like_unextended_sp);
@@ -286,7 +286,7 @@ frame FreezeBase::new_heap_frame(frame& f, frame& caller) {
frame hf(sp, sp, fp, f.pc(), nullptr, nullptr, true /* on_heap */);
// frame_top() and frame_bottom() read these before relativize_interpreted_frame_metadata() is called
*hf.addr_at(ijava_idx(locals)) = frame::metadata_words + locals - 1;
*hf.addr_at(ijava_idx(locals)) = locals_offset;
*hf.addr_at(ijava_idx(esp)) = f.interpreter_frame_esp() - f.fp();
return hf;
} else {
@@ -507,10 +507,8 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
frame f(frame_sp, hf.pc(), frame_sp, fp);
// we need to set the locals so that the caller of new_stack_frame() can call
// ContinuationHelper::InterpretedFrame::frame_bottom
intptr_t offset = *hf.addr_at(ijava_idx(locals)) + padding;
assert((int)offset == hf.interpreter_frame_method()->max_locals() + frame::metadata_words_at_top + padding - 1, "");
// set relativized locals
*f.addr_at(ijava_idx(locals)) = offset;
// copy relativized locals from the heap frame
*f.addr_at(ijava_idx(locals)) = *hf.addr_at(ijava_idx(locals));
return f;
} else {
@@ -549,12 +547,6 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
derelativize_one(vfp, ijava_idx(top_frame_sp));
}
inline void ThawBase::set_interpreter_frame_bottom(const frame& f, intptr_t* bottom) {
// set relativized locals
// This line can be changed into an assert when we have fixed the "frame padding problem", see JDK-8300197
*f.addr_at(ijava_idx(locals)) = (bottom - 1) - f.fp();
}
inline void ThawBase::patch_pd(frame& f, const frame& caller) {
patch_callee_link(caller, caller.fp());
// Prevent assertion if f gets deoptimized right away before it's fully initialized

View File

@@ -83,11 +83,11 @@ template<typename FKind> frame FreezeBase::new_heap_frame(frame& f, frame& calle
if (FKind::interpreted) {
assert((intptr_t*)f.at(frame::interpreter_frame_last_sp_offset) == nullptr
|| f.unextended_sp() == (intptr_t*)f.at(frame::interpreter_frame_last_sp_offset), "");
int locals = f.interpreter_frame_method()->max_locals();
intptr_t locals_offset = *f.addr_at(frame::interpreter_frame_locals_offset);
// If the caller.is_empty(), i.e. we're freezing into an empty chunk, then we set
// the chunk's argsize in finalize_freeze and make room for it above the unextended_sp
bool overlap_caller = caller.is_interpreted_frame() || caller.is_empty();
fp = caller.unextended_sp() - (locals + frame::sender_sp_offset) + (overlap_caller ? ContinuationHelper::InterpretedFrame::stack_argsize(f) : 0);
fp = caller.unextended_sp() - 1 - locals_offset + (overlap_caller ? ContinuationHelper::InterpretedFrame::stack_argsize(f) : 0);
sp = fp - (f.fp() - f.unextended_sp());
assert(sp <= fp, "");
assert(fp <= caller.unextended_sp(), "");
@@ -96,7 +96,7 @@ template<typename FKind> frame FreezeBase::new_heap_frame(frame& f, frame& calle
assert(_cont.tail()->is_in_chunk(sp), "");
frame hf(sp, sp, fp, f.pc(), nullptr, nullptr, true /* on_heap */);
*hf.addr_at(frame::interpreter_frame_locals_offset) = frame::sender_sp_offset + locals - 1;
*hf.addr_at(frame::interpreter_frame_locals_offset) = locals_offset;
return hf;
} else {
// We need to re-read fp out of the frame because it may be an oop and we might have
@@ -144,13 +144,11 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
// On RISCV, we may insert padding between the locals and the rest of the frame
// (see TemplateInterpreterGenerator::generate_normal_entry, and AbstractInterpreter::layout_activation)
// so we compute locals "from scratch" rather than relativizing the value in the stack frame, which might include padding,
// since we don't freeze the padding word (see recurse_freeze_interpreted_frame).
// because we freeze the padding word (see recurse_freeze_interpreted_frame) in order to keep the same relativized
// locals value, we don't need to change the locals value here.
// at(frame::interpreter_frame_last_sp_offset) can be null at safepoint preempts
*hf.addr_at(frame::interpreter_frame_last_sp_offset) = hf.unextended_sp() - hf.fp();
// this line can be changed into an assert when we have fixed the "frame padding problem", see JDK-8300197
*hf.addr_at(frame::interpreter_frame_locals_offset) = frame::sender_sp_offset + f.interpreter_frame_method()->max_locals() - 1;
relativize_one(vfp, hfp, frame::interpreter_frame_initial_sp_offset); // == block_top == block_bottom
relativize_one(vfp, hfp, frame::interpreter_frame_extended_sp_offset);
@@ -225,11 +223,9 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
const int locals = hf.interpreter_frame_method()->max_locals();
intptr_t* frame_sp = caller.unextended_sp() - fsize;
intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
int padding = 0;
if ((intptr_t)fp % frame::frame_alignment != 0) {
fp--;
frame_sp--;
padding++;
log_develop_trace(continuations)("Adding internal interpreted frame alignment");
}
DEBUG_ONLY(intptr_t* unextended_sp = fp + *hf.addr_at(frame::interpreter_frame_last_sp_offset);)
@@ -238,10 +234,8 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
frame f(frame_sp, frame_sp, fp, hf.pc());
// we need to set the locals so that the caller of new_stack_frame() can call
// ContinuationHelper::InterpretedFrame::frame_bottom
intptr_t offset = *hf.addr_at(frame::interpreter_frame_locals_offset);
assert((int)offset == frame::sender_sp_offset + locals - 1, "");
// set relativized locals
*f.addr_at(frame::interpreter_frame_locals_offset) = padding + offset;
// copy relativized locals from the heap frame
*f.addr_at(frame::interpreter_frame_locals_offset) = *hf.addr_at(frame::interpreter_frame_locals_offset);
assert((intptr_t)f.fp() % frame::frame_alignment == 0, "");
return f;
} else {
@@ -303,10 +297,4 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
derelativize_one(vfp, frame::interpreter_frame_extended_sp_offset);
}
inline void ThawBase::set_interpreter_frame_bottom(const frame& f, intptr_t* bottom) {
// set relativized locals
// This line can be changed into an assert when we have fixed the "frame padding problem", see JDK-8300197
*f.addr_at(frame::interpreter_frame_locals_offset) = (bottom - 1) - f.fp();
}
#endif // CPU_RISCV_CONTINUATIONFREEZETHAW_RISCV_INLINE_HPP

View File

@@ -70,10 +70,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
return frame();
}
inline void ThawBase::set_interpreter_frame_bottom(const frame& f, intptr_t* bottom) {
Unimplemented();
}
inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, const frame& f) {
Unimplemented();
}

View File

@@ -81,11 +81,11 @@ frame FreezeBase::new_heap_frame(frame& f, frame& caller) {
if (FKind::interpreted) {
assert((intptr_t*)f.at(frame::interpreter_frame_last_sp_offset) == nullptr
|| f.unextended_sp() == (intptr_t*)f.at(frame::interpreter_frame_last_sp_offset), "");
int locals = f.interpreter_frame_method()->max_locals();
intptr_t locals_offset = *f.addr_at(frame::interpreter_frame_locals_offset);
// If the caller.is_empty(), i.e. we're freezing into an empty chunk, then we set
// the chunk's argsize in finalize_freeze and make room for it above the unextended_sp
bool overlap_caller = caller.is_interpreted_frame() || caller.is_empty();
fp = caller.unextended_sp() - (locals + frame::sender_sp_offset) + (overlap_caller ? ContinuationHelper::InterpretedFrame::stack_argsize(f) : 0);
fp = caller.unextended_sp() - 1 - locals_offset + (overlap_caller ? ContinuationHelper::InterpretedFrame::stack_argsize(f) : 0);
sp = fp - (f.fp() - f.unextended_sp());
assert(sp <= fp, "");
assert(fp <= caller.unextended_sp(), "");
@@ -94,7 +94,8 @@ frame FreezeBase::new_heap_frame(frame& f, frame& caller) {
assert(_cont.tail()->is_in_chunk(sp), "");
frame hf(sp, sp, fp, f.pc(), nullptr, nullptr, true /* on_heap */);
*hf.addr_at(frame::interpreter_frame_locals_offset) = frame::sender_sp_offset + locals - 1;
// copy relativized locals from the stack frame
*hf.addr_at(frame::interpreter_frame_locals_offset) = locals_offset;
return hf;
} else {
// We need to re-read fp out of the frame because it may be an oop and we might have
@@ -223,10 +224,10 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
frame f(frame_sp, frame_sp, fp, hf.pc());
// we need to set the locals so that the caller of new_stack_frame() can call
// ContinuationHelper::InterpretedFrame::frame_bottom
intptr_t offset = *hf.addr_at(frame::interpreter_frame_locals_offset);
assert((int)offset == frame::sender_sp_offset + locals - 1, "");
// set relativized locals
*f.addr_at(frame::interpreter_frame_locals_offset) = offset;
intptr_t locals_offset = *hf.addr_at(frame::interpreter_frame_locals_offset);
assert((int)locals_offset == frame::sender_sp_offset + locals - 1, "");
// copy relativized locals from the heap frame
*f.addr_at(frame::interpreter_frame_locals_offset) = locals_offset;
return f;
} else {
int fsize = FKind::size(hf);
@@ -285,8 +286,4 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
derelativize_one(vfp, frame::interpreter_frame_initial_sp_offset);
}
inline void ThawBase::set_interpreter_frame_bottom(const frame& f, intptr_t* bottom) {
// Nothing to do. Just make sure the relativized locals is already set.
assert((*f.addr_at(frame::interpreter_frame_locals_offset) == (bottom - 1) - f.fp()), "");
}
#endif // CPU_X86_CONTINUATIONFREEZE_THAW_X86_INLINE_HPP

View File

@@ -70,10 +70,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
return frame();
}
inline void ThawBase::set_interpreter_frame_bottom(const frame& f, intptr_t* bottom) {
Unimplemented();
}
inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, const frame& f) {
Unimplemented();
}

View File

@@ -1058,12 +1058,8 @@ NOINLINE freeze_result FreezeBase::recurse_freeze_interpreted_frame(frame& f, fr
// The frame's top never includes the stack arguments to the callee
intptr_t* const stack_frame_top = ContinuationHelper::InterpretedFrame::frame_top(f, callee_argsize, callee_interpreted);
intptr_t* const callers_sp = ContinuationHelper::InterpretedFrame::callers_sp(f);
const int locals = f.interpreter_frame_method()->max_locals();
const int fsize = callers_sp + frame::metadata_words_at_top + locals - stack_frame_top;
intptr_t* const stack_frame_bottom = ContinuationHelper::InterpretedFrame::frame_bottom(f);
assert(stack_frame_bottom - stack_frame_top >= fsize, ""); // == on x86
const int fsize = stack_frame_bottom - stack_frame_top;
DEBUG_ONLY(verify_frame_top(f, stack_frame_top));
@@ -1093,9 +1089,9 @@ NOINLINE freeze_result FreezeBase::recurse_freeze_interpreted_frame(frame& f, fr
intptr_t* heap_frame_bottom = ContinuationHelper::InterpretedFrame::frame_bottom(hf);
assert(heap_frame_bottom == heap_frame_top + fsize, "");
// on AArch64 we add padding between the locals and the rest of the frame to keep the fp 16-byte-aligned
copy_to_chunk(stack_frame_bottom - locals, heap_frame_bottom - locals, locals); // copy locals
copy_to_chunk(stack_frame_top, heap_frame_top, fsize - locals); // copy rest
// Some architectures (like AArch64/PPC64/RISC-V) add padding between the locals and the fixed_frame to keep the fp 16-byte-aligned.
// On those architectures we freeze the padding in order to keep the same fp-relative offsets in the fixed_frame.
copy_to_chunk(stack_frame_top, heap_frame_top, fsize);
assert(!is_bottom_frame || !caller.is_interpreted_frame() || (heap_frame_top + fsize) == (caller.unextended_sp() + argsize), "");
relativize_interpreted_frame_metadata(f, hf);
@@ -1754,7 +1750,6 @@ private:
void maybe_set_fastpath(intptr_t* sp) { if (sp > _fastpath) _fastpath = sp; }
static inline void derelativize_interpreted_frame_metadata(const frame& hf, const frame& f);
static inline void set_interpreter_frame_bottom(const frame& f, intptr_t* bottom);
public:
CONT_JFR_ONLY(FreezeThawJfrInfo& jfr_info() { return _jfr_info; })
@@ -2149,21 +2144,19 @@ NOINLINE void ThawBase::recurse_thaw_interpreted_frame(const frame& hf, frame& c
intptr_t* const heap_frame_top = hf.unextended_sp() + frame::metadata_words_at_top;
intptr_t* const heap_frame_bottom = ContinuationHelper::InterpretedFrame::frame_bottom(hf);
assert(hf.is_heap_frame(), "should be");
const int fsize = heap_frame_bottom - heap_frame_top;
assert((stack_frame_bottom >= stack_frame_top + fsize) &&
(stack_frame_bottom <= stack_frame_top + fsize + 1), ""); // internal alignment on aarch64
// on AArch64/PPC64 we add padding between the locals and the rest of the frame to keep the fp 16-byte-aligned
const int locals = hf.interpreter_frame_method()->max_locals();
assert(hf.is_heap_frame(), "should be");
assert(!f.is_heap_frame(), "should not be");
copy_from_chunk(heap_frame_bottom - locals, stack_frame_bottom - locals, locals); // copy locals
copy_from_chunk(heap_frame_top, stack_frame_top, fsize - locals); // copy rest
const int fsize = heap_frame_bottom - heap_frame_top;
assert((stack_frame_bottom == stack_frame_top + fsize), "");
// Some architectures (like AArch64/PPC64/RISC-V) add padding between the locals and the fixed_frame to keep the fp 16-byte-aligned.
// On those architectures we freeze the padding in order to keep the same fp-relative offsets in the fixed_frame.
copy_from_chunk(heap_frame_top, stack_frame_top, fsize);
// Make sure the relativized locals is already set.
assert(f.interpreter_frame_local_at(0) == stack_frame_bottom - 1, "invalid frame bottom");
set_interpreter_frame_bottom(f, stack_frame_bottom); // the copy overwrites the metadata
derelativize_interpreted_frame_metadata(hf, f);
patch(f, caller, is_bottom_frame);
@@ -2174,6 +2167,8 @@ NOINLINE void ThawBase::recurse_thaw_interpreted_frame(const frame& hf, frame& c
maybe_set_fastpath(f.sp());
const int locals = hf.interpreter_frame_method()->max_locals();
if (!is_bottom_frame) {
// can only fix caller once this frame is thawed (due to callee saved regs)
_cont.tail()->fix_thawed_frame(caller, SmallRegisterMap::instance);