mirror of
https://github.com/JetBrains/JetBrainsRuntime.git
synced 2025-12-13 21:09:41 +01:00
Compare commits
101 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
23c39757ec | ||
|
|
f2e56e4c18 | ||
|
|
4e9525ef36 | ||
|
|
b6319f5b42 | ||
|
|
6e2ab84154 | ||
|
|
9b12c0bb19 | ||
|
|
e65e06867e | ||
|
|
0eb2bcd260 | ||
|
|
6ec36d348b | ||
|
|
a99f340e1b | ||
|
|
d854a04231 | ||
|
|
410014377c | ||
|
|
a05d5d2514 | ||
|
|
180d8c1b57 | ||
|
|
dc6255261f | ||
|
|
650de99fc6 | ||
|
|
325cdb7fc5 | ||
|
|
c46bed7292 | ||
|
|
ae85d899d0 | ||
|
|
66d7b0ce8f | ||
|
|
431dcf84e9 | ||
|
|
692edc4879 | ||
|
|
2a1c676e0a | ||
|
|
b0bd0c398e | ||
|
|
e1d1d53cd1 | ||
|
|
aa986be752 | ||
|
|
6a6ff876c5 | ||
|
|
4b774cb46d | ||
|
|
b46aef88b3 | ||
|
|
920a99faeb | ||
|
|
74dca863c2 | ||
|
|
52aa7fe1c9 | ||
|
|
413f852bdb | ||
|
|
11aa6e10c0 | ||
|
|
54430a8722 | ||
|
|
655e9cda3f | ||
|
|
b58e3b600b | ||
|
|
8eaeb6990b | ||
|
|
b60ac710be | ||
|
|
00068a8030 | ||
|
|
1bbbce75c5 | ||
|
|
a5968f9364 | ||
|
|
d36a234c12 | ||
|
|
b6732d6048 | ||
|
|
a26221299e | ||
|
|
eef9813ad4 | ||
|
|
7f9951a934 | ||
|
|
1ae4a6c43e | ||
|
|
b2daf9de30 | ||
|
|
b99be505a5 | ||
|
|
831fe94c75 | ||
|
|
8c8d21db6f | ||
|
|
a4eb57c5ec | ||
|
|
830c4d3b19 | ||
|
|
0a557890a5 | ||
|
|
1f49edd978 | ||
|
|
786833cd1b | ||
|
|
9c91c68d1d | ||
|
|
24244e4121 | ||
|
|
3a8a6e07f2 | ||
|
|
cba09cd10d | ||
|
|
020e3f9591 | ||
|
|
35fe0b1101 | ||
|
|
c9ab330b7b | ||
|
|
3ea82b9ff9 | ||
|
|
b1c9550182 | ||
|
|
c03d445a8c | ||
|
|
b86b2cbc7d | ||
|
|
8df3f3d341 | ||
|
|
b118caf677 | ||
|
|
d34ef196c2 | ||
|
|
811591c5c3 | ||
|
|
355755d35d | ||
|
|
ac81ce51fa | ||
|
|
ed5fc9ad2d | ||
|
|
6700baa505 | ||
|
|
b83bf0717e | ||
|
|
a659479483 | ||
|
|
3500150882 | ||
|
|
7da91533aa | ||
|
|
5f083abafc | ||
|
|
b0f59f6021 | ||
|
|
2596608ba1 | ||
|
|
be8cbfa612 | ||
|
|
f3dd8daaa9 | ||
|
|
4378789029 | ||
|
|
a20b7eb943 | ||
|
|
520c092a65 | ||
|
|
4d696d0d0e | ||
|
|
ee0b8a72c6 | ||
|
|
c09167df60 | ||
|
|
674cc3eeca | ||
|
|
7e91d34f3e | ||
|
|
15f2538943 | ||
|
|
13e32bf166 | ||
|
|
6db1c4f5b9 | ||
|
|
c8b30da7ef | ||
|
|
5ec5a6ea6c | ||
|
|
8e653d394e | ||
|
|
ef7532e7e6 | ||
|
|
b19163b107 |
@@ -1,7 +1,7 @@
|
||||
# Welcome to the JDK!
|
||||
|
||||
For build instructions please see the
|
||||
[online documentation](https://openjdk.org/groups/build/doc/building.html),
|
||||
[online documentation](https://git.openjdk.org/jdk/blob/master/doc/building.md),
|
||||
or either of these files:
|
||||
|
||||
- [doc/building.html](doc/building.html) (html version)
|
||||
|
||||
@@ -873,7 +873,7 @@ define SetupRunJtregTestBody
|
||||
$1_JTREG_BASIC_OPTIONS += -testThreadFactoryPath:$$(JTREG_TEST_THREAD_FACTORY_JAR)
|
||||
$1_JTREG_BASIC_OPTIONS += -testThreadFactory:$$(JTREG_TEST_THREAD_FACTORY)
|
||||
$1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$(wildcard \
|
||||
$$(addprefix $$($1_TEST_ROOT)/, ProblemList-$$(JTREG_TEST_THREAD_FACTORY).txt) \
|
||||
$$(addprefix $$($1_TEST_ROOT)/, ProblemList-$$(JTREG_TEST_THREAD_FACTORY).txt) \
|
||||
))
|
||||
endif
|
||||
|
||||
@@ -881,8 +881,8 @@ define SetupRunJtregTestBody
|
||||
AGENT := $$(LIBRARY_PREFIX)JvmtiStressAgent$$(SHARED_LIBRARY_SUFFIX)=$$(JTREG_JVMTI_STRESS_AGENT)
|
||||
$1_JTREG_BASIC_OPTIONS += -javaoption:'-agentpath:$(TEST_IMAGE_DIR)/hotspot/jtreg/native/$$(AGENT)'
|
||||
$1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$(wildcard \
|
||||
$$(addprefix $$($1_TEST_ROOT)/, ProblemList-jvmti-stress-agent.txt) \
|
||||
))
|
||||
$$(addprefix $$($1_TEST_ROOT)/, ProblemList-jvmti-stress-agent.txt) \
|
||||
))
|
||||
endif
|
||||
|
||||
|
||||
@@ -1092,7 +1092,7 @@ define SetupRunJtregTestBody
|
||||
$$(call MakeDir, $$($1_TEST_RESULTS_DIR) $$($1_TEST_SUPPORT_DIR) \
|
||||
$$($1_TEST_TMP_DIR))
|
||||
$$(call ExecuteWithLog, $$($1_TEST_SUPPORT_DIR)/jtreg, \
|
||||
$$(COV_ENVIRONMENT) $$($1_COMMAND_LINE) \
|
||||
$$(COV_ENVIRONMENT) $$($1_COMMAND_LINE) \
|
||||
)
|
||||
|
||||
$1_RESULT_FILE := $$($1_TEST_RESULTS_DIR)/text/stats.txt
|
||||
@@ -1102,11 +1102,11 @@ define SetupRunJtregTestBody
|
||||
$$(call LogWarn, Test report is stored in $$(strip \
|
||||
$$(subst $$(TOPDIR)/, , $$($1_TEST_RESULTS_DIR))))
|
||||
|
||||
# Read jtreg documentation to learn on the test stats categories:
|
||||
# https://github.com/openjdk/jtreg/blob/master/src/share/doc/javatest/regtest/faq.md#what-do-all-those-numbers-in-the-test-results-line-mean
|
||||
# In jtreg, "skipped:" category accounts for tests that threw jtreg.SkippedException at runtime.
|
||||
# At the same time these tests contribute to "passed:" tests.
|
||||
# In here we don't want that and so we substract number of "skipped:" from "passed:".
|
||||
# Read jtreg documentation to learn on the test stats categories:
|
||||
# https://github.com/openjdk/jtreg/blob/master/src/share/doc/javatest/regtest/faq.md#what-do-all-those-numbers-in-the-test-results-line-mean
|
||||
# In jtreg, "skipped:" category accounts for tests that threw jtreg.SkippedException at runtime.
|
||||
# At the same time these tests contribute to "passed:" tests.
|
||||
# In here we don't want that and so we substract number of "skipped:" from "passed:".
|
||||
|
||||
$$(if $$(wildcard $$($1_RESULT_FILE)), \
|
||||
$$(eval $1_PASSED_AND_RUNTIME_SKIPPED := $$(shell $$(AWK) '{ gsub(/[,;]/, ""); \
|
||||
|
||||
@@ -63,7 +63,7 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
|
||||
fi
|
||||
|
||||
BASIC_LDFLAGS_JVM_ONLY=""
|
||||
LDFLAGS_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing"
|
||||
LDFLAGS_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing $DEBUG_PREFIX_CFLAGS"
|
||||
|
||||
LDFLAGS_CXX_PARTIAL_LINKING="$MACHINE_FLAG -r"
|
||||
|
||||
@@ -71,7 +71,7 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
|
||||
BASIC_LDFLAGS_JVM_ONLY="-mno-omit-leaf-frame-pointer -mstack-alignment=16 \
|
||||
-fPIC"
|
||||
|
||||
LDFLAGS_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing"
|
||||
LDFLAGS_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing $DEBUG_PREFIX_CFLAGS"
|
||||
LDFLAGS_CXX_PARTIAL_LINKING="$MACHINE_FLAG -r"
|
||||
|
||||
if test "x$OPENJDK_TARGET_OS" = xlinux; then
|
||||
|
||||
@@ -234,6 +234,9 @@ define SetupLinkerFlags
|
||||
ifeq ($(call isTargetOs, macosx), true)
|
||||
$1_EXTRA_LDFLAGS += -Wl,-object_path_lto,$$($1_OBJECT_DIR)/$$($1_NAME)_lto_helper.o
|
||||
endif
|
||||
ifeq ($(TOOLCHAIN_TYPE), microsoft)
|
||||
$1_EXTRA_LDFLAGS += -LTCGOUT:$$($1_OBJECT_DIR)/$$($1_NAME).iobj
|
||||
endif
|
||||
endif
|
||||
|
||||
$1_EXTRA_LDFLAGS += $$($1_LDFLAGS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_LDFLAGS_$(OPENJDK_TARGET_OS)) \
|
||||
|
||||
@@ -1192,8 +1192,8 @@ var getJibProfilesDependencies = function (input, common) {
|
||||
server: "jpg",
|
||||
product: "jcov",
|
||||
version: "3.0",
|
||||
build_number: "3",
|
||||
file: "bundles/jcov-3.0+3.zip",
|
||||
build_number: "5",
|
||||
file: "bundles/jcov-3.0+5.zip",
|
||||
environment_name: "JCOV_HOME",
|
||||
},
|
||||
|
||||
|
||||
@@ -346,8 +346,14 @@ source %{
|
||||
}
|
||||
|
||||
bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
|
||||
// Only SVE has partial vector operations
|
||||
if (UseSVE == 0) {
|
||||
// 1. Only SVE requires partial vector operations.
|
||||
// 2. The vector size in bytes must be smaller than MaxVectorSize.
|
||||
// 3. Predicated vectors have a mask input, which guarantees that
|
||||
// out-of-bounds lanes remain inactive.
|
||||
int length_in_bytes = vt->length_in_bytes();
|
||||
if (UseSVE == 0 ||
|
||||
length_in_bytes == MaxVectorSize ||
|
||||
node->is_predicated_vector()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -370,21 +376,22 @@ source %{
|
||||
return !node->in(1)->is_Con();
|
||||
case Op_LoadVector:
|
||||
case Op_StoreVector:
|
||||
// We use NEON load/store instructions if the vector length is <= 128 bits.
|
||||
return vt->length_in_bytes() > 16;
|
||||
case Op_AddReductionVI:
|
||||
case Op_AddReductionVL:
|
||||
// We may prefer using NEON instructions rather than SVE partial operations.
|
||||
return !VM_Version::use_neon_for_vector(vt->length_in_bytes());
|
||||
// For these ops, we prefer using NEON instructions rather than SVE
|
||||
// predicated instructions for better performance.
|
||||
return !VM_Version::use_neon_for_vector(length_in_bytes);
|
||||
case Op_MinReductionV:
|
||||
case Op_MaxReductionV:
|
||||
// For BYTE/SHORT/INT/FLOAT/DOUBLE types, we may prefer using NEON
|
||||
// instructions rather than SVE partial operations.
|
||||
// For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON
|
||||
// instructions rather than SVE predicated instructions for
|
||||
// better performance.
|
||||
return vt->element_basic_type() == T_LONG ||
|
||||
!VM_Version::use_neon_for_vector(vt->length_in_bytes());
|
||||
!VM_Version::use_neon_for_vector(length_in_bytes);
|
||||
default:
|
||||
// For other ops whose vector size is smaller than the max vector size, a
|
||||
// full-sized unpredicated operation does not impact the final vector result.
|
||||
// For other ops whose vector size is smaller than the max vector
|
||||
// size, a full-sized unpredicated operation does not impact the
|
||||
// vector result.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -336,8 +336,14 @@ source %{
|
||||
}
|
||||
|
||||
bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
|
||||
// Only SVE has partial vector operations
|
||||
if (UseSVE == 0) {
|
||||
// 1. Only SVE requires partial vector operations.
|
||||
// 2. The vector size in bytes must be smaller than MaxVectorSize.
|
||||
// 3. Predicated vectors have a mask input, which guarantees that
|
||||
// out-of-bounds lanes remain inactive.
|
||||
int length_in_bytes = vt->length_in_bytes();
|
||||
if (UseSVE == 0 ||
|
||||
length_in_bytes == MaxVectorSize ||
|
||||
node->is_predicated_vector()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -360,21 +366,22 @@ source %{
|
||||
return !node->in(1)->is_Con();
|
||||
case Op_LoadVector:
|
||||
case Op_StoreVector:
|
||||
// We use NEON load/store instructions if the vector length is <= 128 bits.
|
||||
return vt->length_in_bytes() > 16;
|
||||
case Op_AddReductionVI:
|
||||
case Op_AddReductionVL:
|
||||
// We may prefer using NEON instructions rather than SVE partial operations.
|
||||
return !VM_Version::use_neon_for_vector(vt->length_in_bytes());
|
||||
// For these ops, we prefer using NEON instructions rather than SVE
|
||||
// predicated instructions for better performance.
|
||||
return !VM_Version::use_neon_for_vector(length_in_bytes);
|
||||
case Op_MinReductionV:
|
||||
case Op_MaxReductionV:
|
||||
// For BYTE/SHORT/INT/FLOAT/DOUBLE types, we may prefer using NEON
|
||||
// instructions rather than SVE partial operations.
|
||||
// For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON
|
||||
// instructions rather than SVE predicated instructions for
|
||||
// better performance.
|
||||
return vt->element_basic_type() == T_LONG ||
|
||||
!VM_Version::use_neon_for_vector(vt->length_in_bytes());
|
||||
!VM_Version::use_neon_for_vector(length_in_bytes);
|
||||
default:
|
||||
// For other ops whose vector size is smaller than the max vector size, a
|
||||
// full-sized unpredicated operation does not impact the final vector result.
|
||||
// For other ops whose vector size is smaller than the max vector
|
||||
// size, a full-sized unpredicated operation does not impact the
|
||||
// vector result.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5379,7 +5379,6 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
|
||||
assert (UseCompressedClassPointers, "should only be used for compressed headers");
|
||||
assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
|
||||
int index = oop_recorder()->find_index(k);
|
||||
assert(! Universe::heap()->is_in(k), "should not be an oop");
|
||||
|
||||
InstructionMark im(this);
|
||||
RelocationHolder rspec = metadata_Relocation::spec(index);
|
||||
|
||||
@@ -6335,8 +6335,36 @@ instruct loadConD_Ex(regD dst, immD src) %{
|
||||
// Prefetch instructions.
|
||||
// Must be safe to execute with invalid address (cannot fault).
|
||||
|
||||
// Special prefetch versions which use the dcbz instruction.
|
||||
instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{
|
||||
match(PrefetchAllocation (AddP mem src));
|
||||
predicate(AllocatePrefetchStyle == 3);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ dcbz($src$$Register, $mem$$base$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{
|
||||
match(PrefetchAllocation mem);
|
||||
predicate(AllocatePrefetchStyle == 3);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ dcbz($mem$$base$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
|
||||
match(PrefetchAllocation (AddP mem src));
|
||||
predicate(AllocatePrefetchStyle != 3);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
|
||||
@@ -6349,6 +6377,7 @@ instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
|
||||
|
||||
instruct prefetch_alloc_no_offset(indirectMemory mem) %{
|
||||
match(PrefetchAllocation mem);
|
||||
predicate(AllocatePrefetchStyle != 3);
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
|
||||
|
||||
@@ -2067,6 +2067,83 @@ void C2_MacroAssembler::enc_cmove_cmp_fp(int cmpFlag, FloatRegister op1, FloatRe
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::enc_cmove_fp_cmp(int cmpFlag, Register op1, Register op2,
|
||||
FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
bool is_unsigned = (cmpFlag & unsigned_branch_mask) == unsigned_branch_mask;
|
||||
int op_select = cmpFlag & (~unsigned_branch_mask);
|
||||
|
||||
switch (op_select) {
|
||||
case BoolTest::eq:
|
||||
cmov_fp_eq(op1, op2, dst, src, is_single);
|
||||
break;
|
||||
case BoolTest::ne:
|
||||
cmov_fp_ne(op1, op2, dst, src, is_single);
|
||||
break;
|
||||
case BoolTest::le:
|
||||
if (is_unsigned) {
|
||||
cmov_fp_leu(op1, op2, dst, src, is_single);
|
||||
} else {
|
||||
cmov_fp_le(op1, op2, dst, src, is_single);
|
||||
}
|
||||
break;
|
||||
case BoolTest::ge:
|
||||
if (is_unsigned) {
|
||||
cmov_fp_geu(op1, op2, dst, src, is_single);
|
||||
} else {
|
||||
cmov_fp_ge(op1, op2, dst, src, is_single);
|
||||
}
|
||||
break;
|
||||
case BoolTest::lt:
|
||||
if (is_unsigned) {
|
||||
cmov_fp_ltu(op1, op2, dst, src, is_single);
|
||||
} else {
|
||||
cmov_fp_lt(op1, op2, dst, src, is_single);
|
||||
}
|
||||
break;
|
||||
case BoolTest::gt:
|
||||
if (is_unsigned) {
|
||||
cmov_fp_gtu(op1, op2, dst, src, is_single);
|
||||
} else {
|
||||
cmov_fp_gt(op1, op2, dst, src, is_single);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false, "unsupported compare condition");
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::enc_cmove_fp_cmp_fp(int cmpFlag,
|
||||
FloatRegister op1, FloatRegister op2,
|
||||
FloatRegister dst, FloatRegister src,
|
||||
bool cmp_single, bool cmov_single) {
|
||||
int op_select = cmpFlag & (~unsigned_branch_mask);
|
||||
|
||||
switch (op_select) {
|
||||
case BoolTest::eq:
|
||||
cmov_fp_cmp_fp_eq(op1, op2, dst, src, cmp_single, cmov_single);
|
||||
break;
|
||||
case BoolTest::ne:
|
||||
cmov_fp_cmp_fp_ne(op1, op2, dst, src, cmp_single, cmov_single);
|
||||
break;
|
||||
case BoolTest::le:
|
||||
cmov_fp_cmp_fp_le(op1, op2, dst, src, cmp_single, cmov_single);
|
||||
break;
|
||||
case BoolTest::ge:
|
||||
cmov_fp_cmp_fp_ge(op1, op2, dst, src, cmp_single, cmov_single);
|
||||
break;
|
||||
case BoolTest::lt:
|
||||
cmov_fp_cmp_fp_lt(op1, op2, dst, src, cmp_single, cmov_single);
|
||||
break;
|
||||
case BoolTest::gt:
|
||||
cmov_fp_cmp_fp_gt(op1, op2, dst, src, cmp_single, cmov_single);
|
||||
break;
|
||||
default:
|
||||
assert(false, "unsupported compare condition");
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
// Set dst to NaN if any NaN input.
|
||||
void C2_MacroAssembler::minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2,
|
||||
FLOAT_TYPE ft, bool is_min) {
|
||||
|
||||
@@ -132,6 +132,13 @@
|
||||
FloatRegister op1, FloatRegister op2,
|
||||
Register dst, Register src, bool is_single);
|
||||
|
||||
void enc_cmove_fp_cmp(int cmpFlag, Register op1, Register op2,
|
||||
FloatRegister dst, FloatRegister src, bool is_single);
|
||||
|
||||
void enc_cmove_fp_cmp_fp(int cmpFlag, FloatRegister op1, FloatRegister op2,
|
||||
FloatRegister dst, FloatRegister src,
|
||||
bool cmp_single, bool cmov_single);
|
||||
|
||||
void spill(Register r, bool is64, int offset) {
|
||||
is64 ? sd(r, Address(sp, offset))
|
||||
: sw(r, Address(sp, offset));
|
||||
|
||||
@@ -1233,7 +1233,119 @@ void MacroAssembler::cmov_gtu(Register cmp1, Register cmp2, Register dst, Regist
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
// ----------- cmove, compare float -----------
|
||||
// ----------- cmove float/double -----------
|
||||
|
||||
void MacroAssembler::cmov_fp_eq(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
Label no_set;
|
||||
bne(cmp1, cmp2, no_set);
|
||||
if (is_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_ne(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
Label no_set;
|
||||
beq(cmp1, cmp2, no_set);
|
||||
if (is_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_le(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
Label no_set;
|
||||
bgt(cmp1, cmp2, no_set);
|
||||
if (is_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_leu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
Label no_set;
|
||||
bgtu(cmp1, cmp2, no_set);
|
||||
if (is_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_ge(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
Label no_set;
|
||||
blt(cmp1, cmp2, no_set);
|
||||
if (is_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_geu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
Label no_set;
|
||||
bltu(cmp1, cmp2, no_set);
|
||||
if (is_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_lt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
Label no_set;
|
||||
bge(cmp1, cmp2, no_set);
|
||||
if (is_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_ltu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
Label no_set;
|
||||
bgeu(cmp1, cmp2, no_set);
|
||||
if (is_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_gt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
Label no_set;
|
||||
ble(cmp1, cmp2, no_set);
|
||||
if (is_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_gtu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
|
||||
Label no_set;
|
||||
bleu(cmp1, cmp2, no_set);
|
||||
if (is_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
// ----------- cmove, compare float/double -----------
|
||||
//
|
||||
// For CmpF/D + CMoveI/L, ordered ones are quite straight and simple,
|
||||
// so, just list behaviour of unordered ones as follow.
|
||||
@@ -1391,6 +1503,148 @@ void MacroAssembler::cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Regi
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
// ----------- cmove float/double, compare float/double -----------
|
||||
|
||||
// Move src to dst only if cmp1 == cmp2,
|
||||
// otherwise leave dst unchanged, including the case where one of them is NaN.
|
||||
// Clarification:
|
||||
// java code : cmp1 != cmp2 ? dst : src
|
||||
// transformed to : CMove dst, (cmp1 eq cmp2), dst, src
|
||||
void MacroAssembler::cmov_fp_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2,
|
||||
FloatRegister dst, FloatRegister src,
|
||||
bool cmp_single, bool cmov_single) {
|
||||
Label no_set;
|
||||
if (cmp_single) {
|
||||
// jump if cmp1 != cmp2, including the case of NaN
|
||||
// not jump (i.e. move src to dst) if cmp1 == cmp2
|
||||
float_bne(cmp1, cmp2, no_set);
|
||||
} else {
|
||||
double_bne(cmp1, cmp2, no_set);
|
||||
}
|
||||
if (cmov_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
// Keep dst unchanged only if cmp1 == cmp2,
|
||||
// otherwise move src to dst, including the case where one of them is NaN.
|
||||
// Clarification:
|
||||
// java code : cmp1 == cmp2 ? dst : src
|
||||
// transformed to : CMove dst, (cmp1 ne cmp2), dst, src
|
||||
void MacroAssembler::cmov_fp_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2,
|
||||
FloatRegister dst, FloatRegister src,
|
||||
bool cmp_single, bool cmov_single) {
|
||||
Label no_set;
|
||||
if (cmp_single) {
|
||||
// jump if cmp1 == cmp2
|
||||
// not jump (i.e. move src to dst) if cmp1 != cmp2, including the case of NaN
|
||||
float_beq(cmp1, cmp2, no_set);
|
||||
} else {
|
||||
double_beq(cmp1, cmp2, no_set);
|
||||
}
|
||||
if (cmov_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
// When cmp1 <= cmp2 or any of them is NaN then dst = src, otherwise, dst = dst
|
||||
// Clarification
|
||||
// scenario 1:
|
||||
// java code : cmp2 < cmp1 ? dst : src
|
||||
// transformed to : CMove dst, (cmp1 le cmp2), dst, src
|
||||
// scenario 2:
|
||||
// java code : cmp1 > cmp2 ? dst : src
|
||||
// transformed to : CMove dst, (cmp1 le cmp2), dst, src
|
||||
void MacroAssembler::cmov_fp_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2,
|
||||
FloatRegister dst, FloatRegister src,
|
||||
bool cmp_single, bool cmov_single) {
|
||||
Label no_set;
|
||||
if (cmp_single) {
|
||||
// jump if cmp1 > cmp2
|
||||
// not jump (i.e. move src to dst) if cmp1 <= cmp2 or either is NaN
|
||||
float_bgt(cmp1, cmp2, no_set);
|
||||
} else {
|
||||
double_bgt(cmp1, cmp2, no_set);
|
||||
}
|
||||
if (cmov_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2,
|
||||
FloatRegister dst, FloatRegister src,
|
||||
bool cmp_single, bool cmov_single) {
|
||||
Label no_set;
|
||||
if (cmp_single) {
|
||||
// jump if cmp1 < cmp2 or either is NaN
|
||||
// not jump (i.e. move src to dst) if cmp1 >= cmp2
|
||||
float_blt(cmp1, cmp2, no_set, false, true);
|
||||
} else {
|
||||
double_blt(cmp1, cmp2, no_set, false, true);
|
||||
}
|
||||
if (cmov_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
// When cmp1 < cmp2 or any of them is NaN then dst = src, otherwise, dst = dst
|
||||
// Clarification
|
||||
// scenario 1:
|
||||
// java code : cmp2 <= cmp1 ? dst : src
|
||||
// transformed to : CMove dst, (cmp1 lt cmp2), dst, src
|
||||
// scenario 2:
|
||||
// java code : cmp1 >= cmp2 ? dst : src
|
||||
// transformed to : CMove dst, (cmp1 lt cmp2), dst, src
|
||||
void MacroAssembler::cmov_fp_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2,
|
||||
FloatRegister dst, FloatRegister src,
|
||||
bool cmp_single, bool cmov_single) {
|
||||
Label no_set;
|
||||
if (cmp_single) {
|
||||
// jump if cmp1 >= cmp2
|
||||
// not jump (i.e. move src to dst) if cmp1 < cmp2 or either is NaN
|
||||
float_bge(cmp1, cmp2, no_set);
|
||||
} else {
|
||||
double_bge(cmp1, cmp2, no_set);
|
||||
}
|
||||
if (cmov_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmov_fp_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2,
|
||||
FloatRegister dst, FloatRegister src,
|
||||
bool cmp_single, bool cmov_single) {
|
||||
Label no_set;
|
||||
if (cmp_single) {
|
||||
// jump if cmp1 <= cmp2 or either is NaN
|
||||
// not jump (i.e. move src to dst) if cmp1 > cmp2
|
||||
float_ble(cmp1, cmp2, no_set, false, true);
|
||||
} else {
|
||||
double_ble(cmp1, cmp2, no_set, false, true);
|
||||
}
|
||||
if (cmov_single) {
|
||||
fmv_s(dst, src);
|
||||
} else {
|
||||
fmv_d(dst, src);
|
||||
}
|
||||
bind(no_set);
|
||||
}
|
||||
|
||||
// Float compare branch instructions
|
||||
|
||||
#define INSN(NAME, FLOATCMP, BRANCH) \
|
||||
@@ -4933,7 +5187,6 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
|
||||
assert (UseCompressedClassPointers, "should only be used for compressed headers");
|
||||
assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
|
||||
int index = oop_recorder()->find_index(k);
|
||||
assert(!Universe::heap()->is_in(k), "should not be an oop");
|
||||
|
||||
narrowKlass nk = CompressedKlassPointers::encode(k);
|
||||
relocate(metadata_Relocation::spec(index), [&] {
|
||||
|
||||
@@ -665,6 +665,24 @@ class MacroAssembler: public Assembler {
|
||||
void cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
|
||||
void cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
|
||||
|
||||
void cmov_fp_eq(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
|
||||
void cmov_fp_ne(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
|
||||
void cmov_fp_le(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
|
||||
void cmov_fp_leu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
|
||||
void cmov_fp_ge(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
|
||||
void cmov_fp_geu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
|
||||
void cmov_fp_lt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
|
||||
void cmov_fp_ltu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
|
||||
void cmov_fp_gt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
|
||||
void cmov_fp_gtu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
|
||||
|
||||
void cmov_fp_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
|
||||
void cmov_fp_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
|
||||
void cmov_fp_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
|
||||
void cmov_fp_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
|
||||
void cmov_fp_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
|
||||
void cmov_fp_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
|
||||
|
||||
public:
|
||||
// We try to follow risc-v asm menomics.
|
||||
// But as we don't layout a reachable GOT,
|
||||
|
||||
@@ -1924,8 +1924,6 @@ bool Matcher::match_rule_supported(int opcode) {
|
||||
case Op_SubHF:
|
||||
return UseZfh;
|
||||
|
||||
case Op_CMoveF:
|
||||
case Op_CMoveD:
|
||||
case Op_CMoveP:
|
||||
case Op_CMoveN:
|
||||
return false;
|
||||
@@ -10466,6 +10464,286 @@ instruct cmovL_cmpP(iRegLNoSp dst, iRegL src, iRegP op1, iRegP op2, cmpOpU cop)
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
// --------- CMoveF ---------
|
||||
|
||||
instruct cmovF_cmpI(fRegF dst, fRegF src, iRegI op1, iRegI op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveF (Binary cop (CmpI op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpI\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovF_cmpU(fRegF dst, fRegF src, iRegI op1, iRegI op2, cmpOpU cop) %{
|
||||
match(Set dst (CMoveF (Binary cop (CmpU op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpU\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovF_cmpL(fRegF dst, fRegF src, iRegL op1, iRegL op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveF (Binary cop (CmpL op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpL\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovF_cmpUL(fRegF dst, fRegF src, iRegL op1, iRegL op2, cmpOpU cop) %{
|
||||
match(Set dst (CMoveF (Binary cop (CmpUL op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpUL\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovF_cmpF(fRegF dst, fRegF src, fRegF op1, fRegF op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveF (Binary cop (CmpF op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpF\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp_fp($cop$$cmpcode,
|
||||
as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
true /* cmp_single */, true /* cmov_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovF_cmpD(fRegF dst, fRegF src, fRegD op1, fRegD op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveF (Binary cop (CmpD op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpD\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp_fp($cop$$cmpcode | C2_MacroAssembler::double_branch_mask,
|
||||
as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
false /* cmp_single */, true /* cmov_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovF_cmpN(fRegF dst, fRegF src, iRegN op1, iRegN op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveF (Binary cop (CmpN op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpN\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovF_cmpP(fRegF dst, fRegF src, iRegP op1, iRegP op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveF (Binary cop (CmpP op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpP\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
// --------- CMoveD ---------
|
||||
|
||||
instruct cmovD_cmpI(fRegD dst, fRegD src, iRegI op1, iRegI op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveD (Binary cop (CmpI op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpI\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovD_cmpU(fRegD dst, fRegD src, iRegI op1, iRegI op2, cmpOpU cop) %{
|
||||
match(Set dst (CMoveD (Binary cop (CmpU op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpU\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovD_cmpL(fRegD dst, fRegD src, iRegL op1, iRegL op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveD (Binary cop (CmpL op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpL\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovD_cmpUL(fRegD dst, fRegD src, iRegL op1, iRegL op2, cmpOpU cop) %{
|
||||
match(Set dst (CMoveD (Binary cop (CmpUL op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpUL\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovD_cmpF(fRegD dst, fRegD src, fRegF op1, fRegF op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveD (Binary cop (CmpF op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpF\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp_fp($cop$$cmpcode,
|
||||
as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
true /* cmp_single */, false /* cmov_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovD_cmpD(fRegD dst, fRegD src, fRegD op1, fRegD op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveD (Binary cop (CmpD op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpD\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp_fp($cop$$cmpcode | C2_MacroAssembler::double_branch_mask,
|
||||
as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
false /* cmp_single */, false /* cmov_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovD_cmpN(fRegD dst, fRegD src, iRegN op1, iRegN op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveD (Binary cop (CmpN op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpN\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
instruct cmovD_cmpP(fRegD dst, fRegD src, iRegP op1, iRegP op2, cmpOp cop) %{
|
||||
match(Set dst (CMoveD (Binary cop (CmpP op1 op2)) (Binary dst src)));
|
||||
ins_cost(ALU_COST + BRANCH_COST);
|
||||
|
||||
format %{
|
||||
"CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpP\n\t"
|
||||
%}
|
||||
|
||||
ins_encode %{
|
||||
__ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
|
||||
as_Register($op1$$reg), as_Register($op2$$reg),
|
||||
as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_compare);
|
||||
%}
|
||||
|
||||
// ============================================================================
|
||||
// Procedure Call/Return Instructions
|
||||
|
||||
|
||||
@@ -2493,8 +2493,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
|
||||
__ vle32_v(res, from);
|
||||
|
||||
__ mv(t2, 52);
|
||||
__ blt(keylen, t2, L_aes128);
|
||||
__ mv(t2, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ bltu(keylen, t2, L_aes128);
|
||||
__ beq(keylen, t2, L_aes192);
|
||||
// Else we fallthrough to the biggest case (256-bit key size)
|
||||
|
||||
@@ -2572,8 +2572,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
|
||||
__ vle32_v(res, from);
|
||||
|
||||
__ mv(t2, 52);
|
||||
__ blt(keylen, t2, L_aes128);
|
||||
__ mv(t2, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ bltu(keylen, t2, L_aes128);
|
||||
__ beq(keylen, t2, L_aes192);
|
||||
// Else we fallthrough to the biggest case (256-bit key size)
|
||||
|
||||
@@ -2606,6 +2606,401 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
void cipherBlockChaining_encryptAESCrypt(int round, Register from, Register to, Register key,
|
||||
Register rvec, Register input_len) {
|
||||
const Register len = x29;
|
||||
|
||||
VectorRegister working_vregs[] = {
|
||||
v1, v2, v3, v4, v5, v6, v7, v8,
|
||||
v9, v10, v11, v12, v13, v14, v15
|
||||
};
|
||||
|
||||
const unsigned int BLOCK_SIZE = 16;
|
||||
|
||||
__ mv(len, input_len);
|
||||
// load init rvec
|
||||
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
|
||||
__ vle32_v(v16, rvec);
|
||||
|
||||
generate_aes_loadkeys(key, working_vregs, round);
|
||||
Label L_enc_loop;
|
||||
__ bind(L_enc_loop);
|
||||
// Encrypt from source by block size
|
||||
__ vle32_v(v17, from);
|
||||
__ addi(from, from, BLOCK_SIZE);
|
||||
__ vxor_vv(v16, v16, v17);
|
||||
generate_aes_encrypt(v16, working_vregs, round);
|
||||
__ vse32_v(v16, to);
|
||||
__ addi(to, to, BLOCK_SIZE);
|
||||
__ subi(len, len, BLOCK_SIZE);
|
||||
__ bnez(len, L_enc_loop);
|
||||
|
||||
// save current rvec and return
|
||||
__ vse32_v(v16, rvec);
|
||||
__ mv(x10, input_len);
|
||||
__ leave();
|
||||
__ ret();
|
||||
}
|
||||
|
||||
// Arguments:
|
||||
//
|
||||
// Inputs:
|
||||
// c_rarg0 - source byte array address
|
||||
// c_rarg1 - destination byte array address
|
||||
// c_rarg2 - K (key) in little endian int array
|
||||
// c_rarg3 - r vector byte array address
|
||||
// c_rarg4 - input length
|
||||
//
|
||||
// Output:
|
||||
// x10 - input length
|
||||
//
|
||||
address generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
assert(UseAESIntrinsics, "Must be");
|
||||
assert(UseZvkn, "need AES instructions (Zvkned extension) support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubId stub_id = StubId::stubgen_cipherBlockChaining_encryptAESCrypt_id;
|
||||
StubCodeMark mark(this, stub_id);
|
||||
|
||||
const Register from = c_rarg0;
|
||||
const Register to = c_rarg1;
|
||||
const Register key = c_rarg2;
|
||||
const Register rvec = c_rarg3;
|
||||
const Register input_len = c_rarg4;
|
||||
|
||||
const Register keylen = x28;
|
||||
|
||||
address start = __ pc();
|
||||
__ enter();
|
||||
|
||||
Label L_aes128, L_aes192;
|
||||
// Compute #rounds for AES based on the length of the key array
|
||||
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
__ mv(t0, 52);
|
||||
__ bltu(keylen, t0, L_aes128);
|
||||
__ beq(keylen, t0, L_aes192);
|
||||
// Else we fallthrough to the biggest case (256-bit key size)
|
||||
|
||||
// Note: the following function performs key += 15*16
|
||||
cipherBlockChaining_encryptAESCrypt(15, from, to, key, rvec, input_len);
|
||||
|
||||
// Note: the following function performs key += 11*16
|
||||
__ bind(L_aes128);
|
||||
cipherBlockChaining_encryptAESCrypt(11, from, to, key, rvec, input_len);
|
||||
|
||||
// Note: the following function performs key += 13*16
|
||||
__ bind(L_aes192);
|
||||
cipherBlockChaining_encryptAESCrypt(13, from, to, key, rvec, input_len);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
void cipherBlockChaining_decryptAESCrypt(int round, Register from, Register to, Register key,
|
||||
Register rvec, Register input_len) {
|
||||
const Register len = x29;
|
||||
|
||||
VectorRegister working_vregs[] = {
|
||||
v1, v2, v3, v4, v5, v6, v7, v8,
|
||||
v9, v10, v11, v12, v13, v14, v15
|
||||
};
|
||||
|
||||
const unsigned int BLOCK_SIZE = 16;
|
||||
|
||||
__ mv(len, input_len);
|
||||
// load init rvec
|
||||
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
|
||||
__ vle32_v(v16, rvec);
|
||||
|
||||
generate_aes_loadkeys(key, working_vregs, round);
|
||||
Label L_dec_loop;
|
||||
// Decrypt from source by block size
|
||||
__ bind(L_dec_loop);
|
||||
__ vle32_v(v17, from);
|
||||
__ addi(from, from, BLOCK_SIZE);
|
||||
__ vmv_v_v(v18, v17);
|
||||
generate_aes_decrypt(v17, working_vregs, round);
|
||||
__ vxor_vv(v17, v17, v16);
|
||||
__ vse32_v(v17, to);
|
||||
__ vmv_v_v(v16, v18);
|
||||
__ addi(to, to, BLOCK_SIZE);
|
||||
__ subi(len, len, BLOCK_SIZE);
|
||||
__ bnez(len, L_dec_loop);
|
||||
|
||||
// save current rvec and return
|
||||
__ vse32_v(v16, rvec);
|
||||
__ mv(x10, input_len);
|
||||
__ leave();
|
||||
__ ret();
|
||||
}
|
||||
|
||||
// Arguments:
|
||||
//
|
||||
// Inputs:
|
||||
// c_rarg0 - source byte array address
|
||||
// c_rarg1 - destination byte array address
|
||||
// c_rarg2 - K (key) in little endian int array
|
||||
// c_rarg3 - r vector byte array address
|
||||
// c_rarg4 - input length
|
||||
//
|
||||
// Output:
|
||||
// x10 - input length
|
||||
//
|
||||
address generate_cipherBlockChaining_decryptAESCrypt() {
|
||||
assert(UseAESIntrinsics, "Must be");
|
||||
assert(UseZvkn, "need AES instructions (Zvkned extension) support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubId stub_id = StubId::stubgen_cipherBlockChaining_decryptAESCrypt_id;
|
||||
StubCodeMark mark(this, stub_id);
|
||||
|
||||
const Register from = c_rarg0;
|
||||
const Register to = c_rarg1;
|
||||
const Register key = c_rarg2;
|
||||
const Register rvec = c_rarg3;
|
||||
const Register input_len = c_rarg4;
|
||||
|
||||
const Register keylen = x28;
|
||||
|
||||
address start = __ pc();
|
||||
__ enter();
|
||||
|
||||
Label L_aes128, L_aes192, L_aes128_loop, L_aes192_loop, L_aes256_loop;
|
||||
// Compute #rounds for AES based on the length of the key array
|
||||
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
__ mv(t0, 52);
|
||||
__ bltu(keylen, t0, L_aes128);
|
||||
__ beq(keylen, t0, L_aes192);
|
||||
// Else we fallthrough to the biggest case (256-bit key size)
|
||||
|
||||
// Note: the following function performs key += 15*16
|
||||
cipherBlockChaining_decryptAESCrypt(15, from, to, key, rvec, input_len);
|
||||
|
||||
// Note: the following function performs key += 11*16
|
||||
__ bind(L_aes128);
|
||||
cipherBlockChaining_decryptAESCrypt(11, from, to, key, rvec, input_len);
|
||||
|
||||
// Note: the following function performs key += 13*16
|
||||
__ bind(L_aes192);
|
||||
cipherBlockChaining_decryptAESCrypt(13, from, to, key, rvec, input_len);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// Load big-endian 128-bit from memory.
|
||||
void be_load_counter_128(Register counter_hi, Register counter_lo, Register counter) {
|
||||
__ ld(counter_lo, Address(counter, 8)); // Load 128-bits from counter
|
||||
__ ld(counter_hi, Address(counter));
|
||||
__ rev8(counter_lo, counter_lo); // Convert big-endian to little-endian
|
||||
__ rev8(counter_hi, counter_hi);
|
||||
}
|
||||
|
||||
// Little-endian 128-bit + 64-bit -> 128-bit addition.
|
||||
void add_counter_128(Register counter_hi, Register counter_lo) {
|
||||
assert_different_registers(counter_hi, counter_lo, t0);
|
||||
__ addi(counter_lo, counter_lo, 1);
|
||||
__ seqz(t0, counter_lo); // Check for result overflow
|
||||
__ add(counter_hi, counter_hi, t0); // Add 1 if overflow otherwise 0
|
||||
}
|
||||
|
||||
// Store big-endian 128-bit to memory.
|
||||
void be_store_counter_128(Register counter_hi, Register counter_lo, Register counter) {
|
||||
assert_different_registers(counter_hi, counter_lo, t0, t1);
|
||||
__ rev8(t0, counter_lo); // Convert little-endian to big-endian
|
||||
__ rev8(t1, counter_hi);
|
||||
__ sd(t0, Address(counter, 8)); // Store 128-bits to counter
|
||||
__ sd(t1, Address(counter));
|
||||
}
|
||||
|
||||
void counterMode_AESCrypt(int round, Register in, Register out, Register key, Register counter,
|
||||
Register input_len, Register saved_encrypted_ctr, Register used_ptr) {
|
||||
// Algorithm:
|
||||
//
|
||||
// generate_aes_loadkeys();
|
||||
// load_counter_128(counter_hi, counter_lo, counter);
|
||||
//
|
||||
// L_next:
|
||||
// if (used >= BLOCK_SIZE) goto L_main_loop;
|
||||
//
|
||||
// L_encrypt_next:
|
||||
// *out = *in ^ saved_encrypted_ctr[used]);
|
||||
// out++; in++; used++; len--;
|
||||
// if (len == 0) goto L_exit;
|
||||
// goto L_next;
|
||||
//
|
||||
// L_main_loop:
|
||||
// if (len == 0) goto L_exit;
|
||||
// saved_encrypted_ctr = generate_aes_encrypt(counter);
|
||||
//
|
||||
// add_counter_128(counter_hi, counter_lo);
|
||||
// be_store_counter_128(counter_hi, counter_lo, counter);
|
||||
// used = 0;
|
||||
//
|
||||
// if(len < BLOCK_SIZE) goto L_encrypt_next;
|
||||
//
|
||||
// v_in = load_16Byte(in);
|
||||
// v_out = load_16Byte(out);
|
||||
// v_saved_encrypted_ctr = load_16Byte(saved_encrypted_ctr);
|
||||
// v_out = v_in ^ v_saved_encrypted_ctr;
|
||||
// out += BLOCK_SIZE;
|
||||
// in += BLOCK_SIZE;
|
||||
// len -= BLOCK_SIZE;
|
||||
// used = BLOCK_SIZE;
|
||||
// goto L_main_loop;
|
||||
//
|
||||
//
|
||||
// L_exit:
|
||||
// store(used);
|
||||
// result = input_len
|
||||
// return result;
|
||||
|
||||
const Register used = x28;
|
||||
const Register len = x29;
|
||||
const Register counter_hi = x30;
|
||||
const Register counter_lo = x31;
|
||||
const Register block_size = t2;
|
||||
|
||||
const unsigned int BLOCK_SIZE = 16;
|
||||
|
||||
VectorRegister working_vregs[] = {
|
||||
v1, v2, v3, v4, v5, v6, v7, v8,
|
||||
v9, v10, v11, v12, v13, v14, v15
|
||||
};
|
||||
|
||||
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
|
||||
|
||||
__ lwu(used, Address(used_ptr));
|
||||
__ mv(len, input_len);
|
||||
__ mv(block_size, BLOCK_SIZE);
|
||||
|
||||
// load keys to working_vregs according to round
|
||||
generate_aes_loadkeys(key, working_vregs, round);
|
||||
|
||||
// 128-bit big-endian load
|
||||
be_load_counter_128(counter_hi, counter_lo, counter);
|
||||
|
||||
Label L_next, L_encrypt_next, L_main_loop, L_exit;
|
||||
// Check the last saved_encrypted_ctr used value, we fall through
|
||||
// to L_encrypt_next when the used value lower than block_size
|
||||
__ bind(L_next);
|
||||
__ bgeu(used, block_size, L_main_loop);
|
||||
|
||||
// There is still data left fewer than block_size after L_main_loop
|
||||
// or last used, we encrypt them one by one.
|
||||
__ bind(L_encrypt_next);
|
||||
__ add(t0, saved_encrypted_ctr, used);
|
||||
__ lbu(t1, Address(t0));
|
||||
__ lbu(t0, Address(in));
|
||||
__ xorr(t1, t1, t0);
|
||||
__ sb(t1, Address(out));
|
||||
__ addi(in, in, 1);
|
||||
__ addi(out, out, 1);
|
||||
__ addi(used, used, 1);
|
||||
__ subi(len, len, 1);
|
||||
__ beqz(len, L_exit);
|
||||
__ j(L_next);
|
||||
|
||||
// We will calculate the next saved_encrypted_ctr and encrypt the blocks of data
|
||||
// one by one until there is less than a full block remaining if len not zero
|
||||
__ bind(L_main_loop);
|
||||
__ beqz(len, L_exit);
|
||||
__ vle32_v(v16, counter);
|
||||
|
||||
// encrypt counter according to round
|
||||
generate_aes_encrypt(v16, working_vregs, round);
|
||||
|
||||
__ vse32_v(v16, saved_encrypted_ctr);
|
||||
|
||||
// 128-bit little-endian increment
|
||||
add_counter_128(counter_hi, counter_lo);
|
||||
// 128-bit big-endian store
|
||||
be_store_counter_128(counter_hi, counter_lo, counter);
|
||||
|
||||
__ mv(used, 0);
|
||||
// Check if we have a full block_size
|
||||
__ bltu(len, block_size, L_encrypt_next);
|
||||
|
||||
// We have one full block to encrypt at least
|
||||
__ vle32_v(v17, in);
|
||||
__ vxor_vv(v16, v16, v17);
|
||||
__ vse32_v(v16, out);
|
||||
__ add(out, out, block_size);
|
||||
__ add(in, in, block_size);
|
||||
__ sub(len, len, block_size);
|
||||
__ mv(used, block_size);
|
||||
__ j(L_main_loop);
|
||||
|
||||
__ bind(L_exit);
|
||||
__ sw(used, Address(used_ptr));
|
||||
__ mv(x10, input_len);
|
||||
__ leave();
|
||||
__ ret();
|
||||
};
|
||||
|
||||
// CTR AES crypt.
|
||||
// Arguments:
|
||||
//
|
||||
// Inputs:
|
||||
// c_rarg0 - source byte array address
|
||||
// c_rarg1 - destination byte array address
|
||||
// c_rarg2 - K (key) in little endian int array
|
||||
// c_rarg3 - counter vector byte array address
|
||||
// c_rarg4 - input length
|
||||
// c_rarg5 - saved encryptedCounter start
|
||||
// c_rarg6 - saved used length
|
||||
//
|
||||
// Output:
|
||||
// x10 - input length
|
||||
//
|
||||
address generate_counterMode_AESCrypt() {
|
||||
assert(UseAESCTRIntrinsics, "Must be");
|
||||
assert(UseZvkn, "need AES instructions (Zvkned extension) support");
|
||||
assert(UseZbb, "need basic bit manipulation (Zbb extension) support");
|
||||
|
||||
__ align(CodeEntryAlignment);
|
||||
StubId stub_id = StubId::stubgen_counterMode_AESCrypt_id;
|
||||
StubCodeMark mark(this, stub_id);
|
||||
|
||||
const Register in = c_rarg0;
|
||||
const Register out = c_rarg1;
|
||||
const Register key = c_rarg2;
|
||||
const Register counter = c_rarg3;
|
||||
const Register input_len = c_rarg4;
|
||||
const Register saved_encrypted_ctr = c_rarg5;
|
||||
const Register used_len_ptr = c_rarg6;
|
||||
|
||||
const Register keylen = c_rarg7; // temporary register
|
||||
|
||||
const address start = __ pc();
|
||||
__ enter();
|
||||
|
||||
Label L_exit;
|
||||
__ beqz(input_len, L_exit);
|
||||
|
||||
Label L_aes128, L_aes192;
|
||||
// Compute #rounds for AES based on the length of the key array
|
||||
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
__ mv(t0, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ bltu(keylen, t0, L_aes128);
|
||||
__ beq(keylen, t0, L_aes192);
|
||||
// Else we fallthrough to the biggest case (256-bit key size)
|
||||
|
||||
// Note: the following function performs crypt with key += 15*16
|
||||
counterMode_AESCrypt(15, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
|
||||
|
||||
// Note: the following function performs crypt with key += 13*16
|
||||
__ bind(L_aes192);
|
||||
counterMode_AESCrypt(13, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
|
||||
|
||||
// Note: the following function performs crypt with key += 11*16
|
||||
__ bind(L_aes128);
|
||||
counterMode_AESCrypt(11, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
|
||||
|
||||
__ bind(L_exit);
|
||||
__ mv(x10, input_len);
|
||||
__ leave();
|
||||
__ ret();
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// code for comparing 8 characters of strings with Latin1 and Utf16 encoding
|
||||
void compare_string_8_x_LU(Register tmpL, Register tmpU,
|
||||
Register strL, Register strU, Label& DIFF) {
|
||||
@@ -6824,6 +7219,12 @@ static const int64_t right_3_bits = right_n_bits(3);
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
|
||||
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
||||
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
|
||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
|
||||
}
|
||||
|
||||
if (UseAESCTRIntrinsics) {
|
||||
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt();
|
||||
}
|
||||
|
||||
if (UsePoly1305Intrinsics) {
|
||||
|
||||
@@ -434,6 +434,15 @@ void VM_Version::c2_initialize() {
|
||||
warning("UseAESIntrinsics enabled, but UseAES not, enabling");
|
||||
UseAES = true;
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics) && UseZbb) {
|
||||
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
|
||||
}
|
||||
|
||||
if (UseAESCTRIntrinsics && !UseZbb) {
|
||||
warning("Cannot enable UseAESCTRIntrinsics on cpu without UseZbb support.");
|
||||
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
||||
}
|
||||
} else {
|
||||
if (UseAES) {
|
||||
warning("AES instructions are not available on this CPU");
|
||||
@@ -443,11 +452,10 @@ void VM_Version::c2_initialize() {
|
||||
warning("AES intrinsics are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (UseAESCTRIntrinsics) {
|
||||
warning("AES/CTR intrinsics are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
||||
if (UseAESCTRIntrinsics) {
|
||||
warning("Cannot enable UseAESCTRIntrinsics on cpu without UseZvkn support.");
|
||||
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1715,6 +1715,8 @@ bool Matcher::match_rule_supported(int opcode) {
|
||||
switch (opcode) {
|
||||
case Op_ReverseBytesI:
|
||||
case Op_ReverseBytesL:
|
||||
case Op_ReverseBytesS:
|
||||
case Op_ReverseBytesUS:
|
||||
return UseByteReverseInstruction;
|
||||
case Op_PopCountI:
|
||||
case Op_PopCountL:
|
||||
@@ -11615,6 +11617,38 @@ instruct vround2D_reg(vecX dst, vecX src, immI8 rmode) %{
|
||||
|
||||
// Byte reverse
|
||||
|
||||
instruct bytes_reverse_short(iRegI dst, iRegI src) %{
|
||||
match(Set dst (ReverseBytesS src));
|
||||
predicate(UseByteReverseInstruction);
|
||||
ins_cost(2 * DEFAULT_COST);
|
||||
size(8);
|
||||
|
||||
format %{ "LRVR $dst, $src\n\t # byte reverse int"
|
||||
"SRA $dst, 0x0010\t # right shift by 16, sign extended" %}
|
||||
|
||||
ins_encode %{
|
||||
__ z_lrvr($dst$$Register, $src$$Register);
|
||||
__ z_sra($dst$$Register, 0x0010);
|
||||
%}
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{
|
||||
match(Set dst (ReverseBytesUS src));
|
||||
predicate(UseByteReverseInstruction);
|
||||
ins_cost(2 * DEFAULT_COST);
|
||||
size(8);
|
||||
|
||||
format %{ "LRVR $dst, $src\n\t # byte reverse int"
|
||||
"SRL $dst, 0x0010\t # right shift by 16, zero extended" %}
|
||||
|
||||
ins_encode %{
|
||||
__ z_lrvr($dst$$Register, $src$$Register);
|
||||
__ z_srl($dst$$Register, 0x0010);
|
||||
%}
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
instruct bytes_reverse_int(iRegI dst, iRegI src) %{
|
||||
match(Set dst (ReverseBytesI src));
|
||||
predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported
|
||||
|
||||
@@ -2333,8 +2333,8 @@ int os::open(const char *path, int oflag, int mode) {
|
||||
|
||||
if (ret != -1) {
|
||||
if ((st_mode & S_IFMT) == S_IFDIR) {
|
||||
errno = EISDIR;
|
||||
::close(fd);
|
||||
errno = EISDIR;
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -2277,8 +2277,8 @@ int os::open(const char *path, int oflag, int mode) {
|
||||
|
||||
if (ret != -1) {
|
||||
if ((st_mode & S_IFMT) == S_IFDIR) {
|
||||
errno = EISDIR;
|
||||
::close(fd);
|
||||
errno = EISDIR;
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -4932,8 +4932,8 @@ int os::open(const char *path, int oflag, int mode) {
|
||||
|
||||
if (ret != -1) {
|
||||
if ((st_mode & S_IFMT) == S_IFDIR) {
|
||||
errno = EISDIR;
|
||||
::close(fd);
|
||||
errno = EISDIR;
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -1028,6 +1028,7 @@ char* os::realpath(const char* filename, char* outbuf, size_t outbuflen) {
|
||||
} else {
|
||||
errno = ENAMETOOLONG;
|
||||
}
|
||||
ErrnoPreserver ep;
|
||||
permit_forbidden_function::free(p); // *not* os::free
|
||||
} else {
|
||||
// Fallback for platforms struggling with modern Posix standards (AIX 5.3, 6.1). If realpath
|
||||
|
||||
@@ -1645,7 +1645,7 @@ static void SR_handler(int sig, siginfo_t* siginfo, void* context) {
|
||||
|
||||
// Save and restore errno to avoid confusing native code with EINTR
|
||||
// after sigsuspend.
|
||||
int old_errno = errno;
|
||||
ErrnoPreserver ep;
|
||||
|
||||
PosixSignals::unblock_error_signals();
|
||||
|
||||
@@ -1727,7 +1727,6 @@ static void SR_handler(int sig, siginfo_t* siginfo, void* context) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
errno = old_errno;
|
||||
}
|
||||
|
||||
static int SR_initialize() {
|
||||
|
||||
@@ -4782,8 +4782,8 @@ int os::stat(const char *path, struct stat *sbuf) {
|
||||
path_to_target = get_path_to_target(wide_path);
|
||||
if (path_to_target == nullptr) {
|
||||
// it is a symbolic link, but we failed to resolve it
|
||||
errno = ENOENT;
|
||||
os::free(wide_path);
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -4794,14 +4794,14 @@ int os::stat(const char *path, struct stat *sbuf) {
|
||||
// if getting attributes failed, GetLastError should be called immediately after that
|
||||
if (!bret) {
|
||||
DWORD errcode = ::GetLastError();
|
||||
log_debug(os)("os::stat() failed to GetFileAttributesExW: GetLastError->%lu.", errcode);
|
||||
os::free(wide_path);
|
||||
os::free(path_to_target);
|
||||
if (errcode == ERROR_FILE_NOT_FOUND || errcode == ERROR_PATH_NOT_FOUND) {
|
||||
errno = ENOENT;
|
||||
} else {
|
||||
errno = 0;
|
||||
}
|
||||
log_debug(os)("os::stat() failed to GetFileAttributesExW: GetLastError->%lu.", errcode);
|
||||
os::free(wide_path);
|
||||
os::free(path_to_target);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -5000,8 +5000,8 @@ int os::open(const char *path, int oflag, int mode) {
|
||||
path_to_target = get_path_to_target(wide_path);
|
||||
if (path_to_target == nullptr) {
|
||||
// it is a symbolic link, but we failed to resolve it
|
||||
errno = ENOENT;
|
||||
os::free(wide_path);
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -5275,6 +5275,7 @@ char* os::realpath(const char* filename, char* outbuf, size_t outbuflen) {
|
||||
} else {
|
||||
errno = ENAMETOOLONG;
|
||||
}
|
||||
ErrnoPreserver ep;
|
||||
permit_forbidden_function::free(p); // *not* os::free
|
||||
}
|
||||
return result;
|
||||
|
||||
@@ -96,6 +96,7 @@
|
||||
#include "runtime/vmOperations.hpp"
|
||||
#include "runtime/vmThread.hpp"
|
||||
#include "sanitizers/leak.hpp"
|
||||
#include "services/management.hpp"
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/bitMap.inline.hpp"
|
||||
#include "utilities/defaultStream.hpp"
|
||||
@@ -2204,7 +2205,7 @@ void AOTMetaspace::initialize_shared_spaces() {
|
||||
CountSharedSymbols cl;
|
||||
SymbolTable::shared_symbols_do(&cl);
|
||||
tty->print_cr("Number of shared symbols: %zu", cl.total());
|
||||
if (HeapShared::is_loading_mapping_mode()) {
|
||||
if (HeapShared::is_loading() && HeapShared::is_loading_mapping_mode()) {
|
||||
tty->print_cr("Number of shared strings: %zu", StringTable::shared_entry_count());
|
||||
}
|
||||
tty->print_cr("VM version: %s\r\n", static_mapinfo->vm_version());
|
||||
|
||||
@@ -149,6 +149,10 @@ public:
|
||||
assert(is_loaded(), "must be loaded");
|
||||
return _flags;
|
||||
}
|
||||
|
||||
// Fetch Klass::access_flags.
|
||||
jint access_flags() { return flags().as_int(); }
|
||||
|
||||
bool has_finalizer() {
|
||||
assert(is_loaded(), "must be loaded");
|
||||
return _has_finalizer; }
|
||||
|
||||
@@ -216,15 +216,6 @@ jint ciKlass::modifier_flags() {
|
||||
)
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// ciKlass::access_flags
|
||||
jint ciKlass::access_flags() {
|
||||
assert(is_loaded(), "not loaded");
|
||||
GUARDED_VM_ENTRY(
|
||||
return get_Klass()->access_flags().as_unsigned_short();
|
||||
)
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// ciKlass::misc_flags
|
||||
klass_flags_t ciKlass::misc_flags() {
|
||||
|
||||
@@ -122,9 +122,6 @@ public:
|
||||
// Fetch modifier flags.
|
||||
jint modifier_flags();
|
||||
|
||||
// Fetch Klass::access_flags.
|
||||
jint access_flags();
|
||||
|
||||
// Fetch Klass::misc_flags.
|
||||
klass_flags_t misc_flags();
|
||||
|
||||
|
||||
@@ -439,7 +439,7 @@ class MethodFamily : public ResourceObj {
|
||||
StreamIndentor si(str, indent * 2);
|
||||
str->print("Selected method: ");
|
||||
print_method(str, _selected_target);
|
||||
Klass* method_holder = _selected_target->method_holder();
|
||||
InstanceKlass* method_holder = _selected_target->method_holder();
|
||||
if (!method_holder->is_interface()) {
|
||||
str->print(" : in superclass");
|
||||
}
|
||||
|
||||
@@ -1091,10 +1091,6 @@ void java_lang_Class::allocate_mirror(Klass* k, bool is_scratch, Handle protecti
|
||||
// Set the modifiers flag.
|
||||
u2 computed_modifiers = k->compute_modifier_flags();
|
||||
set_modifiers(mirror(), computed_modifiers);
|
||||
// Set the raw access_flags, this is used by reflection instead of modifier flags.
|
||||
// The Java code for array classes gets the access flags from the element type.
|
||||
assert(!k->is_array_klass() || k->access_flags().as_unsigned_short() == 0, "access flags are not set for arrays");
|
||||
set_raw_access_flags(mirror(), k->access_flags().as_unsigned_short());
|
||||
|
||||
InstanceMirrorKlass* mk = InstanceMirrorKlass::cast(mirror->klass());
|
||||
assert(oop_size(mirror()) == mk->instance_size(k), "should have been set");
|
||||
@@ -1103,6 +1099,8 @@ void java_lang_Class::allocate_mirror(Klass* k, bool is_scratch, Handle protecti
|
||||
|
||||
// It might also have a component mirror. This mirror must already exist.
|
||||
if (k->is_array_klass()) {
|
||||
// The Java code for array classes gets the access flags from the element type.
|
||||
set_raw_access_flags(mirror(), 0);
|
||||
if (k->is_typeArray_klass()) {
|
||||
BasicType type = TypeArrayKlass::cast(k)->element_type();
|
||||
if (is_scratch) {
|
||||
@@ -1129,6 +1127,8 @@ void java_lang_Class::allocate_mirror(Klass* k, bool is_scratch, Handle protecti
|
||||
// and java_mirror in this klass.
|
||||
} else {
|
||||
assert(k->is_instance_klass(), "Must be");
|
||||
// Set the raw access_flags, this is used by reflection instead of modifier flags.
|
||||
set_raw_access_flags(mirror(), InstanceKlass::cast(k)->access_flags().as_unsigned_short());
|
||||
initialize_mirror_fields(InstanceKlass::cast(k), mirror, protection_domain, classData, THREAD);
|
||||
if (HAS_PENDING_EXCEPTION) {
|
||||
// If any of the fields throws an exception like OOM remove the klass field
|
||||
|
||||
@@ -2172,9 +2172,10 @@ static bool is_always_visible_class(oop mirror) {
|
||||
return true; // primitive array
|
||||
}
|
||||
assert(klass->is_instance_klass(), "%s", klass->external_name());
|
||||
return klass->is_public() &&
|
||||
(InstanceKlass::cast(klass)->is_same_class_package(vmClasses::Object_klass()) || // java.lang
|
||||
InstanceKlass::cast(klass)->is_same_class_package(vmClasses::MethodHandle_klass())); // java.lang.invoke
|
||||
InstanceKlass* ik = InstanceKlass::cast(klass);
|
||||
return ik->is_public() &&
|
||||
(ik->is_same_class_package(vmClasses::Object_klass()) || // java.lang
|
||||
ik->is_same_class_package(vmClasses::MethodHandle_klass())); // java.lang.invoke
|
||||
}
|
||||
|
||||
// Find or construct the Java mirror (java.lang.Class instance) for
|
||||
|
||||
@@ -227,11 +227,6 @@ void CodeCache::initialize_heaps() {
|
||||
|
||||
if (!non_nmethod.set) {
|
||||
non_nmethod.size += compiler_buffer_size;
|
||||
// Further down, just before FLAG_SET_ERGO(), all segment sizes are
|
||||
// aligned down to the next lower multiple of min_size. For large page
|
||||
// sizes, this may result in (non_nmethod.size == 0) which is not acceptable.
|
||||
// Therefore, force non_nmethod.size to at least min_size.
|
||||
non_nmethod.size = MAX2(non_nmethod.size, min_size);
|
||||
}
|
||||
|
||||
if (!profiled.set && !non_profiled.set) {
|
||||
@@ -307,11 +302,10 @@ void CodeCache::initialize_heaps() {
|
||||
|
||||
// Note: if large page support is enabled, min_size is at least the large
|
||||
// page size. This ensures that the code cache is covered by large pages.
|
||||
non_profiled.size += non_nmethod.size & alignment_mask(min_size);
|
||||
non_profiled.size += profiled.size & alignment_mask(min_size);
|
||||
non_nmethod.size = align_down(non_nmethod.size, min_size);
|
||||
profiled.size = align_down(profiled.size, min_size);
|
||||
non_profiled.size = align_down(non_profiled.size, min_size);
|
||||
non_nmethod.size = align_up(non_nmethod.size, min_size);
|
||||
profiled.size = align_up(profiled.size, min_size);
|
||||
non_profiled.size = align_up(non_profiled.size, min_size);
|
||||
cache_size = non_nmethod.size + profiled.size + non_profiled.size;
|
||||
|
||||
FLAG_SET_ERGO(NonNMethodCodeHeapSize, non_nmethod.size);
|
||||
FLAG_SET_ERGO(ProfiledCodeHeapSize, profiled.size);
|
||||
|
||||
@@ -561,6 +561,20 @@ bool DirectiveSet::should_not_inline(ciMethod* inlinee) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DirectiveSet::should_delay_inline(ciMethod* inlinee) {
|
||||
inlinee->check_is_loaded();
|
||||
VM_ENTRY_MARK;
|
||||
methodHandle mh(THREAD, inlinee->get_Method());
|
||||
|
||||
if (_inlinematchers != nullptr) {
|
||||
return matches_inline(mh, InlineMatcher::delay_inline);
|
||||
}
|
||||
if (!CompilerDirectivesIgnoreCompileCommandsOption) {
|
||||
return CompilerOracle::should_delay_inline(mh);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DirectiveSet::parse_and_add_inline(char* str, const char*& error_msg) {
|
||||
InlineMatcher* m = InlineMatcher::parse_inline_pattern(str, error_msg);
|
||||
if (m != nullptr) {
|
||||
|
||||
@@ -142,6 +142,7 @@ public:
|
||||
void append_inline(InlineMatcher* m);
|
||||
bool should_inline(ciMethod* inlinee);
|
||||
bool should_not_inline(ciMethod* inlinee);
|
||||
bool should_delay_inline(ciMethod* inlinee);
|
||||
void print_inline(outputStream* st);
|
||||
DirectiveSet* compilecommand_compatibility_init(const methodHandle& method);
|
||||
bool is_exclusive_copy() { return _directive == nullptr; }
|
||||
|
||||
@@ -480,6 +480,10 @@ bool CompilerOracle::should_not_inline(const methodHandle& method) {
|
||||
return check_predicate(CompileCommandEnum::DontInline, method) || check_predicate(CompileCommandEnum::Exclude, method);
|
||||
}
|
||||
|
||||
bool CompilerOracle::should_delay_inline(const methodHandle& method) {
|
||||
return (check_predicate(CompileCommandEnum::DelayInline, method));
|
||||
}
|
||||
|
||||
bool CompilerOracle::should_print(const methodHandle& method) {
|
||||
return check_predicate(CompileCommandEnum::Print, method);
|
||||
}
|
||||
|
||||
@@ -51,6 +51,7 @@ class methodHandle;
|
||||
option(Log, "log", Bool) \
|
||||
option(Print, "print", Bool) \
|
||||
option(Inline, "inline", Bool) \
|
||||
option(DelayInline, "delayinline", Bool) \
|
||||
option(DontInline, "dontinline", Bool) \
|
||||
option(Blackhole, "blackhole", Bool) \
|
||||
option(CompileOnly, "compileonly", Bool)\
|
||||
@@ -150,6 +151,9 @@ class CompilerOracle : AllStatic {
|
||||
// Tells whether we want to disallow inlining of this method
|
||||
static bool should_not_inline(const methodHandle& method);
|
||||
|
||||
// Tells whether we want to delay inlining of this method
|
||||
static bool should_delay_inline(const methodHandle& method);
|
||||
|
||||
// Tells whether this method changes Thread.currentThread()
|
||||
static bool changes_current_thread(const methodHandle& method);
|
||||
|
||||
|
||||
@@ -100,6 +100,7 @@ public:
|
||||
enum InlineType {
|
||||
unknown_inline,
|
||||
dont_inline,
|
||||
delay_inline,
|
||||
force_inline
|
||||
};
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#include "gc/g1/g1HeapSizingPolicy.hpp"
|
||||
#include "gc/g1/jvmFlagConstraintsG1.hpp"
|
||||
#include "gc/shared/bufferNode.hpp"
|
||||
#include "gc/shared/ptrQueue.hpp"
|
||||
#include "gc/shared/satbMarkQueue.hpp"
|
||||
#include "runtime/globals_extension.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
|
||||
|
||||
@@ -70,8 +70,8 @@
|
||||
nonstatic_field(G1HeapRegionSetBase, _length, uint) \
|
||||
\
|
||||
nonstatic_field(SATBMarkQueue, _active, bool) \
|
||||
nonstatic_field(PtrQueue, _buf, void**) \
|
||||
nonstatic_field(PtrQueue, _index, size_t)
|
||||
nonstatic_field(SATBMarkQueue, _buf, void**) \
|
||||
nonstatic_field(SATBMarkQueue, _index, size_t)
|
||||
|
||||
#define VM_INT_CONSTANTS_G1GC(declare_constant, declare_constant_with_value) \
|
||||
declare_constant(G1HeapRegionType::FreeTag) \
|
||||
@@ -96,7 +96,6 @@
|
||||
declare_toplevel_type(G1HeapRegionManager) \
|
||||
declare_toplevel_type(G1HeapRegionSetBase) \
|
||||
declare_toplevel_type(G1MonitoringSupport) \
|
||||
declare_toplevel_type(PtrQueue) \
|
||||
declare_toplevel_type(G1HeapRegionType) \
|
||||
declare_toplevel_type(SATBMarkQueue) \
|
||||
\
|
||||
|
||||
@@ -58,8 +58,6 @@
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/vmError.hpp"
|
||||
|
||||
PSYoungGen* ParallelScavengeHeap::_young_gen = nullptr;
|
||||
PSOldGen* ParallelScavengeHeap::_old_gen = nullptr;
|
||||
PSAdaptiveSizePolicy* ParallelScavengeHeap::_size_policy = nullptr;
|
||||
GCPolicyCounters* ParallelScavengeHeap::_gc_policy_counters = nullptr;
|
||||
size_t ParallelScavengeHeap::_desired_page_size = 0;
|
||||
@@ -134,18 +132,18 @@ jint ParallelScavengeHeap::initialize() {
|
||||
|
||||
void ParallelScavengeHeap::initialize_serviceability() {
|
||||
|
||||
_eden_pool = new EdenMutableSpacePool(_young_gen,
|
||||
_young_gen->eden_space(),
|
||||
"PS Eden Space",
|
||||
false /* support_usage_threshold */);
|
||||
_eden_pool = new PSEdenSpacePool(_young_gen,
|
||||
_young_gen->eden_space(),
|
||||
"PS Eden Space",
|
||||
false /* support_usage_threshold */);
|
||||
|
||||
_survivor_pool = new SurvivorMutableSpacePool(_young_gen,
|
||||
"PS Survivor Space",
|
||||
false /* support_usage_threshold */);
|
||||
_survivor_pool = new PSSurvivorSpacePool(_young_gen,
|
||||
"PS Survivor Space",
|
||||
false /* support_usage_threshold */);
|
||||
|
||||
_old_pool = new PSGenerationPool(_old_gen,
|
||||
"PS Old Gen",
|
||||
true /* support_usage_threshold */);
|
||||
_old_pool = new PSOldGenerationPool(_old_gen,
|
||||
"PS Old Gen",
|
||||
true /* support_usage_threshold */);
|
||||
|
||||
_young_manager = new GCMemoryManager("PS Scavenge");
|
||||
_old_manager = new GCMemoryManager("PS MarkSweep");
|
||||
|
||||
@@ -69,8 +69,8 @@ class ReservedSpace;
|
||||
class ParallelScavengeHeap : public CollectedHeap {
|
||||
friend class VMStructs;
|
||||
private:
|
||||
static PSYoungGen* _young_gen;
|
||||
static PSOldGen* _old_gen;
|
||||
PSYoungGen* _young_gen;
|
||||
PSOldGen* _old_gen;
|
||||
|
||||
// Sizing policy for entire heap
|
||||
static PSAdaptiveSizePolicy* _size_policy;
|
||||
@@ -160,8 +160,8 @@ public:
|
||||
GrowableArray<GCMemoryManager*> memory_managers() override;
|
||||
GrowableArray<MemoryPool*> memory_pools() override;
|
||||
|
||||
static PSYoungGen* young_gen() { return _young_gen; }
|
||||
static PSOldGen* old_gen() { return _old_gen; }
|
||||
PSYoungGen* young_gen() const { return _young_gen; }
|
||||
PSOldGen* old_gen() const { return _old_gen; }
|
||||
|
||||
PSAdaptiveSizePolicy* size_policy() { return _size_policy; }
|
||||
|
||||
|
||||
@@ -24,14 +24,14 @@
|
||||
|
||||
#include "gc/parallel/psMemoryPool.hpp"
|
||||
|
||||
PSGenerationPool::PSGenerationPool(PSOldGen* old_gen,
|
||||
const char* name,
|
||||
bool support_usage_threshold) :
|
||||
PSOldGenerationPool::PSOldGenerationPool(PSOldGen* old_gen,
|
||||
const char* name,
|
||||
bool support_usage_threshold) :
|
||||
CollectedMemoryPool(name, old_gen->capacity_in_bytes(),
|
||||
old_gen->reserved().byte_size(), support_usage_threshold), _old_gen(old_gen) {
|
||||
}
|
||||
|
||||
MemoryUsage PSGenerationPool::get_memory_usage() {
|
||||
MemoryUsage PSOldGenerationPool::get_memory_usage() {
|
||||
size_t maxSize = (available_for_allocation() ? max_size() : 0);
|
||||
size_t used = used_in_bytes();
|
||||
size_t committed = _old_gen->capacity_in_bytes();
|
||||
@@ -39,16 +39,16 @@ MemoryUsage PSGenerationPool::get_memory_usage() {
|
||||
return MemoryUsage(initial_size(), used, committed, maxSize);
|
||||
}
|
||||
|
||||
// The max size of EdenMutableSpacePool =
|
||||
// The max size of PSEdenSpacePool =
|
||||
// max size of the PSYoungGen - capacity of two survivor spaces
|
||||
//
|
||||
// Max size of PS eden space is changing due to ergonomic.
|
||||
// PSYoungGen, PSOldGen, Eden, Survivor spaces are all resizable.
|
||||
//
|
||||
EdenMutableSpacePool::EdenMutableSpacePool(PSYoungGen* young_gen,
|
||||
MutableSpace* space,
|
||||
const char* name,
|
||||
bool support_usage_threshold) :
|
||||
PSEdenSpacePool::PSEdenSpacePool(PSYoungGen* young_gen,
|
||||
MutableSpace* space,
|
||||
const char* name,
|
||||
bool support_usage_threshold) :
|
||||
CollectedMemoryPool(name, space->capacity_in_bytes(),
|
||||
(young_gen->max_gen_size() -
|
||||
young_gen->from_space()->capacity_in_bytes() -
|
||||
@@ -58,7 +58,7 @@ EdenMutableSpacePool::EdenMutableSpacePool(PSYoungGen* young_gen,
|
||||
_space(space) {
|
||||
}
|
||||
|
||||
MemoryUsage EdenMutableSpacePool::get_memory_usage() {
|
||||
MemoryUsage PSEdenSpacePool::get_memory_usage() {
|
||||
size_t maxSize = (available_for_allocation() ? max_size() : 0);
|
||||
size_t used = used_in_bytes();
|
||||
size_t committed = _space->capacity_in_bytes();
|
||||
@@ -66,20 +66,20 @@ MemoryUsage EdenMutableSpacePool::get_memory_usage() {
|
||||
return MemoryUsage(initial_size(), used, committed, maxSize);
|
||||
}
|
||||
|
||||
// The max size of SurvivorMutableSpacePool =
|
||||
// The max size of PSSurvivorSpacePool =
|
||||
// current capacity of the from-space
|
||||
//
|
||||
// PS from and to survivor spaces could have different sizes.
|
||||
//
|
||||
SurvivorMutableSpacePool::SurvivorMutableSpacePool(PSYoungGen* young_gen,
|
||||
const char* name,
|
||||
bool support_usage_threshold) :
|
||||
PSSurvivorSpacePool::PSSurvivorSpacePool(PSYoungGen* young_gen,
|
||||
const char* name,
|
||||
bool support_usage_threshold) :
|
||||
CollectedMemoryPool(name, young_gen->from_space()->capacity_in_bytes(),
|
||||
young_gen->from_space()->capacity_in_bytes(),
|
||||
support_usage_threshold), _young_gen(young_gen) {
|
||||
}
|
||||
|
||||
MemoryUsage SurvivorMutableSpacePool::get_memory_usage() {
|
||||
MemoryUsage PSSurvivorSpacePool::get_memory_usage() {
|
||||
size_t maxSize = (available_for_allocation() ? max_size() : 0);
|
||||
size_t used = used_in_bytes();
|
||||
size_t committed = committed_in_bytes();
|
||||
|
||||
@@ -31,28 +31,28 @@
|
||||
#include "services/memoryPool.hpp"
|
||||
#include "services/memoryUsage.hpp"
|
||||
|
||||
class PSGenerationPool : public CollectedMemoryPool {
|
||||
class PSOldGenerationPool : public CollectedMemoryPool {
|
||||
private:
|
||||
PSOldGen* _old_gen;
|
||||
|
||||
public:
|
||||
PSGenerationPool(PSOldGen* pool, const char* name, bool support_usage_threshold);
|
||||
PSOldGenerationPool(PSOldGen* pool, const char* name, bool support_usage_threshold);
|
||||
|
||||
MemoryUsage get_memory_usage();
|
||||
size_t used_in_bytes() { return _old_gen->used_in_bytes(); }
|
||||
size_t max_size() const { return _old_gen->reserved().byte_size(); }
|
||||
};
|
||||
|
||||
class EdenMutableSpacePool : public CollectedMemoryPool {
|
||||
class PSEdenSpacePool : public CollectedMemoryPool {
|
||||
private:
|
||||
PSYoungGen* _young_gen;
|
||||
MutableSpace* _space;
|
||||
|
||||
public:
|
||||
EdenMutableSpacePool(PSYoungGen* young_gen,
|
||||
MutableSpace* space,
|
||||
const char* name,
|
||||
bool support_usage_threshold);
|
||||
PSEdenSpacePool(PSYoungGen* young_gen,
|
||||
MutableSpace* space,
|
||||
const char* name,
|
||||
bool support_usage_threshold);
|
||||
|
||||
MutableSpace* space() { return _space; }
|
||||
MemoryUsage get_memory_usage();
|
||||
@@ -65,14 +65,14 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class SurvivorMutableSpacePool : public CollectedMemoryPool {
|
||||
class PSSurvivorSpacePool : public CollectedMemoryPool {
|
||||
private:
|
||||
PSYoungGen* _young_gen;
|
||||
|
||||
public:
|
||||
SurvivorMutableSpacePool(PSYoungGen* young_gen,
|
||||
const char* name,
|
||||
bool support_usage_threshold);
|
||||
PSSurvivorSpacePool(PSYoungGen* young_gen,
|
||||
const char* name,
|
||||
bool support_usage_threshold);
|
||||
|
||||
MemoryUsage get_memory_usage();
|
||||
|
||||
|
||||
@@ -115,7 +115,7 @@ class PSScavenge: AllStatic {
|
||||
}
|
||||
|
||||
static bool is_obj_in_to_space(oop o) {
|
||||
return ParallelScavengeHeap::young_gen()->to_space()->contains(o);
|
||||
return ParallelScavengeHeap::heap()->young_gen()->to_space()->contains(o);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -64,8 +64,8 @@
|
||||
nonstatic_field(PSOldGen, _max_gen_size, const size_t) \
|
||||
\
|
||||
\
|
||||
static_field(ParallelScavengeHeap, _young_gen, PSYoungGen*) \
|
||||
static_field(ParallelScavengeHeap, _old_gen, PSOldGen*) \
|
||||
nonstatic_field(ParallelScavengeHeap, _young_gen, PSYoungGen*) \
|
||||
nonstatic_field(ParallelScavengeHeap, _old_gen, PSOldGen*) \
|
||||
\
|
||||
|
||||
#define VM_TYPES_PARALLELGC(declare_type, \
|
||||
|
||||
@@ -91,14 +91,16 @@ SerialHeap::SerialHeap() :
|
||||
CollectedHeap(),
|
||||
_young_gen(nullptr),
|
||||
_old_gen(nullptr),
|
||||
_young_gen_saved_top(nullptr),
|
||||
_old_gen_saved_top(nullptr),
|
||||
_rem_set(nullptr),
|
||||
_gc_policy_counters(new GCPolicyCounters("Copy:MSC", 2, 2)),
|
||||
_young_manager(nullptr),
|
||||
_old_manager(nullptr),
|
||||
_is_heap_almost_full(false),
|
||||
_eden_pool(nullptr),
|
||||
_survivor_pool(nullptr),
|
||||
_old_pool(nullptr) {
|
||||
_old_pool(nullptr),
|
||||
_is_heap_almost_full(false) {
|
||||
_young_manager = new GCMemoryManager("Copy");
|
||||
_old_manager = new GCMemoryManager("MarkSweepCompact");
|
||||
GCLocker::initialize();
|
||||
@@ -630,6 +632,14 @@ bool SerialHeap::requires_barriers(stackChunkOop obj) const {
|
||||
|
||||
// Returns "TRUE" iff "p" points into the committed areas of the heap.
|
||||
bool SerialHeap::is_in(const void* p) const {
|
||||
// precondition
|
||||
verify_not_in_native_if_java_thread();
|
||||
|
||||
if (!is_in_reserved(p)) {
|
||||
// If it's not even in reserved.
|
||||
return false;
|
||||
}
|
||||
|
||||
return _young_gen->is_in(p) || _old_gen->is_in(p);
|
||||
}
|
||||
|
||||
@@ -797,3 +807,12 @@ void SerialHeap::gc_epilogue(bool full) {
|
||||
|
||||
MetaspaceCounters::update_performance_counters();
|
||||
};
|
||||
|
||||
#ifdef ASSERT
|
||||
void SerialHeap::verify_not_in_native_if_java_thread() {
|
||||
if (Thread::current()->is_Java_thread()) {
|
||||
JavaThread* thread = JavaThread::current();
|
||||
assert(thread->thread_state() != _thread_in_native, "precondition");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -76,6 +76,8 @@ class SerialHeap : public CollectedHeap {
|
||||
private:
|
||||
DefNewGeneration* _young_gen;
|
||||
TenuredGeneration* _old_gen;
|
||||
|
||||
// Used during young-gc
|
||||
HeapWord* _young_gen_saved_top;
|
||||
HeapWord* _old_gen_saved_top;
|
||||
|
||||
@@ -94,6 +96,10 @@ private:
|
||||
GCMemoryManager* _young_manager;
|
||||
GCMemoryManager* _old_manager;
|
||||
|
||||
MemoryPool* _eden_pool;
|
||||
MemoryPool* _survivor_pool;
|
||||
MemoryPool* _old_pool;
|
||||
|
||||
// Indicate whether heap is almost or approaching full.
|
||||
// Usually, there is some memory headroom for application/gc to run properly.
|
||||
// However, in extreme cases, e.g. young-gen is non-empty after a full gc, we
|
||||
@@ -111,6 +117,21 @@ private:
|
||||
void print_tracing_info() const override;
|
||||
void stop() override {};
|
||||
|
||||
static void verify_not_in_native_if_java_thread() NOT_DEBUG_RETURN;
|
||||
|
||||
// Try to allocate space by expanding the heap.
|
||||
HeapWord* expand_heap_and_allocate(size_t size, bool is_tlab);
|
||||
|
||||
HeapWord* mem_allocate_cas_noexpand(size_t size, bool is_tlab);
|
||||
HeapWord* mem_allocate_work(size_t size, bool is_tlab);
|
||||
|
||||
void initialize_serviceability() override;
|
||||
|
||||
// Set the saved marks of generations, if that makes sense.
|
||||
// In particular, if any generation might iterate over the oops
|
||||
// in other generations, it should call this method.
|
||||
void save_marks();
|
||||
|
||||
public:
|
||||
// Returns JNI_OK on success
|
||||
jint initialize() override;
|
||||
@@ -209,26 +230,6 @@ public:
|
||||
// generations in a fully generational heap.
|
||||
CardTableRS* rem_set() { return _rem_set; }
|
||||
|
||||
public:
|
||||
// Set the saved marks of generations, if that makes sense.
|
||||
// In particular, if any generation might iterate over the oops
|
||||
// in other generations, it should call this method.
|
||||
void save_marks();
|
||||
|
||||
private:
|
||||
// Try to allocate space by expanding the heap.
|
||||
HeapWord* expand_heap_and_allocate(size_t size, bool is_tlab);
|
||||
|
||||
HeapWord* mem_allocate_cas_noexpand(size_t size, bool is_tlab);
|
||||
HeapWord* mem_allocate_work(size_t size, bool is_tlab);
|
||||
|
||||
MemoryPool* _eden_pool;
|
||||
MemoryPool* _survivor_pool;
|
||||
MemoryPool* _old_pool;
|
||||
|
||||
void initialize_serviceability() override;
|
||||
|
||||
public:
|
||||
static SerialHeap* heap();
|
||||
|
||||
SerialHeap();
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gc/shared/bufferNode.hpp"
|
||||
#include "gc/shared/ptrQueue.hpp"
|
||||
|
||||
PtrQueue::PtrQueue(PtrQueueSet* qset) :
|
||||
_index(0),
|
||||
_buf(nullptr)
|
||||
{}
|
||||
|
||||
PtrQueue::~PtrQueue() {
|
||||
assert(_buf == nullptr, "queue must be flushed before delete");
|
||||
}
|
||||
|
||||
size_t PtrQueue::current_capacity() const {
|
||||
if (_buf == nullptr) {
|
||||
return 0;
|
||||
} else {
|
||||
return BufferNode::make_node_from_buffer(_buf)->capacity();
|
||||
}
|
||||
}
|
||||
|
||||
PtrQueueSet::PtrQueueSet(BufferNode::Allocator* allocator) :
|
||||
_allocator(allocator)
|
||||
{}
|
||||
|
||||
PtrQueueSet::~PtrQueueSet() {}
|
||||
|
||||
void PtrQueueSet::reset_queue(PtrQueue& queue) {
|
||||
queue.set_index(queue.current_capacity());
|
||||
}
|
||||
|
||||
void PtrQueueSet::flush_queue(PtrQueue& queue) {
|
||||
void** buffer = queue.buffer();
|
||||
if (buffer != nullptr) {
|
||||
size_t index = queue.index();
|
||||
queue.set_buffer(nullptr);
|
||||
queue.set_index(0);
|
||||
BufferNode* node = BufferNode::make_node_from_buffer(buffer, index);
|
||||
if (index == node->capacity()) {
|
||||
deallocate_buffer(node);
|
||||
} else {
|
||||
enqueue_completed_buffer(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool PtrQueueSet::try_enqueue(PtrQueue& queue, void* value) {
|
||||
size_t index = queue.index();
|
||||
if (index == 0) return false;
|
||||
void** buffer = queue.buffer();
|
||||
assert(buffer != nullptr, "no buffer but non-zero index");
|
||||
buffer[--index] = value;
|
||||
queue.set_index(index);
|
||||
return true;
|
||||
}
|
||||
|
||||
void PtrQueueSet::retry_enqueue(PtrQueue& queue, void* value) {
|
||||
assert(queue.index() != 0, "precondition");
|
||||
assert(queue.buffer() != nullptr, "precondition");
|
||||
size_t index = queue.index();
|
||||
queue.buffer()[--index] = value;
|
||||
queue.set_index(index);
|
||||
}
|
||||
|
||||
BufferNode* PtrQueueSet::exchange_buffer_with_new(PtrQueue& queue) {
|
||||
BufferNode* node = nullptr;
|
||||
void** buffer = queue.buffer();
|
||||
if (buffer != nullptr) {
|
||||
node = BufferNode::make_node_from_buffer(buffer, queue.index());
|
||||
}
|
||||
install_new_buffer(queue);
|
||||
return node;
|
||||
}
|
||||
|
||||
void PtrQueueSet::install_new_buffer(PtrQueue& queue) {
|
||||
BufferNode* node = _allocator->allocate();
|
||||
queue.set_buffer(BufferNode::make_buffer_from_node(node));
|
||||
queue.set_index(node->capacity());
|
||||
}
|
||||
|
||||
void** PtrQueueSet::allocate_buffer() {
|
||||
BufferNode* node = _allocator->allocate();
|
||||
return BufferNode::make_buffer_from_node(node);
|
||||
}
|
||||
|
||||
void PtrQueueSet::deallocate_buffer(BufferNode* node) {
|
||||
_allocator->release(node);
|
||||
}
|
||||
@@ -1,168 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_GC_SHARED_PTRQUEUE_HPP
|
||||
#define SHARE_GC_SHARED_PTRQUEUE_HPP
|
||||
|
||||
#include "gc/shared/bufferNode.hpp"
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/sizes.hpp"
|
||||
|
||||
// There are various techniques that require threads to be able to log
|
||||
// addresses. For example, a generational write barrier might log
|
||||
// the addresses of modified old-generation objects. This type supports
|
||||
// this operation.
|
||||
|
||||
class PtrQueueSet;
|
||||
class PtrQueue {
|
||||
friend class VMStructs;
|
||||
|
||||
NONCOPYABLE(PtrQueue);
|
||||
|
||||
// The (byte) index at which an object was last enqueued. Starts at
|
||||
// capacity (in bytes) (indicating an empty buffer) and goes towards zero.
|
||||
// Value is always pointer-size aligned.
|
||||
size_t _index;
|
||||
|
||||
static const size_t _element_size = sizeof(void*);
|
||||
|
||||
static size_t byte_index_to_index(size_t ind) {
|
||||
assert(is_aligned(ind, _element_size), "precondition");
|
||||
return ind / _element_size;
|
||||
}
|
||||
|
||||
static size_t index_to_byte_index(size_t ind) {
|
||||
return ind * _element_size;
|
||||
}
|
||||
|
||||
protected:
|
||||
// The buffer.
|
||||
void** _buf;
|
||||
|
||||
// Initialize this queue to contain a null buffer, and be part of the
|
||||
// given PtrQueueSet.
|
||||
PtrQueue(PtrQueueSet* qset);
|
||||
|
||||
// Requires queue flushed.
|
||||
~PtrQueue();
|
||||
|
||||
public:
|
||||
|
||||
void** buffer() const { return _buf; }
|
||||
void set_buffer(void** buffer) { _buf = buffer; }
|
||||
|
||||
size_t index() const {
|
||||
return byte_index_to_index(_index);
|
||||
}
|
||||
|
||||
void set_index(size_t new_index) {
|
||||
assert(new_index <= current_capacity(), "precondition");
|
||||
_index = index_to_byte_index(new_index);
|
||||
}
|
||||
|
||||
// Returns the capacity of the buffer, or 0 if the queue doesn't currently
|
||||
// have a buffer.
|
||||
size_t current_capacity() const;
|
||||
|
||||
bool is_empty() const { return index() == current_capacity(); }
|
||||
size_t size() const { return current_capacity() - index(); }
|
||||
|
||||
protected:
|
||||
// To support compiler.
|
||||
template<typename Derived>
|
||||
static ByteSize byte_offset_of_index() {
|
||||
return byte_offset_of(Derived, _index);
|
||||
}
|
||||
|
||||
static constexpr ByteSize byte_width_of_index() { return in_ByteSize(sizeof(size_t)); }
|
||||
|
||||
template<typename Derived>
|
||||
static ByteSize byte_offset_of_buf() {
|
||||
return byte_offset_of(Derived, _buf);
|
||||
}
|
||||
|
||||
static ByteSize byte_width_of_buf() { return in_ByteSize(_element_size); }
|
||||
};
|
||||
|
||||
// A PtrQueueSet represents resources common to a set of pointer queues.
|
||||
// In particular, the individual queues allocate buffers from this shared
|
||||
// set, and return completed buffers to the set.
|
||||
class PtrQueueSet {
|
||||
BufferNode::Allocator* _allocator;
|
||||
|
||||
NONCOPYABLE(PtrQueueSet);
|
||||
|
||||
protected:
|
||||
// Create an empty ptr queue set.
|
||||
PtrQueueSet(BufferNode::Allocator* allocator);
|
||||
~PtrQueueSet();
|
||||
|
||||
// Discard any buffered enqueued data.
|
||||
void reset_queue(PtrQueue& queue);
|
||||
|
||||
// If queue has any buffered enqueued data, transfer it to this qset.
|
||||
// Otherwise, deallocate queue's buffer.
|
||||
void flush_queue(PtrQueue& queue);
|
||||
|
||||
// Add value to queue's buffer, returning true. If buffer is full
|
||||
// or if queue doesn't have a buffer, does nothing and returns false.
|
||||
bool try_enqueue(PtrQueue& queue, void* value);
|
||||
|
||||
// Add value to queue's buffer. The queue must have a non-full buffer.
|
||||
// Used after an initial try_enqueue has failed and the situation resolved.
|
||||
void retry_enqueue(PtrQueue& queue, void* value);
|
||||
|
||||
// Installs a new buffer into queue.
|
||||
// Returns the old buffer, or null if queue didn't have a buffer.
|
||||
BufferNode* exchange_buffer_with_new(PtrQueue& queue);
|
||||
|
||||
// Installs a new buffer into queue.
|
||||
void install_new_buffer(PtrQueue& queue);
|
||||
|
||||
public:
|
||||
|
||||
// Return the associated BufferNode allocator.
|
||||
BufferNode::Allocator* allocator() const { return _allocator; }
|
||||
|
||||
// Return the buffer for a BufferNode of size buffer_capacity().
|
||||
void** allocate_buffer();
|
||||
|
||||
// Return an empty buffer to the free list. The node is required
|
||||
// to have been allocated with a size of buffer_capacity().
|
||||
void deallocate_buffer(BufferNode* node);
|
||||
|
||||
// A completed buffer is a buffer the mutator is finished with, and
|
||||
// is ready to be processed by the collector. It need not be full.
|
||||
|
||||
// Adds node to the completed buffer list.
|
||||
virtual void enqueue_completed_buffer(BufferNode* node) = 0;
|
||||
|
||||
size_t buffer_capacity() const {
|
||||
return _allocator->buffer_capacity();
|
||||
}
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_SHARED_PTRQUEUE_HPP
|
||||
@@ -36,14 +36,19 @@
|
||||
#include "utilities/globalCounter.inline.hpp"
|
||||
|
||||
SATBMarkQueue::SATBMarkQueue(SATBMarkQueueSet* qset) :
|
||||
PtrQueue(qset),
|
||||
_buf(nullptr),
|
||||
_index(0),
|
||||
// SATB queues are only active during marking cycles. We create them
|
||||
// with their active field set to false. If a thread is created
|
||||
// during a cycle, it's SATB queue needs to be activated before the
|
||||
// thread starts running. This is handled by the collector-specific
|
||||
// BarrierSet thread attachment protocol.
|
||||
_active(false)
|
||||
{ }
|
||||
{}
|
||||
|
||||
SATBMarkQueue::~SATBMarkQueue() {
|
||||
assert(_buf == nullptr, "queue must be flushed before delete");
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
// Helpful for debugging
|
||||
@@ -64,7 +69,7 @@ void SATBMarkQueue::print(const char* name) {
|
||||
#endif // PRODUCT
|
||||
|
||||
SATBMarkQueueSet::SATBMarkQueueSet(BufferNode::Allocator* allocator) :
|
||||
PtrQueueSet(allocator),
|
||||
_allocator(allocator),
|
||||
_list(),
|
||||
_count_and_process_flag(0),
|
||||
_process_completed_buffers_threshold(SIZE_MAX),
|
||||
@@ -214,13 +219,6 @@ bool SATBMarkQueueSet::apply_closure_to_completed_buffer(SATBBufferClosure* cl)
|
||||
}
|
||||
}
|
||||
|
||||
void SATBMarkQueueSet::flush_queue(SATBMarkQueue& queue) {
|
||||
// Filter now to possibly save work later. If filtering empties the
|
||||
// buffer then flush_queue can deallocate the buffer.
|
||||
filter(queue);
|
||||
PtrQueueSet::flush_queue(queue);
|
||||
}
|
||||
|
||||
void SATBMarkQueueSet::enqueue_known_active(SATBMarkQueue& queue, oop obj) {
|
||||
assert(queue.is_active(), "precondition");
|
||||
void* value = cast_from_oop<void*>(obj);
|
||||
@@ -355,3 +353,76 @@ void SATBMarkQueueSet::abandon_partial_marking() {
|
||||
} closure(*this);
|
||||
Threads::threads_do(&closure);
|
||||
}
|
||||
|
||||
size_t SATBMarkQueue::current_capacity() const {
|
||||
if (_buf == nullptr) {
|
||||
return 0;
|
||||
} else {
|
||||
return BufferNode::make_node_from_buffer(_buf)->capacity();
|
||||
}
|
||||
}
|
||||
|
||||
void SATBMarkQueueSet::reset_queue(SATBMarkQueue& queue) {
|
||||
queue.set_index(queue.current_capacity());
|
||||
}
|
||||
|
||||
void SATBMarkQueueSet::flush_queue(SATBMarkQueue& queue) {
|
||||
// Filter now to possibly save work later. If filtering empties the
|
||||
// buffer then flush_queue can deallocate the buffer.
|
||||
filter(queue);
|
||||
void** buffer = queue.buffer();
|
||||
if (buffer != nullptr) {
|
||||
size_t index = queue.index();
|
||||
queue.set_buffer(nullptr);
|
||||
queue.set_index(0);
|
||||
BufferNode* node = BufferNode::make_node_from_buffer(buffer, index);
|
||||
if (index == node->capacity()) {
|
||||
deallocate_buffer(node);
|
||||
} else {
|
||||
enqueue_completed_buffer(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SATBMarkQueueSet::try_enqueue(SATBMarkQueue& queue, void* value) {
|
||||
size_t index = queue.index();
|
||||
if (index == 0) return false;
|
||||
void** buffer = queue.buffer();
|
||||
assert(buffer != nullptr, "no buffer but non-zero index");
|
||||
buffer[--index] = value;
|
||||
queue.set_index(index);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SATBMarkQueueSet::retry_enqueue(SATBMarkQueue& queue, void* value) {
|
||||
assert(queue.index() != 0, "precondition");
|
||||
assert(queue.buffer() != nullptr, "precondition");
|
||||
size_t index = queue.index();
|
||||
queue.buffer()[--index] = value;
|
||||
queue.set_index(index);
|
||||
}
|
||||
|
||||
BufferNode* SATBMarkQueueSet::exchange_buffer_with_new(SATBMarkQueue& queue) {
|
||||
BufferNode* node = nullptr;
|
||||
void** buffer = queue.buffer();
|
||||
if (buffer != nullptr) {
|
||||
node = BufferNode::make_node_from_buffer(buffer, queue.index());
|
||||
}
|
||||
install_new_buffer(queue);
|
||||
return node;
|
||||
}
|
||||
|
||||
void SATBMarkQueueSet::install_new_buffer(SATBMarkQueue& queue) {
|
||||
BufferNode* node = _allocator->allocate();
|
||||
queue.set_buffer(BufferNode::make_buffer_from_node(node));
|
||||
queue.set_index(node->capacity());
|
||||
}
|
||||
|
||||
void** SATBMarkQueueSet::allocate_buffer() {
|
||||
BufferNode* node = _allocator->allocate();
|
||||
return BufferNode::make_buffer_from_node(node);
|
||||
}
|
||||
|
||||
void SATBMarkQueueSet::deallocate_buffer(BufferNode* node) {
|
||||
_allocator->release(node);
|
||||
}
|
||||
|
||||
@@ -25,11 +25,15 @@
|
||||
#ifndef SHARE_GC_SHARED_SATBMARKQUEUE_HPP
|
||||
#define SHARE_GC_SHARED_SATBMARKQUEUE_HPP
|
||||
|
||||
#include "gc/shared/ptrQueue.hpp"
|
||||
#include "gc/shared/bufferNode.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
#include "memory/padded.hpp"
|
||||
#include "oops/oopsHierarchy.hpp"
|
||||
#include "runtime/atomic.hpp"
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/sizes.hpp"
|
||||
|
||||
class Thread;
|
||||
class Monitor;
|
||||
@@ -45,12 +49,33 @@ public:
|
||||
virtual void do_buffer(void** buffer, size_t size) = 0;
|
||||
};
|
||||
|
||||
// A PtrQueue whose elements are (possibly stale) pointers to object heads.
|
||||
class SATBMarkQueue: public PtrQueue {
|
||||
// A queue whose elements are (possibly stale) pointers to object heads.
|
||||
class SATBMarkQueue {
|
||||
friend class VMStructs;
|
||||
friend class SATBMarkQueueSet;
|
||||
|
||||
private:
|
||||
NONCOPYABLE(SATBMarkQueue);
|
||||
|
||||
// The buffer.
|
||||
void** _buf;
|
||||
|
||||
// The (byte) index at which an object was last enqueued. Starts at
|
||||
// capacity (in bytes) (indicating an empty buffer) and goes towards zero.
|
||||
// Value is always pointer-size aligned.
|
||||
size_t _index;
|
||||
|
||||
static const size_t _element_size = sizeof(void*);
|
||||
|
||||
static size_t byte_index_to_index(size_t ind) {
|
||||
assert(is_aligned(ind, _element_size), "precondition");
|
||||
return ind / _element_size;
|
||||
}
|
||||
|
||||
static size_t index_to_byte_index(size_t ind) {
|
||||
return ind * _element_size;
|
||||
}
|
||||
|
||||
// Per-queue (so thread-local) cache of the SATBMarkQueueSet's
|
||||
// active state, to support inline barriers in compiled code.
|
||||
bool _active;
|
||||
@@ -58,6 +83,29 @@ private:
|
||||
public:
|
||||
SATBMarkQueue(SATBMarkQueueSet* qset);
|
||||
|
||||
// Queue must be flushed
|
||||
~SATBMarkQueue();
|
||||
|
||||
void** buffer() const { return _buf; }
|
||||
|
||||
void set_buffer(void** buffer) { _buf = buffer; }
|
||||
|
||||
size_t index() const {
|
||||
return byte_index_to_index(_index);
|
||||
}
|
||||
|
||||
void set_index(size_t new_index) {
|
||||
assert(new_index <= current_capacity(), "precondition");
|
||||
_index = index_to_byte_index(new_index);
|
||||
}
|
||||
|
||||
// Returns the capacity of the buffer, or 0 if the queue doesn't currently
|
||||
// have a buffer.
|
||||
size_t current_capacity() const;
|
||||
|
||||
bool is_empty() const { return index() == current_capacity(); }
|
||||
size_t size() const { return current_capacity() - index(); }
|
||||
|
||||
bool is_active() const { return _active; }
|
||||
void set_active(bool value) { _active = value; }
|
||||
|
||||
@@ -68,14 +116,16 @@ public:
|
||||
|
||||
// Compiler support.
|
||||
static ByteSize byte_offset_of_index() {
|
||||
return PtrQueue::byte_offset_of_index<SATBMarkQueue>();
|
||||
return byte_offset_of(SATBMarkQueue, _index);
|
||||
}
|
||||
using PtrQueue::byte_width_of_index;
|
||||
|
||||
static constexpr ByteSize byte_width_of_index() { return in_ByteSize(sizeof(size_t)); }
|
||||
|
||||
static ByteSize byte_offset_of_buf() {
|
||||
return PtrQueue::byte_offset_of_buf<SATBMarkQueue>();
|
||||
return byte_offset_of(SATBMarkQueue, _buf);
|
||||
}
|
||||
using PtrQueue::byte_width_of_buf;
|
||||
|
||||
static ByteSize byte_width_of_buf() { return in_ByteSize(_element_size); }
|
||||
|
||||
static ByteSize byte_offset_of_active() {
|
||||
return byte_offset_of(SATBMarkQueue, _active);
|
||||
@@ -84,7 +134,18 @@ public:
|
||||
static ByteSize byte_width_of_active() { return in_ByteSize(sizeof(bool)); }
|
||||
};
|
||||
|
||||
class SATBMarkQueueSet: public PtrQueueSet {
|
||||
|
||||
// A SATBMarkQueueSet represents resources common to a set of SATBMarkQueues.
|
||||
// In particular, the individual queues allocate buffers from this shared
|
||||
// set, and return completed buffers to the set.
|
||||
// A completed buffer is a buffer the mutator is finished with, and
|
||||
// is ready to be processed by the collector. It need not be full.
|
||||
|
||||
class SATBMarkQueueSet {
|
||||
|
||||
BufferNode::Allocator* _allocator;
|
||||
|
||||
NONCOPYABLE(SATBMarkQueueSet);
|
||||
|
||||
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, 0);
|
||||
PaddedEnd<BufferNode::Stack> _list;
|
||||
@@ -99,6 +160,24 @@ class SATBMarkQueueSet: public PtrQueueSet {
|
||||
BufferNode* get_completed_buffer();
|
||||
void abandon_completed_buffers();
|
||||
|
||||
// Discard any buffered enqueued data.
|
||||
void reset_queue(SATBMarkQueue& queue);
|
||||
|
||||
// Add value to queue's buffer, returning true. If buffer is full
|
||||
// or if queue doesn't have a buffer, does nothing and returns false.
|
||||
bool try_enqueue(SATBMarkQueue& queue, void* value);
|
||||
|
||||
// Add value to queue's buffer. The queue must have a non-full buffer.
|
||||
// Used after an initial try_enqueue has failed and the situation resolved.
|
||||
void retry_enqueue(SATBMarkQueue& queue, void* value);
|
||||
|
||||
// Installs a new buffer into queue.
|
||||
// Returns the old buffer, or null if queue didn't have a buffer.
|
||||
BufferNode* exchange_buffer_with_new(SATBMarkQueue& queue);
|
||||
|
||||
// Installs a new buffer into queue.
|
||||
void install_new_buffer(SATBMarkQueue& queue);
|
||||
|
||||
#ifdef ASSERT
|
||||
void dump_active_states(bool expected_active);
|
||||
void verify_active_states(bool expected_active);
|
||||
@@ -106,6 +185,7 @@ class SATBMarkQueueSet: public PtrQueueSet {
|
||||
|
||||
protected:
|
||||
SATBMarkQueueSet(BufferNode::Allocator* allocator);
|
||||
|
||||
~SATBMarkQueueSet();
|
||||
|
||||
void handle_zero_index(SATBMarkQueue& queue);
|
||||
@@ -131,6 +211,7 @@ public:
|
||||
void set_process_completed_buffers_threshold(size_t value);
|
||||
|
||||
size_t buffer_enqueue_threshold() const { return _buffer_enqueue_threshold; }
|
||||
|
||||
void set_buffer_enqueue_threshold_percentage(uint value);
|
||||
|
||||
// If there exists some completed buffer, pop and process it, and
|
||||
@@ -144,7 +225,7 @@ public:
|
||||
// Add obj to queue. This qset and the queue must be active.
|
||||
void enqueue_known_active(SATBMarkQueue& queue, oop obj);
|
||||
virtual void filter(SATBMarkQueue& queue) = 0;
|
||||
virtual void enqueue_completed_buffer(BufferNode* node);
|
||||
void enqueue_completed_buffer(BufferNode* node);
|
||||
|
||||
// The number of buffers in the list. Racy and not updated atomically
|
||||
// with the set of completed buffers.
|
||||
@@ -157,6 +238,20 @@ public:
|
||||
return (_count_and_process_flag.load_relaxed() & 1) != 0;
|
||||
}
|
||||
|
||||
// Return the associated BufferNode allocator.
|
||||
BufferNode::Allocator* allocator() const { return _allocator; }
|
||||
|
||||
// Return the buffer for a BufferNode of size buffer_capacity().
|
||||
void** allocate_buffer();
|
||||
|
||||
// Return an empty buffer to the free list. The node is required
|
||||
// to have been allocated with a size of buffer_capacity().
|
||||
void deallocate_buffer(BufferNode* node);
|
||||
|
||||
size_t buffer_capacity() const {
|
||||
return _allocator->buffer_capacity();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
// Helpful for debugging
|
||||
void print_all(const char* msg);
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "utilities/copy.hpp"
|
||||
|
||||
size_t ThreadLocalAllocBuffer::_max_size = 0;
|
||||
int ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0;
|
||||
unsigned int ThreadLocalAllocBuffer::_target_refills = 0;
|
||||
|
||||
ThreadLocalAllocBuffer::ThreadLocalAllocBuffer() :
|
||||
@@ -224,6 +225,30 @@ void ThreadLocalAllocBuffer::startup_initialization() {
|
||||
// abort during VM initialization.
|
||||
_target_refills = MAX2(_target_refills, 2U);
|
||||
|
||||
#ifdef COMPILER2
|
||||
// If the C2 compiler is present, extra space is needed at the end of
|
||||
// TLABs, otherwise prefetching instructions generated by the C2
|
||||
// compiler will fault (due to accessing memory outside of heap).
|
||||
// The amount of space is the max of the number of lines to
|
||||
// prefetch for array and for instance allocations. (Extra space must be
|
||||
// reserved to accommodate both types of allocations.)
|
||||
//
|
||||
// Only SPARC-specific BIS instructions are known to fault. (Those
|
||||
// instructions are generated if AllocatePrefetchStyle==3 and
|
||||
// AllocatePrefetchInstr==1). To be on the safe side, however,
|
||||
// extra space is reserved for all combinations of
|
||||
// AllocatePrefetchStyle and AllocatePrefetchInstr.
|
||||
//
|
||||
// If the C2 compiler is not present, no space is reserved.
|
||||
|
||||
// +1 for rounding up to next cache line, +1 to be safe
|
||||
if (CompilerConfig::is_c2_or_jvmci_compiler_enabled()) {
|
||||
int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
|
||||
_reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) /
|
||||
(int)HeapWordSize;
|
||||
}
|
||||
#endif
|
||||
|
||||
// During jvm startup, the main thread is initialized
|
||||
// before the heap is initialized. So reinitialize it now.
|
||||
guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread");
|
||||
@@ -429,7 +454,8 @@ void ThreadLocalAllocStats::publish() {
|
||||
}
|
||||
|
||||
size_t ThreadLocalAllocBuffer::end_reserve() {
|
||||
return CollectedHeap::lab_alignment_reserve();
|
||||
size_t reserve_size = CollectedHeap::lab_alignment_reserve();
|
||||
return MAX2(reserve_size, (size_t)_reserve_for_allocation_prefetch);
|
||||
}
|
||||
|
||||
const HeapWord* ThreadLocalAllocBuffer::start_relaxed() const {
|
||||
|
||||
@@ -58,6 +58,7 @@ private:
|
||||
size_t _allocated_before_last_gc; // total bytes allocated up until the last gc
|
||||
|
||||
static size_t _max_size; // maximum size of any TLAB
|
||||
static int _reserve_for_allocation_prefetch; // Reserve at the end of the TLAB
|
||||
static unsigned _target_refills; // expected number of refills between GCs
|
||||
|
||||
unsigned _number_of_refills;
|
||||
|
||||
@@ -1394,7 +1394,7 @@ void ShenandoahBarrierC2Support::pin_and_expand(PhaseIdealLoop* phase) {
|
||||
}
|
||||
if (addr->Opcode() == Op_AddP) {
|
||||
Node* orig_base = addr->in(AddPNode::Base);
|
||||
Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), ConstraintCastNode::StrongDependency);
|
||||
Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), ConstraintCastNode::DependencyType::NonFloatingNarrowing);
|
||||
phase->register_new_node(base, ctrl);
|
||||
if (addr->in(AddPNode::Base) == addr->in((AddPNode::Address))) {
|
||||
// Field access
|
||||
|
||||
@@ -104,7 +104,7 @@ void ShenandoahGenerationalHeuristics::choose_collection_set(ShenandoahCollectio
|
||||
// Note that for GLOBAL GC, region may be OLD, and OLD regions do not qualify for pre-selection
|
||||
|
||||
// This region is old enough to be promoted but it was not preselected, either because its garbage is below
|
||||
// ShenandoahOldGarbageThreshold so it will be promoted in place, or because there is not sufficient room
|
||||
// old garbage threshold so it will be promoted in place, or because there is not sufficient room
|
||||
// in old gen to hold the evacuated copies of this region's live data. In both cases, we choose not to
|
||||
// place this region into the collection set.
|
||||
if (region->get_top_before_promote() != nullptr) {
|
||||
|
||||
@@ -71,7 +71,8 @@ ShenandoahOldHeuristics::ShenandoahOldHeuristics(ShenandoahOldGeneration* genera
|
||||
_growth_trigger(false),
|
||||
_fragmentation_density(0.0),
|
||||
_fragmentation_first_old_region(0),
|
||||
_fragmentation_last_old_region(0)
|
||||
_fragmentation_last_old_region(0),
|
||||
_old_garbage_threshold(ShenandoahOldGarbageThreshold)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -373,7 +374,8 @@ void ShenandoahOldHeuristics::prepare_for_old_collections() {
|
||||
}
|
||||
}
|
||||
|
||||
_old_generation->set_live_bytes_after_last_mark(live_data);
|
||||
// TODO: subtract from live_data bytes promoted during concurrent GC.
|
||||
_old_generation->set_live_bytes_at_last_mark(live_data);
|
||||
|
||||
// Unlike young, we are more interested in efficiently packing OLD-gen than in reclaiming garbage first. We sort by live-data.
|
||||
// Some regular regions may have been promoted in place with no garbage but also with very little live data. When we "compact"
|
||||
@@ -385,7 +387,7 @@ void ShenandoahOldHeuristics::prepare_for_old_collections() {
|
||||
const size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes();
|
||||
|
||||
// The convention is to collect regions that have more than this amount of garbage.
|
||||
const size_t garbage_threshold = region_size_bytes * ShenandoahOldGarbageThreshold / 100;
|
||||
const size_t garbage_threshold = region_size_bytes * get_old_garbage_threshold() / 100;
|
||||
|
||||
// Enlightened interpretation: collect regions that have less than this amount of live.
|
||||
const size_t live_threshold = region_size_bytes - garbage_threshold;
|
||||
@@ -655,6 +657,7 @@ bool ShenandoahOldHeuristics::should_start_gc() {
|
||||
const double percent = percent_of(old_gen_capacity, heap_capacity);
|
||||
log_trigger("Expansion failure, current size: %zu%s which is %.1f%% of total heap size",
|
||||
byte_size_in_proper_unit(old_gen_capacity), proper_unit_for_byte_size(old_gen_capacity), percent);
|
||||
adjust_old_garbage_threshold();
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -677,6 +680,7 @@ bool ShenandoahOldHeuristics::should_start_gc() {
|
||||
"%zu to %zu (%zu), density: %.1f%%",
|
||||
byte_size_in_proper_unit(fragmented_free), proper_unit_for_byte_size(fragmented_free),
|
||||
first_old_region, last_old_region, span_of_old_regions, density * 100);
|
||||
adjust_old_garbage_threshold();
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -699,12 +703,13 @@ bool ShenandoahOldHeuristics::should_start_gc() {
|
||||
consecutive_young_cycles);
|
||||
_growth_trigger = false;
|
||||
} else if (current_usage > trigger_threshold) {
|
||||
const size_t live_at_previous_old = _old_generation->get_live_bytes_after_last_mark();
|
||||
const size_t live_at_previous_old = _old_generation->get_live_bytes_at_last_mark();
|
||||
const double percent_growth = percent_of(current_usage - live_at_previous_old, live_at_previous_old);
|
||||
log_trigger("Old has overgrown, live at end of previous OLD marking: "
|
||||
"%zu%s, current usage: %zu%s, percent growth: %.1f%%",
|
||||
byte_size_in_proper_unit(live_at_previous_old), proper_unit_for_byte_size(live_at_previous_old),
|
||||
byte_size_in_proper_unit(current_usage), proper_unit_for_byte_size(current_usage), percent_growth);
|
||||
adjust_old_garbage_threshold();
|
||||
return true;
|
||||
} else {
|
||||
// Mixed evacuations have decreased current_usage such that old-growth trigger is no longer relevant.
|
||||
@@ -713,7 +718,41 @@ bool ShenandoahOldHeuristics::should_start_gc() {
|
||||
}
|
||||
|
||||
// Otherwise, defer to inherited heuristic for gc trigger.
|
||||
return this->ShenandoahHeuristics::should_start_gc();
|
||||
bool result = this->ShenandoahHeuristics::should_start_gc();
|
||||
if (result) {
|
||||
adjust_old_garbage_threshold();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void ShenandoahOldHeuristics::adjust_old_garbage_threshold() {
|
||||
const uintx MinimumOldGarbageThreshold = 10;
|
||||
const uintx InterventionPercentage = 50;
|
||||
|
||||
const ShenandoahHeap* heap = ShenandoahHeap::heap();
|
||||
size_t old_regions_size = _old_generation->used_regions_size();
|
||||
size_t soft_max_size = heap->soft_max_capacity();
|
||||
uintx percent_used = (uintx) ((old_regions_size * 100) / soft_max_size);
|
||||
_old_garbage_threshold = ShenandoahOldGarbageThreshold;
|
||||
if (percent_used > InterventionPercentage) {
|
||||
uintx severity = percent_used - InterventionPercentage; // ranges from 0 to 50
|
||||
if (MinimumOldGarbageThreshold < ShenandoahOldGarbageThreshold) {
|
||||
uintx adjustment_potential = ShenandoahOldGarbageThreshold - MinimumOldGarbageThreshold;
|
||||
// With default values:
|
||||
// if percent_used > 80, garbage_threshold is 10
|
||||
// else if percent_used > 65, garbage_threshold is 15
|
||||
// else if percent_used > 50, garbage_threshold is 20
|
||||
if (severity > 30) {
|
||||
_old_garbage_threshold = ShenandoahOldGarbageThreshold - adjustment_potential;
|
||||
} else if (severity > 15) {
|
||||
_old_garbage_threshold = ShenandoahOldGarbageThreshold - 2 * adjustment_potential / 3;
|
||||
} else {
|
||||
_old_garbage_threshold = ShenandoahOldGarbageThreshold - adjustment_potential / 3;
|
||||
}
|
||||
log_info(gc)("Adjusting old garbage threshold to %lu because Old Generation used regions represents %lu%% of heap",
|
||||
_old_garbage_threshold, percent_used);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShenandoahOldHeuristics::record_success_concurrent() {
|
||||
|
||||
@@ -102,6 +102,17 @@ private:
|
||||
size_t _fragmentation_first_old_region;
|
||||
size_t _fragmentation_last_old_region;
|
||||
|
||||
// The value of command-line argument ShenandoahOldGarbageThreshold represents the percent of garbage that must
|
||||
// be present within an old-generation region before that region is considered a good candidate for inclusion in
|
||||
// the collection set under normal circumstances. For our purposes, normal circustances are when the memory consumed
|
||||
// by the old generation is less than 50% of the soft heap capacity. When the old generation grows beyond the 50%
|
||||
// threshold, we dynamically adjust the old garbage threshold, allowing us to invest in packing the old generation
|
||||
// more tightly so that more memory can be made available to the more frequent young GC cycles. This variable
|
||||
// is used in place of ShenandoahOldGarbageThreshold. Under normal circumstances, its value is equal to
|
||||
// ShenandoahOldGarbageThreshold. When the GC is under duress, this value may be adjusted to a smaller value,
|
||||
// as scaled according to the severity of duress that we are experiencing.
|
||||
uintx _old_garbage_threshold;
|
||||
|
||||
// Compare by live is used to prioritize compaction of old-gen regions. With old-gen compaction, the goal is
|
||||
// to tightly pack long-lived objects into available regions. In most cases, there has not been an accumulation
|
||||
// of garbage within old-gen regions. The more likely opportunity will be to combine multiple sparsely populated
|
||||
@@ -200,9 +211,28 @@ public:
|
||||
|
||||
bool is_experimental() override;
|
||||
|
||||
// Returns the current value of a dynamically adjusted threshold percentage of garbage above which an old region is
|
||||
// deemed eligible for evacuation.
|
||||
inline uintx get_old_garbage_threshold() { return _old_garbage_threshold; }
|
||||
|
||||
private:
|
||||
void slide_pinned_regions_to_front();
|
||||
bool all_candidates_are_pinned();
|
||||
|
||||
// The normal old_garbage_threshold is specified by ShenandoahOldGarbageThreshold command-line argument, with default
|
||||
// value 25, denoting that a region that has at least 25% garbage is eligible for evacuation. With default values for
|
||||
// all command-line arguments, we make the following adjustments:
|
||||
// 1. If the old generation has grown to consume more than 80% of the soft max capacity, adjust threshold to 10%
|
||||
// 2. Otherwise, if the old generation has grown to consume more than 65%, adjust threshold to 15%
|
||||
// 3. Otherwise, if the old generation has grown to consume more than 50%, adjust threshold to 20%
|
||||
// The effect is to compact the old generation more aggressively as the old generation consumes larger percentages
|
||||
// of the available heap memory. In these circumstances, we pack the old generation more tightly in order to make
|
||||
// more memory avaiable to the young generation so that the more frequent young collections can operate more
|
||||
// efficiently.
|
||||
//
|
||||
// If the ShenandoahOldGarbageThreshold is specified on the command line, the effect of adjusting the old garbage
|
||||
// threshold is scaled linearly.
|
||||
void adjust_old_garbage_threshold();
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHOLDHEURISTICS_HPP
|
||||
|
||||
@@ -83,16 +83,15 @@ public:
|
||||
return "PLAB";
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// When ShenandoahElasticTLAB is enabled, the request cannot be made smaller than _min_size.
|
||||
size_t _min_size;
|
||||
size_t const _min_size;
|
||||
|
||||
// The size of the request in words.
|
||||
size_t _requested_size;
|
||||
size_t const _requested_size;
|
||||
|
||||
// The allocation may be increased for padding or decreased to fit in the remaining space of a region.
|
||||
size_t _actual_size;
|
||||
@@ -104,7 +103,7 @@ private:
|
||||
size_t _waste;
|
||||
|
||||
// This is the type of the request.
|
||||
Type _alloc_type;
|
||||
Type const _alloc_type;
|
||||
|
||||
#ifdef ASSERT
|
||||
// Check that this is set before being read.
|
||||
@@ -209,6 +208,10 @@ public:
|
||||
return (_alloc_type & bit_old_alloc) == 0;
|
||||
}
|
||||
|
||||
inline bool is_cds() const {
|
||||
return _alloc_type == _alloc_cds;
|
||||
}
|
||||
|
||||
inline ShenandoahAffiliation affiliation() const {
|
||||
return (_alloc_type & bit_old_alloc) == 0 ? YOUNG_GENERATION : OLD_GENERATION ;
|
||||
}
|
||||
|
||||
@@ -128,8 +128,8 @@ public:
|
||||
void write_ref_array(HeapWord* start, size_t count);
|
||||
|
||||
private:
|
||||
template <class T>
|
||||
inline void arraycopy_marking(T* dst, size_t count);
|
||||
template <bool IS_GENERATIONAL, class T>
|
||||
void arraycopy_marking(T* dst, size_t count);
|
||||
template <class T>
|
||||
inline void arraycopy_evacuation(T* src, size_t count);
|
||||
template <class T>
|
||||
|
||||
@@ -429,7 +429,11 @@ void ShenandoahBarrierSet::arraycopy_barrier(T* src, T* dst, size_t count) {
|
||||
// If marking old or young, we must evaluate the SATB barrier. This will be the only
|
||||
// action if we are not marking old. If we are marking old, we must still evaluate the
|
||||
// load reference barrier for a young collection.
|
||||
arraycopy_marking(dst, count);
|
||||
if (_heap->mode()->is_generational()) {
|
||||
arraycopy_marking<true>(dst, count);
|
||||
} else {
|
||||
arraycopy_marking<false>(dst, count);
|
||||
}
|
||||
}
|
||||
|
||||
if ((gc_state & ShenandoahHeap::EVACUATION) != 0) {
|
||||
@@ -441,11 +445,12 @@ void ShenandoahBarrierSet::arraycopy_barrier(T* src, T* dst, size_t count) {
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
template <bool IS_GENERATIONAL, class T>
|
||||
void ShenandoahBarrierSet::arraycopy_marking(T* dst, size_t count) {
|
||||
assert(_heap->is_concurrent_mark_in_progress(), "only during marking");
|
||||
if (ShenandoahSATBBarrier) {
|
||||
if (!_heap->marking_context()->allocated_after_mark_start(reinterpret_cast<HeapWord*>(dst))) {
|
||||
if (!_heap->marking_context()->allocated_after_mark_start(reinterpret_cast<HeapWord*>(dst)) ||
|
||||
(IS_GENERATIONAL && _heap->heap_region_containing(dst)->is_old() && _heap->is_concurrent_young_mark_in_progress())) {
|
||||
arraycopy_work<T, false, false, true>(dst, count);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,13 +144,12 @@ public:
|
||||
{
|
||||
ShenandoahReentrantLocker locker(nm_data->lock());
|
||||
|
||||
// Heal oops and disarm
|
||||
// Heal oops
|
||||
if (_bs->is_armed(nm)) {
|
||||
ShenandoahEvacOOMScope oom_evac_scope;
|
||||
ShenandoahNMethod::heal_nmethod_metadata(nm_data);
|
||||
// Code cache unloading needs to know about on-stack nmethods. Arm the nmethods to get
|
||||
// mark_as_maybe_on_stack() callbacks when they are used again.
|
||||
_bs->arm(nm);
|
||||
// Must remain armed to complete remaining work in nmethod entry barrier
|
||||
assert(_bs->is_armed(nm), "Should remain armed");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1369,7 +1369,7 @@ template<typename Iter>
|
||||
HeapWord* ShenandoahFreeSet::allocate_from_regions(Iter& iterator, ShenandoahAllocRequest &req, bool &in_new_region) {
|
||||
for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) {
|
||||
ShenandoahHeapRegion* r = _heap->get_region(idx);
|
||||
size_t min_size = (req.type() == ShenandoahAllocRequest::_alloc_tlab) ? req.min_size() : req.size();
|
||||
size_t min_size = req.is_lab_alloc() ? req.min_size() : req.size();
|
||||
if (alloc_capacity(r) >= min_size * HeapWordSize) {
|
||||
HeapWord* result = try_allocate_in(r, req, in_new_region);
|
||||
if (result != nullptr) {
|
||||
@@ -1501,7 +1501,7 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
|
||||
|
||||
if (in_new_region) {
|
||||
log_debug(gc, free)("Using new region (%zu) for %s (" PTR_FORMAT ").",
|
||||
r->index(), ShenandoahAllocRequest::alloc_type_to_string(req.type()), p2i(&req));
|
||||
r->index(), req.type_string(), p2i(&req));
|
||||
assert(!r->is_affiliated(), "New region %zu should be unaffiliated", r->index());
|
||||
r->set_affiliation(req.affiliation());
|
||||
if (r->is_old()) {
|
||||
@@ -1520,7 +1520,7 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
|
||||
assert(ctx->is_bitmap_range_within_region_clear(ctx->top_bitmap(r), r->end()), "Bitmap above top_bitmap() must be clear");
|
||||
#endif
|
||||
log_debug(gc, free)("Using new region (%zu) for %s (" PTR_FORMAT ").",
|
||||
r->index(), ShenandoahAllocRequest::alloc_type_to_string(req.type()), p2i(&req));
|
||||
r->index(), req.type_string(), p2i(&req));
|
||||
} else {
|
||||
assert(r->is_affiliated(), "Region %zu that is not new should be affiliated", r->index());
|
||||
if (r->affiliation() != req.affiliation()) {
|
||||
@@ -1534,8 +1534,8 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
|
||||
if (req.is_lab_alloc()) {
|
||||
size_t adjusted_size = req.size();
|
||||
size_t free = r->free(); // free represents bytes available within region r
|
||||
if (req.type() == ShenandoahAllocRequest::_alloc_plab) {
|
||||
// This is a PLAB allocation
|
||||
if (req.is_old()) {
|
||||
// This is a PLAB allocation(lab alloc in old gen)
|
||||
assert(_heap->mode()->is_generational(), "PLABs are only for generational mode");
|
||||
assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::OldCollector, r->index()),
|
||||
"PLABS must be allocated in old_collector_free regions");
|
||||
@@ -1596,8 +1596,6 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
|
||||
r->set_update_watermark(r->top());
|
||||
if (r->is_old()) {
|
||||
_partitions.increase_used(ShenandoahFreeSetPartitionId::OldCollector, (req.actual_size() + req.waste()) * HeapWordSize);
|
||||
assert(req.type() != ShenandoahAllocRequest::_alloc_gclab, "old-gen allocations use PLAB or shared allocation");
|
||||
// for plabs, we'll sort the difference between evac and promotion usage when we retire the plab
|
||||
} else {
|
||||
_partitions.increase_used(ShenandoahFreeSetPartitionId::Collector, (req.actual_size() + req.waste()) * HeapWordSize);
|
||||
}
|
||||
|
||||
@@ -505,10 +505,10 @@ inline void assert_no_in_place_promotions() {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Preselect for inclusion into the collection set regions whose age is at or above tenure age which contain more than
|
||||
// ShenandoahOldGarbageThreshold amounts of garbage. We identify these regions by setting the appropriate entry of
|
||||
// the collection set's preselected regions array to true. All entries are initialized to false before calling this
|
||||
// function.
|
||||
// Preselect for inclusion into the collection set all regions whose age is at or above tenure age and for which the
|
||||
// garbage percentage exceeds a dynamically adjusted threshold (known as the old-garbage threshold percentage). We
|
||||
// identify these regions by setting the appropriate entry of the collection set's preselected regions array to true.
|
||||
// All entries are initialized to false before calling this function.
|
||||
//
|
||||
// During the subsequent selection of the collection set, we give priority to these promotion set candidates.
|
||||
// Without this prioritization, we found that the aged regions tend to be ignored because they typically have
|
||||
@@ -531,7 +531,8 @@ size_t ShenandoahGeneration::select_aged_regions(const size_t old_promotion_rese
|
||||
bool* const candidate_regions_for_promotion_by_copy = heap->collection_set()->preselected_regions();
|
||||
ShenandoahMarkingContext* const ctx = heap->marking_context();
|
||||
|
||||
const size_t old_garbage_threshold = (ShenandoahHeapRegion::region_size_bytes() * ShenandoahOldGarbageThreshold) / 100;
|
||||
const size_t old_garbage_threshold =
|
||||
(ShenandoahHeapRegion::region_size_bytes() * heap->old_generation()->heuristics()->get_old_garbage_threshold()) / 100;
|
||||
|
||||
const size_t pip_used_threshold = (ShenandoahHeapRegion::region_size_bytes() * ShenandoahGenerationalMinPIPUsage) / 100;
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ private:
|
||||
// garbage-dense regions, including those that satisfy criteria 1 & 2 below,
|
||||
// and whose live bytes will fit within old_available budget:
|
||||
// Criterion 1. region age >= tenuring threshold
|
||||
// Criterion 2. region garbage percentage > ShenandoahOldGarbageThreshold
|
||||
// Criterion 2. region garbage percentage > old garbage threshold
|
||||
//
|
||||
// Identifies regions eligible for promotion in place,
|
||||
// being those of at least tenuring_threshold age that have lower garbage
|
||||
|
||||
@@ -145,7 +145,7 @@ void ShenandoahGenerationalEvacuationTask::maybe_promote_region(ShenandoahHeapRe
|
||||
// triggers the load-reference barrier (LRB) to copy on reference fetch.
|
||||
//
|
||||
// Aged humongous continuation regions are handled with their start region. If an aged regular region has
|
||||
// more garbage than ShenandoahOldGarbageThreshold, we'll promote by evacuation. If there is room for evacuation
|
||||
// more garbage than the old garbage threshold, we'll promote by evacuation. If there is room for evacuation
|
||||
// in this cycle, the region will be in the collection set. If there is not room, the region will be promoted
|
||||
// by evacuation in some future GC cycle.
|
||||
|
||||
@@ -177,7 +177,8 @@ void ShenandoahGenerationalEvacuationTask::promote_in_place(ShenandoahHeapRegion
|
||||
size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes();
|
||||
|
||||
{
|
||||
const size_t old_garbage_threshold = (region_size_bytes * ShenandoahOldGarbageThreshold) / 100;
|
||||
const size_t old_garbage_threshold =
|
||||
(region_size_bytes * _heap->old_generation()->heuristics()->get_old_garbage_threshold()) / 100;
|
||||
assert(!_heap->is_concurrent_old_mark_in_progress(), "Cannot promote in place during old marking");
|
||||
assert(region->garbage_before_padded_for_promote() < old_garbage_threshold,
|
||||
"Region %zu has too much garbage for promotion", region->index());
|
||||
|
||||
@@ -83,7 +83,7 @@ void ShenandoahGenerationalFullGC::handle_completion(ShenandoahHeap* heap) {
|
||||
assert_usage_not_more_than_regions_used(young);
|
||||
|
||||
// Establish baseline for next old-has-grown trigger.
|
||||
old->set_live_bytes_after_last_mark(old->used());
|
||||
old->set_live_bytes_at_last_mark(old->used());
|
||||
}
|
||||
|
||||
void ShenandoahGenerationalFullGC::rebuild_remembered_set(ShenandoahHeap* heap) {
|
||||
|
||||
@@ -985,7 +985,7 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) {
|
||||
|
||||
assert (req.is_lab_alloc() || (requested == actual),
|
||||
"Only LAB allocations are elastic: %s, requested = %zu, actual = %zu",
|
||||
ShenandoahAllocRequest::alloc_type_to_string(req.type()), requested, actual);
|
||||
req.type_string(), requested, actual);
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -1014,8 +1014,9 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req
|
||||
|
||||
// Record the plab configuration for this result and register the object.
|
||||
if (result != nullptr && req.is_old()) {
|
||||
old_generation()->configure_plab_for_current_thread(req);
|
||||
if (!req.is_lab_alloc()) {
|
||||
if (req.is_lab_alloc()) {
|
||||
old_generation()->configure_plab_for_current_thread(req);
|
||||
} else {
|
||||
// Register the newly allocated object while we're holding the global lock since there's no synchronization
|
||||
// built in to the implementation of register_object(). There are potential races when multiple independent
|
||||
// threads are allocating objects, some of which might span the same card region. For example, consider
|
||||
@@ -1035,6 +1036,13 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req
|
||||
// last-start representing object b while first-start represents object c. This is why we need to require all
|
||||
// register_object() invocations to be "mutually exclusive" with respect to each card's memory range.
|
||||
old_generation()->card_scan()->register_object(result);
|
||||
|
||||
if (req.is_promotion()) {
|
||||
// Shared promotion.
|
||||
const size_t actual_size = req.actual_size() * HeapWordSize;
|
||||
log_debug(gc, plab)("Expend shared promotion of %zu bytes", actual_size);
|
||||
old_generation()->expend_promoted(actual_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -447,7 +447,7 @@ public:
|
||||
return (bottom() <= p) && (p < top());
|
||||
}
|
||||
|
||||
inline void adjust_alloc_metadata(ShenandoahAllocRequest::Type type, size_t);
|
||||
inline void adjust_alloc_metadata(const ShenandoahAllocRequest &req, size_t);
|
||||
void reset_alloc_metadata();
|
||||
size_t get_shared_allocs() const;
|
||||
size_t get_tlab_allocs() const;
|
||||
|
||||
@@ -71,7 +71,7 @@ HeapWord* ShenandoahHeapRegion::allocate_aligned(size_t size, ShenandoahAllocReq
|
||||
}
|
||||
|
||||
make_regular_allocation(req.affiliation());
|
||||
adjust_alloc_metadata(req.type(), size);
|
||||
adjust_alloc_metadata(req, size);
|
||||
|
||||
HeapWord* new_top = aligned_obj + size;
|
||||
assert(new_top <= end(), "PLAB cannot span end of heap region");
|
||||
@@ -111,7 +111,7 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, const ShenandoahAllocReque
|
||||
HeapWord* obj = top();
|
||||
if (pointer_delta(end(), obj) >= size) {
|
||||
make_regular_allocation(req.affiliation());
|
||||
adjust_alloc_metadata(req.type(), size);
|
||||
adjust_alloc_metadata(req, size);
|
||||
|
||||
HeapWord* new_top = obj + size;
|
||||
set_top(new_top);
|
||||
@@ -125,26 +125,16 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, const ShenandoahAllocReque
|
||||
}
|
||||
}
|
||||
|
||||
inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest::Type type, size_t size) {
|
||||
switch (type) {
|
||||
case ShenandoahAllocRequest::_alloc_shared:
|
||||
case ShenandoahAllocRequest::_alloc_shared_gc:
|
||||
case ShenandoahAllocRequest::_alloc_shared_gc_old:
|
||||
case ShenandoahAllocRequest::_alloc_shared_gc_promotion:
|
||||
case ShenandoahAllocRequest::_alloc_cds:
|
||||
// Counted implicitly by tlab/gclab allocs
|
||||
break;
|
||||
case ShenandoahAllocRequest::_alloc_tlab:
|
||||
inline void ShenandoahHeapRegion::adjust_alloc_metadata(const ShenandoahAllocRequest &req, size_t size) {
|
||||
// Only need to update alloc metadata for lab alloc, shared alloc is counted implicitly by tlab/gclab allocs
|
||||
if (req.is_lab_alloc()) {
|
||||
if (req.is_mutator_alloc()) {
|
||||
_tlab_allocs += size;
|
||||
break;
|
||||
case ShenandoahAllocRequest::_alloc_gclab:
|
||||
_gclab_allocs += size;
|
||||
break;
|
||||
case ShenandoahAllocRequest::_alloc_plab:
|
||||
} else if (req.is_old()) {
|
||||
_plab_allocs += size;
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
} else {
|
||||
_gclab_allocs += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -116,11 +116,10 @@ ShenandoahOldGeneration::ShenandoahOldGeneration(uint max_queues)
|
||||
_is_parsable(true),
|
||||
_card_scan(nullptr),
|
||||
_state(WAITING_FOR_BOOTSTRAP),
|
||||
_growth_before_compaction(INITIAL_GROWTH_BEFORE_COMPACTION),
|
||||
_min_growth_before_compaction ((ShenandoahMinOldGenGrowthPercent * FRACTIONAL_DENOMINATOR) / 100)
|
||||
_growth_percent_before_collection(INITIAL_GROWTH_PERCENT_BEFORE_COLLECTION)
|
||||
{
|
||||
assert(type() == ShenandoahGenerationType::OLD, "OO sanity");
|
||||
_live_bytes_after_last_mark = ShenandoahHeap::heap()->capacity() * INITIAL_LIVE_FRACTION / FRACTIONAL_DENOMINATOR;
|
||||
_live_bytes_at_last_mark = (ShenandoahHeap::heap()->soft_max_capacity() * INITIAL_LIVE_PERCENT) / 100;
|
||||
// Always clear references for old generation
|
||||
ref_processor()->set_soft_reference_policy(true);
|
||||
|
||||
@@ -168,7 +167,7 @@ size_t ShenandoahOldGeneration::get_promoted_expended() const {
|
||||
}
|
||||
|
||||
bool ShenandoahOldGeneration::can_allocate(const ShenandoahAllocRequest &req) const {
|
||||
assert(req.type() != ShenandoahAllocRequest::_alloc_gclab, "GCLAB pertains only to young-gen memory");
|
||||
assert(req.is_old(), "Must be old allocation request");
|
||||
|
||||
const size_t requested_bytes = req.size() * HeapWordSize;
|
||||
// The promotion reserve may also be used for evacuations. If we can promote this object,
|
||||
@@ -180,7 +179,7 @@ bool ShenandoahOldGeneration::can_allocate(const ShenandoahAllocRequest &req) co
|
||||
return true;
|
||||
}
|
||||
|
||||
if (req.type() == ShenandoahAllocRequest::_alloc_plab) {
|
||||
if (req.is_lab_alloc()) {
|
||||
// The promotion reserve cannot accommodate this plab request. Check if we still have room for
|
||||
// evacuations. Note that we cannot really know how much of the plab will be used for evacuations,
|
||||
// so here we only check that some evacuation reserve still exists.
|
||||
@@ -195,54 +194,46 @@ bool ShenandoahOldGeneration::can_allocate(const ShenandoahAllocRequest &req) co
|
||||
|
||||
void
|
||||
ShenandoahOldGeneration::configure_plab_for_current_thread(const ShenandoahAllocRequest &req) {
|
||||
// Note: Even when a mutator is performing a promotion outside a LAB, we use a 'shared_gc' request.
|
||||
if (req.is_gc_alloc()) {
|
||||
const size_t actual_size = req.actual_size() * HeapWordSize;
|
||||
if (req.type() == ShenandoahAllocRequest::_alloc_plab) {
|
||||
// We've created a new plab. Now we configure it whether it will be used for promotions
|
||||
// and evacuations - or just evacuations.
|
||||
Thread* thread = Thread::current();
|
||||
ShenandoahThreadLocalData::reset_plab_promoted(thread);
|
||||
assert(req.is_gc_alloc() && req.is_old() && req.is_lab_alloc(), "Must be a plab alloc request");
|
||||
const size_t actual_size = req.actual_size() * HeapWordSize;
|
||||
// We've created a new plab. Now we configure it whether it will be used for promotions
|
||||
// and evacuations - or just evacuations.
|
||||
Thread* thread = Thread::current();
|
||||
ShenandoahThreadLocalData::reset_plab_promoted(thread);
|
||||
|
||||
// The actual size of the allocation may be larger than the requested bytes (due to alignment on card boundaries).
|
||||
// If this puts us over our promotion budget, we need to disable future PLAB promotions for this thread.
|
||||
if (can_promote(actual_size)) {
|
||||
// Assume the entirety of this PLAB will be used for promotion. This prevents promotion from overreach.
|
||||
// When we retire this plab, we'll unexpend what we don't really use.
|
||||
log_debug(gc, plab)("Thread can promote using PLAB of %zu bytes. Expended: %zu, available: %zu",
|
||||
actual_size, get_promoted_expended(), get_promoted_reserve());
|
||||
expend_promoted(actual_size);
|
||||
ShenandoahThreadLocalData::enable_plab_promotions(thread);
|
||||
ShenandoahThreadLocalData::set_plab_actual_size(thread, actual_size);
|
||||
} else {
|
||||
// Disable promotions in this thread because entirety of this PLAB must be available to hold old-gen evacuations.
|
||||
ShenandoahThreadLocalData::disable_plab_promotions(thread);
|
||||
ShenandoahThreadLocalData::set_plab_actual_size(thread, 0);
|
||||
log_debug(gc, plab)("Thread cannot promote using PLAB of %zu bytes. Expended: %zu, available: %zu, mixed evacuations? %s",
|
||||
actual_size, get_promoted_expended(), get_promoted_reserve(), BOOL_TO_STR(ShenandoahHeap::heap()->collection_set()->has_old_regions()));
|
||||
}
|
||||
} else if (req.is_promotion()) {
|
||||
// Shared promotion.
|
||||
log_debug(gc, plab)("Expend shared promotion of %zu bytes", actual_size);
|
||||
expend_promoted(actual_size);
|
||||
}
|
||||
// The actual size of the allocation may be larger than the requested bytes (due to alignment on card boundaries).
|
||||
// If this puts us over our promotion budget, we need to disable future PLAB promotions for this thread.
|
||||
if (can_promote(actual_size)) {
|
||||
// Assume the entirety of this PLAB will be used for promotion. This prevents promotion from overreach.
|
||||
// When we retire this plab, we'll unexpend what we don't really use.
|
||||
log_debug(gc, plab)("Thread can promote using PLAB of %zu bytes. Expended: %zu, available: %zu",
|
||||
actual_size, get_promoted_expended(), get_promoted_reserve());
|
||||
expend_promoted(actual_size);
|
||||
ShenandoahThreadLocalData::enable_plab_promotions(thread);
|
||||
ShenandoahThreadLocalData::set_plab_actual_size(thread, actual_size);
|
||||
} else {
|
||||
// Disable promotions in this thread because entirety of this PLAB must be available to hold old-gen evacuations.
|
||||
ShenandoahThreadLocalData::disable_plab_promotions(thread);
|
||||
ShenandoahThreadLocalData::set_plab_actual_size(thread, 0);
|
||||
log_debug(gc, plab)("Thread cannot promote using PLAB of %zu bytes. Expended: %zu, available: %zu, mixed evacuations? %s",
|
||||
actual_size, get_promoted_expended(), get_promoted_reserve(), BOOL_TO_STR(ShenandoahHeap::heap()->collection_set()->has_old_regions()));
|
||||
}
|
||||
}
|
||||
|
||||
size_t ShenandoahOldGeneration::get_live_bytes_after_last_mark() const {
|
||||
return _live_bytes_after_last_mark;
|
||||
size_t ShenandoahOldGeneration::get_live_bytes_at_last_mark() const {
|
||||
return _live_bytes_at_last_mark;
|
||||
}
|
||||
|
||||
void ShenandoahOldGeneration::set_live_bytes_after_last_mark(size_t bytes) {
|
||||
void ShenandoahOldGeneration::set_live_bytes_at_last_mark(size_t bytes) {
|
||||
if (bytes == 0) {
|
||||
// Restart search for best old-gen size to the initial state
|
||||
_live_bytes_after_last_mark = ShenandoahHeap::heap()->capacity() * INITIAL_LIVE_FRACTION / FRACTIONAL_DENOMINATOR;
|
||||
_growth_before_compaction = INITIAL_GROWTH_BEFORE_COMPACTION;
|
||||
_live_bytes_at_last_mark = (ShenandoahHeap::heap()->soft_max_capacity() * INITIAL_LIVE_PERCENT) / 100;
|
||||
_growth_percent_before_collection = INITIAL_GROWTH_PERCENT_BEFORE_COLLECTION;
|
||||
} else {
|
||||
_live_bytes_after_last_mark = bytes;
|
||||
_growth_before_compaction /= 2;
|
||||
if (_growth_before_compaction < _min_growth_before_compaction) {
|
||||
_growth_before_compaction = _min_growth_before_compaction;
|
||||
_live_bytes_at_last_mark = bytes;
|
||||
_growth_percent_before_collection /= 2;
|
||||
if (_growth_percent_before_collection < ShenandoahMinOldGenGrowthPercent) {
|
||||
_growth_percent_before_collection = ShenandoahMinOldGenGrowthPercent;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -252,7 +243,19 @@ void ShenandoahOldGeneration::handle_failed_transfer() {
|
||||
}
|
||||
|
||||
size_t ShenandoahOldGeneration::usage_trigger_threshold() const {
|
||||
size_t result = _live_bytes_after_last_mark + (_live_bytes_after_last_mark * _growth_before_compaction) / FRACTIONAL_DENOMINATOR;
|
||||
size_t threshold_by_relative_growth =
|
||||
_live_bytes_at_last_mark + (_live_bytes_at_last_mark * _growth_percent_before_collection) / 100;
|
||||
size_t soft_max_capacity = ShenandoahHeap::heap()->soft_max_capacity();
|
||||
size_t threshold_by_growth_into_percent_remaining;
|
||||
if (_live_bytes_at_last_mark < soft_max_capacity) {
|
||||
threshold_by_growth_into_percent_remaining = (size_t)
|
||||
(_live_bytes_at_last_mark + ((soft_max_capacity - _live_bytes_at_last_mark)
|
||||
* ShenandoahMinOldGenGrowthRemainingHeapPercent / 100.0));
|
||||
} else {
|
||||
// we're already consuming more than soft max capacity, so we should start old GC right away.
|
||||
threshold_by_growth_into_percent_remaining = soft_max_capacity;
|
||||
}
|
||||
size_t result = MIN2(threshold_by_relative_growth, threshold_by_growth_into_percent_remaining);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -287,28 +287,23 @@ public:
|
||||
private:
|
||||
State _state;
|
||||
|
||||
static const size_t FRACTIONAL_DENOMINATOR = 65536;
|
||||
|
||||
// During initialization of the JVM, we search for the correct old-gen size by initially performing old-gen
|
||||
// collection when old-gen usage is 50% more (INITIAL_GROWTH_BEFORE_COMPACTION) than the initial old-gen size
|
||||
// estimate (3.125% of heap). The next old-gen trigger occurs when old-gen grows 25% larger than its live
|
||||
// memory at the end of the first old-gen collection. Then we trigger again when old-gen grows 12.5%
|
||||
// more than its live memory at the end of the previous old-gen collection. Thereafter, we trigger each time
|
||||
// old-gen grows more than 12.5% following the end of its previous old-gen collection.
|
||||
static const size_t INITIAL_GROWTH_BEFORE_COMPACTION = FRACTIONAL_DENOMINATOR / 2; // 50.0%
|
||||
// collection when old-gen usage is 50% more (INITIAL_GROWTH_PERCENT_BEFORE_COLLECTION) than the initial old-gen size
|
||||
// estimate (16% of heap). With each successive old-gen collection, we divide the growth trigger by two, but
|
||||
// never use a growth trigger smaller than ShenandoahMinOldGenGrowthPercent.
|
||||
static const size_t INITIAL_GROWTH_PERCENT_BEFORE_COLLECTION = 50;
|
||||
|
||||
// INITIAL_LIVE_FRACTION represents the initial guess of how large old-gen should be. We estimate that old-gen
|
||||
// needs to consume 6.25% of the total heap size. And we "pretend" that we start out with this amount of live
|
||||
// INITIAL_LIVE_PERCENT represents the initial guess of how large old-gen should be. We estimate that old gen
|
||||
// needs to consume 16% of the total heap size. And we "pretend" that we start out with this amount of live
|
||||
// old-gen memory. The first old-collection trigger will occur when old-gen occupies 50% more than this initial
|
||||
// approximation of the old-gen memory requirement, in other words when old-gen usage is 150% of 6.25%, which
|
||||
// is 9.375% of the total heap size.
|
||||
static const uint16_t INITIAL_LIVE_FRACTION = FRACTIONAL_DENOMINATOR / 16; // 6.25%
|
||||
// approximation of the old-gen memory requirement, in other words when old-gen usage is 150% of 16%, which
|
||||
// is 24% of the heap size.
|
||||
static const size_t INITIAL_LIVE_PERCENT = 16;
|
||||
|
||||
size_t _live_bytes_after_last_mark;
|
||||
size_t _live_bytes_at_last_mark;
|
||||
|
||||
// How much growth in usage before we trigger old collection, per FRACTIONAL_DENOMINATOR (65_536)
|
||||
size_t _growth_before_compaction;
|
||||
const size_t _min_growth_before_compaction; // Default is 12.5%
|
||||
// How much growth in usage before we trigger old collection as a percent of soft_max_capacity
|
||||
size_t _growth_percent_before_collection;
|
||||
|
||||
void validate_transition(State new_state) NOT_DEBUG_RETURN;
|
||||
|
||||
@@ -323,8 +318,8 @@ public:
|
||||
|
||||
void transition_to(State new_state);
|
||||
|
||||
size_t get_live_bytes_after_last_mark() const;
|
||||
void set_live_bytes_after_last_mark(size_t new_live);
|
||||
size_t get_live_bytes_at_last_mark() const;
|
||||
void set_live_bytes_at_last_mark(size_t new_live);
|
||||
|
||||
size_t usage_trigger_threshold() const;
|
||||
|
||||
|
||||
@@ -335,7 +335,6 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con
|
||||
if (ctx->is_marked(p)) {
|
||||
oop obj = cast_to_oop(p);
|
||||
assert(oopDesc::is_oop(obj), "Should be an object");
|
||||
assert(Klass::is_valid(obj->klass()), "Not a valid klass ptr");
|
||||
assert(p + obj->size() > left, "This object should span start of card");
|
||||
assert(p < right, "Result must precede right");
|
||||
return p;
|
||||
@@ -362,15 +361,15 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con
|
||||
|
||||
// Recall that we already dealt with the co-initial object case above
|
||||
assert(p < left, "obj should start before left");
|
||||
// While it is safe to ask an object its size in the loop that
|
||||
// follows, the (ifdef'd out) loop should never be needed.
|
||||
// While it is safe to ask an object its size in the block that
|
||||
// follows, the (ifdef'd out) block should never be needed.
|
||||
// 1. we ask this question only for regions in the old generation, and those
|
||||
// that are not humongous regions
|
||||
// 2. there is no direct allocation ever by mutators in old generation
|
||||
// regions walked by this code. Only GC will ever allocate in old regions,
|
||||
// and then too only during promotion/evacuation phases. Thus there is no danger
|
||||
// of races between reading from and writing to the object start array,
|
||||
// or of asking partially initialized objects their size (in the loop below).
|
||||
// or of asking partially initialized objects their size (in the ifdef below).
|
||||
// Furthermore, humongous regions (and their dirty cards) are never processed
|
||||
// by this code.
|
||||
// 3. only GC asks this question during phases when it is not concurrently
|
||||
@@ -382,15 +381,6 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con
|
||||
#ifdef ASSERT
|
||||
oop obj = cast_to_oop(p);
|
||||
assert(oopDesc::is_oop(obj), "Should be an object");
|
||||
while (p + obj->size() < left) {
|
||||
p += obj->size();
|
||||
obj = cast_to_oop(p);
|
||||
assert(oopDesc::is_oop(obj), "Should be an object");
|
||||
assert(Klass::is_valid(obj->klass()), "Not a valid klass ptr");
|
||||
// Check assumptions in previous block comment if this assert fires
|
||||
fatal("Should never need forward walk in block start");
|
||||
}
|
||||
assert(p <= left, "p should start at or before left end of card");
|
||||
assert(p + obj->size() > left, "obj should end after left end of card");
|
||||
#endif // ASSERT
|
||||
return p;
|
||||
|
||||
@@ -29,11 +29,7 @@
|
||||
#include "memory/allocation.hpp"
|
||||
#include "runtime/atomicAccess.hpp"
|
||||
|
||||
typedef jbyte ShenandoahSharedValue;
|
||||
|
||||
// Needed for cooperation with generated code.
|
||||
STATIC_ASSERT(sizeof(ShenandoahSharedValue) == 1);
|
||||
|
||||
typedef int32_t ShenandoahSharedValue;
|
||||
typedef struct ShenandoahSharedFlag {
|
||||
enum {
|
||||
UNSET = 0,
|
||||
|
||||
@@ -59,15 +59,29 @@
|
||||
"fail, resulting in stop-the-world full GCs.") \
|
||||
range(0,100) \
|
||||
\
|
||||
product(double, ShenandoahMinOldGenGrowthPercent, 12.5, EXPERIMENTAL, \
|
||||
product(double, ShenandoahMinOldGenGrowthPercent, 50, EXPERIMENTAL, \
|
||||
"(Generational mode only) If the usage within old generation " \
|
||||
"has grown by at least this percent of its live memory size " \
|
||||
"at completion of the most recent old-generation marking " \
|
||||
"effort, heuristics may trigger the start of a new old-gen " \
|
||||
"collection.") \
|
||||
"at the start of the previous old-generation marking effort, " \
|
||||
"heuristics may trigger the start of a new old-gen collection.") \
|
||||
range(0.0,100.0) \
|
||||
\
|
||||
product(uintx, ShenandoahIgnoreOldGrowthBelowPercentage,10, EXPERIMENTAL, \
|
||||
product(double, ShenandoahMinOldGenGrowthRemainingHeapPercent, \
|
||||
35, EXPERIMENTAL, \
|
||||
"(Generational mode only) If the usage within old generation " \
|
||||
"has grown to exceed this percent of the remaining heap that " \
|
||||
"was not marked live within the old generation at the time " \
|
||||
"of the last old-generation marking effort, heuristics may " \
|
||||
"trigger the start of a new old-gen collection. Setting " \
|
||||
"this value to a smaller value may cause back-to-back old " \
|
||||
"generation marking triggers, since the typical memory used " \
|
||||
"by the old generation is about 30% larger than the live " \
|
||||
"memory contained within the old generation (because default " \
|
||||
"value of ShenandoahOldGarbageThreshold is 25.") \
|
||||
range(0.0,100.0) \
|
||||
\
|
||||
product(uintx, ShenandoahIgnoreOldGrowthBelowPercentage, \
|
||||
40, EXPERIMENTAL, \
|
||||
"(Generational mode only) If the total usage of the old " \
|
||||
"generation is smaller than this percent, we do not trigger " \
|
||||
"old gen collections even if old has grown, except when " \
|
||||
@@ -77,12 +91,13 @@
|
||||
range(0,100) \
|
||||
\
|
||||
product(uintx, ShenandoahDoNotIgnoreGrowthAfterYoungCycles, \
|
||||
50, EXPERIMENTAL, \
|
||||
"(Generational mode only) Even if the usage of old generation " \
|
||||
"is below ShenandoahIgnoreOldGrowthBelowPercentage, " \
|
||||
"trigger an old-generation mark if old has grown and this " \
|
||||
"many consecutive young-gen collections have been " \
|
||||
"completed following the preceding old-gen collection.") \
|
||||
100, EXPERIMENTAL, \
|
||||
"(Generational mode only) Trigger an old-generation mark " \
|
||||
"if old has grown and this many consecutive young-gen " \
|
||||
"collections have been completed following the preceding " \
|
||||
"old-gen collection. We perform this old-generation mark " \
|
||||
"evvort even if the usage of old generation is below " \
|
||||
"ShenandoahIgnoreOldGrowthBelowPercentage.") \
|
||||
\
|
||||
product(bool, ShenandoahGenerationalAdaptiveTenuring, true, EXPERIMENTAL, \
|
||||
"(Generational mode only) Dynamically adapt tenuring age.") \
|
||||
|
||||
@@ -87,6 +87,9 @@ JVM_InternString(JNIEnv *env, jstring str);
|
||||
/*
|
||||
* java.lang.System
|
||||
*/
|
||||
JNIEXPORT jboolean JNICALL
|
||||
JVM_AOTEndRecording(JNIEnv *env);
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
JVM_CurrentTimeMillis(JNIEnv *env, jclass ignored);
|
||||
|
||||
|
||||
@@ -24,11 +24,11 @@
|
||||
*/
|
||||
|
||||
#include "jfr/recorder/service/jfrEventThrottler.hpp"
|
||||
#include "jfr/utilities/jfrSpinlockHelper.hpp"
|
||||
#include "jfrfiles/jfrEventIds.hpp"
|
||||
#include "logging/log.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/spinCriticalSection.hpp"
|
||||
|
||||
constexpr static const JfrSamplerParams _disabled_params = {
|
||||
0, // sample points per window
|
||||
@@ -128,7 +128,7 @@ JfrEventThrottler* JfrEventThrottler::create_throttler(JfrEventId id) {
|
||||
* - period_ms time period expressed in milliseconds
|
||||
*/
|
||||
void JfrEventThrottler::configure(int64_t sample_size, int64_t period_ms) {
|
||||
JfrSpinlockHelper mutex(&_lock);
|
||||
SpinCriticalSection scs(&_lock);
|
||||
_sample_size = sample_size;
|
||||
_period_ms = period_ms;
|
||||
_update = true;
|
||||
|
||||
@@ -25,13 +25,13 @@
|
||||
|
||||
#include "jfr/support/jfrAdaptiveSampler.hpp"
|
||||
#include "jfr/utilities/jfrRandom.inline.hpp"
|
||||
#include "jfr/utilities/jfrSpinlockHelper.hpp"
|
||||
#include "jfr/utilities/jfrTime.hpp"
|
||||
#include "jfr/utilities/jfrTimeConverter.hpp"
|
||||
#include "jfr/utilities/jfrTryLock.hpp"
|
||||
#include "logging/log.hpp"
|
||||
#include "runtime/atomicAccess.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/spinCriticalSection.hpp"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
@@ -342,7 +342,7 @@ JfrGTestFixedRateSampler::JfrGTestFixedRateSampler(size_t sample_points_per_wind
|
||||
|
||||
bool JfrGTestFixedRateSampler::initialize() {
|
||||
const bool result = JfrAdaptiveSampler::initialize();
|
||||
JfrSpinlockHelper mutex(&_lock);
|
||||
SpinCriticalSection scs(&_lock);
|
||||
reconfigure();
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -36,7 +36,6 @@
|
||||
#include "jfr/recorder/storage/jfrStorage.hpp"
|
||||
#include "jfr/support/jfrThreadId.inline.hpp"
|
||||
#include "jfr/support/jfrThreadLocal.hpp"
|
||||
#include "jfr/utilities/jfrSpinlockHelper.hpp"
|
||||
#include "jfr/writers/jfrJavaEventWriter.hpp"
|
||||
#include "logging/log.hpp"
|
||||
#include "memory/allocation.inline.hpp"
|
||||
|
||||
@@ -223,6 +223,7 @@
|
||||
volatile_nonstatic_field(InstanceKlass, _init_state, InstanceKlass::ClassState) \
|
||||
volatile_nonstatic_field(InstanceKlass, _init_thread, JavaThread*) \
|
||||
nonstatic_field(InstanceKlass, _misc_flags._flags, u2) \
|
||||
nonstatic_field(InstanceKlass, _access_flags, AccessFlags) \
|
||||
nonstatic_field(InstanceKlass, _annotations, Annotations*) \
|
||||
\
|
||||
volatile_nonstatic_field(JavaFrameAnchor, _last_Java_sp, intptr_t*) \
|
||||
@@ -282,7 +283,6 @@
|
||||
nonstatic_field(Klass, _name, Symbol*) \
|
||||
volatile_nonstatic_field(Klass, _next_sibling, Klass*) \
|
||||
nonstatic_field(Klass, _java_mirror, OopHandle) \
|
||||
nonstatic_field(Klass, _access_flags, AccessFlags) \
|
||||
nonstatic_field(Klass, _class_loader_data, ClassLoaderData*) \
|
||||
nonstatic_field(Klass, _secondary_supers_bitmap, uintx) \
|
||||
nonstatic_field(Klass, _hash_slot, uint8_t) \
|
||||
|
||||
@@ -544,7 +544,7 @@ void Universe::genesis(TRAPS) {
|
||||
// Only modify the global variable inside the mutex.
|
||||
// If we had a race to here, the other dummy_array instances
|
||||
// and their elements just get dropped on the floor, which is fine.
|
||||
MutexLocker ml(THREAD, FullGCALot_lock);
|
||||
MutexLocker ml(THREAD, FullGCALot_lock, Mutex::_no_safepoint_check_flag);
|
||||
if (_fullgc_alot_dummy_array.is_empty()) {
|
||||
_fullgc_alot_dummy_array = OopHandle(vm_global(), dummy_array());
|
||||
}
|
||||
@@ -1458,7 +1458,7 @@ uintptr_t Universe::verify_mark_bits() {
|
||||
#ifdef ASSERT
|
||||
// Release dummy object(s) at bottom of heap
|
||||
bool Universe::release_fullgc_alot_dummy() {
|
||||
MutexLocker ml(FullGCALot_lock);
|
||||
MutexLocker ml(FullGCALot_lock, Mutex::_no_safepoint_check_flag);
|
||||
objArrayOop fullgc_alot_dummy_array = (objArrayOop)_fullgc_alot_dummy_array.resolve();
|
||||
if (fullgc_alot_dummy_array != nullptr) {
|
||||
if (_fullgc_alot_dummy_next >= fullgc_alot_dummy_array->length()) {
|
||||
|
||||
@@ -99,7 +99,8 @@ ArrayKlass::ArrayKlass(Symbol* name, KlassKind kind) :
|
||||
set_name(name);
|
||||
set_super(Universe::is_bootstrapping() ? nullptr : vmClasses::Object_klass());
|
||||
set_layout_helper(Klass::_lh_neutral_value);
|
||||
set_is_cloneable(); // All arrays are considered to be cloneable (See JLS 20.1.5)
|
||||
// All arrays are considered to be cloneable (See JLS 20.1.5)
|
||||
set_is_cloneable_fast();
|
||||
JFR_ONLY(INIT_ID(this);)
|
||||
log_array_class_load(this);
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
|
||||
#include "memory/allocation.hpp"
|
||||
#include "oops/typeArrayOop.hpp"
|
||||
#include "utilities/accessFlags.hpp"
|
||||
#include "utilities/unsigned5.hpp"
|
||||
#include "utilities/vmEnums.hpp"
|
||||
|
||||
|
||||
@@ -552,6 +552,17 @@ InstanceKlass::InstanceKlass(const ClassFileParser& parser, KlassKind kind, Refe
|
||||
assert(size_helper() == parser.layout_size(), "incorrect size_helper?");
|
||||
}
|
||||
|
||||
void InstanceKlass::set_is_cloneable() {
|
||||
if (name() == vmSymbols::java_lang_invoke_MemberName()) {
|
||||
assert(is_final(), "no subclasses allowed");
|
||||
// MemberName cloning should not be intrinsified and always happen in JVM_Clone.
|
||||
} else if (reference_type() != REF_NONE) {
|
||||
// Reference cloning should not be intrinsified and always happen in JVM_Clone.
|
||||
} else {
|
||||
set_is_cloneable_fast();
|
||||
}
|
||||
}
|
||||
|
||||
void InstanceKlass::deallocate_methods(ClassLoaderData* loader_data,
|
||||
Array<Method*>* methods) {
|
||||
if (methods != nullptr && methods != Universe::the_empty_method_array() &&
|
||||
|
||||
@@ -229,7 +229,9 @@ class InstanceKlass: public Klass {
|
||||
// _idnum_allocated_count.
|
||||
volatile ClassState _init_state; // state of class
|
||||
|
||||
u1 _reference_type; // reference type
|
||||
u1 _reference_type; // reference type
|
||||
|
||||
AccessFlags _access_flags; // Access flags. The class/interface distinction is stored here.
|
||||
|
||||
// State is set either at parse time or while executing, atomically to not disturb other state
|
||||
InstanceKlassFlags _misc_flags;
|
||||
@@ -305,6 +307,22 @@ class InstanceKlass: public Klass {
|
||||
// Sets finalization state
|
||||
static void set_finalization_enabled(bool val) { _finalization_enabled = val; }
|
||||
|
||||
// Access flags
|
||||
AccessFlags access_flags() const { return _access_flags; }
|
||||
void set_access_flags(AccessFlags flags) { _access_flags = flags; }
|
||||
|
||||
bool is_public() const { return _access_flags.is_public(); }
|
||||
bool is_final() const { return _access_flags.is_final(); }
|
||||
bool is_interface() const { return _access_flags.is_interface(); }
|
||||
bool is_abstract() const { return _access_flags.is_abstract(); }
|
||||
bool is_super() const { return _access_flags.is_super(); }
|
||||
bool is_synthetic() const { return _access_flags.is_synthetic(); }
|
||||
void set_is_synthetic() { _access_flags.set_is_synthetic(); }
|
||||
|
||||
static ByteSize access_flags_offset() { return byte_offset_of(InstanceKlass, _access_flags); }
|
||||
|
||||
void set_is_cloneable();
|
||||
|
||||
// Quick checks for the loader that defined this class (without switching on this->class_loader())
|
||||
bool defined_by_boot_loader() const { return _misc_flags.defined_by_boot_loader(); }
|
||||
bool defined_by_platform_loader() const { return _misc_flags.defined_by_platform_loader(); }
|
||||
|
||||
@@ -72,17 +72,6 @@ bool Klass::is_cloneable() const {
|
||||
is_subtype_of(vmClasses::Cloneable_klass());
|
||||
}
|
||||
|
||||
void Klass::set_is_cloneable() {
|
||||
if (name() == vmSymbols::java_lang_invoke_MemberName()) {
|
||||
assert(is_final(), "no subclasses allowed");
|
||||
// MemberName cloning should not be intrinsified and always happen in JVM_Clone.
|
||||
} else if (is_instance_klass() && InstanceKlass::cast(this)->reference_type() != REF_NONE) {
|
||||
// Reference cloning should not be intrinsified and always happen in JVM_Clone.
|
||||
} else {
|
||||
_misc_flags.set_is_cloneable_fast(true);
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t Klass::compute_hash_slot(Symbol* n) {
|
||||
uint hash_code;
|
||||
// Special cases for the two superclasses of all Array instances.
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
#include "oops/metadata.hpp"
|
||||
#include "oops/oop.hpp"
|
||||
#include "oops/oopHandle.hpp"
|
||||
#include "utilities/accessFlags.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#if INCLUDE_JFR
|
||||
#include "jfr/support/jfrTraceIdExtension.hpp"
|
||||
@@ -120,9 +119,8 @@ class Klass : public Metadata {
|
||||
// - Various type checking in the JVM
|
||||
const KlassKind _kind;
|
||||
|
||||
AccessFlags _access_flags; // Access flags. The class/interface distinction is stored here.
|
||||
// Some flags created by the JVM, not in the class file itself,
|
||||
// are in _misc_flags below.
|
||||
// Some flags created by the JVM, not in the class file itself,
|
||||
// are in _misc_flags below.
|
||||
KlassFlags _misc_flags;
|
||||
|
||||
// The fields _super_check_offset, _secondary_super_cache, _secondary_supers
|
||||
@@ -453,7 +451,6 @@ protected:
|
||||
static ByteSize java_mirror_offset() { return byte_offset_of(Klass, _java_mirror); }
|
||||
static ByteSize class_loader_data_offset() { return byte_offset_of(Klass, _class_loader_data); }
|
||||
static ByteSize layout_helper_offset() { return byte_offset_of(Klass, _layout_helper); }
|
||||
static ByteSize access_flags_offset() { return byte_offset_of(Klass, _access_flags); }
|
||||
#if INCLUDE_JVMCI
|
||||
static ByteSize subklass_offset() { return byte_offset_of(Klass, _subklass); }
|
||||
static ByteSize next_sibling_offset() { return byte_offset_of(Klass, _next_sibling); }
|
||||
@@ -707,17 +704,10 @@ public:
|
||||
bool is_typeArray_klass() const { return assert_same_query( _kind == TypeArrayKlassKind, is_typeArray_klass_slow()); }
|
||||
#undef assert_same_query
|
||||
|
||||
// Access flags
|
||||
AccessFlags access_flags() const { return _access_flags; }
|
||||
void set_access_flags(AccessFlags flags) { _access_flags = flags; }
|
||||
|
||||
bool is_public() const { return _access_flags.is_public(); }
|
||||
bool is_final() const { return _access_flags.is_final(); }
|
||||
bool is_interface() const { return _access_flags.is_interface(); }
|
||||
bool is_abstract() const { return _access_flags.is_abstract(); }
|
||||
bool is_super() const { return _access_flags.is_super(); }
|
||||
bool is_synthetic() const { return _access_flags.is_synthetic(); }
|
||||
void set_is_synthetic() { _access_flags.set_is_synthetic(); }
|
||||
virtual bool is_interface() const { return false; }
|
||||
virtual bool is_abstract() const { return false; }
|
||||
|
||||
bool has_finalizer() const { return _misc_flags.has_finalizer(); }
|
||||
void set_has_finalizer() { _misc_flags.set_has_finalizer(true); }
|
||||
bool is_hidden() const { return _misc_flags.is_hidden_class(); }
|
||||
@@ -730,7 +720,7 @@ public:
|
||||
inline bool is_non_strong_hidden() const;
|
||||
|
||||
bool is_cloneable() const;
|
||||
void set_is_cloneable();
|
||||
void set_is_cloneable_fast() { _misc_flags.set_is_cloneable_fast(true); }
|
||||
|
||||
inline markWord prototype_header() const;
|
||||
inline void set_prototype_header(markWord header);
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "castnode.hpp"
|
||||
#include "opto/addnode.hpp"
|
||||
#include "opto/callnode.hpp"
|
||||
#include "opto/castnode.hpp"
|
||||
@@ -35,12 +34,22 @@
|
||||
#include "opto/type.hpp"
|
||||
#include "utilities/checkedCast.hpp"
|
||||
|
||||
const ConstraintCastNode::DependencyType ConstraintCastNode::DependencyType::FloatingNarrowing(true, true, "floating narrowing dependency"); // not pinned, narrows type
|
||||
const ConstraintCastNode::DependencyType ConstraintCastNode::DependencyType::FloatingNonNarrowing(true, false, "floating non-narrowing dependency"); // not pinned, doesn't narrow type
|
||||
const ConstraintCastNode::DependencyType ConstraintCastNode::DependencyType::NonFloatingNarrowing(false, true, "non-floating narrowing dependency"); // pinned, narrows type
|
||||
const ConstraintCastNode::DependencyType ConstraintCastNode::DependencyType::NonFloatingNonNarrowing(false, false, "non-floating non-narrowing dependency"); // pinned, doesn't narrow type
|
||||
|
||||
//=============================================================================
|
||||
// If input is already higher or equal to cast type, then this is an identity.
|
||||
Node* ConstraintCastNode::Identity(PhaseGVN* phase) {
|
||||
if (_dependency == UnconditionalDependency) {
|
||||
if (!_dependency.narrows_type()) {
|
||||
// If this cast doesn't carry a type dependency (i.e. not used for type narrowing), we cannot optimize it.
|
||||
return this;
|
||||
}
|
||||
|
||||
// This cast node carries a type dependency. We can remove it if:
|
||||
// - Its input has a narrower type
|
||||
// - There's a dominating cast with same input but narrower type
|
||||
Node* dom = dominating_cast(phase, phase);
|
||||
if (dom != nullptr) {
|
||||
return dom;
|
||||
@@ -109,7 +118,7 @@ Node* ConstraintCastNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
}
|
||||
|
||||
uint ConstraintCastNode::hash() const {
|
||||
return TypeNode::hash() + (int)_dependency + (_extra_types != nullptr ? _extra_types->hash() : 0);
|
||||
return TypeNode::hash() + _dependency.hash() + (_extra_types != nullptr ? _extra_types->hash() : 0);
|
||||
}
|
||||
|
||||
bool ConstraintCastNode::cmp(const Node &n) const {
|
||||
@@ -117,7 +126,7 @@ bool ConstraintCastNode::cmp(const Node &n) const {
|
||||
return false;
|
||||
}
|
||||
ConstraintCastNode& cast = (ConstraintCastNode&) n;
|
||||
if (cast._dependency != _dependency) {
|
||||
if (!cast._dependency.cmp(_dependency)) {
|
||||
return false;
|
||||
}
|
||||
if (_extra_types == nullptr || cast._extra_types == nullptr) {
|
||||
@@ -130,7 +139,7 @@ uint ConstraintCastNode::size_of() const {
|
||||
return sizeof(*this);
|
||||
}
|
||||
|
||||
Node* ConstraintCastNode::make_cast_for_basic_type(Node* c, Node* n, const Type* t, DependencyType dependency, BasicType bt) {
|
||||
Node* ConstraintCastNode::make_cast_for_basic_type(Node* c, Node* n, const Type* t, const DependencyType& dependency, BasicType bt) {
|
||||
switch(bt) {
|
||||
case T_INT:
|
||||
return new CastIINode(c, n, t, dependency);
|
||||
@@ -143,9 +152,9 @@ Node* ConstraintCastNode::make_cast_for_basic_type(Node* c, Node* n, const Type*
|
||||
}
|
||||
|
||||
TypeNode* ConstraintCastNode::dominating_cast(PhaseGVN* gvn, PhaseTransform* pt) const {
|
||||
if (_dependency == UnconditionalDependency) {
|
||||
return nullptr;
|
||||
}
|
||||
// See discussion at definition of ConstraintCastNode::DependencyType: replacing this cast with a dominating one is
|
||||
// not safe if _dependency.narrows_type() is not true.
|
||||
assert(_dependency.narrows_type(), "cast can't be replaced by dominating one");
|
||||
Node* val = in(1);
|
||||
Node* ctl = in(0);
|
||||
int opc = Opcode();
|
||||
@@ -205,30 +214,21 @@ void ConstraintCastNode::dump_spec(outputStream *st) const {
|
||||
st->print(" extra types: ");
|
||||
_extra_types->dump_on(st);
|
||||
}
|
||||
if (_dependency != RegularDependency) {
|
||||
st->print(" %s dependency", _dependency == StrongDependency ? "strong" : "unconditional");
|
||||
}
|
||||
st->print(" ");
|
||||
_dependency.dump_on(st);
|
||||
}
|
||||
#endif
|
||||
|
||||
const Type* CastIINode::Value(PhaseGVN* phase) const {
|
||||
const Type *res = ConstraintCastNode::Value(phase);
|
||||
if (res == Type::TOP) {
|
||||
return Type::TOP;
|
||||
}
|
||||
assert(res->isa_int(), "res must be int");
|
||||
|
||||
// Similar to ConvI2LNode::Value() for the same reasons
|
||||
// see if we can remove type assertion after loop opts
|
||||
res = widen_type(phase, res, T_INT);
|
||||
|
||||
return res;
|
||||
CastIINode* CastIINode::make_with(Node* parent, const TypeInteger* type, const DependencyType& dependency) const {
|
||||
return new CastIINode(in(0), parent, type, dependency, _range_check_dependency, _extra_types);
|
||||
}
|
||||
|
||||
Node* ConstraintCastNode::find_or_make_integer_cast(PhaseIterGVN* igvn, Node* parent, const TypeInteger* type) const {
|
||||
Node* n = clone();
|
||||
n->set_req(1, parent);
|
||||
n->as_ConstraintCast()->set_type(type);
|
||||
CastLLNode* CastLLNode::make_with(Node* parent, const TypeInteger* type, const DependencyType& dependency) const {
|
||||
return new CastLLNode(in(0), parent, type, dependency, _extra_types);
|
||||
}
|
||||
|
||||
Node* ConstraintCastNode::find_or_make_integer_cast(PhaseIterGVN* igvn, Node* parent, const TypeInteger* type, const DependencyType& dependency) const {
|
||||
Node* n = make_with(parent, type, dependency);
|
||||
Node* existing = igvn->hash_find_insert(n);
|
||||
if (existing != nullptr) {
|
||||
n->destruct(igvn);
|
||||
@@ -242,14 +242,13 @@ Node *CastIINode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
if (progress != nullptr) {
|
||||
return progress;
|
||||
}
|
||||
if (can_reshape && !phase->C->post_loop_opts_phase()) {
|
||||
// makes sure we run ::Value to potentially remove type assertion after loop opts
|
||||
if (!phase->C->post_loop_opts_phase()) {
|
||||
// makes sure we run widen_type() to potentially common type assertions after loop opts
|
||||
phase->C->record_for_post_loop_opts_igvn(this);
|
||||
}
|
||||
if (!_range_check_dependency || phase->C->post_loop_opts_phase()) {
|
||||
return optimize_integer_cast(phase, T_INT);
|
||||
}
|
||||
phase->C->record_for_post_loop_opts_igvn(this);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -279,9 +278,9 @@ void CastIINode::dump_spec(outputStream* st) const {
|
||||
#endif
|
||||
|
||||
CastIINode* CastIINode::pin_array_access_node() const {
|
||||
assert(_dependency == RegularDependency, "already pinned");
|
||||
assert(_dependency.is_floating(), "already pinned");
|
||||
if (has_range_check()) {
|
||||
return new CastIINode(in(0), in(1), bottom_type(), StrongDependency, has_range_check());
|
||||
return new CastIINode(in(0), in(1), bottom_type(), _dependency.with_pinned_dependency(), has_range_check());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -315,16 +314,6 @@ void CastIINode::remove_range_check_cast(Compile* C) {
|
||||
}
|
||||
|
||||
|
||||
const Type* CastLLNode::Value(PhaseGVN* phase) const {
|
||||
const Type* res = ConstraintCastNode::Value(phase);
|
||||
if (res == Type::TOP) {
|
||||
return Type::TOP;
|
||||
}
|
||||
assert(res->isa_long(), "res must be long");
|
||||
|
||||
return widen_type(phase, res, T_LONG);
|
||||
}
|
||||
|
||||
bool CastLLNode::is_inner_loop_backedge(ProjNode* proj) {
|
||||
if (proj != nullptr) {
|
||||
Node* ctrl_use = proj->unique_ctrl_out_or_null();
|
||||
@@ -392,7 +381,7 @@ Node* CastLLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
return progress;
|
||||
}
|
||||
if (!phase->C->post_loop_opts_phase()) {
|
||||
// makes sure we run ::Value to potentially remove type assertion after loop opts
|
||||
// makes sure we run widen_type() to potentially common type assertions after loop opts
|
||||
phase->C->record_for_post_loop_opts_igvn(this);
|
||||
}
|
||||
// transform (CastLL (ConvI2L ..)) into (ConvI2L (CastII ..)) if the type of the CastLL is narrower than the type of
|
||||
@@ -543,7 +532,7 @@ Node* CastP2XNode::Identity(PhaseGVN* phase) {
|
||||
return this;
|
||||
}
|
||||
|
||||
Node* ConstraintCastNode::make_cast_for_type(Node* c, Node* in, const Type* type, DependencyType dependency,
|
||||
Node* ConstraintCastNode::make_cast_for_type(Node* c, Node* in, const Type* type, const DependencyType& dependency,
|
||||
const TypeTuple* types) {
|
||||
if (type->isa_int()) {
|
||||
return new CastIINode(c, in, type, dependency, false, types);
|
||||
@@ -564,7 +553,7 @@ Node* ConstraintCastNode::make_cast_for_type(Node* c, Node* in, const Type* type
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node* ConstraintCastNode::optimize_integer_cast(PhaseGVN* phase, BasicType bt) {
|
||||
Node* ConstraintCastNode::optimize_integer_cast_of_add(PhaseGVN* phase, BasicType bt) {
|
||||
PhaseIterGVN *igvn = phase->is_IterGVN();
|
||||
const TypeInteger* this_type = this->type()->isa_integer(bt);
|
||||
if (this_type == nullptr) {
|
||||
@@ -586,8 +575,42 @@ Node* ConstraintCastNode::optimize_integer_cast(PhaseGVN* phase, BasicType bt) {
|
||||
Node* x = z->in(1);
|
||||
Node* y = z->in(2);
|
||||
|
||||
Node* cx = find_or_make_integer_cast(igvn, x, rx);
|
||||
Node* cy = find_or_make_integer_cast(igvn, y, ry);
|
||||
const TypeInteger* tx = phase->type(x)->is_integer(bt);
|
||||
const TypeInteger* ty = phase->type(y)->is_integer(bt);
|
||||
|
||||
// (Cast (Add x y) tz) is transformed into (Add (Cast x rx) (Cast y ry))
|
||||
//
|
||||
// tz = [tzlo, tzhi]
|
||||
// rx = [rxlo, rxhi]
|
||||
// ry = [rylo, ryhi]
|
||||
// with type of x, tx = [txlo, txhi]
|
||||
// with type of y, ty = [tylo, tyhi]
|
||||
//
|
||||
// From Compile::push_thru_add():
|
||||
// rxlo = max(tzlo - tyhi, txlo)
|
||||
// rxhi = min(tzhi - tylo, txhi)
|
||||
// rylo = max(tzlo - txhi, tylo)
|
||||
// ryhi = min(tzhi - txlo, tyhi)
|
||||
//
|
||||
// If x is a constant, then txlo = txhi
|
||||
// rxlo = txlo, rxhi = txhi
|
||||
// The bounds of the type of the Add after transformation then is:
|
||||
// rxlo + rylo >= txlo + tzlo - txhi >= tzlo
|
||||
// rxhi + ryhi <= txhi + tzhi - txlo <= tzhi
|
||||
// The resulting type is not wider than the type of the Cast
|
||||
// before transformation
|
||||
//
|
||||
// If neither x nor y are constant then the type of the resulting
|
||||
// Add can be wider than the type of the type of the Cast before
|
||||
// transformation.
|
||||
// For instance, tx = [0, 10], ty = [0, 10], tz = [0, 10]
|
||||
// then rx = [0, 10], ry = [0, 10]
|
||||
// and rx + ry = [0, 20] which is wider than tz
|
||||
//
|
||||
// Same reasoning applies to (Cast (Sub x y) tz)
|
||||
const DependencyType& dependency = (!tx->is_con() && !ty->is_con()) ? _dependency.with_non_narrowing() : _dependency;
|
||||
Node* cx = find_or_make_integer_cast(igvn, x, rx, dependency);
|
||||
Node* cy = find_or_make_integer_cast(igvn, y, ry, dependency);
|
||||
if (op == Op_Add(bt)) {
|
||||
return AddNode::make(cx, cy, bt);
|
||||
} else {
|
||||
@@ -599,11 +622,26 @@ Node* ConstraintCastNode::optimize_integer_cast(PhaseGVN* phase, BasicType bt) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const Type* ConstraintCastNode::widen_type(const PhaseGVN* phase, const Type* res, BasicType bt) const {
|
||||
if (!phase->C->post_loop_opts_phase()) {
|
||||
Node* ConstraintCastNode::optimize_integer_cast(PhaseGVN* phase, BasicType bt) {
|
||||
Node* res = optimize_integer_cast_of_add(phase, bt);
|
||||
if (res != nullptr) {
|
||||
return res;
|
||||
}
|
||||
const Type* t = Value(phase);
|
||||
if (t != Type::TOP && phase->C->post_loop_opts_phase()) {
|
||||
const Type* bottom_t = bottom_type();
|
||||
const TypeInteger* wide_t = widen_type(phase, bottom_t, bt);
|
||||
if (wide_t != bottom_t) {
|
||||
// Widening the type of the Cast (to allow some commoning) causes the Cast to change how it can be optimized (if
|
||||
// type of its input is narrower than the Cast's type, we can't remove it to not loose the control dependency).
|
||||
return make_with(in(1), wide_t, _dependency.with_non_narrowing());
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const TypeInteger* ConstraintCastNode::widen_type(const PhaseGVN* phase, const Type* res, BasicType bt) const {
|
||||
const TypeInteger* this_type = res->is_integer(bt);
|
||||
// At VerifyConstraintCasts == 1, we verify the ConstraintCastNodes that are present during code
|
||||
// emission. This allows us detecting possible mis-scheduling due to these nodes being pinned at
|
||||
// the wrong control nodes.
|
||||
@@ -612,10 +650,9 @@ const Type* ConstraintCastNode::widen_type(const PhaseGVN* phase, const Type* re
|
||||
// mis-transformations that may happen due to these nodes being pinned at the wrong control
|
||||
// nodes.
|
||||
if (VerifyConstraintCasts > 1) {
|
||||
return res;
|
||||
return this_type;
|
||||
}
|
||||
|
||||
const TypeInteger* this_type = res->is_integer(bt);
|
||||
const TypeInteger* in_type = phase->type(in(1))->isa_integer(bt);
|
||||
if (in_type != nullptr &&
|
||||
(in_type->lo_as_long() != this_type->lo_as_long() ||
|
||||
@@ -636,5 +673,5 @@ const Type* ConstraintCastNode::widen_type(const PhaseGVN* phase, const Type* re
|
||||
MIN2(in_type->hi_as_long(), hi1),
|
||||
MAX2((int)in_type->_widen, w1), bt);
|
||||
}
|
||||
return res;
|
||||
return this_type;
|
||||
}
|
||||
|
||||
@@ -33,21 +33,119 @@
|
||||
// cast to a different range
|
||||
class ConstraintCastNode: public TypeNode {
|
||||
public:
|
||||
enum DependencyType {
|
||||
RegularDependency, // if cast doesn't improve input type, cast can be removed
|
||||
StrongDependency, // leave cast in even if _type doesn't improve input type, can be replaced by stricter dominating cast if one exist
|
||||
UnconditionalDependency // leave cast in unconditionally
|
||||
// Cast nodes are subject to a few optimizations:
|
||||
//
|
||||
// 1- if the type carried by the Cast doesn't narrow the type of its input, the cast can be replaced by its input.
|
||||
// Similarly, if a dominating Cast with the same input and a narrower type constraint is found, it can replace the
|
||||
// current cast.
|
||||
//
|
||||
// 2- if the condition that the Cast is control dependent is hoisted, the Cast is hoisted as well
|
||||
//
|
||||
// 1- and 2- are not always applied depending on what constraint are applied to the Cast: there are cases where 1-
|
||||
// and 2- apply, where neither 1- nor 2- apply and where one or the other apply. This class abstract away these
|
||||
// details.
|
||||
//
|
||||
// If _narrows_type is true, the cast carries a type dependency: "after" the control the cast is dependent on, its data
|
||||
// input is known to have a narrower type (stored in the cast node itself). Optimizations 1- above only apply to cast
|
||||
// nodes for which _narrows_type is true.
|
||||
// if _floating is true, the cast only depends on a single control: its control input. Otherwise, it is pinned at its
|
||||
// current location. Optimizations 2- only apply to cast nodes for which _floating is true.
|
||||
// _floating here is similar to Node::depends_only_on_test().
|
||||
// The 4 combinations of _narrows_types/_floating true/false have some use. See below, at the end of this class
|
||||
// definition, for examples.
|
||||
class DependencyType {
|
||||
private:
|
||||
const bool _floating; // Does this Cast depends on its control input or is it pinned?
|
||||
const bool _narrows_type; // Does this Cast narrows the type i.e. if input type is narrower can it be removed?
|
||||
const char* _desc;
|
||||
DependencyType(bool depends_on_test, bool narrows_type, const char* desc)
|
||||
: _floating(depends_on_test),
|
||||
_narrows_type(narrows_type),
|
||||
_desc(desc) {
|
||||
}
|
||||
NONCOPYABLE(DependencyType);
|
||||
|
||||
public:
|
||||
|
||||
bool is_floating() const {
|
||||
return _floating;
|
||||
}
|
||||
|
||||
bool narrows_type() const {
|
||||
return _narrows_type;
|
||||
}
|
||||
|
||||
void dump_on(outputStream *st) const {
|
||||
st->print("%s", _desc);
|
||||
}
|
||||
|
||||
uint hash() const {
|
||||
return (_floating ? 1 : 0) + (_narrows_type ? 2 : 0);
|
||||
}
|
||||
|
||||
bool cmp(const DependencyType& other) const {
|
||||
return _floating == other._floating && _narrows_type == other._narrows_type;
|
||||
}
|
||||
|
||||
const DependencyType& with_non_narrowing() const {
|
||||
if (_floating) {
|
||||
return FloatingNonNarrowing;
|
||||
}
|
||||
return NonFloatingNonNarrowing;
|
||||
}
|
||||
|
||||
const DependencyType& with_pinned_dependency() const {
|
||||
if (_narrows_type) {
|
||||
return NonFloatingNarrowing;
|
||||
}
|
||||
return NonFloatingNonNarrowing;
|
||||
}
|
||||
|
||||
// All the possible combinations of floating/narrowing with example use cases:
|
||||
|
||||
// Use case example: Range Check CastII
|
||||
// Floating: The Cast is only dependent on the single range check. If the range check was ever to be hoisted it
|
||||
// would be safe to let the Cast float to where the range check is hoisted up to.
|
||||
// Narrowing: The Cast narrows the type to a positive index. If the input to the Cast is narrower, we can safely
|
||||
// remove the cast because the array access will be safe.
|
||||
static const DependencyType FloatingNarrowing;
|
||||
// Use case example: Widening Cast nodes' types after loop opts: We want to common Casts with slightly different types.
|
||||
// Floating: These Casts only depend on the single control.
|
||||
// NonNarrowing: Even when the input type is narrower, we are not removing the Cast. Otherwise, the dependency
|
||||
// to the single control is lost, and an array access could float above its range check because we
|
||||
// just removed the dependency to the range check by removing the Cast. This could lead to an
|
||||
// out-of-bounds access.
|
||||
static const DependencyType FloatingNonNarrowing;
|
||||
// Use case example: An array accesses that is no longer dependent on a single range check (e.g. range check smearing).
|
||||
// NonFloating: The array access must be pinned below all the checks it depends on. If the check it directly depends
|
||||
// on with a control input is hoisted, we do not hoist the Cast as well. If we allowed the Cast to float,
|
||||
// we risk that the array access ends up above another check it depends on (we cannot model two control
|
||||
// dependencies for a node in the IR). This could lead to an out-of-bounds access.
|
||||
// Narrowing: If the Cast does not narrow the input type, then it's safe to remove the cast because the array access
|
||||
// will be safe.
|
||||
static const DependencyType NonFloatingNarrowing;
|
||||
// Use case example: Sinking nodes out of a loop
|
||||
// Non-Floating & Non-Narrowing: We don't want the Cast that forces the node to be out of loop to be removed in any
|
||||
// case. Otherwise, the sunk node could float back into the loop, undoing the sinking.
|
||||
// This Cast is only used for pinning without caring about narrowing types.
|
||||
static const DependencyType NonFloatingNonNarrowing;
|
||||
|
||||
};
|
||||
|
||||
protected:
|
||||
const DependencyType _dependency;
|
||||
protected:
|
||||
const DependencyType& _dependency;
|
||||
virtual bool cmp( const Node &n ) const;
|
||||
virtual uint size_of() const;
|
||||
virtual uint hash() const; // Check the type
|
||||
const Type* widen_type(const PhaseGVN* phase, const Type* res, BasicType bt) const;
|
||||
Node* find_or_make_integer_cast(PhaseIterGVN* igvn, Node* parent, const TypeInteger* type) const;
|
||||
const TypeInteger* widen_type(const PhaseGVN* phase, const Type* res, BasicType bt) const;
|
||||
|
||||
virtual ConstraintCastNode* make_with(Node* parent, const TypeInteger* type, const DependencyType& dependency) const {
|
||||
ShouldNotReachHere(); // Only implemented for CastII and CastLL
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node* find_or_make_integer_cast(PhaseIterGVN* igvn, Node* parent, const TypeInteger* type, const DependencyType& dependency) const;
|
||||
|
||||
private:
|
||||
// PhiNode::Ideal() transforms a Phi that merges a single uncasted value into a single cast pinned at the region.
|
||||
// The types of cast nodes eliminated as a consequence of this transformation are collected and stored here so the
|
||||
// type dependencies carried by the cast are known. The cast can then be eliminated if the type of its input is
|
||||
@@ -55,7 +153,7 @@ public:
|
||||
const TypeTuple* _extra_types;
|
||||
|
||||
public:
|
||||
ConstraintCastNode(Node* ctrl, Node* n, const Type* t, ConstraintCastNode::DependencyType dependency,
|
||||
ConstraintCastNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency,
|
||||
const TypeTuple* extra_types)
|
||||
: TypeNode(t,2), _dependency(dependency), _extra_types(extra_types) {
|
||||
init_class_id(Class_ConstraintCast);
|
||||
@@ -67,18 +165,21 @@ public:
|
||||
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
|
||||
virtual int Opcode() const;
|
||||
virtual uint ideal_reg() const = 0;
|
||||
virtual bool depends_only_on_test() const { return _dependency == RegularDependency; }
|
||||
bool carry_dependency() const { return _dependency != RegularDependency; }
|
||||
bool carry_dependency() const { return !_dependency.cmp(DependencyType::FloatingNarrowing); }
|
||||
// A cast node depends_only_on_test if and only if it is floating
|
||||
virtual bool depends_only_on_test() const { return _dependency.is_floating(); }
|
||||
const DependencyType& dependency() const { return _dependency; }
|
||||
TypeNode* dominating_cast(PhaseGVN* gvn, PhaseTransform* pt) const;
|
||||
static Node* make_cast_for_basic_type(Node* c, Node* n, const Type* t, DependencyType dependency, BasicType bt);
|
||||
static Node* make_cast_for_basic_type(Node* c, Node* n, const Type* t, const DependencyType& dependency, BasicType bt);
|
||||
|
||||
#ifndef PRODUCT
|
||||
virtual void dump_spec(outputStream *st) const;
|
||||
#endif
|
||||
|
||||
static Node* make_cast_for_type(Node* c, Node* in, const Type* type, DependencyType dependency,
|
||||
static Node* make_cast_for_type(Node* c, Node* in, const Type* type, const DependencyType& dependency,
|
||||
const TypeTuple* types);
|
||||
|
||||
Node* optimize_integer_cast_of_add(PhaseGVN* phase, BasicType bt);
|
||||
Node* optimize_integer_cast(PhaseGVN* phase, BasicType bt);
|
||||
|
||||
bool higher_equal_types(PhaseGVN* phase, const Node* other) const;
|
||||
@@ -102,7 +203,7 @@ class CastIINode: public ConstraintCastNode {
|
||||
virtual uint size_of() const;
|
||||
|
||||
public:
|
||||
CastIINode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, bool range_check_dependency = false, const TypeTuple* types = nullptr)
|
||||
CastIINode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, bool range_check_dependency = false, const TypeTuple* types = nullptr)
|
||||
: ConstraintCastNode(ctrl, n, t, dependency, types), _range_check_dependency(range_check_dependency) {
|
||||
assert(ctrl != nullptr, "control must be set");
|
||||
init_class_id(Class_CastII);
|
||||
@@ -110,7 +211,7 @@ class CastIINode: public ConstraintCastNode {
|
||||
virtual int Opcode() const;
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
virtual Node* Identity(PhaseGVN* phase);
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
|
||||
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
|
||||
bool has_range_check() const {
|
||||
#ifdef _LP64
|
||||
@@ -122,6 +223,7 @@ class CastIINode: public ConstraintCastNode {
|
||||
}
|
||||
|
||||
CastIINode* pin_array_access_node() const;
|
||||
CastIINode* make_with(Node* parent, const TypeInteger* type, const DependencyType& dependency) const;
|
||||
void remove_range_check_cast(Compile* C);
|
||||
|
||||
#ifndef PRODUCT
|
||||
@@ -131,14 +233,12 @@ class CastIINode: public ConstraintCastNode {
|
||||
|
||||
class CastLLNode: public ConstraintCastNode {
|
||||
public:
|
||||
CastLLNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
|
||||
CastLLNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
|
||||
: ConstraintCastNode(ctrl, n, t, dependency, types) {
|
||||
assert(ctrl != nullptr, "control must be set");
|
||||
init_class_id(Class_CastLL);
|
||||
}
|
||||
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
|
||||
static bool is_inner_loop_backedge(ProjNode* proj);
|
||||
|
||||
static bool cmp_used_at_inner_loop_exit_test(CmpNode* cmp);
|
||||
@@ -147,11 +247,12 @@ public:
|
||||
virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
|
||||
virtual int Opcode() const;
|
||||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
CastLLNode* make_with(Node* parent, const TypeInteger* type, const DependencyType& dependency) const;
|
||||
};
|
||||
|
||||
class CastHHNode: public ConstraintCastNode {
|
||||
public:
|
||||
CastHHNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
|
||||
CastHHNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
|
||||
: ConstraintCastNode(ctrl, n, t, dependency, types) {
|
||||
assert(ctrl != nullptr, "control must be set");
|
||||
init_class_id(Class_CastHH);
|
||||
@@ -162,7 +263,7 @@ public:
|
||||
|
||||
class CastFFNode: public ConstraintCastNode {
|
||||
public:
|
||||
CastFFNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
|
||||
CastFFNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
|
||||
: ConstraintCastNode(ctrl, n, t, dependency, types) {
|
||||
assert(ctrl != nullptr, "control must be set");
|
||||
init_class_id(Class_CastFF);
|
||||
@@ -173,7 +274,7 @@ public:
|
||||
|
||||
class CastDDNode: public ConstraintCastNode {
|
||||
public:
|
||||
CastDDNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
|
||||
CastDDNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
|
||||
: ConstraintCastNode(ctrl, n, t, dependency, types) {
|
||||
assert(ctrl != nullptr, "control must be set");
|
||||
init_class_id(Class_CastDD);
|
||||
@@ -184,7 +285,7 @@ public:
|
||||
|
||||
class CastVVNode: public ConstraintCastNode {
|
||||
public:
|
||||
CastVVNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
|
||||
CastVVNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
|
||||
: ConstraintCastNode(ctrl, n, t, dependency, types) {
|
||||
assert(ctrl != nullptr, "control must be set");
|
||||
init_class_id(Class_CastVV);
|
||||
@@ -198,7 +299,7 @@ public:
|
||||
// cast pointer to pointer (different type)
|
||||
class CastPPNode: public ConstraintCastNode {
|
||||
public:
|
||||
CastPPNode (Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
|
||||
CastPPNode (Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
|
||||
: ConstraintCastNode(ctrl, n, t, dependency, types) {
|
||||
init_class_id(Class_CastPP);
|
||||
}
|
||||
@@ -210,7 +311,7 @@ class CastPPNode: public ConstraintCastNode {
|
||||
// for _checkcast, cast pointer to pointer (different type), without JOIN,
|
||||
class CheckCastPPNode: public ConstraintCastNode {
|
||||
public:
|
||||
CheckCastPPNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
|
||||
CheckCastPPNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
|
||||
: ConstraintCastNode(ctrl, n, t, dependency, types) {
|
||||
assert(ctrl != nullptr, "control must be set");
|
||||
init_class_id(Class_CheckCastPP);
|
||||
|
||||
@@ -2192,7 +2192,7 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
if (phi_type->isa_ptr()) {
|
||||
const Type* uin_type = phase->type(uin);
|
||||
if (!phi_type->isa_oopptr() && !uin_type->isa_oopptr()) {
|
||||
cast = new CastPPNode(r, uin, phi_type, ConstraintCastNode::StrongDependency, extra_types);
|
||||
cast = new CastPPNode(r, uin, phi_type, ConstraintCastNode::DependencyType::NonFloatingNarrowing, extra_types);
|
||||
} else {
|
||||
// Use a CastPP for a cast to not null and a CheckCastPP for
|
||||
// a cast to a new klass (and both if both null-ness and
|
||||
@@ -2202,7 +2202,7 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
// null, uin's type must be casted to not null
|
||||
if (phi_type->join(TypePtr::NOTNULL) == phi_type->remove_speculative() &&
|
||||
uin_type->join(TypePtr::NOTNULL) != uin_type->remove_speculative()) {
|
||||
cast = new CastPPNode(r, uin, TypePtr::NOTNULL, ConstraintCastNode::StrongDependency, extra_types);
|
||||
cast = new CastPPNode(r, uin, TypePtr::NOTNULL, ConstraintCastNode::DependencyType::NonFloatingNarrowing, extra_types);
|
||||
}
|
||||
|
||||
// If the type of phi and uin, both casted to not null,
|
||||
@@ -2214,14 +2214,14 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
cast = phase->transform(cast);
|
||||
n = cast;
|
||||
}
|
||||
cast = new CheckCastPPNode(r, n, phi_type, ConstraintCastNode::StrongDependency, extra_types);
|
||||
cast = new CheckCastPPNode(r, n, phi_type, ConstraintCastNode::DependencyType::NonFloatingNarrowing, extra_types);
|
||||
}
|
||||
if (cast == nullptr) {
|
||||
cast = new CastPPNode(r, uin, phi_type, ConstraintCastNode::StrongDependency, extra_types);
|
||||
cast = new CastPPNode(r, uin, phi_type, ConstraintCastNode::DependencyType::NonFloatingNarrowing, extra_types);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cast = ConstraintCastNode::make_cast_for_type(r, uin, phi_type, ConstraintCastNode::StrongDependency, extra_types);
|
||||
cast = ConstraintCastNode::make_cast_for_type(r, uin, phi_type, ConstraintCastNode::DependencyType::NonFloatingNarrowing, extra_types);
|
||||
}
|
||||
assert(cast != nullptr, "cast should be set");
|
||||
cast = phase->transform(cast);
|
||||
|
||||
@@ -1726,8 +1726,6 @@ Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_cr
|
||||
}
|
||||
if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))
|
||||
alias_type(idx)->set_rewritable(false);
|
||||
if (flat->offset() == in_bytes(Klass::access_flags_offset()))
|
||||
alias_type(idx)->set_rewritable(false);
|
||||
if (flat->offset() == in_bytes(Klass::misc_flags_offset()))
|
||||
alias_type(idx)->set_rewritable(false);
|
||||
if (flat->offset() == in_bytes(Klass::java_mirror_offset()))
|
||||
@@ -1735,6 +1733,12 @@ Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_cr
|
||||
if (flat->offset() == in_bytes(Klass::secondary_super_cache_offset()))
|
||||
alias_type(idx)->set_rewritable(false);
|
||||
}
|
||||
|
||||
if (flat->isa_instklassptr()) {
|
||||
if (flat->offset() == in_bytes(InstanceKlass::access_flags_offset())) {
|
||||
alias_type(idx)->set_rewritable(false);
|
||||
}
|
||||
}
|
||||
// %%% (We would like to finalize JavaThread::threadObj_offset(),
|
||||
// but the base pointer type is not distinctive enough to identify
|
||||
// references into JavaThread.)
|
||||
@@ -4578,7 +4582,7 @@ Node* Compile::constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt*
|
||||
// node from floating above the range check during loop optimizations. Otherwise, the
|
||||
// ConvI2L node may be eliminated independently of the range check, causing the data path
|
||||
// to become TOP while the control path is still there (although it's unreachable).
|
||||
value = new CastIINode(ctrl, value, itype, carry_dependency ? ConstraintCastNode::StrongDependency : ConstraintCastNode::RegularDependency, true /* range check dependency */);
|
||||
value = new CastIINode(ctrl, value, itype, carry_dependency ? ConstraintCastNode::DependencyType::NonFloatingNarrowing : ConstraintCastNode::DependencyType::FloatingNarrowing, true /* range check dependency */);
|
||||
value = phase->transform(value);
|
||||
}
|
||||
const TypeLong* ltype = TypeLong::make(itype->_lo, itype->_hi, itype->_widen);
|
||||
|
||||
@@ -984,7 +984,8 @@ public:
|
||||
JVMState* jvms, bool allow_inline, float profile_factor, ciKlass* speculative_receiver_type = nullptr,
|
||||
bool allow_intrinsics = true);
|
||||
bool should_delay_inlining(ciMethod* call_method, JVMState* jvms) {
|
||||
return should_delay_string_inlining(call_method, jvms) ||
|
||||
return C->directive()->should_delay_inline(call_method) ||
|
||||
should_delay_string_inlining(call_method, jvms) ||
|
||||
should_delay_boxing_inlining(call_method, jvms) ||
|
||||
should_delay_vector_inlining(call_method, jvms);
|
||||
}
|
||||
|
||||
@@ -26,97 +26,114 @@
|
||||
#include "opto/opcodes.hpp"
|
||||
#include "opto/phaseX.hpp"
|
||||
#include "opto/type.hpp"
|
||||
#include "utilities/count_leading_zeros.hpp"
|
||||
#include "utilities/count_trailing_zeros.hpp"
|
||||
#include "utilities/population_count.hpp"
|
||||
|
||||
static int count_leading_zeros_int(jint i) {
|
||||
return i == 0 ? BitsPerInt : count_leading_zeros(i);
|
||||
}
|
||||
|
||||
static int count_leading_zeros_long(jlong l) {
|
||||
return l == 0 ? BitsPerLong : count_leading_zeros(l);
|
||||
}
|
||||
|
||||
static int count_trailing_zeros_int(jint i) {
|
||||
return i == 0 ? BitsPerInt : count_trailing_zeros(i);
|
||||
}
|
||||
|
||||
static int count_trailing_zeros_long(jlong l) {
|
||||
return l == 0 ? BitsPerLong : count_trailing_zeros(l);
|
||||
}
|
||||
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* CountLeadingZerosINode::Value(PhaseGVN* phase) const {
|
||||
const Type* t = phase->type(in(1));
|
||||
if (t == Type::TOP) return Type::TOP;
|
||||
const TypeInt* ti = t->isa_int();
|
||||
if (ti && ti->is_con()) {
|
||||
jint i = ti->get_con();
|
||||
// HD, Figure 5-6
|
||||
if (i == 0)
|
||||
return TypeInt::make(BitsPerInt);
|
||||
int n = 1;
|
||||
unsigned int x = i;
|
||||
if (x >> 16 == 0) { n += 16; x <<= 16; }
|
||||
if (x >> 24 == 0) { n += 8; x <<= 8; }
|
||||
if (x >> 28 == 0) { n += 4; x <<= 4; }
|
||||
if (x >> 30 == 0) { n += 2; x <<= 2; }
|
||||
n -= x >> 31;
|
||||
return TypeInt::make(n);
|
||||
if (t == Type::TOP) {
|
||||
return Type::TOP;
|
||||
}
|
||||
return TypeInt::INT;
|
||||
|
||||
// To minimize `count_leading_zeros(x)`, we should make the highest 1 bit in x
|
||||
// as far to the left as possible. A bit in x can be 1 iff this bit is not
|
||||
// forced to be 0, i.e. the corresponding bit in `x._bits._zeros` is 0. Thus:
|
||||
// min(clz(x)) = number of bits to the left of the highest 0 bit in x._bits._zeros
|
||||
// = count_leading_ones(x._bits._zeros) = clz(~x._bits._zeros)
|
||||
//
|
||||
// To maximize `count_leading_zeros(x)`, we should make the leading zeros as
|
||||
// many as possible. A bit in x can be 0 iff this bit is not forced to be 1,
|
||||
// i.e. the corresponding bit in `x._bits._ones` is 0. Thus:
|
||||
// max(clz(x)) = clz(x._bits._ones)
|
||||
//
|
||||
// Therefore, the range of `count_leading_zeros(x)` is:
|
||||
// [clz(~x._bits._zeros), clz(x._bits._ones)]
|
||||
//
|
||||
// A more detailed proof using Z3 can be found at:
|
||||
// https://github.com/openjdk/jdk/pull/25928#discussion_r2256750507
|
||||
const TypeInt* ti = t->is_int();
|
||||
return TypeInt::make(count_leading_zeros_int(~ti->_bits._zeros),
|
||||
count_leading_zeros_int(ti->_bits._ones),
|
||||
ti->_widen);
|
||||
}
|
||||
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* CountLeadingZerosLNode::Value(PhaseGVN* phase) const {
|
||||
const Type* t = phase->type(in(1));
|
||||
if (t == Type::TOP) return Type::TOP;
|
||||
const TypeLong* tl = t->isa_long();
|
||||
if (tl && tl->is_con()) {
|
||||
jlong l = tl->get_con();
|
||||
// HD, Figure 5-6
|
||||
if (l == 0)
|
||||
return TypeInt::make(BitsPerLong);
|
||||
int n = 1;
|
||||
unsigned int x = (((julong) l) >> 32);
|
||||
if (x == 0) { n += 32; x = (int) l; }
|
||||
if (x >> 16 == 0) { n += 16; x <<= 16; }
|
||||
if (x >> 24 == 0) { n += 8; x <<= 8; }
|
||||
if (x >> 28 == 0) { n += 4; x <<= 4; }
|
||||
if (x >> 30 == 0) { n += 2; x <<= 2; }
|
||||
n -= x >> 31;
|
||||
return TypeInt::make(n);
|
||||
if (t == Type::TOP) {
|
||||
return Type::TOP;
|
||||
}
|
||||
return TypeInt::INT;
|
||||
|
||||
// The proof of correctness is same as the above comments
|
||||
// in `CountLeadingZerosINode::Value`.
|
||||
const TypeLong* tl = t->is_long();
|
||||
return TypeInt::make(count_leading_zeros_long(~tl->_bits._zeros),
|
||||
count_leading_zeros_long(tl->_bits._ones),
|
||||
tl->_widen);
|
||||
}
|
||||
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* CountTrailingZerosINode::Value(PhaseGVN* phase) const {
|
||||
const Type* t = phase->type(in(1));
|
||||
if (t == Type::TOP) return Type::TOP;
|
||||
const TypeInt* ti = t->isa_int();
|
||||
if (ti && ti->is_con()) {
|
||||
jint i = ti->get_con();
|
||||
// HD, Figure 5-14
|
||||
int y;
|
||||
if (i == 0)
|
||||
return TypeInt::make(BitsPerInt);
|
||||
int n = 31;
|
||||
y = i << 16; if (y != 0) { n = n - 16; i = y; }
|
||||
y = i << 8; if (y != 0) { n = n - 8; i = y; }
|
||||
y = i << 4; if (y != 0) { n = n - 4; i = y; }
|
||||
y = i << 2; if (y != 0) { n = n - 2; i = y; }
|
||||
y = i << 1; if (y != 0) { n = n - 1; }
|
||||
return TypeInt::make(n);
|
||||
if (t == Type::TOP) {
|
||||
return Type::TOP;
|
||||
}
|
||||
return TypeInt::INT;
|
||||
|
||||
// To minimize `count_trailing_zeros(x)`, we should make the lowest 1 bit in x
|
||||
// as far to the right as possible. A bit in x can be 1 iff this bit is not
|
||||
// forced to be 0, i.e. the corresponding bit in `x._bits._zeros` is 0. Thus:
|
||||
// min(ctz(x)) = number of bits to the right of the lowest 0 bit in x._bits._zeros
|
||||
// = count_trailing_ones(x._bits._zeros) = ctz(~x._bits._zeros)
|
||||
//
|
||||
// To maximize `count_trailing_zeros(x)`, we should make the trailing zeros as
|
||||
// many as possible. A bit in x can be 0 iff this bit is not forced to be 1,
|
||||
// i.e. the corresponding bit in `x._bits._ones` is 0. Thus:
|
||||
// max(ctz(x)) = ctz(x._bits._ones)
|
||||
//
|
||||
// Therefore, the range of `count_trailing_zeros(x)` is:
|
||||
// [ctz(~x._bits._zeros), ctz(x._bits._ones)]
|
||||
//
|
||||
// A more detailed proof using Z3 can be found at:
|
||||
// https://github.com/openjdk/jdk/pull/25928#discussion_r2256750507
|
||||
const TypeInt* ti = t->is_int();
|
||||
return TypeInt::make(count_trailing_zeros_int(~ti->_bits._zeros),
|
||||
count_trailing_zeros_int(ti->_bits._ones),
|
||||
ti->_widen);
|
||||
}
|
||||
|
||||
//------------------------------Value------------------------------------------
|
||||
const Type* CountTrailingZerosLNode::Value(PhaseGVN* phase) const {
|
||||
const Type* t = phase->type(in(1));
|
||||
if (t == Type::TOP) return Type::TOP;
|
||||
const TypeLong* tl = t->isa_long();
|
||||
if (tl && tl->is_con()) {
|
||||
jlong l = tl->get_con();
|
||||
// HD, Figure 5-14
|
||||
int x, y;
|
||||
if (l == 0)
|
||||
return TypeInt::make(BitsPerLong);
|
||||
int n = 63;
|
||||
y = (int) l; if (y != 0) { n = n - 32; x = y; } else x = (((julong) l) >> 32);
|
||||
y = x << 16; if (y != 0) { n = n - 16; x = y; }
|
||||
y = x << 8; if (y != 0) { n = n - 8; x = y; }
|
||||
y = x << 4; if (y != 0) { n = n - 4; x = y; }
|
||||
y = x << 2; if (y != 0) { n = n - 2; x = y; }
|
||||
y = x << 1; if (y != 0) { n = n - 1; }
|
||||
return TypeInt::make(n);
|
||||
if (t == Type::TOP) {
|
||||
return Type::TOP;
|
||||
}
|
||||
return TypeInt::INT;
|
||||
|
||||
// The proof of correctness is same as the above comments
|
||||
// in `CountTrailingZerosINode::Value`.
|
||||
const TypeLong* tl = t->is_long();
|
||||
return TypeInt::make(count_trailing_zeros_long(~tl->_bits._zeros),
|
||||
count_trailing_zeros_long(tl->_bits._ones),
|
||||
tl->_widen);
|
||||
}
|
||||
|
||||
// We use the KnownBits information from the integer types to derive how many one bits
|
||||
// we have at least and at most.
|
||||
// From the definition of KnownBits, we know:
|
||||
|
||||
@@ -192,7 +192,7 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
|
||||
// Try inlining a bytecoded method:
|
||||
if (!call_does_dispatch) {
|
||||
InlineTree* ilt = InlineTree::find_subtree_from_root(this->ilt(), jvms->caller(), jvms->method());
|
||||
bool should_delay = C->should_delay_inlining();
|
||||
bool should_delay = C->should_delay_inlining() || C->directive()->should_delay_inline(callee);
|
||||
if (ilt->ok_to_inline(callee, jvms, profile, should_delay)) {
|
||||
CallGenerator* cg = CallGenerator::for_inline(callee, expected_uses);
|
||||
// For optimized virtual calls assert at runtime that receiver object
|
||||
|
||||
@@ -748,7 +748,7 @@ Node* ConnectionGraph::specialize_castpp(Node* castpp, Node* base, Node* current
|
||||
_igvn->_worklist.push(current_control);
|
||||
_igvn->_worklist.push(control_successor);
|
||||
|
||||
return _igvn->transform(ConstraintCastNode::make_cast_for_type(not_eq_control, base, _igvn->type(castpp), ConstraintCastNode::UnconditionalDependency, nullptr));
|
||||
return _igvn->transform(ConstraintCastNode::make_cast_for_type(not_eq_control, base, _igvn->type(castpp), ConstraintCastNode::DependencyType::NonFloatingNonNarrowing, nullptr));
|
||||
}
|
||||
|
||||
Node* ConnectionGraph::split_castpp_load_through_phi(Node* curr_addp, Node* curr_load, Node* region, GrowableArray<Node*>* bases_for_loads, GrowableArray<Node *> &alloc_worklist) {
|
||||
@@ -1235,7 +1235,7 @@ bool ConnectionGraph::reduce_phi_on_safepoints_helper(Node* ophi, Node* cast, No
|
||||
Node* nsr_merge_pointer = ophi;
|
||||
if (cast != nullptr) {
|
||||
const Type* new_t = merge_t->meet(TypePtr::NULL_PTR);
|
||||
nsr_merge_pointer = _igvn->transform(ConstraintCastNode::make_cast_for_type(cast->in(0), cast->in(1), new_t, ConstraintCastNode::RegularDependency, nullptr));
|
||||
nsr_merge_pointer = _igvn->transform(ConstraintCastNode::make_cast_for_type(cast->in(0), cast->in(1), new_t, ConstraintCastNode::DependencyType::FloatingNarrowing, nullptr));
|
||||
}
|
||||
|
||||
for (uint spi = 0; spi < safepoints.size(); spi++) {
|
||||
@@ -1376,7 +1376,7 @@ void ConnectionGraph::reset_scalar_replaceable_entries(PhiNode* ophi) {
|
||||
}
|
||||
|
||||
if (change) {
|
||||
Node* new_cast = ConstraintCastNode::make_cast_for_type(out->in(0), out->in(1), out_new_t, ConstraintCastNode::StrongDependency, nullptr);
|
||||
Node* new_cast = ConstraintCastNode::make_cast_for_type(out->in(0), out->in(1), out_new_t, ConstraintCastNode::DependencyType::NonFloatingNarrowing, nullptr);
|
||||
_igvn->replace_node(out, new_cast);
|
||||
_igvn->register_new_node_with_optimizer(new_cast);
|
||||
}
|
||||
|
||||
@@ -1183,7 +1183,7 @@ bool LibraryCallKit::inline_preconditions_checkIndex(BasicType bt) {
|
||||
jlong upper_bound = _gvn.type(length)->is_integer(bt)->hi_as_long();
|
||||
Node* casted_length = ConstraintCastNode::make_cast_for_basic_type(
|
||||
control(), length, TypeInteger::make(0, upper_bound, Type::WidenMax, bt),
|
||||
ConstraintCastNode::RegularDependency, bt);
|
||||
ConstraintCastNode::DependencyType::FloatingNarrowing, bt);
|
||||
casted_length = _gvn.transform(casted_length);
|
||||
replace_in_map(length, casted_length);
|
||||
length = casted_length;
|
||||
@@ -1213,7 +1213,7 @@ bool LibraryCallKit::inline_preconditions_checkIndex(BasicType bt) {
|
||||
// index is now known to be >= 0 and < length, cast it
|
||||
Node* result = ConstraintCastNode::make_cast_for_basic_type(
|
||||
control(), index, TypeInteger::make(0, upper_bound, Type::WidenMax, bt),
|
||||
ConstraintCastNode::RegularDependency, bt);
|
||||
ConstraintCastNode::DependencyType::FloatingNarrowing, bt);
|
||||
result = _gvn.transform(result);
|
||||
set_result(result);
|
||||
replace_in_map(index, result);
|
||||
@@ -4020,7 +4020,7 @@ Node* LibraryCallKit::generate_klass_flags_guard(Node* kls, int modifier_mask, i
|
||||
}
|
||||
Node* LibraryCallKit::generate_interface_guard(Node* kls, RegionNode* region) {
|
||||
return generate_klass_flags_guard(kls, JVM_ACC_INTERFACE, 0, region,
|
||||
Klass::access_flags_offset(), TypeInt::CHAR, T_CHAR);
|
||||
InstanceKlass::access_flags_offset(), TypeInt::CHAR, T_CHAR);
|
||||
}
|
||||
|
||||
// Use this for testing if Klass is_hidden, has_finalizer, and is_cloneable_fast.
|
||||
@@ -4132,12 +4132,16 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
|
||||
// Arrays store an intermediate super as _super, but must report Object.
|
||||
// Other types can report the actual _super.
|
||||
// (To verify this code sequence, check the asserts in JVM_IsInterface.)
|
||||
if (generate_interface_guard(kls, region) != nullptr)
|
||||
// A guard was added. If the guard is taken, it was an interface.
|
||||
phi->add_req(null());
|
||||
if (generate_array_guard(kls, region) != nullptr)
|
||||
if (generate_array_guard(kls, region) != nullptr) {
|
||||
// A guard was added. If the guard is taken, it was an array.
|
||||
phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
|
||||
}
|
||||
// Check for interface after array since this checks AccessFlags offset into InstanceKlass.
|
||||
// In other words, we are accessing subtype-specific information, so we need to determine the subtype first.
|
||||
if (generate_interface_guard(kls, region) != nullptr) {
|
||||
// A guard was added. If the guard is taken, it was an interface.
|
||||
phi->add_req(null());
|
||||
}
|
||||
// If we fall through, it's a plain class. Get its _super.
|
||||
p = basic_plus_adr(kls, in_bytes(Klass::super_offset()));
|
||||
kls = _gvn.transform(LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeInstKlassPtr::OBJECT_OR_NULL));
|
||||
|
||||
@@ -1366,7 +1366,7 @@ Node *PhaseIdealLoop::clone_up_backedge_goo(Node *back_ctrl, Node *preheader_ctr
|
||||
// the backedge of the main or post loop is removed, a Div node won't be able to float above the zero trip guard of the
|
||||
// loop and can't execute even if the loop is not reached.
|
||||
void PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, CountedLoopNode* loop) {
|
||||
Node* castii = new CastIINode(ctrl, incr, TypeInt::INT, ConstraintCastNode::UnconditionalDependency);
|
||||
Node* castii = new CastIINode(ctrl, incr, TypeInt::INT, ConstraintCastNode::DependencyType::NonFloatingNonNarrowing);
|
||||
register_new_node(castii, ctrl);
|
||||
Node* phi = loop->phi();
|
||||
assert(phi->in(LoopNode::EntryControl) == incr, "replacing wrong input?");
|
||||
@@ -3262,7 +3262,7 @@ bool IdealLoopTree::do_remove_empty_loop(PhaseIdealLoop *phase) {
|
||||
Node* cast_ii = ConstraintCastNode::make_cast_for_basic_type(
|
||||
cl->in(LoopNode::EntryControl), exact_limit,
|
||||
phase->_igvn.type(exact_limit),
|
||||
ConstraintCastNode::UnconditionalDependency, T_INT);
|
||||
ConstraintCastNode::DependencyType::NonFloatingNonNarrowing, T_INT);
|
||||
phase->register_new_node(cast_ii, cl->in(LoopNode::EntryControl));
|
||||
|
||||
Node* final_iv = new SubINode(cast_ii, cl->stride());
|
||||
|
||||
@@ -1001,7 +1001,7 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
|
||||
// a negative stride). We add a CastII here to guarantee that, when the counted loop is created in a subsequent loop
|
||||
// opts pass, an accurate range of values for the limits is found.
|
||||
const TypeInt* inner_iters_actual_int_range = TypeInt::make(0, iters_limit, Type::WidenMin);
|
||||
inner_iters_actual_int = new CastIINode(outer_head, inner_iters_actual_int, inner_iters_actual_int_range, ConstraintCastNode::UnconditionalDependency);
|
||||
inner_iters_actual_int = new CastIINode(outer_head, inner_iters_actual_int, inner_iters_actual_int_range, ConstraintCastNode::DependencyType::NonFloatingNonNarrowing);
|
||||
_igvn.register_new_node_with_optimizer(inner_iters_actual_int);
|
||||
} else {
|
||||
inner_iters_actual_int = inner_iters_actual;
|
||||
@@ -1315,7 +1315,7 @@ bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint strid
|
||||
register_new_node(bol, iff->in(0));
|
||||
new_limit = ConstraintCastNode::make_cast_for_basic_type(new_predicate_proj, new_limit,
|
||||
TypeInteger::make(1, iters_limit_long, Type::WidenMin, bt),
|
||||
ConstraintCastNode::UnconditionalDependency, bt);
|
||||
ConstraintCastNode::DependencyType::NonFloatingNonNarrowing, bt);
|
||||
register_new_node(new_limit, new_predicate_proj);
|
||||
|
||||
#ifndef PRODUCT
|
||||
@@ -1334,7 +1334,7 @@ bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint strid
|
||||
const TypeLong* new_limit_t = new_limit->Value(&_igvn)->is_long();
|
||||
new_limit = ConstraintCastNode::make_cast_for_basic_type(predicates.entry(), new_limit,
|
||||
TypeLong::make(0, new_limit_t->_hi, new_limit_t->_widen),
|
||||
ConstraintCastNode::UnconditionalDependency, bt);
|
||||
ConstraintCastNode::DependencyType::NonFloatingNonNarrowing, bt);
|
||||
register_new_node(new_limit, predicates.entry());
|
||||
} else {
|
||||
assert(bt == T_INT && known_short_running_loop, "only CountedLoop statically known to be short running");
|
||||
|
||||
@@ -1174,7 +1174,7 @@ Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
|
||||
if ( nn ) return nn;
|
||||
}
|
||||
|
||||
if (n->is_ConstraintCast()) {
|
||||
if (n->is_ConstraintCast() && n->as_ConstraintCast()->dependency().narrows_type()) {
|
||||
Node* dom_cast = n->as_ConstraintCast()->dominating_cast(&_igvn, this);
|
||||
// ConstraintCastNode::dominating_cast() uses node control input to determine domination.
|
||||
// Node control inputs don't necessarily agree with loop control info (due to
|
||||
@@ -1837,7 +1837,7 @@ void PhaseIdealLoop::try_sink_out_of_loop(Node* n) {
|
||||
if (in != nullptr && ctrl_is_member(n_loop, in)) {
|
||||
const Type* in_t = _igvn.type(in);
|
||||
cast = ConstraintCastNode::make_cast_for_type(x_ctrl, in, in_t,
|
||||
ConstraintCastNode::UnconditionalDependency, nullptr);
|
||||
ConstraintCastNode::DependencyType::NonFloatingNonNarrowing, nullptr);
|
||||
}
|
||||
if (cast != nullptr) {
|
||||
Node* prev = _igvn.hash_find_insert(cast);
|
||||
|
||||
@@ -1914,7 +1914,8 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
|
||||
transform_later(cache_adr);
|
||||
cache_adr = new CastP2XNode(needgc_false, cache_adr);
|
||||
transform_later(cache_adr);
|
||||
// Address is aligned to execute prefetch to the beginning of cache line size.
|
||||
// Address is aligned to execute prefetch to the beginning of cache line size
|
||||
// (it is important when BIS instruction is used on SPARC as prefetch).
|
||||
Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
|
||||
cache_adr = new AndXNode(cache_adr, mask);
|
||||
transform_later(cache_adr);
|
||||
|
||||
@@ -233,7 +233,7 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode
|
||||
Node* inline_block = generate_guard(ctrl, bol_le, nullptr, PROB_FAIR);
|
||||
Node* stub_block = *ctrl;
|
||||
|
||||
Node* casted_length = new CastLLNode(inline_block, length, inline_range, ConstraintCastNode::RegularDependency);
|
||||
Node* casted_length = new CastLLNode(inline_block, length, inline_range, ConstraintCastNode::DependencyType::FloatingNarrowing);
|
||||
transform_later(casted_length);
|
||||
Node* mask_gen = VectorMaskGenNode::make(casted_length, type);
|
||||
transform_later(mask_gen);
|
||||
|
||||
@@ -329,6 +329,10 @@ public:
|
||||
|
||||
static bool match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt);
|
||||
|
||||
// Determines if a vector operation needs to be partially implemented with a mask
|
||||
// controlling only the lanes in range [0, vector_length) are processed. This applies
|
||||
// to operations whose vector length is less than the hardware-supported maximum
|
||||
// vector length. Returns true if the operation requires masking, false otherwise.
|
||||
static bool vector_needs_partial_operations(Node* node, const TypeVect* vt);
|
||||
|
||||
static bool vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user