8373403: [TESTBUG] TestG1ClassUnloadingHWM.java could fail with large G1HeapRegionSize and small InitialHeapSize

Reviewed-by: tschatzl, iwalulya
8372634: C2: Materialize type information from instanceof checks
2025-12-13 21:09:41 +01:00 · 2025-12-12 21:19:09 +00:00 · 2025-12-12 21:12:09 +00:00 · 2025-12-12 18:57:25 +00:00 · 2025-12-12 18:19:35 +00:00 · 2025-12-12 18:09:51 +00:00
395 changed files with 20517 additions and 7944 deletions
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # Welcome to the JDK!

 For build instructions please see the
-[online documentation](https://openjdk.org/groups/build/doc/building.html),
+[online documentation](https://git.openjdk.org/jdk/blob/master/doc/building.md),
 or either of these files:

 - [doc/building.html](doc/building.html) (html version)
--- a/make/RunTests.gmk
+++ b/make/RunTests.gmk
@@ -873,7 +873,7 @@ define SetupRunJtregTestBody
    $1_JTREG_BASIC_OPTIONS += -testThreadFactoryPath:$$(JTREG_TEST_THREAD_FACTORY_JAR)
    $1_JTREG_BASIC_OPTIONS += -testThreadFactory:$$(JTREG_TEST_THREAD_FACTORY)
    $1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$(wildcard \
-	$$(addprefix $$($1_TEST_ROOT)/, ProblemList-$$(JTREG_TEST_THREAD_FACTORY).txt) \
+        $$(addprefix $$($1_TEST_ROOT)/, ProblemList-$$(JTREG_TEST_THREAD_FACTORY).txt) \
    ))
  endif

@@ -881,8 +881,8 @@ define SetupRunJtregTestBody
    AGENT := $$(LIBRARY_PREFIX)JvmtiStressAgent$$(SHARED_LIBRARY_SUFFIX)=$$(JTREG_JVMTI_STRESS_AGENT)
    $1_JTREG_BASIC_OPTIONS += -javaoption:'-agentpath:$(TEST_IMAGE_DIR)/hotspot/jtreg/native/$$(AGENT)'
    $1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$(wildcard \
-	    $$(addprefix $$($1_TEST_ROOT)/, ProblemList-jvmti-stress-agent.txt) \
-        ))
+        $$(addprefix $$($1_TEST_ROOT)/, ProblemList-jvmti-stress-agent.txt) \
+    ))
  endif


@@ -1092,7 +1092,7 @@ define SetupRunJtregTestBody
 	$$(call MakeDir, $$($1_TEST_RESULTS_DIR) $$($1_TEST_SUPPORT_DIR) \
 	    $$($1_TEST_TMP_DIR))
 	$$(call ExecuteWithLog, $$($1_TEST_SUPPORT_DIR)/jtreg, \
-            $$(COV_ENVIRONMENT) $$($1_COMMAND_LINE) \
+	    $$(COV_ENVIRONMENT) $$($1_COMMAND_LINE) \
 	)

  $1_RESULT_FILE := $$($1_TEST_RESULTS_DIR)/text/stats.txt
@@ -1102,11 +1102,11 @@ define SetupRunJtregTestBody
 	$$(call LogWarn, Test report is stored in $$(strip \
 	    $$(subst $$(TOPDIR)/, , $$($1_TEST_RESULTS_DIR))))

-	# Read jtreg documentation to learn on the test stats categories:
-	# https://github.com/openjdk/jtreg/blob/master/src/share/doc/javatest/regtest/faq.md#what-do-all-those-numbers-in-the-test-results-line-mean
-	# In jtreg, "skipped:" category accounts for tests that threw jtreg.SkippedException at runtime.
-	# At the same time these tests contribute to "passed:" tests.
-	# In here we don't want that and so we substract number of "skipped:" from "passed:".
+        # Read jtreg documentation to learn on the test stats categories:
+        # https://github.com/openjdk/jtreg/blob/master/src/share/doc/javatest/regtest/faq.md#what-do-all-those-numbers-in-the-test-results-line-mean
+        # In jtreg, "skipped:" category accounts for tests that threw jtreg.SkippedException at runtime.
+        # At the same time these tests contribute to "passed:" tests.
+        # In here we don't want that and so we substract number of "skipped:" from "passed:".

 	$$(if $$(wildcard $$($1_RESULT_FILE)), \
 	  $$(eval $1_PASSED_AND_RUNTIME_SKIPPED := $$(shell $$(AWK) '{ gsub(/[,;]/, ""); \
--- a/make/autoconf/flags-ldflags.m4
+++ b/make/autoconf/flags-ldflags.m4
@@ -63,7 +63,7 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
    fi

    BASIC_LDFLAGS_JVM_ONLY=""
-    LDFLAGS_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing"
+    LDFLAGS_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing $DEBUG_PREFIX_CFLAGS"

    LDFLAGS_CXX_PARTIAL_LINKING="$MACHINE_FLAG -r"

@@ -71,7 +71,7 @@ AC_DEFUN([FLAGS_SETUP_LDFLAGS_HELPER],
    BASIC_LDFLAGS_JVM_ONLY="-mno-omit-leaf-frame-pointer -mstack-alignment=16 \
        -fPIC"

-    LDFLAGS_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing"
+    LDFLAGS_LTO="-flto=auto -fuse-linker-plugin -fno-strict-aliasing $DEBUG_PREFIX_CFLAGS"
    LDFLAGS_CXX_PARTIAL_LINKING="$MACHINE_FLAG -r"

    if test "x$OPENJDK_TARGET_OS" = xlinux; then
--- a/make/common/native/Flags.gmk
+++ b/make/common/native/Flags.gmk
@@ -234,6 +234,9 @@ define SetupLinkerFlags
    ifeq ($(call isTargetOs, macosx), true)
      $1_EXTRA_LDFLAGS += -Wl,-object_path_lto,$$($1_OBJECT_DIR)/$$($1_NAME)_lto_helper.o
    endif
+    ifeq ($(TOOLCHAIN_TYPE), microsoft)
+      $1_EXTRA_LDFLAGS += -LTCGOUT:$$($1_OBJECT_DIR)/$$($1_NAME).iobj
+    endif
  endif

  $1_EXTRA_LDFLAGS += $$($1_LDFLAGS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_LDFLAGS_$(OPENJDK_TARGET_OS)) \
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@@ -1192,8 +1192,8 @@ var getJibProfilesDependencies = function (input, common) {
            server: "jpg",
            product: "jcov",
            version: "3.0",
-            build_number: "3",
-            file: "bundles/jcov-3.0+3.zip",
+            build_number: "5",
+            file: "bundles/jcov-3.0+5.zip",
            environment_name: "JCOV_HOME",
        },

--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -346,8 +346,14 @@ source %{
  }

  bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
-    // Only SVE has partial vector operations
-    if (UseSVE == 0) {
+    // 1. Only SVE requires partial vector operations.
+    // 2. The vector size in bytes must be smaller than MaxVectorSize.
+    // 3. Predicated vectors have a mask input, which guarantees that
+    //    out-of-bounds lanes remain inactive.
+    int length_in_bytes = vt->length_in_bytes();
+    if (UseSVE == 0 ||
+        length_in_bytes == MaxVectorSize ||
+        node->is_predicated_vector()) {
      return false;
    }

@@ -370,21 +376,22 @@ source %{
        return !node->in(1)->is_Con();
      case Op_LoadVector:
      case Op_StoreVector:
-        // We use NEON load/store instructions if the vector length is <= 128 bits.
-        return vt->length_in_bytes() > 16;
      case Op_AddReductionVI:
      case Op_AddReductionVL:
-        // We may prefer using NEON instructions rather than SVE partial operations.
-        return !VM_Version::use_neon_for_vector(vt->length_in_bytes());
+        // For these ops, we prefer using NEON instructions rather than SVE
+        // predicated instructions for better performance.
+        return !VM_Version::use_neon_for_vector(length_in_bytes);
      case Op_MinReductionV:
      case Op_MaxReductionV:
-        // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we may prefer using NEON
-        // instructions rather than SVE partial operations.
+        // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON
+        // instructions rather than SVE predicated instructions for
+        // better performance.
        return vt->element_basic_type() == T_LONG ||
-               !VM_Version::use_neon_for_vector(vt->length_in_bytes());
+               !VM_Version::use_neon_for_vector(length_in_bytes);
      default:
-        // For other ops whose vector size is smaller than the max vector size, a
-        // full-sized unpredicated operation does not impact the final vector result.
+        // For other ops whose vector size is smaller than the max vector
+        // size, a full-sized unpredicated operation does not impact the
+        // vector result.
        return false;
    }
  }
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -336,8 +336,14 @@ source %{
  }

  bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
-    // Only SVE has partial vector operations
-    if (UseSVE == 0) {
+    // 1. Only SVE requires partial vector operations.
+    // 2. The vector size in bytes must be smaller than MaxVectorSize.
+    // 3. Predicated vectors have a mask input, which guarantees that
+    //    out-of-bounds lanes remain inactive.
+    int length_in_bytes = vt->length_in_bytes();
+    if (UseSVE == 0 ||
+        length_in_bytes == MaxVectorSize ||
+        node->is_predicated_vector()) {
      return false;
    }

@@ -360,21 +366,22 @@ source %{
        return !node->in(1)->is_Con();
      case Op_LoadVector:
      case Op_StoreVector:
-        // We use NEON load/store instructions if the vector length is <= 128 bits.
-        return vt->length_in_bytes() > 16;
      case Op_AddReductionVI:
      case Op_AddReductionVL:
-        // We may prefer using NEON instructions rather than SVE partial operations.
-        return !VM_Version::use_neon_for_vector(vt->length_in_bytes());
+        // For these ops, we prefer using NEON instructions rather than SVE
+        // predicated instructions for better performance.
+        return !VM_Version::use_neon_for_vector(length_in_bytes);
      case Op_MinReductionV:
      case Op_MaxReductionV:
-        // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we may prefer using NEON
-        // instructions rather than SVE partial operations.
+        // For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON
+        // instructions rather than SVE predicated instructions for
+        // better performance.
        return vt->element_basic_type() == T_LONG ||
-               !VM_Version::use_neon_for_vector(vt->length_in_bytes());
+               !VM_Version::use_neon_for_vector(length_in_bytes);
      default:
-        // For other ops whose vector size is smaller than the max vector size, a
-        // full-sized unpredicated operation does not impact the final vector result.
+        // For other ops whose vector size is smaller than the max vector
+        // size, a full-sized unpredicated operation does not impact the
+        // vector result.
        return false;
    }
  }
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -5379,7 +5379,6 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  assert (UseCompressedClassPointers, "should only be used for compressed headers");
  assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
  int index = oop_recorder()->find_index(k);
-  assert(! Universe::heap()->is_in(k), "should not be an oop");

  InstructionMark im(this);
  RelocationHolder rspec = metadata_Relocation::spec(index);
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -6335,8 +6335,36 @@ instruct loadConD_Ex(regD dst, immD src) %{
 // Prefetch instructions.
 // Must be safe to execute with invalid address (cannot fault).

+// Special prefetch versions which use the dcbz instruction.
+instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{
+  match(PrefetchAllocation (AddP mem src));
+  predicate(AllocatePrefetchStyle == 3);
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %}
+  size(4);
+  ins_encode %{
+    __ dcbz($src$$Register, $mem$$base$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{
+  match(PrefetchAllocation mem);
+  predicate(AllocatePrefetchStyle == 3);
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %}
+  size(4);
+  ins_encode %{
+    __ dcbz($mem$$base$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
 instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
  match(PrefetchAllocation (AddP mem src));
+  predicate(AllocatePrefetchStyle != 3);
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
@@ -6349,6 +6377,7 @@ instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{

 instruct prefetch_alloc_no_offset(indirectMemory mem) %{
  match(PrefetchAllocation mem);
+  predicate(AllocatePrefetchStyle != 3);
  ins_cost(MEMORY_REF_COST);

  format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -2067,6 +2067,83 @@ void C2_MacroAssembler::enc_cmove_cmp_fp(int cmpFlag, FloatRegister op1, FloatRe
  }
 }

+void C2_MacroAssembler::enc_cmove_fp_cmp(int cmpFlag, Register op1, Register op2,
+                        FloatRegister dst, FloatRegister src, bool is_single) {
+  bool is_unsigned = (cmpFlag & unsigned_branch_mask) == unsigned_branch_mask;
+  int op_select = cmpFlag & (~unsigned_branch_mask);
+
+  switch (op_select) {
+    case BoolTest::eq:
+      cmov_fp_eq(op1, op2, dst, src, is_single);
+      break;
+    case BoolTest::ne:
+      cmov_fp_ne(op1, op2, dst, src, is_single);
+      break;
+    case BoolTest::le:
+      if (is_unsigned) {
+        cmov_fp_leu(op1, op2, dst, src, is_single);
+      } else {
+        cmov_fp_le(op1, op2, dst, src, is_single);
+      }
+      break;
+    case BoolTest::ge:
+      if (is_unsigned) {
+        cmov_fp_geu(op1, op2, dst, src, is_single);
+      } else {
+        cmov_fp_ge(op1, op2, dst, src, is_single);
+      }
+      break;
+    case BoolTest::lt:
+      if (is_unsigned) {
+        cmov_fp_ltu(op1, op2, dst, src, is_single);
+      } else {
+        cmov_fp_lt(op1, op2, dst, src, is_single);
+      }
+      break;
+    case BoolTest::gt:
+      if (is_unsigned) {
+        cmov_fp_gtu(op1, op2, dst, src, is_single);
+      } else {
+        cmov_fp_gt(op1, op2, dst, src, is_single);
+      }
+      break;
+    default:
+      assert(false, "unsupported compare condition");
+      ShouldNotReachHere();
+  }
+}
+
+void C2_MacroAssembler::enc_cmove_fp_cmp_fp(int cmpFlag,
+                           FloatRegister op1, FloatRegister op2,
+                           FloatRegister dst, FloatRegister src,
+                           bool cmp_single, bool cmov_single) {
+  int op_select = cmpFlag & (~unsigned_branch_mask);
+
+  switch (op_select) {
+    case BoolTest::eq:
+      cmov_fp_cmp_fp_eq(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    case BoolTest::ne:
+      cmov_fp_cmp_fp_ne(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    case BoolTest::le:
+      cmov_fp_cmp_fp_le(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    case BoolTest::ge:
+      cmov_fp_cmp_fp_ge(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    case BoolTest::lt:
+      cmov_fp_cmp_fp_lt(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    case BoolTest::gt:
+      cmov_fp_cmp_fp_gt(op1, op2, dst, src, cmp_single, cmov_single);
+      break;
+    default:
+      assert(false, "unsupported compare condition");
+      ShouldNotReachHere();
+  }
+}
+
 // Set dst to NaN if any NaN input.
 void C2_MacroAssembler::minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2,
                                  FLOAT_TYPE ft, bool is_min) {
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
@@ -132,6 +132,13 @@
                        FloatRegister op1, FloatRegister op2,
                        Register dst, Register src, bool is_single);

+  void enc_cmove_fp_cmp(int cmpFlag, Register op1, Register op2,
+                        FloatRegister dst, FloatRegister src, bool is_single);
+
+  void enc_cmove_fp_cmp_fp(int cmpFlag, FloatRegister op1, FloatRegister op2,
+                           FloatRegister dst, FloatRegister src,
+                           bool cmp_single, bool cmov_single);
+
  void spill(Register r, bool is64, int offset) {
    is64 ? sd(r, Address(sp, offset))
         : sw(r, Address(sp, offset));
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -1233,7 +1233,119 @@ void MacroAssembler::cmov_gtu(Register cmp1, Register cmp2, Register dst, Regist
  bind(no_set);
 }

-// ----------- cmove, compare float -----------
+// ----------- cmove float/double -----------
+
+void MacroAssembler::cmov_fp_eq(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bne(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_ne(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  beq(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_le(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bgt(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_leu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bgtu(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_ge(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  blt(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_geu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bltu(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_lt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bge(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_ltu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bgeu(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_gt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  ble(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_gtu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single) {
+  Label no_set;
+  bleu(cmp1, cmp2, no_set);
+  if (is_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+// ----------- cmove, compare float/double -----------
 //
 // For CmpF/D + CMoveI/L, ordered ones are quite straight and simple,
 // so, just list behaviour of unordered ones as follow.
@@ -1391,6 +1503,148 @@ void MacroAssembler::cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Regi
  bind(no_set);
 }

+// ----------- cmove float/double, compare float/double -----------
+
+// Move src to dst only if cmp1 == cmp2,
+// otherwise leave dst unchanged, including the case where one of them is NaN.
+// Clarification:
+//   java code      :  cmp1 != cmp2 ? dst : src
+//   transformed to :  CMove dst, (cmp1 eq cmp2), dst, src
+void MacroAssembler::cmov_fp_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 != cmp2, including the case of NaN
+    // not jump (i.e. move src to dst) if cmp1 == cmp2
+    float_bne(cmp1, cmp2, no_set);
+  } else {
+    double_bne(cmp1, cmp2, no_set);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+// Keep dst unchanged only if cmp1 == cmp2,
+// otherwise move src to dst, including the case where one of them is NaN.
+// Clarification:
+//   java code      :  cmp1 == cmp2 ? dst : src
+//   transformed to :  CMove dst, (cmp1 ne cmp2), dst, src
+void MacroAssembler::cmov_fp_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 == cmp2
+    // not jump (i.e. move src to dst) if cmp1 != cmp2, including the case of NaN
+    float_beq(cmp1, cmp2, no_set);
+  } else {
+    double_beq(cmp1, cmp2, no_set);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+// When cmp1 <= cmp2 or any of them is NaN then dst = src, otherwise, dst = dst
+// Clarification
+//   scenario 1:
+//     java code      :  cmp2 < cmp1 ? dst : src
+//     transformed to :  CMove dst, (cmp1 le cmp2), dst, src
+//   scenario 2:
+//     java code      :  cmp1 > cmp2 ? dst : src
+//     transformed to :  CMove dst, (cmp1 le cmp2), dst, src
+void MacroAssembler::cmov_fp_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 > cmp2
+    // not jump (i.e. move src to dst) if cmp1 <= cmp2 or either is NaN
+    float_bgt(cmp1, cmp2, no_set);
+  } else {
+    double_bgt(cmp1, cmp2, no_set);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 < cmp2 or either is NaN
+    // not jump (i.e. move src to dst) if cmp1 >= cmp2
+    float_blt(cmp1, cmp2, no_set, false, true);
+  } else {
+    double_blt(cmp1, cmp2, no_set, false, true);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+// When cmp1 < cmp2 or any of them is NaN then dst = src, otherwise, dst = dst
+// Clarification
+//   scenario 1:
+//     java code      :  cmp2 <= cmp1 ? dst : src
+//     transformed to :  CMove dst, (cmp1 lt cmp2), dst, src
+//   scenario 2:
+//     java code      :  cmp1 >= cmp2 ? dst : src
+//     transformed to :  CMove dst, (cmp1 lt cmp2), dst, src
+void MacroAssembler::cmov_fp_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 >= cmp2
+    // not jump (i.e. move src to dst) if cmp1 < cmp2 or either is NaN
+    float_bge(cmp1, cmp2, no_set);
+  } else {
+    double_bge(cmp1, cmp2, no_set);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
+void MacroAssembler::cmov_fp_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2,
+                                       FloatRegister dst, FloatRegister src,
+                                       bool cmp_single, bool cmov_single) {
+  Label no_set;
+  if (cmp_single) {
+    // jump if cmp1 <= cmp2 or either is NaN
+    // not jump (i.e. move src to dst) if cmp1 > cmp2
+    float_ble(cmp1, cmp2, no_set, false, true);
+  } else {
+    double_ble(cmp1, cmp2, no_set, false, true);
+  }
+  if (cmov_single) {
+    fmv_s(dst, src);
+  } else {
+    fmv_d(dst, src);
+  }
+  bind(no_set);
+}
+
 // Float compare branch instructions

 #define INSN(NAME, FLOATCMP, BRANCH)                                                                                    \
@@ -4933,7 +5187,6 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  assert (UseCompressedClassPointers, "should only be used for compressed headers");
  assert (oop_recorder() != nullptr, "this assembler needs an OopRecorder");
  int index = oop_recorder()->find_index(k);
-  assert(!Universe::heap()->is_in(k), "should not be an oop");

  narrowKlass nk = CompressedKlassPointers::encode(k);
  relocate(metadata_Relocation::spec(index), [&] {
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -665,6 +665,24 @@ class MacroAssembler: public Assembler {
  void cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
  void cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);

+  void cmov_fp_eq(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_ne(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_le(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_leu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_ge(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_geu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_lt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_ltu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_gt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+  void cmov_fp_gtu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
+
+  void cmov_fp_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+  void cmov_fp_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+  void cmov_fp_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+  void cmov_fp_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+  void cmov_fp_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+  void cmov_fp_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
+
 public:
  // We try to follow risc-v asm menomics.
  // But as we don't layout a reachable GOT,
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1924,8 +1924,6 @@ bool Matcher::match_rule_supported(int opcode) {
    case Op_SubHF:
      return UseZfh;

-    case Op_CMoveF:
-    case Op_CMoveD:
    case Op_CMoveP:
    case Op_CMoveN:
      return false;
@@ -10466,6 +10464,286 @@ instruct cmovL_cmpP(iRegLNoSp dst, iRegL src, iRegP op1, iRegP op2, cmpOpU cop)
  ins_pipe(pipe_class_compare);
 %}

+// --------- CMoveF ---------
+
+instruct cmovF_cmpI(fRegF dst, fRegF src, iRegI op1, iRegI op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpI op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpI\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpU(fRegF dst, fRegF src, iRegI op1, iRegI op2, cmpOpU cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpU op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpU\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpL(fRegF dst, fRegF src, iRegL op1, iRegL op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpUL(fRegF dst, fRegF src, iRegL op1, iRegL op2, cmpOpU cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpUL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpUL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpF(fRegF dst, fRegF src, fRegF op1, fRegF op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpF op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpF\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp_fp($cop$$cmpcode,
+                    as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+                    as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+                    true /* cmp_single */, true /* cmov_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpD(fRegF dst, fRegF src, fRegD op1, fRegD op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpD op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpD\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp_fp($cop$$cmpcode | C2_MacroAssembler::double_branch_mask,
+                    as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+                    as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+                    false /* cmp_single */, true /* cmov_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpN(fRegF dst, fRegF src, iRegN op1, iRegN op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpN op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpN\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovF_cmpP(fRegF dst, fRegF src, iRegP op1, iRegP op2, cmpOp cop) %{
+  match(Set dst (CMoveF (Binary cop (CmpP op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveF $dst, ($op1 $cop $op2), $dst, $src\t#@cmovF_cmpP\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), true /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+// --------- CMoveD ---------
+
+instruct cmovD_cmpI(fRegD dst, fRegD src, iRegI op1, iRegI op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpI op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpI\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpU(fRegD dst, fRegD src, iRegI op1, iRegI op2, cmpOpU cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpU op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpU\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpL(fRegD dst, fRegD src, iRegL op1, iRegL op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpUL(fRegD dst, fRegD src, iRegL op1, iRegL op2, cmpOpU cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpUL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpUL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpF(fRegD dst, fRegD src, fRegF op1, fRegF op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpF op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpF\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp_fp($cop$$cmpcode,
+                    as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+                    as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+                    true /* cmp_single */, false /* cmov_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpD(fRegD dst, fRegD src, fRegD op1, fRegD op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpD op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpD\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp_fp($cop$$cmpcode | C2_MacroAssembler::double_branch_mask,
+                    as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+                    as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+                    false /* cmp_single */, false /* cmov_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpN(fRegD dst, fRegD src, iRegN op1, iRegN op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpN op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpN\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovD_cmpP(fRegD dst, fRegD src, iRegP op1, iRegP op2, cmpOp cop) %{
+  match(Set dst (CMoveD (Binary cop (CmpP op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMoveD $dst, ($op1 $cop $op2), $dst, $src\t#@cmovD_cmpP\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove_fp_cmp($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), false /* is_single */);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
 // ============================================================================
 // Procedure Call/Return Instructions

--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -2493,8 +2493,8 @@ class StubGenerator: public StubCodeGenerator {
    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
    __ vle32_v(res, from);

-    __ mv(t2, 52);
-    __ blt(keylen, t2, L_aes128);
+    __ mv(t2, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
+    __ bltu(keylen, t2, L_aes128);
    __ beq(keylen, t2, L_aes192);
    // Else we fallthrough to the biggest case (256-bit key size)

@@ -2572,8 +2572,8 @@ class StubGenerator: public StubCodeGenerator {
    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
    __ vle32_v(res, from);

-    __ mv(t2, 52);
-    __ blt(keylen, t2, L_aes128);
+    __ mv(t2, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
+    __ bltu(keylen, t2, L_aes128);
    __ beq(keylen, t2, L_aes192);
    // Else we fallthrough to the biggest case (256-bit key size)

@@ -2606,6 +2606,401 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

+  void cipherBlockChaining_encryptAESCrypt(int round, Register from, Register to, Register key,
+                                           Register rvec, Register input_len) {
+    const Register len = x29;
+
+    VectorRegister working_vregs[] = {
+      v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15
+    };
+
+    const unsigned int BLOCK_SIZE = 16;
+
+    __ mv(len, input_len);
+    // load init rvec
+    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
+    __ vle32_v(v16, rvec);
+
+    generate_aes_loadkeys(key, working_vregs, round);
+    Label L_enc_loop;
+    __ bind(L_enc_loop);
+    // Encrypt from source by block size
+      __ vle32_v(v17, from);
+      __ addi(from, from, BLOCK_SIZE);
+      __ vxor_vv(v16, v16, v17);
+      generate_aes_encrypt(v16, working_vregs, round);
+      __ vse32_v(v16, to);
+      __ addi(to, to, BLOCK_SIZE);
+      __ subi(len, len, BLOCK_SIZE);
+      __ bnez(len, L_enc_loop);
+
+    // save current rvec and return
+    __ vse32_v(v16, rvec);
+    __ mv(x10, input_len);
+    __ leave();
+    __ ret();
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+  // Output:
+  //   x10       - input length
+  //
+  address generate_cipherBlockChaining_encryptAESCrypt() {
+    assert(UseAESIntrinsics, "Must be");
+    assert(UseZvkn, "need AES instructions (Zvkned extension) support");
+    __ align(CodeEntryAlignment);
+    StubId stub_id = StubId::stubgen_cipherBlockChaining_encryptAESCrypt_id;
+    StubCodeMark mark(this, stub_id);
+
+    const Register from       = c_rarg0;
+    const Register to         = c_rarg1;
+    const Register key        = c_rarg2;
+    const Register rvec       = c_rarg3;
+    const Register input_len  = c_rarg4;
+
+    const Register keylen     = x28;
+
+    address start = __ pc();
+    __ enter();
+
+    Label L_aes128, L_aes192;
+    // Compute #rounds for AES based on the length of the key array
+    __ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ mv(t0, 52);
+    __ bltu(keylen, t0, L_aes128);
+    __ beq(keylen, t0, L_aes192);
+    // Else we fallthrough to the biggest case (256-bit key size)
+
+    // Note: the following function performs key += 15*16
+    cipherBlockChaining_encryptAESCrypt(15, from, to, key, rvec, input_len);
+
+    // Note: the following function performs key += 11*16
+    __ bind(L_aes128);
+    cipherBlockChaining_encryptAESCrypt(11, from, to, key, rvec, input_len);
+
+    // Note: the following function performs key += 13*16
+    __ bind(L_aes192);
+    cipherBlockChaining_encryptAESCrypt(13, from, to, key, rvec, input_len);
+
+    return start;
+  }
+
+  void cipherBlockChaining_decryptAESCrypt(int round, Register from, Register to, Register key,
+                                           Register rvec, Register input_len) {
+    const Register len = x29;
+
+    VectorRegister working_vregs[] = {
+      v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15
+    };
+
+    const unsigned int BLOCK_SIZE = 16;
+
+    __ mv(len, input_len);
+    // load init rvec
+    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
+    __ vle32_v(v16, rvec);
+
+    generate_aes_loadkeys(key, working_vregs, round);
+    Label L_dec_loop;
+    // Decrypt from source by block size
+    __ bind(L_dec_loop);
+      __ vle32_v(v17, from);
+      __ addi(from, from, BLOCK_SIZE);
+      __ vmv_v_v(v18, v17);
+      generate_aes_decrypt(v17, working_vregs, round);
+      __ vxor_vv(v17, v17, v16);
+      __ vse32_v(v17, to);
+      __ vmv_v_v(v16, v18);
+      __ addi(to, to, BLOCK_SIZE);
+      __ subi(len, len, BLOCK_SIZE);
+      __ bnez(len, L_dec_loop);
+
+    // save current rvec and return
+    __ vse32_v(v16, rvec);
+    __ mv(x10, input_len);
+    __ leave();
+    __ ret();
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+  // Output:
+  //   x10       - input length
+  //
+  address generate_cipherBlockChaining_decryptAESCrypt() {
+    assert(UseAESIntrinsics, "Must be");
+    assert(UseZvkn, "need AES instructions (Zvkned extension) support");
+    __ align(CodeEntryAlignment);
+    StubId stub_id = StubId::stubgen_cipherBlockChaining_decryptAESCrypt_id;
+    StubCodeMark mark(this, stub_id);
+
+    const Register from        = c_rarg0;
+    const Register to          = c_rarg1;
+    const Register key         = c_rarg2;
+    const Register rvec        = c_rarg3;
+    const Register input_len   = c_rarg4;
+
+    const Register keylen      = x28;
+
+    address start = __ pc();
+    __ enter();
+
+    Label L_aes128, L_aes192, L_aes128_loop, L_aes192_loop, L_aes256_loop;
+    // Compute #rounds for AES based on the length of the key array
+    __ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ mv(t0, 52);
+    __ bltu(keylen, t0, L_aes128);
+    __ beq(keylen, t0, L_aes192);
+    // Else we fallthrough to the biggest case (256-bit key size)
+
+    // Note: the following function performs key += 15*16
+    cipherBlockChaining_decryptAESCrypt(15, from, to, key, rvec, input_len);
+
+    // Note: the following function performs key += 11*16
+    __ bind(L_aes128);
+    cipherBlockChaining_decryptAESCrypt(11, from, to, key, rvec, input_len);
+
+    // Note: the following function performs key += 13*16
+    __ bind(L_aes192);
+    cipherBlockChaining_decryptAESCrypt(13, from, to, key, rvec, input_len);
+
+    return start;
+  }
+
+  // Load big-endian 128-bit from memory.
+  void be_load_counter_128(Register counter_hi, Register counter_lo, Register counter) {
+    __ ld(counter_lo, Address(counter, 8)); // Load 128-bits from counter
+    __ ld(counter_hi, Address(counter));
+    __ rev8(counter_lo, counter_lo);        // Convert big-endian to little-endian
+    __ rev8(counter_hi, counter_hi);
+  }
+
+  // Little-endian 128-bit + 64-bit -> 128-bit addition.
+  void add_counter_128(Register counter_hi, Register counter_lo) {
+    assert_different_registers(counter_hi, counter_lo, t0);
+    __ addi(counter_lo, counter_lo, 1);
+    __ seqz(t0, counter_lo);                // Check for result overflow
+    __ add(counter_hi, counter_hi, t0);     // Add 1 if overflow otherwise 0
+  }
+
+  // Store big-endian 128-bit to memory.
+  void be_store_counter_128(Register counter_hi, Register counter_lo, Register counter) {
+    assert_different_registers(counter_hi, counter_lo, t0, t1);
+    __ rev8(t0, counter_lo);                // Convert little-endian to big-endian
+    __ rev8(t1, counter_hi);
+    __ sd(t0, Address(counter, 8));         // Store 128-bits to counter
+    __ sd(t1, Address(counter));
+  }
+
+  void counterMode_AESCrypt(int round, Register in, Register out, Register key, Register counter,
+                            Register input_len,  Register saved_encrypted_ctr, Register used_ptr) {
+    // Algorithm:
+    //
+    //   generate_aes_loadkeys();
+    //   load_counter_128(counter_hi, counter_lo, counter);
+    //
+    //   L_next:
+    //     if (used >= BLOCK_SIZE) goto L_main_loop;
+    //
+    //   L_encrypt_next:
+    //       *out = *in ^ saved_encrypted_ctr[used]);
+    //       out++; in++; used++; len--;
+    //       if (len == 0) goto L_exit;
+    //       goto L_next;
+    //
+    //   L_main_loop:
+    //     if (len == 0) goto L_exit;
+    //     saved_encrypted_ctr = generate_aes_encrypt(counter);
+    //
+    //     add_counter_128(counter_hi, counter_lo);
+    //     be_store_counter_128(counter_hi, counter_lo, counter);
+    //     used = 0;
+    //
+    //     if(len < BLOCK_SIZE) goto L_encrypt_next;
+    //
+    //     v_in = load_16Byte(in);
+    //     v_out = load_16Byte(out);
+    //     v_saved_encrypted_ctr = load_16Byte(saved_encrypted_ctr);
+    //     v_out = v_in ^ v_saved_encrypted_ctr;
+    //     out += BLOCK_SIZE;
+    //     in += BLOCK_SIZE;
+    //     len -= BLOCK_SIZE;
+    //     used = BLOCK_SIZE;
+    //     goto L_main_loop;
+    //
+    //
+    //   L_exit:
+    //     store(used);
+    //     result = input_len
+    //     return result;
+
+    const Register used          = x28;
+    const Register len           = x29;
+    const Register counter_hi    = x30;
+    const Register counter_lo    = x31;
+    const Register block_size    = t2;
+
+    const unsigned int BLOCK_SIZE = 16;
+
+    VectorRegister working_vregs[] = {
+      v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15
+    };
+
+    __ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
+
+    __ lwu(used, Address(used_ptr));
+    __ mv(len, input_len);
+    __ mv(block_size, BLOCK_SIZE);
+
+    // load keys to working_vregs according to round
+    generate_aes_loadkeys(key, working_vregs, round);
+
+    // 128-bit big-endian load
+    be_load_counter_128(counter_hi, counter_lo, counter);
+
+    Label L_next, L_encrypt_next, L_main_loop, L_exit;
+    // Check the last saved_encrypted_ctr used value, we fall through
+    // to L_encrypt_next when the used value lower than block_size
+    __ bind(L_next);
+    __ bgeu(used, block_size, L_main_loop);
+
+    // There is still data left fewer than block_size after L_main_loop
+    // or last used, we encrypt them one by one.
+    __ bind(L_encrypt_next);
+    __ add(t0, saved_encrypted_ctr, used);
+    __ lbu(t1, Address(t0));
+    __ lbu(t0, Address(in));
+    __ xorr(t1, t1, t0);
+    __ sb(t1, Address(out));
+    __ addi(in, in, 1);
+    __ addi(out, out, 1);
+    __ addi(used, used, 1);
+    __ subi(len, len, 1);
+    __ beqz(len, L_exit);
+    __ j(L_next);
+
+    // We will calculate the next saved_encrypted_ctr and encrypt the blocks of data
+    // one by one until there is less than a full block remaining if len not zero
+    __ bind(L_main_loop);
+    __ beqz(len, L_exit);
+    __ vle32_v(v16, counter);
+
+    // encrypt counter according to round
+    generate_aes_encrypt(v16, working_vregs, round);
+
+    __ vse32_v(v16, saved_encrypted_ctr);
+
+    // 128-bit little-endian increment
+    add_counter_128(counter_hi, counter_lo);
+    // 128-bit big-endian store
+    be_store_counter_128(counter_hi, counter_lo, counter);
+
+    __ mv(used, 0);
+    // Check if we have a full block_size
+    __ bltu(len, block_size, L_encrypt_next);
+
+    // We have one full block to encrypt at least
+    __ vle32_v(v17, in);
+    __ vxor_vv(v16, v16, v17);
+    __ vse32_v(v16, out);
+    __ add(out, out, block_size);
+    __ add(in, in, block_size);
+    __ sub(len, len, block_size);
+    __ mv(used, block_size);
+    __ j(L_main_loop);
+
+    __ bind(L_exit);
+    __ sw(used, Address(used_ptr));
+    __ mv(x10, input_len);
+    __ leave();
+    __ ret();
+  };
+
+  // CTR AES crypt.
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - counter vector byte array address
+  //   c_rarg4   - input length
+  //   c_rarg5   - saved encryptedCounter start
+  //   c_rarg6   - saved used length
+  //
+  // Output:
+  //   x10       - input length
+  //
+  address generate_counterMode_AESCrypt() {
+    assert(UseAESCTRIntrinsics, "Must be");
+    assert(UseZvkn, "need AES instructions (Zvkned extension) support");
+    assert(UseZbb, "need basic bit manipulation (Zbb extension) support");
+
+    __ align(CodeEntryAlignment);
+    StubId stub_id = StubId::stubgen_counterMode_AESCrypt_id;
+    StubCodeMark mark(this, stub_id);
+
+    const Register in                  = c_rarg0;
+    const Register out                 = c_rarg1;
+    const Register key                 = c_rarg2;
+    const Register counter             = c_rarg3;
+    const Register input_len           = c_rarg4;
+    const Register saved_encrypted_ctr = c_rarg5;
+    const Register used_len_ptr        = c_rarg6;
+
+    const Register keylen              = c_rarg7; // temporary register
+
+    const address start = __ pc();
+    __ enter();
+
+    Label L_exit;
+    __ beqz(input_len, L_exit);
+
+    Label L_aes128, L_aes192;
+    // Compute #rounds for AES based on the length of the key array
+    __ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ mv(t0, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
+    __ bltu(keylen, t0, L_aes128);
+    __ beq(keylen, t0, L_aes192);
+    // Else we fallthrough to the biggest case (256-bit key size)
+
+    // Note: the following function performs crypt with key += 15*16
+    counterMode_AESCrypt(15, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
+
+    // Note: the following function performs crypt with key += 13*16
+    __ bind(L_aes192);
+    counterMode_AESCrypt(13, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
+
+    // Note: the following function performs crypt with key += 11*16
+    __ bind(L_aes128);
+    counterMode_AESCrypt(11, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
+
+    __ bind(L_exit);
+    __ mv(x10, input_len);
+    __ leave();
+    __ ret();
+
+    return start;
+  }
+
  // code for comparing 8 characters of strings with Latin1 and Utf16 encoding
  void compare_string_8_x_LU(Register tmpL, Register tmpU,
                             Register strL, Register strU, Label& DIFF) {
@@ -6824,6 +7219,12 @@ static const int64_t right_3_bits = right_n_bits(3);
    if (UseAESIntrinsics) {
      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
+    }
+
+    if (UseAESCTRIntrinsics) {
+      StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt();
    }

    if (UsePoly1305Intrinsics) {
--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@@ -434,6 +434,15 @@ void VM_Version::c2_initialize() {
      warning("UseAESIntrinsics enabled, but UseAES not, enabling");
      UseAES = true;
    }
+
+    if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics) && UseZbb) {
+      FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
+    }
+
+    if (UseAESCTRIntrinsics && !UseZbb) {
+      warning("Cannot enable UseAESCTRIntrinsics on cpu without UseZbb support.");
+      FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+    }
  } else {
    if (UseAES) {
      warning("AES instructions are not available on this CPU");
@@ -443,11 +452,10 @@ void VM_Version::c2_initialize() {
      warning("AES intrinsics are not available on this CPU");
      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
    }
-  }
-
-  if (UseAESCTRIntrinsics) {
-    warning("AES/CTR intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+    if (UseAESCTRIntrinsics) {
+      warning("Cannot enable UseAESCTRIntrinsics on cpu without UseZvkn support.");
+      FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+    }
  }
 }

--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -1715,6 +1715,8 @@ bool Matcher::match_rule_supported(int opcode) {
  switch (opcode) {
    case Op_ReverseBytesI:
    case Op_ReverseBytesL:
+    case Op_ReverseBytesS:
+    case Op_ReverseBytesUS:
      return UseByteReverseInstruction;
    case Op_PopCountI:
    case Op_PopCountL:
@@ -11615,6 +11617,38 @@ instruct vround2D_reg(vecX dst, vecX src, immI8 rmode) %{

 // Byte reverse

+instruct bytes_reverse_short(iRegI dst, iRegI src) %{
+  match(Set dst (ReverseBytesS src));
+  predicate(UseByteReverseInstruction);
+  ins_cost(2 * DEFAULT_COST);
+  size(8);
+
+  format %{ "LRVR   $dst, $src\n\t # byte reverse int"
+            "SRA    $dst, 0x0010\t # right shift by 16, sign extended" %}
+
+  ins_encode %{
+    __ z_lrvr($dst$$Register, $src$$Register);
+    __ z_sra($dst$$Register, 0x0010);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
+instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{
+  match(Set dst (ReverseBytesUS src));
+  predicate(UseByteReverseInstruction);
+  ins_cost(2 * DEFAULT_COST);
+  size(8);
+
+  format %{ "LRVR   $dst, $src\n\t # byte reverse int"
+            "SRL    $dst, 0x0010\t # right shift by 16, zero extended" %}
+
+  ins_encode %{
+    __ z_lrvr($dst$$Register, $src$$Register);
+    __ z_srl($dst$$Register, 0x0010);
+  %}
+  ins_pipe(pipe_class_dummy);
+%}
+
 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
  match(Set dst (ReverseBytesI src));
  predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@@ -2333,8 +2333,8 @@ int os::open(const char *path, int oflag, int mode) {

    if (ret != -1) {
      if ((st_mode & S_IFMT) == S_IFDIR) {
-        errno = EISDIR;
        ::close(fd);
+        errno = EISDIR;
        return -1;
      }
    } else {
--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@@ -2277,8 +2277,8 @@ int os::open(const char *path, int oflag, int mode) {

    if (ret != -1) {
      if ((st_mode & S_IFMT) == S_IFDIR) {
-        errno = EISDIR;
        ::close(fd);
+        errno = EISDIR;
        return -1;
      }
    } else {
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -4932,8 +4932,8 @@ int os::open(const char *path, int oflag, int mode) {

    if (ret != -1) {
      if ((st_mode & S_IFMT) == S_IFDIR) {
-        errno = EISDIR;
        ::close(fd);
+        errno = EISDIR;
        return -1;
      }
    } else {
--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@@ -1028,6 +1028,7 @@ char* os::realpath(const char* filename, char* outbuf, size_t outbuflen) {
    } else {
      errno = ENAMETOOLONG;
    }
+    ErrnoPreserver ep;
    permit_forbidden_function::free(p); // *not* os::free
  } else {
    // Fallback for platforms struggling with modern Posix standards (AIX 5.3, 6.1). If realpath
--- a/src/hotspot/os/posix/signals_posix.cpp
+++ b/src/hotspot/os/posix/signals_posix.cpp
@@ -1645,7 +1645,7 @@ static void SR_handler(int sig, siginfo_t* siginfo, void* context) {

  // Save and restore errno to avoid confusing native code with EINTR
  // after sigsuspend.
-  int old_errno = errno;
+  ErrnoPreserver ep;

  PosixSignals::unblock_error_signals();

@@ -1727,7 +1727,6 @@ static void SR_handler(int sig, siginfo_t* siginfo, void* context) {
    // ignore
  }

-  errno = old_errno;
 }

 static int SR_initialize() {
--- a/src/hotspot/os/windows/os_windows.cpp
+++ b/src/hotspot/os/windows/os_windows.cpp
@@ -4782,8 +4782,8 @@ int os::stat(const char *path, struct stat *sbuf) {
    path_to_target = get_path_to_target(wide_path);
    if (path_to_target == nullptr) {
      // it is a symbolic link, but we failed to resolve it
-      errno = ENOENT;
      os::free(wide_path);
+      errno = ENOENT;
      return -1;
    }
  }
@@ -4794,14 +4794,14 @@ int os::stat(const char *path, struct stat *sbuf) {
  // if getting attributes failed, GetLastError should be called immediately after that
  if (!bret) {
    DWORD errcode = ::GetLastError();
+    log_debug(os)("os::stat() failed to GetFileAttributesExW: GetLastError->%lu.", errcode);
+    os::free(wide_path);
+    os::free(path_to_target);
    if (errcode == ERROR_FILE_NOT_FOUND || errcode == ERROR_PATH_NOT_FOUND) {
      errno = ENOENT;
    } else {
      errno = 0;
    }
-    log_debug(os)("os::stat() failed to GetFileAttributesExW: GetLastError->%lu.", errcode);
-    os::free(wide_path);
-    os::free(path_to_target);
    return -1;
  }

@@ -5000,8 +5000,8 @@ int os::open(const char *path, int oflag, int mode) {
    path_to_target = get_path_to_target(wide_path);
    if (path_to_target == nullptr) {
      // it is a symbolic link, but we failed to resolve it
-      errno = ENOENT;
      os::free(wide_path);
+      errno = ENOENT;
      return -1;
    }
  }
@@ -5275,6 +5275,7 @@ char* os::realpath(const char* filename, char* outbuf, size_t outbuflen) {
    } else {
      errno = ENAMETOOLONG;
    }
+    ErrnoPreserver ep;
    permit_forbidden_function::free(p); // *not* os::free
  }
  return result;
--- a/src/hotspot/share/cds/aotMetaspace.cpp
+++ b/src/hotspot/share/cds/aotMetaspace.cpp
@@ -96,6 +96,7 @@
 #include "runtime/vmOperations.hpp"
 #include "runtime/vmThread.hpp"
 #include "sanitizers/leak.hpp"
+#include "services/management.hpp"
 #include "utilities/align.hpp"
 #include "utilities/bitMap.inline.hpp"
 #include "utilities/defaultStream.hpp"
@@ -2204,7 +2205,7 @@ void AOTMetaspace::initialize_shared_spaces() {
    CountSharedSymbols cl;
    SymbolTable::shared_symbols_do(&cl);
    tty->print_cr("Number of shared symbols: %zu", cl.total());
-    if (HeapShared::is_loading_mapping_mode()) {
+    if (HeapShared::is_loading() && HeapShared::is_loading_mapping_mode()) {
      tty->print_cr("Number of shared strings: %zu", StringTable::shared_entry_count());
    }
    tty->print_cr("VM version: %s\r\n", static_mapinfo->vm_version());
--- a/src/hotspot/share/ci/ciInstanceKlass.hpp
+++ b/src/hotspot/share/ci/ciInstanceKlass.hpp
@@ -149,6 +149,10 @@ public:
    assert(is_loaded(), "must be loaded");
    return _flags;
  }
+
+  // Fetch Klass::access_flags.
+  jint                   access_flags() { return flags().as_int(); }
+
  bool                   has_finalizer()  {
    assert(is_loaded(), "must be loaded");
    return _has_finalizer; }
--- a/src/hotspot/share/ci/ciKlass.cpp
+++ b/src/hotspot/share/ci/ciKlass.cpp
@@ -216,15 +216,6 @@ jint ciKlass::modifier_flags() {
  )
 }

-// ------------------------------------------------------------------
-// ciKlass::access_flags
-jint ciKlass::access_flags() {
-  assert(is_loaded(), "not loaded");
-  GUARDED_VM_ENTRY(
-    return get_Klass()->access_flags().as_unsigned_short();
-  )
-}
-
 // ------------------------------------------------------------------
 // ciKlass::misc_flags
 klass_flags_t ciKlass::misc_flags() {
--- a/src/hotspot/share/ci/ciKlass.hpp
+++ b/src/hotspot/share/ci/ciKlass.hpp
@@ -122,9 +122,6 @@ public:
  // Fetch modifier flags.
  jint                   modifier_flags();

-  // Fetch Klass::access_flags.
-  jint                   access_flags();
-
  // Fetch Klass::misc_flags.
  klass_flags_t          misc_flags();

--- a/src/hotspot/share/classfile/defaultMethods.cpp
+++ b/src/hotspot/share/classfile/defaultMethods.cpp
@@ -439,7 +439,7 @@ class MethodFamily : public ResourceObj {
    StreamIndentor si(str, indent * 2);
    str->print("Selected method: ");
    print_method(str, _selected_target);
-    Klass* method_holder = _selected_target->method_holder();
+    InstanceKlass* method_holder = _selected_target->method_holder();
    if (!method_holder->is_interface()) {
      str->print(" : in superclass");
    }
--- a/src/hotspot/share/classfile/javaClasses.cpp
+++ b/src/hotspot/share/classfile/javaClasses.cpp
@@ -1091,10 +1091,6 @@ void java_lang_Class::allocate_mirror(Klass* k, bool is_scratch, Handle protecti
  // Set the modifiers flag.
  u2 computed_modifiers = k->compute_modifier_flags();
  set_modifiers(mirror(), computed_modifiers);
-  // Set the raw access_flags, this is used by reflection instead of modifier flags.
-  // The Java code for array classes gets the access flags from the element type.
-  assert(!k->is_array_klass() || k->access_flags().as_unsigned_short() == 0, "access flags are not set for arrays");
-  set_raw_access_flags(mirror(), k->access_flags().as_unsigned_short());

  InstanceMirrorKlass* mk = InstanceMirrorKlass::cast(mirror->klass());
  assert(oop_size(mirror()) == mk->instance_size(k), "should have been set");
@@ -1103,6 +1099,8 @@ void java_lang_Class::allocate_mirror(Klass* k, bool is_scratch, Handle protecti

  // It might also have a component mirror.  This mirror must already exist.
  if (k->is_array_klass()) {
+    // The Java code for array classes gets the access flags from the element type.
+    set_raw_access_flags(mirror(), 0);
    if (k->is_typeArray_klass()) {
      BasicType type = TypeArrayKlass::cast(k)->element_type();
      if (is_scratch) {
@@ -1129,6 +1127,8 @@ void java_lang_Class::allocate_mirror(Klass* k, bool is_scratch, Handle protecti
    // and java_mirror in this klass.
  } else {
    assert(k->is_instance_klass(), "Must be");
+    // Set the raw access_flags, this is used by reflection instead of modifier flags.
+    set_raw_access_flags(mirror(), InstanceKlass::cast(k)->access_flags().as_unsigned_short());
    initialize_mirror_fields(InstanceKlass::cast(k), mirror, protection_domain, classData, THREAD);
    if (HAS_PENDING_EXCEPTION) {
      // If any of the fields throws an exception like OOM remove the klass field
--- a/src/hotspot/share/classfile/systemDictionary.cpp
+++ b/src/hotspot/share/classfile/systemDictionary.cpp
@@ -2172,9 +2172,10 @@ static bool is_always_visible_class(oop mirror) {
    return true; // primitive array
  }
  assert(klass->is_instance_klass(), "%s", klass->external_name());
-  return klass->is_public() &&
-         (InstanceKlass::cast(klass)->is_same_class_package(vmClasses::Object_klass()) ||       // java.lang
-          InstanceKlass::cast(klass)->is_same_class_package(vmClasses::MethodHandle_klass()));  // java.lang.invoke
+  InstanceKlass* ik = InstanceKlass::cast(klass);
+  return ik->is_public() &&
+         (ik->is_same_class_package(vmClasses::Object_klass()) ||       // java.lang
+          ik->is_same_class_package(vmClasses::MethodHandle_klass()));  // java.lang.invoke
 }

 // Find or construct the Java mirror (java.lang.Class instance) for
--- a/src/hotspot/share/code/codeCache.cpp
+++ b/src/hotspot/share/code/codeCache.cpp
@@ -227,11 +227,6 @@ void CodeCache::initialize_heaps() {

  if (!non_nmethod.set) {
    non_nmethod.size += compiler_buffer_size;
-    // Further down, just before FLAG_SET_ERGO(), all segment sizes are
-    // aligned down to the next lower multiple of min_size. For large page
-    // sizes, this may result in (non_nmethod.size == 0) which is not acceptable.
-    // Therefore, force non_nmethod.size to at least min_size.
-    non_nmethod.size = MAX2(non_nmethod.size, min_size);
  }

  if (!profiled.set && !non_profiled.set) {
@@ -307,11 +302,10 @@ void CodeCache::initialize_heaps() {

  // Note: if large page support is enabled, min_size is at least the large
  // page size. This ensures that the code cache is covered by large pages.
-  non_profiled.size += non_nmethod.size & alignment_mask(min_size);
-  non_profiled.size += profiled.size & alignment_mask(min_size);
-  non_nmethod.size = align_down(non_nmethod.size, min_size);
-  profiled.size = align_down(profiled.size, min_size);
-  non_profiled.size = align_down(non_profiled.size, min_size);
+  non_nmethod.size = align_up(non_nmethod.size, min_size);
+  profiled.size = align_up(profiled.size, min_size);
+  non_profiled.size = align_up(non_profiled.size, min_size);
+  cache_size = non_nmethod.size + profiled.size + non_profiled.size;

  FLAG_SET_ERGO(NonNMethodCodeHeapSize, non_nmethod.size);
  FLAG_SET_ERGO(ProfiledCodeHeapSize, profiled.size);
--- a/src/hotspot/share/compiler/compilerDirectives.cpp
+++ b/src/hotspot/share/compiler/compilerDirectives.cpp
@@ -561,6 +561,20 @@ bool DirectiveSet::should_not_inline(ciMethod* inlinee) {
  return false;
 }

+bool DirectiveSet::should_delay_inline(ciMethod* inlinee) {
+  inlinee->check_is_loaded();
+  VM_ENTRY_MARK;
+  methodHandle mh(THREAD, inlinee->get_Method());
+
+  if (_inlinematchers != nullptr) {
+    return matches_inline(mh, InlineMatcher::delay_inline);
+  }
+  if (!CompilerDirectivesIgnoreCompileCommandsOption) {
+    return CompilerOracle::should_delay_inline(mh);
+  }
+  return false;
+}
+
 bool DirectiveSet::parse_and_add_inline(char* str, const char*& error_msg) {
  InlineMatcher* m = InlineMatcher::parse_inline_pattern(str, error_msg);
  if (m != nullptr) {
--- a/src/hotspot/share/compiler/compilerDirectives.hpp
+++ b/src/hotspot/share/compiler/compilerDirectives.hpp
@@ -142,6 +142,7 @@ public:
  void append_inline(InlineMatcher* m);
  bool should_inline(ciMethod* inlinee);
  bool should_not_inline(ciMethod* inlinee);
+  bool should_delay_inline(ciMethod* inlinee);
  void print_inline(outputStream* st);
  DirectiveSet* compilecommand_compatibility_init(const methodHandle& method);
  bool is_exclusive_copy() { return _directive == nullptr; }
--- a/src/hotspot/share/compiler/compilerOracle.cpp
+++ b/src/hotspot/share/compiler/compilerOracle.cpp
@@ -480,6 +480,10 @@ bool CompilerOracle::should_not_inline(const methodHandle& method) {
  return check_predicate(CompileCommandEnum::DontInline, method) || check_predicate(CompileCommandEnum::Exclude, method);
 }

+bool CompilerOracle::should_delay_inline(const methodHandle& method) {
+  return (check_predicate(CompileCommandEnum::DelayInline, method));
+}
+
 bool CompilerOracle::should_print(const methodHandle& method) {
  return check_predicate(CompileCommandEnum::Print, method);
 }
--- a/src/hotspot/share/compiler/compilerOracle.hpp
+++ b/src/hotspot/share/compiler/compilerOracle.hpp
@@ -51,6 +51,7 @@ class methodHandle;
  option(Log, "log", Bool) \
  option(Print, "print", Bool) \
  option(Inline,  "inline", Bool) \
+  option(DelayInline,  "delayinline", Bool) \
  option(DontInline,  "dontinline", Bool) \
  option(Blackhole,  "blackhole", Bool) \
  option(CompileOnly, "compileonly", Bool)\
@@ -150,6 +151,9 @@ class CompilerOracle : AllStatic {
  // Tells whether we want to disallow inlining of this method
  static bool should_not_inline(const methodHandle& method);

+  // Tells whether we want to delay inlining of this method
+  static bool should_delay_inline(const methodHandle& method);
+
  // Tells whether this method changes Thread.currentThread()
  static bool changes_current_thread(const methodHandle& method);

--- a/src/hotspot/share/compiler/methodMatcher.hpp
+++ b/src/hotspot/share/compiler/methodMatcher.hpp
@@ -100,6 +100,7 @@ public:
  enum InlineType {
      unknown_inline,
      dont_inline,
+      delay_inline,
      force_inline
    };

--- a/src/hotspot/share/gc/g1/jvmFlagConstraintsG1.cpp
+++ b/src/hotspot/share/gc/g1/jvmFlagConstraintsG1.cpp
@@ -26,7 +26,7 @@
 #include "gc/g1/g1HeapSizingPolicy.hpp"
 #include "gc/g1/jvmFlagConstraintsG1.hpp"
 #include "gc/shared/bufferNode.hpp"
-#include "gc/shared/ptrQueue.hpp"
+#include "gc/shared/satbMarkQueue.hpp"
 #include "runtime/globals_extension.hpp"
 #include "utilities/globalDefinitions.hpp"

--- a/src/hotspot/share/gc/g1/vmStructs_g1.hpp
+++ b/src/hotspot/share/gc/g1/vmStructs_g1.hpp
@@ -70,8 +70,8 @@
  nonstatic_field(G1HeapRegionSetBase,   _length,       uint)                 \
                                                                              \
  nonstatic_field(SATBMarkQueue,       _active,         bool)                 \
-  nonstatic_field(PtrQueue,            _buf,            void**)               \
-  nonstatic_field(PtrQueue,            _index,          size_t)
+  nonstatic_field(SATBMarkQueue,       _buf,            void**)               \
+  nonstatic_field(SATBMarkQueue,       _index,          size_t)

 #define VM_INT_CONSTANTS_G1GC(declare_constant, declare_constant_with_value)  \
  declare_constant(G1HeapRegionType::FreeTag)                                 \
@@ -96,7 +96,6 @@
  declare_toplevel_type(G1HeapRegionManager)                                  \
  declare_toplevel_type(G1HeapRegionSetBase)                                  \
  declare_toplevel_type(G1MonitoringSupport)                                  \
-  declare_toplevel_type(PtrQueue)                                             \
  declare_toplevel_type(G1HeapRegionType)                                     \
  declare_toplevel_type(SATBMarkQueue)                                        \
                                                                              \
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
@@ -58,8 +58,6 @@
 #include "utilities/macros.hpp"
 #include "utilities/vmError.hpp"

-PSYoungGen*  ParallelScavengeHeap::_young_gen = nullptr;
-PSOldGen*    ParallelScavengeHeap::_old_gen = nullptr;
 PSAdaptiveSizePolicy* ParallelScavengeHeap::_size_policy = nullptr;
 GCPolicyCounters* ParallelScavengeHeap::_gc_policy_counters = nullptr;
 size_t ParallelScavengeHeap::_desired_page_size = 0;
@@ -134,18 +132,18 @@ jint ParallelScavengeHeap::initialize() {

 void ParallelScavengeHeap::initialize_serviceability() {

-  _eden_pool = new EdenMutableSpacePool(_young_gen,
-                                        _young_gen->eden_space(),
-                                        "PS Eden Space",
-                                        false /* support_usage_threshold */);
+  _eden_pool = new PSEdenSpacePool(_young_gen,
+                                   _young_gen->eden_space(),
+                                   "PS Eden Space",
+                                   false /* support_usage_threshold */);

-  _survivor_pool = new SurvivorMutableSpacePool(_young_gen,
-                                                "PS Survivor Space",
-                                                false /* support_usage_threshold */);
+  _survivor_pool = new PSSurvivorSpacePool(_young_gen,
+                                           "PS Survivor Space",
+                                           false /* support_usage_threshold */);

-  _old_pool = new PSGenerationPool(_old_gen,
-                                   "PS Old Gen",
-                                   true /* support_usage_threshold */);
+  _old_pool = new PSOldGenerationPool(_old_gen,
+                                      "PS Old Gen",
+                                      true /* support_usage_threshold */);

  _young_manager = new GCMemoryManager("PS Scavenge");
  _old_manager = new GCMemoryManager("PS MarkSweep");
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
@@ -69,8 +69,8 @@ class ReservedSpace;
 class ParallelScavengeHeap : public CollectedHeap {
  friend class VMStructs;
 private:
-  static PSYoungGen* _young_gen;
-  static PSOldGen*   _old_gen;
+  PSYoungGen* _young_gen;
+  PSOldGen*   _old_gen;

  // Sizing policy for entire heap
  static PSAdaptiveSizePolicy*       _size_policy;
@@ -160,8 +160,8 @@ public:
  GrowableArray<GCMemoryManager*> memory_managers() override;
  GrowableArray<MemoryPool*> memory_pools() override;

-  static PSYoungGen* young_gen() { return _young_gen; }
-  static PSOldGen* old_gen()     { return _old_gen; }
+  PSYoungGen* young_gen() const { return _young_gen; }
+  PSOldGen*   old_gen()   const { return _old_gen; }

  PSAdaptiveSizePolicy* size_policy() { return _size_policy; }

--- a/src/hotspot/share/gc/parallel/psMemoryPool.cpp
+++ b/src/hotspot/share/gc/parallel/psMemoryPool.cpp
@@ -24,14 +24,14 @@

 #include "gc/parallel/psMemoryPool.hpp"

-PSGenerationPool::PSGenerationPool(PSOldGen* old_gen,
-                                   const char* name,
-                                   bool support_usage_threshold) :
+PSOldGenerationPool::PSOldGenerationPool(PSOldGen* old_gen,
+                                         const char* name,
+                                         bool support_usage_threshold) :
  CollectedMemoryPool(name, old_gen->capacity_in_bytes(),
                      old_gen->reserved().byte_size(), support_usage_threshold), _old_gen(old_gen) {
 }

-MemoryUsage PSGenerationPool::get_memory_usage() {
+MemoryUsage PSOldGenerationPool::get_memory_usage() {
  size_t maxSize   = (available_for_allocation() ? max_size() : 0);
  size_t used      = used_in_bytes();
  size_t committed = _old_gen->capacity_in_bytes();
@@ -39,16 +39,16 @@ MemoryUsage PSGenerationPool::get_memory_usage() {
  return MemoryUsage(initial_size(), used, committed, maxSize);
 }

-// The max size of EdenMutableSpacePool =
+// The max size of PSEdenSpacePool =
 //     max size of the PSYoungGen - capacity of two survivor spaces
 //
 // Max size of PS eden space is changing due to ergonomic.
 // PSYoungGen, PSOldGen, Eden, Survivor spaces are all resizable.
 //
-EdenMutableSpacePool::EdenMutableSpacePool(PSYoungGen* young_gen,
-                                           MutableSpace* space,
-                                           const char* name,
-                                           bool support_usage_threshold) :
+PSEdenSpacePool::PSEdenSpacePool(PSYoungGen* young_gen,
+                                 MutableSpace* space,
+                                 const char* name,
+                                 bool support_usage_threshold) :
  CollectedMemoryPool(name, space->capacity_in_bytes(),
                      (young_gen->max_gen_size() -
                       young_gen->from_space()->capacity_in_bytes() -
@@ -58,7 +58,7 @@ EdenMutableSpacePool::EdenMutableSpacePool(PSYoungGen* young_gen,
  _space(space) {
 }

-MemoryUsage EdenMutableSpacePool::get_memory_usage() {
+MemoryUsage PSEdenSpacePool::get_memory_usage() {
  size_t maxSize   = (available_for_allocation() ? max_size() : 0);
  size_t used = used_in_bytes();
  size_t committed = _space->capacity_in_bytes();
@@ -66,20 +66,20 @@ MemoryUsage EdenMutableSpacePool::get_memory_usage() {
  return MemoryUsage(initial_size(), used, committed, maxSize);
 }

-// The max size of SurvivorMutableSpacePool =
+// The max size of PSSurvivorSpacePool =
 //     current capacity of the from-space
 //
 // PS from and to survivor spaces could have different sizes.
 //
-SurvivorMutableSpacePool::SurvivorMutableSpacePool(PSYoungGen* young_gen,
-                                                   const char* name,
-                                                   bool support_usage_threshold) :
+PSSurvivorSpacePool::PSSurvivorSpacePool(PSYoungGen* young_gen,
+                                         const char* name,
+                                         bool support_usage_threshold) :
  CollectedMemoryPool(name, young_gen->from_space()->capacity_in_bytes(),
                      young_gen->from_space()->capacity_in_bytes(),
                      support_usage_threshold), _young_gen(young_gen) {
 }

-MemoryUsage SurvivorMutableSpacePool::get_memory_usage() {
+MemoryUsage PSSurvivorSpacePool::get_memory_usage() {
  size_t maxSize = (available_for_allocation() ? max_size() : 0);
  size_t used    = used_in_bytes();
  size_t committed = committed_in_bytes();
--- a/src/hotspot/share/gc/parallel/psMemoryPool.hpp
+++ b/src/hotspot/share/gc/parallel/psMemoryPool.hpp
@@ -31,28 +31,28 @@
 #include "services/memoryPool.hpp"
 #include "services/memoryUsage.hpp"

-class PSGenerationPool : public CollectedMemoryPool {
+class PSOldGenerationPool : public CollectedMemoryPool {
 private:
  PSOldGen* _old_gen;

 public:
-  PSGenerationPool(PSOldGen* pool, const char* name, bool support_usage_threshold);
+  PSOldGenerationPool(PSOldGen* pool, const char* name, bool support_usage_threshold);

  MemoryUsage get_memory_usage();
  size_t used_in_bytes() { return _old_gen->used_in_bytes(); }
  size_t max_size() const { return _old_gen->reserved().byte_size(); }
 };

-class EdenMutableSpacePool : public CollectedMemoryPool {
+class PSEdenSpacePool : public CollectedMemoryPool {
 private:
  PSYoungGen*   _young_gen;
  MutableSpace* _space;

 public:
-  EdenMutableSpacePool(PSYoungGen* young_gen,
-                       MutableSpace* space,
-                       const char* name,
-                       bool support_usage_threshold);
+  PSEdenSpacePool(PSYoungGen* young_gen,
+                  MutableSpace* space,
+                  const char* name,
+                  bool support_usage_threshold);

  MutableSpace* space()                     { return _space; }
  MemoryUsage get_memory_usage();
@@ -65,14 +65,14 @@ public:
  }
 };

-class SurvivorMutableSpacePool : public CollectedMemoryPool {
+class PSSurvivorSpacePool : public CollectedMemoryPool {
 private:
  PSYoungGen*   _young_gen;

 public:
-  SurvivorMutableSpacePool(PSYoungGen* young_gen,
-                           const char* name,
-                           bool support_usage_threshold);
+  PSSurvivorSpacePool(PSYoungGen* young_gen,
+                      const char* name,
+                      bool support_usage_threshold);

  MemoryUsage get_memory_usage();

--- a/src/hotspot/share/gc/parallel/psScavenge.hpp
+++ b/src/hotspot/share/gc/parallel/psScavenge.hpp
@@ -115,7 +115,7 @@ class PSScavenge: AllStatic {
  }

  static bool is_obj_in_to_space(oop o) {
-    return ParallelScavengeHeap::young_gen()->to_space()->contains(o);
+    return ParallelScavengeHeap::heap()->young_gen()->to_space()->contains(o);
  }
 };

--- a/src/hotspot/share/gc/parallel/vmStructs_parallelgc.hpp
+++ b/src/hotspot/share/gc/parallel/vmStructs_parallelgc.hpp
@@ -64,8 +64,8 @@
  nonstatic_field(PSOldGen,                    _max_gen_size,                                 const size_t)                          \
                                                                                                                                     \
                                                                                                                                     \
-     static_field(ParallelScavengeHeap,        _young_gen,                                    PSYoungGen*)                           \
-     static_field(ParallelScavengeHeap,        _old_gen,                                      PSOldGen*)                             \
+  nonstatic_field(ParallelScavengeHeap,        _young_gen,                                    PSYoungGen*)                           \
+  nonstatic_field(ParallelScavengeHeap,        _old_gen,                                      PSOldGen*)                             \
                                                                                                                                     \

 #define VM_TYPES_PARALLELGC(declare_type,                                 \
--- a/src/hotspot/share/gc/serial/serialHeap.cpp
+++ b/src/hotspot/share/gc/serial/serialHeap.cpp
@@ -91,14 +91,16 @@ SerialHeap::SerialHeap() :
    CollectedHeap(),
    _young_gen(nullptr),
    _old_gen(nullptr),
+    _young_gen_saved_top(nullptr),
+    _old_gen_saved_top(nullptr),
    _rem_set(nullptr),
    _gc_policy_counters(new GCPolicyCounters("Copy:MSC", 2, 2)),
    _young_manager(nullptr),
    _old_manager(nullptr),
-    _is_heap_almost_full(false),
    _eden_pool(nullptr),
    _survivor_pool(nullptr),
-    _old_pool(nullptr) {
+    _old_pool(nullptr),
+    _is_heap_almost_full(false) {
  _young_manager = new GCMemoryManager("Copy");
  _old_manager = new GCMemoryManager("MarkSweepCompact");
  GCLocker::initialize();
@@ -630,6 +632,14 @@ bool SerialHeap::requires_barriers(stackChunkOop obj) const {

 // Returns "TRUE" iff "p" points into the committed areas of the heap.
 bool SerialHeap::is_in(const void* p) const {
+  // precondition
+  verify_not_in_native_if_java_thread();
+
+  if (!is_in_reserved(p)) {
+    // If it's not even in reserved.
+    return false;
+  }
+
  return _young_gen->is_in(p) || _old_gen->is_in(p);
 }

@@ -797,3 +807,12 @@ void SerialHeap::gc_epilogue(bool full) {

  MetaspaceCounters::update_performance_counters();
 };
+
+#ifdef ASSERT
+void SerialHeap::verify_not_in_native_if_java_thread() {
+  if (Thread::current()->is_Java_thread()) {
+    JavaThread* thread = JavaThread::current();
+    assert(thread->thread_state() != _thread_in_native, "precondition");
+  }
+}
+#endif
--- a/src/hotspot/share/gc/serial/serialHeap.hpp
+++ b/src/hotspot/share/gc/serial/serialHeap.hpp
@@ -76,6 +76,8 @@ class SerialHeap : public CollectedHeap {
 private:
  DefNewGeneration* _young_gen;
  TenuredGeneration* _old_gen;
+
+  // Used during young-gc
  HeapWord* _young_gen_saved_top;
  HeapWord* _old_gen_saved_top;

@@ -94,6 +96,10 @@ private:
  GCMemoryManager* _young_manager;
  GCMemoryManager* _old_manager;

+  MemoryPool* _eden_pool;
+  MemoryPool* _survivor_pool;
+  MemoryPool* _old_pool;
+
  // Indicate whether heap is almost or approaching full.
  // Usually, there is some memory headroom for application/gc to run properly.
  // However, in extreme cases, e.g. young-gen is non-empty after a full gc, we
@@ -111,6 +117,21 @@ private:
  void print_tracing_info() const override;
  void stop() override {};

+  static void verify_not_in_native_if_java_thread() NOT_DEBUG_RETURN;
+
+  // Try to allocate space by expanding the heap.
+  HeapWord* expand_heap_and_allocate(size_t size, bool is_tlab);
+
+  HeapWord* mem_allocate_cas_noexpand(size_t size, bool is_tlab);
+  HeapWord* mem_allocate_work(size_t size, bool is_tlab);
+
+  void initialize_serviceability() override;
+
+  // Set the saved marks of generations, if that makes sense.
+  // In particular, if any generation might iterate over the oops
+  // in other generations, it should call this method.
+  void save_marks();
+
 public:
  // Returns JNI_OK on success
  jint initialize() override;
@@ -209,26 +230,6 @@ public:
  // generations in a fully generational heap.
  CardTableRS* rem_set() { return _rem_set; }

- public:
-  // Set the saved marks of generations, if that makes sense.
-  // In particular, if any generation might iterate over the oops
-  // in other generations, it should call this method.
-  void save_marks();
-
-private:
-  // Try to allocate space by expanding the heap.
-  HeapWord* expand_heap_and_allocate(size_t size, bool is_tlab);
-
-  HeapWord* mem_allocate_cas_noexpand(size_t size, bool is_tlab);
-  HeapWord* mem_allocate_work(size_t size, bool is_tlab);
-
-  MemoryPool* _eden_pool;
-  MemoryPool* _survivor_pool;
-  MemoryPool* _old_pool;
-
-  void initialize_serviceability() override;
-
-public:
  static SerialHeap* heap();

  SerialHeap();
--- a/src/hotspot/share/gc/shared/ptrQueue.cpp
+++ b/src/hotspot/share/gc/shared/ptrQueue.cpp
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "gc/shared/bufferNode.hpp"
-#include "gc/shared/ptrQueue.hpp"
-
-PtrQueue::PtrQueue(PtrQueueSet* qset) :
-  _index(0),
-  _buf(nullptr)
-{}
-
-PtrQueue::~PtrQueue() {
-  assert(_buf == nullptr, "queue must be flushed before delete");
-}
-
-size_t PtrQueue::current_capacity() const {
-  if (_buf == nullptr) {
-    return 0;
-  } else {
-    return BufferNode::make_node_from_buffer(_buf)->capacity();
-  }
-}
-
-PtrQueueSet::PtrQueueSet(BufferNode::Allocator* allocator) :
-  _allocator(allocator)
-{}
-
-PtrQueueSet::~PtrQueueSet() {}
-
-void PtrQueueSet::reset_queue(PtrQueue& queue) {
-  queue.set_index(queue.current_capacity());
-}
-
-void PtrQueueSet::flush_queue(PtrQueue& queue) {
-  void** buffer = queue.buffer();
-  if (buffer != nullptr) {
-    size_t index = queue.index();
-    queue.set_buffer(nullptr);
-    queue.set_index(0);
-    BufferNode* node = BufferNode::make_node_from_buffer(buffer, index);
-    if (index == node->capacity()) {
-      deallocate_buffer(node);
-    } else {
-      enqueue_completed_buffer(node);
-    }
-  }
-}
-
-bool PtrQueueSet::try_enqueue(PtrQueue& queue, void* value) {
-  size_t index = queue.index();
-  if (index == 0) return false;
-  void** buffer = queue.buffer();
-  assert(buffer != nullptr, "no buffer but non-zero index");
-  buffer[--index] = value;
-  queue.set_index(index);
-  return true;
-}
-
-void PtrQueueSet::retry_enqueue(PtrQueue& queue, void* value) {
-  assert(queue.index() != 0, "precondition");
-  assert(queue.buffer() != nullptr, "precondition");
-  size_t index = queue.index();
-  queue.buffer()[--index] = value;
-  queue.set_index(index);
-}
-
-BufferNode* PtrQueueSet::exchange_buffer_with_new(PtrQueue& queue) {
-  BufferNode* node = nullptr;
-  void** buffer = queue.buffer();
-  if (buffer != nullptr) {
-    node = BufferNode::make_node_from_buffer(buffer, queue.index());
-  }
-  install_new_buffer(queue);
-  return node;
-}
-
-void PtrQueueSet::install_new_buffer(PtrQueue& queue) {
-  BufferNode* node = _allocator->allocate();
-  queue.set_buffer(BufferNode::make_buffer_from_node(node));
-  queue.set_index(node->capacity());
-}
-
-void** PtrQueueSet::allocate_buffer() {
-  BufferNode* node = _allocator->allocate();
-  return BufferNode::make_buffer_from_node(node);
-}
-
-void PtrQueueSet::deallocate_buffer(BufferNode* node) {
-  _allocator->release(node);
-}
--- a/src/hotspot/share/gc/shared/ptrQueue.hpp
+++ b/src/hotspot/share/gc/shared/ptrQueue.hpp
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_GC_SHARED_PTRQUEUE_HPP
-#define SHARE_GC_SHARED_PTRQUEUE_HPP
-
-#include "gc/shared/bufferNode.hpp"
-#include "utilities/align.hpp"
-#include "utilities/debug.hpp"
-#include "utilities/globalDefinitions.hpp"
-#include "utilities/sizes.hpp"
-
-// There are various techniques that require threads to be able to log
-// addresses.  For example, a generational write barrier might log
-// the addresses of modified old-generation objects.  This type supports
-// this operation.
-
-class PtrQueueSet;
-class PtrQueue {
-  friend class VMStructs;
-
-  NONCOPYABLE(PtrQueue);
-
-  // The (byte) index at which an object was last enqueued.  Starts at
-  // capacity (in bytes) (indicating an empty buffer) and goes towards zero.
-  // Value is always pointer-size aligned.
-  size_t _index;
-
-  static const size_t _element_size = sizeof(void*);
-
-  static size_t byte_index_to_index(size_t ind) {
-    assert(is_aligned(ind, _element_size), "precondition");
-    return ind / _element_size;
-  }
-
-  static size_t index_to_byte_index(size_t ind) {
-    return ind * _element_size;
-  }
-
-protected:
-  // The buffer.
-  void** _buf;
-
-  // Initialize this queue to contain a null buffer, and be part of the
-  // given PtrQueueSet.
-  PtrQueue(PtrQueueSet* qset);
-
-  // Requires queue flushed.
-  ~PtrQueue();
-
-public:
-
-  void** buffer() const { return _buf; }
-  void set_buffer(void** buffer) { _buf = buffer; }
-
-  size_t index() const {
-    return byte_index_to_index(_index);
-  }
-
-  void set_index(size_t new_index) {
-    assert(new_index <= current_capacity(), "precondition");
-    _index = index_to_byte_index(new_index);
-  }
-
-  // Returns the capacity of the buffer, or 0 if the queue doesn't currently
-  // have a buffer.
-  size_t current_capacity() const;
-
-  bool is_empty() const { return index() == current_capacity(); }
-  size_t size() const { return current_capacity() - index(); }
-
-protected:
-  // To support compiler.
-  template<typename Derived>
-  static ByteSize byte_offset_of_index() {
-    return byte_offset_of(Derived, _index);
-  }
-
-  static constexpr ByteSize byte_width_of_index() { return in_ByteSize(sizeof(size_t)); }
-
-  template<typename Derived>
-  static ByteSize byte_offset_of_buf() {
-    return byte_offset_of(Derived, _buf);
-  }
-
-  static ByteSize byte_width_of_buf() { return in_ByteSize(_element_size); }
-};
-
-// A PtrQueueSet represents resources common to a set of pointer queues.
-// In particular, the individual queues allocate buffers from this shared
-// set, and return completed buffers to the set.
-class PtrQueueSet {
-  BufferNode::Allocator* _allocator;
-
-  NONCOPYABLE(PtrQueueSet);
-
-protected:
-  // Create an empty ptr queue set.
-  PtrQueueSet(BufferNode::Allocator* allocator);
-  ~PtrQueueSet();
-
-  // Discard any buffered enqueued data.
-  void reset_queue(PtrQueue& queue);
-
-  // If queue has any buffered enqueued data, transfer it to this qset.
-  // Otherwise, deallocate queue's buffer.
-  void flush_queue(PtrQueue& queue);
-
-  // Add value to queue's buffer, returning true.  If buffer is full
-  // or if queue doesn't have a buffer, does nothing and returns false.
-  bool try_enqueue(PtrQueue& queue, void* value);
-
-  // Add value to queue's buffer.  The queue must have a non-full buffer.
-  // Used after an initial try_enqueue has failed and the situation resolved.
-  void retry_enqueue(PtrQueue& queue, void* value);
-
-  // Installs a new buffer into queue.
-  // Returns the old buffer, or null if queue didn't have a buffer.
-  BufferNode* exchange_buffer_with_new(PtrQueue& queue);
-
-  // Installs a new buffer into queue.
-  void install_new_buffer(PtrQueue& queue);
-
-public:
-
-  // Return the associated BufferNode allocator.
-  BufferNode::Allocator* allocator() const { return _allocator; }
-
-  // Return the buffer for a BufferNode of size buffer_capacity().
-  void** allocate_buffer();
-
-  // Return an empty buffer to the free list.  The node is required
-  // to have been allocated with a size of buffer_capacity().
-  void deallocate_buffer(BufferNode* node);
-
-  // A completed buffer is a buffer the mutator is finished with, and
-  // is ready to be processed by the collector.  It need not be full.
-
-  // Adds node to the completed buffer list.
-  virtual void enqueue_completed_buffer(BufferNode* node) = 0;
-
-  size_t buffer_capacity() const {
-    return _allocator->buffer_capacity();
-  }
-};
-
-#endif // SHARE_GC_SHARED_PTRQUEUE_HPP
--- a/src/hotspot/share/gc/shared/satbMarkQueue.cpp
+++ b/src/hotspot/share/gc/shared/satbMarkQueue.cpp
@@ -36,14 +36,19 @@
 #include "utilities/globalCounter.inline.hpp"

 SATBMarkQueue::SATBMarkQueue(SATBMarkQueueSet* qset) :
-  PtrQueue(qset),
+  _buf(nullptr),
+  _index(0),
  // SATB queues are only active during marking cycles. We create them
  // with their active field set to false. If a thread is created
  // during a cycle, it's SATB queue needs to be activated before the
  // thread starts running.  This is handled by the collector-specific
  // BarrierSet thread attachment protocol.
  _active(false)
-{ }
+{}
+
+SATBMarkQueue::~SATBMarkQueue() {
+  assert(_buf == nullptr, "queue must be flushed before delete");
+}

 #ifndef PRODUCT
 // Helpful for debugging
@@ -64,7 +69,7 @@ void SATBMarkQueue::print(const char* name) {
 #endif // PRODUCT

 SATBMarkQueueSet::SATBMarkQueueSet(BufferNode::Allocator* allocator) :
-  PtrQueueSet(allocator),
+  _allocator(allocator),
  _list(),
  _count_and_process_flag(0),
  _process_completed_buffers_threshold(SIZE_MAX),
@@ -214,13 +219,6 @@ bool SATBMarkQueueSet::apply_closure_to_completed_buffer(SATBBufferClosure* cl)
  }
 }

-void SATBMarkQueueSet::flush_queue(SATBMarkQueue& queue) {
-  // Filter now to possibly save work later.  If filtering empties the
-  // buffer then flush_queue can deallocate the buffer.
-  filter(queue);
-  PtrQueueSet::flush_queue(queue);
-}
-
 void SATBMarkQueueSet::enqueue_known_active(SATBMarkQueue& queue, oop obj) {
  assert(queue.is_active(), "precondition");
  void* value = cast_from_oop<void*>(obj);
@@ -355,3 +353,76 @@ void SATBMarkQueueSet::abandon_partial_marking() {
  } closure(*this);
  Threads::threads_do(&closure);
 }
+
+size_t SATBMarkQueue::current_capacity() const {
+  if (_buf == nullptr) {
+    return 0;
+  } else {
+    return BufferNode::make_node_from_buffer(_buf)->capacity();
+  }
+}
+
+void SATBMarkQueueSet::reset_queue(SATBMarkQueue& queue) {
+  queue.set_index(queue.current_capacity());
+}
+
+void SATBMarkQueueSet::flush_queue(SATBMarkQueue& queue) {
+  // Filter now to possibly save work later.  If filtering empties the
+  // buffer then flush_queue can deallocate the buffer.
+  filter(queue);
+  void** buffer = queue.buffer();
+  if (buffer != nullptr) {
+    size_t index = queue.index();
+    queue.set_buffer(nullptr);
+    queue.set_index(0);
+    BufferNode* node = BufferNode::make_node_from_buffer(buffer, index);
+    if (index == node->capacity()) {
+      deallocate_buffer(node);
+    } else {
+      enqueue_completed_buffer(node);
+    }
+  }
+}
+
+bool SATBMarkQueueSet::try_enqueue(SATBMarkQueue& queue, void* value) {
+  size_t index = queue.index();
+  if (index == 0) return false;
+  void** buffer = queue.buffer();
+  assert(buffer != nullptr, "no buffer but non-zero index");
+  buffer[--index] = value;
+  queue.set_index(index);
+  return true;
+}
+
+void SATBMarkQueueSet::retry_enqueue(SATBMarkQueue& queue, void* value) {
+  assert(queue.index() != 0, "precondition");
+  assert(queue.buffer() != nullptr, "precondition");
+  size_t index = queue.index();
+  queue.buffer()[--index] = value;
+  queue.set_index(index);
+}
+
+BufferNode* SATBMarkQueueSet::exchange_buffer_with_new(SATBMarkQueue& queue) {
+  BufferNode* node = nullptr;
+  void** buffer = queue.buffer();
+  if (buffer != nullptr) {
+    node = BufferNode::make_node_from_buffer(buffer, queue.index());
+  }
+  install_new_buffer(queue);
+  return node;
+}
+
+void SATBMarkQueueSet::install_new_buffer(SATBMarkQueue& queue) {
+  BufferNode* node = _allocator->allocate();
+  queue.set_buffer(BufferNode::make_buffer_from_node(node));
+  queue.set_index(node->capacity());
+}
+
+void** SATBMarkQueueSet::allocate_buffer() {
+  BufferNode* node = _allocator->allocate();
+  return BufferNode::make_buffer_from_node(node);
+}
+
+void SATBMarkQueueSet::deallocate_buffer(BufferNode* node) {
+  _allocator->release(node);
+}
--- a/src/hotspot/share/gc/shared/satbMarkQueue.hpp
+++ b/src/hotspot/share/gc/shared/satbMarkQueue.hpp
@@ -25,11 +25,15 @@
 #ifndef SHARE_GC_SHARED_SATBMARKQUEUE_HPP
 #define SHARE_GC_SHARED_SATBMARKQUEUE_HPP

-#include "gc/shared/ptrQueue.hpp"
+#include "gc/shared/bufferNode.hpp"
 #include "memory/allocation.hpp"
 #include "memory/padded.hpp"
 #include "oops/oopsHierarchy.hpp"
 #include "runtime/atomic.hpp"
+#include "utilities/align.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/sizes.hpp"

 class Thread;
 class Monitor;
@@ -45,12 +49,33 @@ public:
  virtual void do_buffer(void** buffer, size_t size) = 0;
 };

-// A PtrQueue whose elements are (possibly stale) pointers to object heads.
-class SATBMarkQueue: public PtrQueue {
+// A queue whose elements are (possibly stale) pointers to object heads.
+class SATBMarkQueue {
  friend class VMStructs;
  friend class SATBMarkQueueSet;

 private:
+  NONCOPYABLE(SATBMarkQueue);
+
+  // The buffer.
+  void** _buf;
+
+  // The (byte) index at which an object was last enqueued.  Starts at
+  // capacity (in bytes) (indicating an empty buffer) and goes towards zero.
+  // Value is always pointer-size aligned.
+  size_t _index;
+
+  static const size_t _element_size = sizeof(void*);
+
+  static size_t byte_index_to_index(size_t ind) {
+    assert(is_aligned(ind, _element_size), "precondition");
+    return ind / _element_size;
+  }
+
+  static size_t index_to_byte_index(size_t ind) {
+    return ind * _element_size;
+  }
+
  // Per-queue (so thread-local) cache of the SATBMarkQueueSet's
  // active state, to support inline barriers in compiled code.
  bool _active;
@@ -58,6 +83,29 @@ private:
 public:
  SATBMarkQueue(SATBMarkQueueSet* qset);

+  // Queue must be flushed
+  ~SATBMarkQueue();
+
+  void** buffer() const { return _buf; }
+
+  void set_buffer(void** buffer) { _buf = buffer; }
+
+  size_t index() const {
+    return byte_index_to_index(_index);
+  }
+
+  void set_index(size_t new_index) {
+    assert(new_index <= current_capacity(), "precondition");
+    _index = index_to_byte_index(new_index);
+  }
+
+  // Returns the capacity of the buffer, or 0 if the queue doesn't currently
+  // have a buffer.
+  size_t current_capacity() const;
+
+  bool is_empty() const { return index() == current_capacity(); }
+  size_t size() const { return current_capacity() - index(); }
+
  bool is_active() const { return _active; }
  void set_active(bool value) { _active = value; }

@@ -68,14 +116,16 @@ public:

  // Compiler support.
  static ByteSize byte_offset_of_index() {
-    return PtrQueue::byte_offset_of_index<SATBMarkQueue>();
+    return byte_offset_of(SATBMarkQueue, _index);
  }
-  using PtrQueue::byte_width_of_index;
+
+  static constexpr ByteSize byte_width_of_index() { return in_ByteSize(sizeof(size_t)); }

  static ByteSize byte_offset_of_buf() {
-    return PtrQueue::byte_offset_of_buf<SATBMarkQueue>();
+    return byte_offset_of(SATBMarkQueue, _buf);
  }
-  using PtrQueue::byte_width_of_buf;
+
+  static ByteSize byte_width_of_buf() { return in_ByteSize(_element_size); }

  static ByteSize byte_offset_of_active() {
    return byte_offset_of(SATBMarkQueue, _active);
@@ -84,7 +134,18 @@ public:
  static ByteSize byte_width_of_active() { return in_ByteSize(sizeof(bool)); }
 };

-class SATBMarkQueueSet: public PtrQueueSet {
+
+// A SATBMarkQueueSet represents resources common to a set of SATBMarkQueues.
+// In particular, the individual queues allocate buffers from this shared
+// set, and return completed buffers to the set.
+// A completed buffer is a buffer the mutator is finished with, and
+// is ready to be processed by the collector.  It need not be full.
+
+class SATBMarkQueueSet {
+
+  BufferNode::Allocator* _allocator;
+
+  NONCOPYABLE(SATBMarkQueueSet);

  DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, 0);
  PaddedEnd<BufferNode::Stack> _list;
@@ -99,6 +160,24 @@ class SATBMarkQueueSet: public PtrQueueSet {
  BufferNode* get_completed_buffer();
  void abandon_completed_buffers();

+  // Discard any buffered enqueued data.
+  void reset_queue(SATBMarkQueue& queue);
+
+  // Add value to queue's buffer, returning true.  If buffer is full
+  // or if queue doesn't have a buffer, does nothing and returns false.
+  bool try_enqueue(SATBMarkQueue& queue, void* value);
+
+  // Add value to queue's buffer.  The queue must have a non-full buffer.
+  // Used after an initial try_enqueue has failed and the situation resolved.
+  void retry_enqueue(SATBMarkQueue& queue, void* value);
+
+  // Installs a new buffer into queue.
+  // Returns the old buffer, or null if queue didn't have a buffer.
+  BufferNode* exchange_buffer_with_new(SATBMarkQueue& queue);
+
+  // Installs a new buffer into queue.
+  void install_new_buffer(SATBMarkQueue& queue);
+
 #ifdef ASSERT
  void dump_active_states(bool expected_active);
  void verify_active_states(bool expected_active);
@@ -106,6 +185,7 @@ class SATBMarkQueueSet: public PtrQueueSet {

 protected:
  SATBMarkQueueSet(BufferNode::Allocator* allocator);
+
  ~SATBMarkQueueSet();

  void handle_zero_index(SATBMarkQueue& queue);
@@ -131,6 +211,7 @@ public:
  void set_process_completed_buffers_threshold(size_t value);

  size_t buffer_enqueue_threshold() const { return _buffer_enqueue_threshold; }
+
  void set_buffer_enqueue_threshold_percentage(uint value);

  // If there exists some completed buffer, pop and process it, and
@@ -144,7 +225,7 @@ public:
  // Add obj to queue.  This qset and the queue must be active.
  void enqueue_known_active(SATBMarkQueue& queue, oop obj);
  virtual void filter(SATBMarkQueue& queue) = 0;
-  virtual void enqueue_completed_buffer(BufferNode* node);
+  void enqueue_completed_buffer(BufferNode* node);

  // The number of buffers in the list.  Racy and not updated atomically
  // with the set of completed buffers.
@@ -157,6 +238,20 @@ public:
    return (_count_and_process_flag.load_relaxed() & 1) != 0;
  }

+  // Return the associated BufferNode allocator.
+  BufferNode::Allocator* allocator() const { return _allocator; }
+
+  // Return the buffer for a BufferNode of size buffer_capacity().
+  void** allocate_buffer();
+
+  // Return an empty buffer to the free list.  The node is required
+  // to have been allocated with a size of buffer_capacity().
+  void deallocate_buffer(BufferNode* node);
+
+  size_t buffer_capacity() const {
+    return _allocator->buffer_capacity();
+  }
+
 #ifndef PRODUCT
  // Helpful for debugging
  void print_all(const char* msg);
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
@@ -37,6 +37,7 @@
 #include "utilities/copy.hpp"

 size_t       ThreadLocalAllocBuffer::_max_size = 0;
+int          ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0;
 unsigned int ThreadLocalAllocBuffer::_target_refills = 0;

 ThreadLocalAllocBuffer::ThreadLocalAllocBuffer() :
@@ -224,6 +225,30 @@ void ThreadLocalAllocBuffer::startup_initialization() {
  // abort during VM initialization.
  _target_refills = MAX2(_target_refills, 2U);

+#ifdef COMPILER2
+  // If the C2 compiler is present, extra space is needed at the end of
+  // TLABs, otherwise prefetching instructions generated by the C2
+  // compiler will fault (due to accessing memory outside of heap).
+  // The amount of space is the max of the number of lines to
+  // prefetch for array and for instance allocations. (Extra space must be
+  // reserved to accommodate both types of allocations.)
+  //
+  // Only SPARC-specific BIS instructions are known to fault. (Those
+  // instructions are generated if AllocatePrefetchStyle==3 and
+  // AllocatePrefetchInstr==1). To be on the safe side, however,
+  // extra space is reserved for all combinations of
+  // AllocatePrefetchStyle and AllocatePrefetchInstr.
+  //
+  // If the C2 compiler is not present, no space is reserved.
+
+  // +1 for rounding up to next cache line, +1 to be safe
+  if (CompilerConfig::is_c2_or_jvmci_compiler_enabled()) {
+    int lines =  MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
+    _reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) /
+                                       (int)HeapWordSize;
+  }
+#endif
+
  // During jvm startup, the main thread is initialized
  // before the heap is initialized.  So reinitialize it now.
  guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread");
@@ -429,7 +454,8 @@ void ThreadLocalAllocStats::publish() {
 }

 size_t ThreadLocalAllocBuffer::end_reserve() {
-  return CollectedHeap::lab_alignment_reserve();
+  size_t reserve_size = CollectedHeap::lab_alignment_reserve();
+  return MAX2(reserve_size, (size_t)_reserve_for_allocation_prefetch);
 }

 const HeapWord* ThreadLocalAllocBuffer::start_relaxed() const {
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
@@ -58,6 +58,7 @@ private:
  size_t    _allocated_before_last_gc;           // total bytes allocated up until the last gc

  static size_t   _max_size;                          // maximum size of any TLAB
+  static int      _reserve_for_allocation_prefetch;   // Reserve at the end of the TLAB
  static unsigned _target_refills;                    // expected number of refills between GCs

  unsigned  _number_of_refills;
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
@@ -1394,7 +1394,7 @@ void ShenandoahBarrierC2Support::pin_and_expand(PhaseIdealLoop* phase) {
    }
    if (addr->Opcode() == Op_AddP) {
      Node* orig_base = addr->in(AddPNode::Base);
-      Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), ConstraintCastNode::StrongDependency);
+      Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), ConstraintCastNode::DependencyType::NonFloatingNarrowing);
      phase->register_new_node(base, ctrl);
      if (addr->in(AddPNode::Base) == addr->in((AddPNode::Address))) {
        // Field access
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
@@ -104,7 +104,7 @@ void ShenandoahGenerationalHeuristics::choose_collection_set(ShenandoahCollectio
          // Note that for GLOBAL GC, region may be OLD, and OLD regions do not qualify for pre-selection

          // This region is old enough to be promoted but it was not preselected, either because its garbage is below
-          // ShenandoahOldGarbageThreshold so it will be promoted in place, or because there is not sufficient room
+          // old garbage threshold so it will be promoted in place, or because there is not sufficient room
          // in old gen to hold the evacuated copies of this region's live data.  In both cases, we choose not to
          // place this region into the collection set.
          if (region->get_top_before_promote() != nullptr) {
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahOldHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahOldHeuristics.cpp
@@ -71,7 +71,8 @@ ShenandoahOldHeuristics::ShenandoahOldHeuristics(ShenandoahOldGeneration* genera
  _growth_trigger(false),
  _fragmentation_density(0.0),
  _fragmentation_first_old_region(0),
-  _fragmentation_last_old_region(0)
+  _fragmentation_last_old_region(0),
+  _old_garbage_threshold(ShenandoahOldGarbageThreshold)
 {
 }

@@ -373,7 +374,8 @@ void ShenandoahOldHeuristics::prepare_for_old_collections() {
    }
  }

-  _old_generation->set_live_bytes_after_last_mark(live_data);
+  // TODO: subtract from live_data bytes promoted during concurrent GC.
+  _old_generation->set_live_bytes_at_last_mark(live_data);

  // Unlike young, we are more interested in efficiently packing OLD-gen than in reclaiming garbage first.  We sort by live-data.
  // Some regular regions may have been promoted in place with no garbage but also with very little live data.  When we "compact"
@@ -385,7 +387,7 @@ void ShenandoahOldHeuristics::prepare_for_old_collections() {
  const size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes();

  // The convention is to collect regions that have more than this amount of garbage.
-  const size_t garbage_threshold = region_size_bytes * ShenandoahOldGarbageThreshold / 100;
+  const size_t garbage_threshold = region_size_bytes * get_old_garbage_threshold() / 100;

  // Enlightened interpretation: collect regions that have less than this amount of live.
  const size_t live_threshold = region_size_bytes - garbage_threshold;
@@ -655,6 +657,7 @@ bool ShenandoahOldHeuristics::should_start_gc() {
    const double percent = percent_of(old_gen_capacity, heap_capacity);
    log_trigger("Expansion failure, current size: %zu%s which is %.1f%% of total heap size",
                 byte_size_in_proper_unit(old_gen_capacity), proper_unit_for_byte_size(old_gen_capacity), percent);
+    adjust_old_garbage_threshold();
    return true;
  }

@@ -677,6 +680,7 @@ bool ShenandoahOldHeuristics::should_start_gc() {
                "%zu to %zu (%zu), density: %.1f%%",
                byte_size_in_proper_unit(fragmented_free), proper_unit_for_byte_size(fragmented_free),
                first_old_region, last_old_region, span_of_old_regions, density * 100);
+    adjust_old_garbage_threshold();
    return true;
  }

@@ -699,12 +703,13 @@ bool ShenandoahOldHeuristics::should_start_gc() {
                    consecutive_young_cycles);
      _growth_trigger = false;
    } else if (current_usage > trigger_threshold) {
-      const size_t live_at_previous_old = _old_generation->get_live_bytes_after_last_mark();
+      const size_t live_at_previous_old = _old_generation->get_live_bytes_at_last_mark();
      const double percent_growth = percent_of(current_usage - live_at_previous_old, live_at_previous_old);
      log_trigger("Old has overgrown, live at end of previous OLD marking: "
                  "%zu%s, current usage: %zu%s, percent growth: %.1f%%",
                  byte_size_in_proper_unit(live_at_previous_old), proper_unit_for_byte_size(live_at_previous_old),
                  byte_size_in_proper_unit(current_usage), proper_unit_for_byte_size(current_usage), percent_growth);
+      adjust_old_garbage_threshold();
      return true;
    } else {
      // Mixed evacuations have decreased current_usage such that old-growth trigger is no longer relevant.
@@ -713,7 +718,41 @@ bool ShenandoahOldHeuristics::should_start_gc() {
  }

  // Otherwise, defer to inherited heuristic for gc trigger.
-  return this->ShenandoahHeuristics::should_start_gc();
+  bool result = this->ShenandoahHeuristics::should_start_gc();
+  if (result) {
+    adjust_old_garbage_threshold();
+  }
+  return result;
+}
+
+void ShenandoahOldHeuristics::adjust_old_garbage_threshold() {
+  const uintx MinimumOldGarbageThreshold = 10;
+  const uintx InterventionPercentage = 50;
+
+  const ShenandoahHeap* heap = ShenandoahHeap::heap();
+  size_t old_regions_size = _old_generation->used_regions_size();
+  size_t soft_max_size = heap->soft_max_capacity();
+  uintx percent_used = (uintx) ((old_regions_size * 100) / soft_max_size);
+  _old_garbage_threshold = ShenandoahOldGarbageThreshold;
+  if (percent_used > InterventionPercentage) {
+    uintx severity = percent_used - InterventionPercentage;    // ranges from 0 to 50
+    if (MinimumOldGarbageThreshold < ShenandoahOldGarbageThreshold) {
+      uintx adjustment_potential = ShenandoahOldGarbageThreshold - MinimumOldGarbageThreshold;
+      // With default values:
+      //   if percent_used > 80, garbage_threshold is 10
+      //   else if percent_used > 65, garbage_threshold is 15
+      //   else if percent_used > 50, garbage_threshold is 20
+      if (severity > 30) {
+        _old_garbage_threshold = ShenandoahOldGarbageThreshold - adjustment_potential;
+      } else if (severity > 15) {
+        _old_garbage_threshold = ShenandoahOldGarbageThreshold - 2 * adjustment_potential / 3;
+      } else {
+        _old_garbage_threshold = ShenandoahOldGarbageThreshold - adjustment_potential / 3;
+      }
+      log_info(gc)("Adjusting old garbage threshold to %lu because Old Generation used regions represents %lu%% of heap",
+                   _old_garbage_threshold, percent_used);
+    }
+  }
 }

 void ShenandoahOldHeuristics::record_success_concurrent() {
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahOldHeuristics.hpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahOldHeuristics.hpp
@@ -102,6 +102,17 @@ private:
  size_t _fragmentation_first_old_region;
  size_t _fragmentation_last_old_region;

+  // The value of command-line argument ShenandoahOldGarbageThreshold represents the percent of garbage that must
+  // be present within an old-generation region before that region is considered a good candidate for inclusion in
+  // the collection set under normal circumstances.  For our purposes, normal circustances are when the memory consumed
+  // by the old generation is less than 50% of the soft heap capacity.  When the old generation grows beyond the 50%
+  // threshold, we dynamically adjust the old garbage threshold, allowing us to invest in packing the old generation
+  // more tightly so that more memory can be made available to the more frequent young GC cycles.  This variable
+  // is used in place of ShenandoahOldGarbageThreshold.  Under normal circumstances, its value is equal to
+  // ShenandoahOldGarbageThreshold.  When the GC is under duress, this value may be adjusted to a smaller value,
+  // as scaled according to the severity of duress that we are experiencing.
+  uintx _old_garbage_threshold;
+
  // Compare by live is used to prioritize compaction of old-gen regions.  With old-gen compaction, the goal is
  // to tightly pack long-lived objects into available regions.  In most cases, there has not been an accumulation
  // of garbage within old-gen regions.  The more likely opportunity will be to combine multiple sparsely populated
@@ -200,9 +211,28 @@ public:

  bool is_experimental() override;

+  // Returns the current value of a dynamically adjusted threshold percentage of garbage above which an old region is
+  // deemed eligible for evacuation.
+  inline uintx get_old_garbage_threshold() { return _old_garbage_threshold; }
+
 private:
  void slide_pinned_regions_to_front();
  bool all_candidates_are_pinned();
+
+  // The normal old_garbage_threshold is specified by ShenandoahOldGarbageThreshold command-line argument, with default
+  // value 25, denoting that a region that has at least 25% garbage is eligible for evacuation.  With default values for
+  // all command-line arguments, we make the following adjustments:
+  //  1. If the old generation has grown to consume more than 80% of the soft max capacity, adjust threshold to 10%
+  //  2. Otherwise, if the old generation has grown to consume more than 65%, adjust threshold to 15%
+  //  3. Otherwise, if the old generation has grown to consume more than 50%, adjust threshold to 20%
+  // The effect is to compact the old generation more aggressively as the old generation consumes larger percentages
+  // of the available heap memory.  In these circumstances, we pack the old generation more tightly in order to make
+  // more memory avaiable to the young generation so that the more frequent young collections can operate more
+  // efficiently.
+  //
+  // If the ShenandoahOldGarbageThreshold is specified on the command line, the effect of adjusting the old garbage
+  // threshold is scaled linearly.
+  void adjust_old_garbage_threshold();
 };

 #endif // SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHOLDHEURISTICS_HPP
--- a/src/hotspot/share/gc/shenandoah/shenandoahAllocRequest.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRequest.hpp
@@ -83,16 +83,15 @@ public:
        return "PLAB";
      default:
        ShouldNotReachHere();
-        return "";
    }
  }

 private:
  // When ShenandoahElasticTLAB is enabled, the request cannot be made smaller than _min_size.
-  size_t _min_size;
+  size_t const _min_size;

  // The size of the request in words.
-  size_t _requested_size;
+  size_t const _requested_size;

  // The allocation may be increased for padding or decreased to fit in the remaining space of a region.
  size_t _actual_size;
@@ -104,7 +103,7 @@ private:
  size_t _waste;

  // This is the type of the request.
-  Type _alloc_type;
+  Type const _alloc_type;

 #ifdef ASSERT
  // Check that this is set before being read.
@@ -209,6 +208,10 @@ public:
    return (_alloc_type & bit_old_alloc) == 0;
  }

+  inline bool is_cds() const {
+    return _alloc_type == _alloc_cds;
+  }
+
  inline ShenandoahAffiliation affiliation() const {
    return (_alloc_type & bit_old_alloc) == 0 ? YOUNG_GENERATION : OLD_GENERATION ;
  }
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
@@ -128,8 +128,8 @@ public:
  void write_ref_array(HeapWord* start, size_t count);

 private:
-  template <class T>
-  inline void arraycopy_marking(T* dst, size_t count);
+  template <bool IS_GENERATIONAL, class T>
+  void arraycopy_marking(T* dst, size_t count);
  template <class T>
  inline void arraycopy_evacuation(T* src, size_t count);
  template <class T>
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
@@ -429,7 +429,11 @@ void ShenandoahBarrierSet::arraycopy_barrier(T* src, T* dst, size_t count) {
    // If marking old or young, we must evaluate the SATB barrier. This will be the only
    // action if we are not marking old. If we are marking old, we must still evaluate the
    // load reference barrier for a young collection.
-    arraycopy_marking(dst, count);
+    if (_heap->mode()->is_generational()) {
+      arraycopy_marking<true>(dst, count);
+    } else {
+      arraycopy_marking<false>(dst, count);
+    }
  }

  if ((gc_state & ShenandoahHeap::EVACUATION) != 0) {
@@ -441,11 +445,12 @@ void ShenandoahBarrierSet::arraycopy_barrier(T* src, T* dst, size_t count) {
  }
 }

-template <class T>
+template <bool IS_GENERATIONAL, class T>
 void ShenandoahBarrierSet::arraycopy_marking(T* dst, size_t count) {
  assert(_heap->is_concurrent_mark_in_progress(), "only during marking");
  if (ShenandoahSATBBarrier) {
-    if (!_heap->marking_context()->allocated_after_mark_start(reinterpret_cast<HeapWord*>(dst))) {
+    if (!_heap->marking_context()->allocated_after_mark_start(reinterpret_cast<HeapWord*>(dst)) ||
+        (IS_GENERATIONAL && _heap->heap_region_containing(dst)->is_old() && _heap->is_concurrent_young_mark_in_progress())) {
      arraycopy_work<T, false, false, true>(dst, count);
    }
  }
--- a/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp
@@ -144,13 +144,12 @@ public:
    {
      ShenandoahReentrantLocker locker(nm_data->lock());

-      // Heal oops and disarm
+      // Heal oops
      if (_bs->is_armed(nm)) {
        ShenandoahEvacOOMScope oom_evac_scope;
        ShenandoahNMethod::heal_nmethod_metadata(nm_data);
-        // Code cache unloading needs to know about on-stack nmethods. Arm the nmethods to get
-        // mark_as_maybe_on_stack() callbacks when they are used again.
-        _bs->arm(nm);
+        // Must remain armed to complete remaining work in nmethod entry barrier
+        assert(_bs->is_armed(nm), "Should remain armed");
      }
    }

--- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp
@@ -1369,7 +1369,7 @@ template<typename Iter>
 HeapWord* ShenandoahFreeSet::allocate_from_regions(Iter& iterator, ShenandoahAllocRequest &req, bool &in_new_region) {
  for (idx_t idx = iterator.current(); iterator.has_next(); idx = iterator.next()) {
    ShenandoahHeapRegion* r = _heap->get_region(idx);
-    size_t min_size = (req.type() == ShenandoahAllocRequest::_alloc_tlab) ? req.min_size() : req.size();
+    size_t min_size = req.is_lab_alloc() ? req.min_size() : req.size();
    if (alloc_capacity(r) >= min_size * HeapWordSize) {
      HeapWord* result = try_allocate_in(r, req, in_new_region);
      if (result != nullptr) {
@@ -1501,7 +1501,7 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah

  if (in_new_region) {
    log_debug(gc, free)("Using new region (%zu) for %s (" PTR_FORMAT ").",
-                        r->index(), ShenandoahAllocRequest::alloc_type_to_string(req.type()), p2i(&req));
+                        r->index(), req.type_string(), p2i(&req));
    assert(!r->is_affiliated(), "New region %zu should be unaffiliated", r->index());
    r->set_affiliation(req.affiliation());
    if (r->is_old()) {
@@ -1520,7 +1520,7 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
    assert(ctx->is_bitmap_range_within_region_clear(ctx->top_bitmap(r), r->end()), "Bitmap above top_bitmap() must be clear");
 #endif
    log_debug(gc, free)("Using new region (%zu) for %s (" PTR_FORMAT ").",
-                        r->index(), ShenandoahAllocRequest::alloc_type_to_string(req.type()), p2i(&req));
+                        r->index(), req.type_string(), p2i(&req));
  } else {
    assert(r->is_affiliated(), "Region %zu that is not new should be affiliated", r->index());
    if (r->affiliation() != req.affiliation()) {
@@ -1534,8 +1534,8 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
  if (req.is_lab_alloc()) {
    size_t adjusted_size = req.size();
    size_t free = r->free();    // free represents bytes available within region r
-    if (req.type() == ShenandoahAllocRequest::_alloc_plab) {
-      // This is a PLAB allocation
+    if (req.is_old()) {
+      // This is a PLAB allocation(lab alloc in old gen)
      assert(_heap->mode()->is_generational(), "PLABs are only for generational mode");
      assert(_partitions.in_free_set(ShenandoahFreeSetPartitionId::OldCollector, r->index()),
             "PLABS must be allocated in old_collector_free regions");
@@ -1596,8 +1596,6 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
      r->set_update_watermark(r->top());
      if (r->is_old()) {
        _partitions.increase_used(ShenandoahFreeSetPartitionId::OldCollector, (req.actual_size() + req.waste()) * HeapWordSize);
-        assert(req.type() != ShenandoahAllocRequest::_alloc_gclab, "old-gen allocations use PLAB or shared allocation");
-        // for plabs, we'll sort the difference between evac and promotion usage when we retire the plab
      } else {
        _partitions.increase_used(ShenandoahFreeSetPartitionId::Collector, (req.actual_size() + req.waste()) * HeapWordSize);
      }
--- a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp
@@ -505,10 +505,10 @@ inline void assert_no_in_place_promotions() {
 #endif
 }

-// Preselect for inclusion into the collection set regions whose age is at or above tenure age which contain more than
-// ShenandoahOldGarbageThreshold amounts of garbage.  We identify these regions by setting the appropriate entry of
-// the collection set's preselected regions array to true.  All entries are initialized to false before calling this
-// function.
+// Preselect for inclusion into the collection set all regions whose age is at or above tenure age and for which the
+// garbage percentage exceeds a dynamically adjusted threshold (known as the old-garbage threshold percentage).  We
+// identify these regions by setting the appropriate entry of the collection set's preselected regions array to true.
+// All entries are initialized to false before calling this function.
 //
 // During the subsequent selection of the collection set, we give priority to these promotion set candidates.
 // Without this prioritization, we found that the aged regions tend to be ignored because they typically have
@@ -531,7 +531,8 @@ size_t ShenandoahGeneration::select_aged_regions(const size_t old_promotion_rese
  bool* const candidate_regions_for_promotion_by_copy = heap->collection_set()->preselected_regions();
  ShenandoahMarkingContext* const ctx = heap->marking_context();

-  const size_t old_garbage_threshold = (ShenandoahHeapRegion::region_size_bytes() * ShenandoahOldGarbageThreshold) / 100;
+  const size_t old_garbage_threshold =
+    (ShenandoahHeapRegion::region_size_bytes() * heap->old_generation()->heuristics()->get_old_garbage_threshold()) / 100;

  const size_t pip_used_threshold = (ShenandoahHeapRegion::region_size_bytes() * ShenandoahGenerationalMinPIPUsage) / 100;

--- a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.hpp
@@ -71,7 +71,7 @@ private:
  // garbage-dense regions, including those that satisfy criteria 1 & 2 below,
  // and whose live bytes will fit within old_available budget:
  // Criterion 1. region age >= tenuring threshold
-  // Criterion 2. region garbage percentage > ShenandoahOldGarbageThreshold
+  // Criterion 2. region garbage percentage > old garbage threshold
  //
  // Identifies regions eligible for promotion in place,
  // being those of at least tenuring_threshold age that have lower garbage
--- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalEvacuationTask.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalEvacuationTask.cpp
@@ -145,7 +145,7 @@ void ShenandoahGenerationalEvacuationTask::maybe_promote_region(ShenandoahHeapRe
      // triggers the load-reference barrier (LRB) to copy on reference fetch.
      //
      // Aged humongous continuation regions are handled with their start region.  If an aged regular region has
-      // more garbage than ShenandoahOldGarbageThreshold, we'll promote by evacuation.  If there is room for evacuation
+      // more garbage than the old garbage threshold, we'll promote by evacuation.  If there is room for evacuation
      // in this cycle, the region will be in the collection set.  If there is not room, the region will be promoted
      // by evacuation in some future GC cycle.

@@ -177,7 +177,8 @@ void ShenandoahGenerationalEvacuationTask::promote_in_place(ShenandoahHeapRegion
  size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes();

  {
-    const size_t old_garbage_threshold = (region_size_bytes * ShenandoahOldGarbageThreshold) / 100;
+    const size_t old_garbage_threshold =
+      (region_size_bytes * _heap->old_generation()->heuristics()->get_old_garbage_threshold()) / 100;
    assert(!_heap->is_concurrent_old_mark_in_progress(), "Cannot promote in place during old marking");
    assert(region->garbage_before_padded_for_promote() < old_garbage_threshold,
           "Region %zu has too much garbage for promotion", region->index());
--- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalFullGC.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalFullGC.cpp
@@ -83,7 +83,7 @@ void ShenandoahGenerationalFullGC::handle_completion(ShenandoahHeap* heap) {
  assert_usage_not_more_than_regions_used(young);

  // Establish baseline for next old-has-grown trigger.
-  old->set_live_bytes_after_last_mark(old->used());
+  old->set_live_bytes_at_last_mark(old->used());
 }

 void ShenandoahGenerationalFullGC::rebuild_remembered_set(ShenandoahHeap* heap) {
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
@@ -985,7 +985,7 @@ HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) {

    assert (req.is_lab_alloc() || (requested == actual),
            "Only LAB allocations are elastic: %s, requested = %zu, actual = %zu",
-            ShenandoahAllocRequest::alloc_type_to_string(req.type()), requested, actual);
+            req.type_string(), requested, actual);
  }

  return result;
@@ -1014,8 +1014,9 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req

  // Record the plab configuration for this result and register the object.
  if (result != nullptr && req.is_old()) {
-    old_generation()->configure_plab_for_current_thread(req);
-    if (!req.is_lab_alloc()) {
+    if (req.is_lab_alloc()) {
+      old_generation()->configure_plab_for_current_thread(req);
+    } else {
      // Register the newly allocated object while we're holding the global lock since there's no synchronization
      // built in to the implementation of register_object().  There are potential races when multiple independent
      // threads are allocating objects, some of which might span the same card region.  For example, consider
@@ -1035,6 +1036,13 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req
      // last-start representing object b while first-start represents object c.  This is why we need to require all
      // register_object() invocations to be "mutually exclusive" with respect to each card's memory range.
      old_generation()->card_scan()->register_object(result);
+
+      if (req.is_promotion()) {
+        // Shared promotion.
+        const size_t actual_size = req.actual_size() * HeapWordSize;
+        log_debug(gc, plab)("Expend shared promotion of %zu bytes", actual_size);
+        old_generation()->expend_promoted(actual_size);
+      }
    }
  }

--- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp
@@ -447,7 +447,7 @@ public:
    return (bottom() <= p) && (p < top());
  }

-  inline void adjust_alloc_metadata(ShenandoahAllocRequest::Type type, size_t);
+  inline void adjust_alloc_metadata(const ShenandoahAllocRequest &req, size_t);
  void reset_alloc_metadata();
  size_t get_shared_allocs() const;
  size_t get_tlab_allocs() const;
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp
@@ -71,7 +71,7 @@ HeapWord* ShenandoahHeapRegion::allocate_aligned(size_t size, ShenandoahAllocReq
    }

    make_regular_allocation(req.affiliation());
-    adjust_alloc_metadata(req.type(), size);
+    adjust_alloc_metadata(req, size);

    HeapWord* new_top = aligned_obj + size;
    assert(new_top <= end(), "PLAB cannot span end of heap region");
@@ -111,7 +111,7 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, const ShenandoahAllocReque
  HeapWord* obj = top();
  if (pointer_delta(end(), obj) >= size) {
    make_regular_allocation(req.affiliation());
-    adjust_alloc_metadata(req.type(), size);
+    adjust_alloc_metadata(req, size);

    HeapWord* new_top = obj + size;
    set_top(new_top);
@@ -125,26 +125,16 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, const ShenandoahAllocReque
  }
 }

-inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest::Type type, size_t size) {
-  switch (type) {
-    case ShenandoahAllocRequest::_alloc_shared:
-    case ShenandoahAllocRequest::_alloc_shared_gc:
-    case ShenandoahAllocRequest::_alloc_shared_gc_old:
-    case ShenandoahAllocRequest::_alloc_shared_gc_promotion:
-    case ShenandoahAllocRequest::_alloc_cds:
-      // Counted implicitly by tlab/gclab allocs
-      break;
-    case ShenandoahAllocRequest::_alloc_tlab:
+inline void ShenandoahHeapRegion::adjust_alloc_metadata(const ShenandoahAllocRequest &req, size_t size) {
+  // Only need to update alloc metadata for lab alloc, shared alloc is counted implicitly by tlab/gclab allocs
+  if (req.is_lab_alloc()) {
+    if (req.is_mutator_alloc()) {
      _tlab_allocs += size;
-      break;
-    case ShenandoahAllocRequest::_alloc_gclab:
-      _gclab_allocs += size;
-      break;
-    case ShenandoahAllocRequest::_alloc_plab:
+    } else if (req.is_old()) {
      _plab_allocs += size;
-      break;
-    default:
-      ShouldNotReachHere();
+    } else {
+      _gclab_allocs += size;
+    }
  }
 }

--- a/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp
@@ -116,11 +116,10 @@ ShenandoahOldGeneration::ShenandoahOldGeneration(uint max_queues)
    _is_parsable(true),
    _card_scan(nullptr),
    _state(WAITING_FOR_BOOTSTRAP),
-    _growth_before_compaction(INITIAL_GROWTH_BEFORE_COMPACTION),
-    _min_growth_before_compaction ((ShenandoahMinOldGenGrowthPercent * FRACTIONAL_DENOMINATOR) / 100)
+    _growth_percent_before_collection(INITIAL_GROWTH_PERCENT_BEFORE_COLLECTION)
 {
  assert(type() == ShenandoahGenerationType::OLD, "OO sanity");
-  _live_bytes_after_last_mark = ShenandoahHeap::heap()->capacity() * INITIAL_LIVE_FRACTION / FRACTIONAL_DENOMINATOR;
+  _live_bytes_at_last_mark = (ShenandoahHeap::heap()->soft_max_capacity() * INITIAL_LIVE_PERCENT) / 100;
  // Always clear references for old generation
  ref_processor()->set_soft_reference_policy(true);

@@ -168,7 +167,7 @@ size_t ShenandoahOldGeneration::get_promoted_expended() const {
 }

 bool ShenandoahOldGeneration::can_allocate(const ShenandoahAllocRequest &req) const {
-  assert(req.type() != ShenandoahAllocRequest::_alloc_gclab, "GCLAB pertains only to young-gen memory");
+  assert(req.is_old(), "Must be old allocation request");

  const size_t requested_bytes = req.size() * HeapWordSize;
  // The promotion reserve may also be used for evacuations. If we can promote this object,
@@ -180,7 +179,7 @@ bool ShenandoahOldGeneration::can_allocate(const ShenandoahAllocRequest &req) co
    return true;
  }

-  if (req.type() == ShenandoahAllocRequest::_alloc_plab) {
+  if (req.is_lab_alloc()) {
    // The promotion reserve cannot accommodate this plab request. Check if we still have room for
    // evacuations. Note that we cannot really know how much of the plab will be used for evacuations,
    // so here we only check that some evacuation reserve still exists.
@@ -195,54 +194,46 @@ bool ShenandoahOldGeneration::can_allocate(const ShenandoahAllocRequest &req) co

 void
 ShenandoahOldGeneration::configure_plab_for_current_thread(const ShenandoahAllocRequest &req) {
-  // Note: Even when a mutator is performing a promotion outside a LAB, we use a 'shared_gc' request.
-  if (req.is_gc_alloc()) {
-    const size_t actual_size = req.actual_size() * HeapWordSize;
-    if (req.type() ==  ShenandoahAllocRequest::_alloc_plab) {
-      // We've created a new plab. Now we configure it whether it will be used for promotions
-      // and evacuations - or just evacuations.
-      Thread* thread = Thread::current();
-      ShenandoahThreadLocalData::reset_plab_promoted(thread);
+  assert(req.is_gc_alloc() && req.is_old() && req.is_lab_alloc(), "Must be a plab alloc request");
+  const size_t actual_size = req.actual_size() * HeapWordSize;
+  // We've created a new plab. Now we configure it whether it will be used for promotions
+  // and evacuations - or just evacuations.
+  Thread* thread = Thread::current();
+  ShenandoahThreadLocalData::reset_plab_promoted(thread);

-      // The actual size of the allocation may be larger than the requested bytes (due to alignment on card boundaries).
-      // If this puts us over our promotion budget, we need to disable future PLAB promotions for this thread.
-      if (can_promote(actual_size)) {
-        // Assume the entirety of this PLAB will be used for promotion.  This prevents promotion from overreach.
-        // When we retire this plab, we'll unexpend what we don't really use.
-        log_debug(gc, plab)("Thread can promote using PLAB of %zu bytes. Expended: %zu, available: %zu",
-                            actual_size, get_promoted_expended(), get_promoted_reserve());
-        expend_promoted(actual_size);
-        ShenandoahThreadLocalData::enable_plab_promotions(thread);
-        ShenandoahThreadLocalData::set_plab_actual_size(thread, actual_size);
-      } else {
-        // Disable promotions in this thread because entirety of this PLAB must be available to hold old-gen evacuations.
-        ShenandoahThreadLocalData::disable_plab_promotions(thread);
-        ShenandoahThreadLocalData::set_plab_actual_size(thread, 0);
-        log_debug(gc, plab)("Thread cannot promote using PLAB of %zu bytes. Expended: %zu, available: %zu, mixed evacuations? %s",
-                            actual_size, get_promoted_expended(), get_promoted_reserve(), BOOL_TO_STR(ShenandoahHeap::heap()->collection_set()->has_old_regions()));
-      }
-    } else if (req.is_promotion()) {
-      // Shared promotion.
-      log_debug(gc, plab)("Expend shared promotion of %zu bytes", actual_size);
-      expend_promoted(actual_size);
-    }
+  // The actual size of the allocation may be larger than the requested bytes (due to alignment on card boundaries).
+  // If this puts us over our promotion budget, we need to disable future PLAB promotions for this thread.
+  if (can_promote(actual_size)) {
+    // Assume the entirety of this PLAB will be used for promotion.  This prevents promotion from overreach.
+    // When we retire this plab, we'll unexpend what we don't really use.
+    log_debug(gc, plab)("Thread can promote using PLAB of %zu bytes. Expended: %zu, available: %zu",
+                        actual_size, get_promoted_expended(), get_promoted_reserve());
+    expend_promoted(actual_size);
+    ShenandoahThreadLocalData::enable_plab_promotions(thread);
+    ShenandoahThreadLocalData::set_plab_actual_size(thread, actual_size);
+  } else {
+    // Disable promotions in this thread because entirety of this PLAB must be available to hold old-gen evacuations.
+    ShenandoahThreadLocalData::disable_plab_promotions(thread);
+    ShenandoahThreadLocalData::set_plab_actual_size(thread, 0);
+    log_debug(gc, plab)("Thread cannot promote using PLAB of %zu bytes. Expended: %zu, available: %zu, mixed evacuations? %s",
+                        actual_size, get_promoted_expended(), get_promoted_reserve(), BOOL_TO_STR(ShenandoahHeap::heap()->collection_set()->has_old_regions()));
  }
 }

-size_t ShenandoahOldGeneration::get_live_bytes_after_last_mark() const {
-  return _live_bytes_after_last_mark;
+size_t ShenandoahOldGeneration::get_live_bytes_at_last_mark() const {
+  return _live_bytes_at_last_mark;
 }

-void ShenandoahOldGeneration::set_live_bytes_after_last_mark(size_t bytes) {
+void ShenandoahOldGeneration::set_live_bytes_at_last_mark(size_t bytes) {
  if (bytes == 0) {
    // Restart search for best old-gen size to the initial state
-    _live_bytes_after_last_mark = ShenandoahHeap::heap()->capacity() * INITIAL_LIVE_FRACTION / FRACTIONAL_DENOMINATOR;
-    _growth_before_compaction = INITIAL_GROWTH_BEFORE_COMPACTION;
+    _live_bytes_at_last_mark = (ShenandoahHeap::heap()->soft_max_capacity() * INITIAL_LIVE_PERCENT) / 100;
+    _growth_percent_before_collection = INITIAL_GROWTH_PERCENT_BEFORE_COLLECTION;
  } else {
-    _live_bytes_after_last_mark = bytes;
-    _growth_before_compaction /= 2;
-    if (_growth_before_compaction < _min_growth_before_compaction) {
-      _growth_before_compaction = _min_growth_before_compaction;
+    _live_bytes_at_last_mark = bytes;
+    _growth_percent_before_collection /= 2;
+    if (_growth_percent_before_collection < ShenandoahMinOldGenGrowthPercent) {
+      _growth_percent_before_collection = ShenandoahMinOldGenGrowthPercent;
    }
  }
 }
@@ -252,7 +243,19 @@ void ShenandoahOldGeneration::handle_failed_transfer() {
 }

 size_t ShenandoahOldGeneration::usage_trigger_threshold() const {
-  size_t result = _live_bytes_after_last_mark + (_live_bytes_after_last_mark * _growth_before_compaction) / FRACTIONAL_DENOMINATOR;
+  size_t threshold_by_relative_growth =
+    _live_bytes_at_last_mark + (_live_bytes_at_last_mark * _growth_percent_before_collection) / 100;
+  size_t soft_max_capacity = ShenandoahHeap::heap()->soft_max_capacity();
+  size_t threshold_by_growth_into_percent_remaining;
+  if (_live_bytes_at_last_mark < soft_max_capacity) {
+    threshold_by_growth_into_percent_remaining = (size_t)
+      (_live_bytes_at_last_mark + ((soft_max_capacity - _live_bytes_at_last_mark)
+                                      * ShenandoahMinOldGenGrowthRemainingHeapPercent / 100.0));
+  } else {
+    // we're already consuming more than soft max capacity, so we should start old GC right away.
+    threshold_by_growth_into_percent_remaining = soft_max_capacity;
+  }
+  size_t result = MIN2(threshold_by_relative_growth, threshold_by_growth_into_percent_remaining);
  return result;
 }

--- a/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.hpp
@@ -287,28 +287,23 @@ public:
 private:
  State _state;

-  static const size_t FRACTIONAL_DENOMINATOR = 65536;
-
  // During initialization of the JVM, we search for the correct old-gen size by initially performing old-gen
-  // collection when old-gen usage is 50% more (INITIAL_GROWTH_BEFORE_COMPACTION) than the initial old-gen size
-  // estimate (3.125% of heap).  The next old-gen trigger occurs when old-gen grows 25% larger than its live
-  // memory at the end of the first old-gen collection.  Then we trigger again when old-gen grows 12.5%
-  // more than its live memory at the end of the previous old-gen collection.  Thereafter, we trigger each time
-  // old-gen grows more than 12.5% following the end of its previous old-gen collection.
-  static const size_t INITIAL_GROWTH_BEFORE_COMPACTION = FRACTIONAL_DENOMINATOR / 2;        //  50.0%
+  // collection when old-gen usage is 50% more (INITIAL_GROWTH_PERCENT_BEFORE_COLLECTION) than the initial old-gen size
+  // estimate (16% of heap).  With each successive old-gen collection, we divide the growth trigger by two, but
+  // never use a growth trigger smaller than ShenandoahMinOldGenGrowthPercent.
+  static const size_t INITIAL_GROWTH_PERCENT_BEFORE_COLLECTION = 50;

-  // INITIAL_LIVE_FRACTION represents the initial guess of how large old-gen should be.  We estimate that old-gen
-  // needs to consume 6.25% of the total heap size.  And we "pretend" that we start out with this amount of live
+  // INITIAL_LIVE_PERCENT represents the initial guess of how large old-gen should be.  We estimate that old gen
+  // needs to consume 16% of the total heap size.  And we "pretend" that we start out with this amount of live
  // old-gen memory.  The first old-collection trigger will occur when old-gen occupies 50% more than this initial
-  // approximation of the old-gen memory requirement, in other words when old-gen usage is 150% of 6.25%, which
-  // is 9.375% of the total heap size.
-  static const uint16_t INITIAL_LIVE_FRACTION = FRACTIONAL_DENOMINATOR / 16;                //   6.25%
+  // approximation of the old-gen memory requirement, in other words when old-gen usage is 150% of 16%, which
+  // is 24% of the heap size.
+  static const size_t INITIAL_LIVE_PERCENT = 16;

-  size_t _live_bytes_after_last_mark;
+  size_t _live_bytes_at_last_mark;

-  // How much growth in usage before we trigger old collection, per FRACTIONAL_DENOMINATOR (65_536)
-  size_t _growth_before_compaction;
-  const size_t _min_growth_before_compaction;                                               // Default is 12.5%
+  // How much growth in usage before we trigger old collection as a percent of soft_max_capacity
+  size_t _growth_percent_before_collection;

  void validate_transition(State new_state) NOT_DEBUG_RETURN;

@@ -323,8 +318,8 @@ public:

  void transition_to(State new_state);

-  size_t get_live_bytes_after_last_mark() const;
-  void set_live_bytes_after_last_mark(size_t new_live);
+  size_t get_live_bytes_at_last_mark() const;
+  void set_live_bytes_at_last_mark(size_t new_live);

  size_t usage_trigger_threshold() const;

--- a/src/hotspot/share/gc/shenandoah/shenandoahScanRemembered.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahScanRemembered.cpp
@@ -335,7 +335,6 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con
    if (ctx->is_marked(p)) {
      oop obj = cast_to_oop(p);
      assert(oopDesc::is_oop(obj), "Should be an object");
-      assert(Klass::is_valid(obj->klass()), "Not a valid klass ptr");
      assert(p + obj->size() > left, "This object should span start of card");
      assert(p < right, "Result must precede right");
      return p;
@@ -362,15 +361,15 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con

  // Recall that we already dealt with the co-initial object case above
  assert(p < left, "obj should start before left");
-  // While it is safe to ask an object its size in the loop that
-  // follows, the (ifdef'd out) loop should never be needed.
+  // While it is safe to ask an object its size in the block that
+  // follows, the (ifdef'd out) block should never be needed.
  // 1. we ask this question only for regions in the old generation, and those
  //    that are not humongous regions
  // 2. there is no direct allocation ever by mutators in old generation
  //    regions walked by this code. Only GC will ever allocate in old regions,
  //    and then too only during promotion/evacuation phases. Thus there is no danger
  //    of races between reading from and writing to the object start array,
-  //    or of asking partially initialized objects their size (in the loop below).
+  //    or of asking partially initialized objects their size (in the ifdef below).
  //    Furthermore, humongous regions (and their dirty cards) are never processed
  //    by this code.
  // 3. only GC asks this question during phases when it is not concurrently
@@ -382,15 +381,6 @@ HeapWord* ShenandoahCardCluster::first_object_start(const size_t card_index, con
 #ifdef ASSERT
  oop obj = cast_to_oop(p);
  assert(oopDesc::is_oop(obj), "Should be an object");
-  while (p + obj->size() < left) {
-    p += obj->size();
-    obj = cast_to_oop(p);
-    assert(oopDesc::is_oop(obj), "Should be an object");
-    assert(Klass::is_valid(obj->klass()), "Not a valid klass ptr");
-    // Check assumptions in previous block comment if this assert fires
-    fatal("Should never need forward walk in block start");
-  }
-  assert(p <= left, "p should start at or before left end of card");
  assert(p + obj->size() > left, "obj should end after left end of card");
 #endif // ASSERT
  return p;
--- a/src/hotspot/share/gc/shenandoah/shenandoahSharedVariables.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahSharedVariables.hpp
@@ -29,11 +29,7 @@
 #include "memory/allocation.hpp"
 #include "runtime/atomicAccess.hpp"

-typedef jbyte ShenandoahSharedValue;
-
-// Needed for cooperation with generated code.
-STATIC_ASSERT(sizeof(ShenandoahSharedValue) == 1);
-
+typedef int32_t ShenandoahSharedValue;
 typedef struct ShenandoahSharedFlag {
  enum {
    UNSET = 0,
--- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp
@@ -59,15 +59,29 @@
          "fail, resulting in stop-the-world full GCs.")                    \
          range(0,100)                                                      \
                                                                            \
-  product(double, ShenandoahMinOldGenGrowthPercent, 12.5, EXPERIMENTAL,     \
+  product(double, ShenandoahMinOldGenGrowthPercent, 50, EXPERIMENTAL,       \
          "(Generational mode only) If the usage within old generation "    \
          "has grown by at least this percent of its live memory size "     \
-          "at completion of the most recent old-generation marking "        \
-          "effort, heuristics may trigger the start of a new old-gen "      \
-          "collection.")                                                    \
+          "at the start of the previous old-generation marking effort, "    \
+          "heuristics may trigger the start of a new old-gen collection.")  \
          range(0.0,100.0)                                                  \
                                                                            \
-  product(uintx, ShenandoahIgnoreOldGrowthBelowPercentage,10, EXPERIMENTAL, \
+  product(double, ShenandoahMinOldGenGrowthRemainingHeapPercent,            \
+          35, EXPERIMENTAL,                                                 \
+          "(Generational mode only) If the usage within old generation "    \
+          "has grown to exceed this percent of the remaining heap that "    \
+          "was not marked live within the old generation at the time "      \
+          "of the last old-generation marking effort, heuristics may "      \
+          "trigger the start of a new old-gen collection.  Setting "        \
+          "this value to a smaller value may cause back-to-back old "       \
+          "generation marking triggers, since the typical memory used "     \
+          "by the old generation is about 30% larger than the live "        \
+          "memory contained within the old generation (because default "    \
+          "value of ShenandoahOldGarbageThreshold is 25.")                  \
+          range(0.0,100.0)                                                  \
+                                                                            \
+  product(uintx, ShenandoahIgnoreOldGrowthBelowPercentage,                  \
+          40, EXPERIMENTAL,                                                 \
          "(Generational mode only) If the total usage of the old "         \
          "generation is smaller than this percent, we do not trigger "     \
          "old gen collections even if old has grown, except when "         \
@@ -77,12 +91,13 @@
          range(0,100)                                                      \
                                                                            \
  product(uintx, ShenandoahDoNotIgnoreGrowthAfterYoungCycles,               \
-          50, EXPERIMENTAL,                                                 \
-          "(Generational mode only) Even if the usage of old generation "   \
-          "is below ShenandoahIgnoreOldGrowthBelowPercentage, "             \
-          "trigger an old-generation mark if old has grown and this "       \
-          "many consecutive young-gen collections have been "               \
-          "completed following the preceding old-gen collection.")          \
+          100, EXPERIMENTAL,                                                \
+          "(Generational mode only) Trigger an old-generation mark "        \
+          "if old has grown and this many consecutive young-gen "           \
+          "collections have been completed following the preceding "        \
+          "old-gen collection.  We perform this old-generation mark "       \
+          "evvort even if the usage of old generation is below "            \
+          "ShenandoahIgnoreOldGrowthBelowPercentage.")                      \
                                                                            \
  product(bool, ShenandoahGenerationalAdaptiveTenuring, true, EXPERIMENTAL, \
          "(Generational mode only) Dynamically adapt tenuring age.")       \
--- a/src/hotspot/share/include/jvm.h
+++ b/src/hotspot/share/include/jvm.h
@@ -87,6 +87,9 @@ JVM_InternString(JNIEnv *env, jstring str);
 /*
 * java.lang.System
 */
+JNIEXPORT jboolean JNICALL
+JVM_AOTEndRecording(JNIEnv *env);
+
 JNIEXPORT jlong JNICALL
 JVM_CurrentTimeMillis(JNIEnv *env, jclass ignored);

--- a/src/hotspot/share/jfr/recorder/service/jfrEventThrottler.cpp
+++ b/src/hotspot/share/jfr/recorder/service/jfrEventThrottler.cpp
@@ -24,11 +24,11 @@
 */

 #include "jfr/recorder/service/jfrEventThrottler.hpp"
-#include "jfr/utilities/jfrSpinlockHelper.hpp"
 #include "jfrfiles/jfrEventIds.hpp"
 #include "logging/log.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/spinCriticalSection.hpp"

 constexpr static const JfrSamplerParams _disabled_params = {
                                                             0, // sample points per window
@@ -128,7 +128,7 @@ JfrEventThrottler* JfrEventThrottler::create_throttler(JfrEventId id) {
 * - period_ms   time period expressed in milliseconds
 */
 void JfrEventThrottler::configure(int64_t sample_size, int64_t period_ms) {
-  JfrSpinlockHelper mutex(&_lock);
+  SpinCriticalSection scs(&_lock);
  _sample_size = sample_size;
  _period_ms = period_ms;
  _update = true;
--- a/src/hotspot/share/jfr/support/jfrAdaptiveSampler.cpp
+++ b/src/hotspot/share/jfr/support/jfrAdaptiveSampler.cpp
@@ -25,13 +25,13 @@

 #include "jfr/support/jfrAdaptiveSampler.hpp"
 #include "jfr/utilities/jfrRandom.inline.hpp"
-#include "jfr/utilities/jfrSpinlockHelper.hpp"
 #include "jfr/utilities/jfrTime.hpp"
 #include "jfr/utilities/jfrTimeConverter.hpp"
 #include "jfr/utilities/jfrTryLock.hpp"
 #include "logging/log.hpp"
 #include "runtime/atomicAccess.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/spinCriticalSection.hpp"

 #include <cmath>

@@ -342,7 +342,7 @@ JfrGTestFixedRateSampler::JfrGTestFixedRateSampler(size_t sample_points_per_wind

 bool JfrGTestFixedRateSampler::initialize() {
  const bool result = JfrAdaptiveSampler::initialize();
-  JfrSpinlockHelper mutex(&_lock);
+  SpinCriticalSection scs(&_lock);
  reconfigure();
  return result;
 }
--- a/src/hotspot/share/jfr/support/jfrThreadLocal.cpp
+++ b/src/hotspot/share/jfr/support/jfrThreadLocal.cpp
@@ -36,7 +36,6 @@
 #include "jfr/recorder/storage/jfrStorage.hpp"
 #include "jfr/support/jfrThreadId.inline.hpp"
 #include "jfr/support/jfrThreadLocal.hpp"
-#include "jfr/utilities/jfrSpinlockHelper.hpp"
 #include "jfr/writers/jfrJavaEventWriter.hpp"
 #include "logging/log.hpp"
 #include "memory/allocation.inline.hpp"
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
@@ -223,6 +223,7 @@
  volatile_nonstatic_field(InstanceKlass,      _init_state,                                   InstanceKlass::ClassState)             \
  volatile_nonstatic_field(InstanceKlass,      _init_thread,                                  JavaThread*)                           \
  nonstatic_field(InstanceKlass,               _misc_flags._flags,                            u2)                                    \
+  nonstatic_field(InstanceKlass,               _access_flags,                                 AccessFlags)                           \
  nonstatic_field(InstanceKlass,               _annotations,                                  Annotations*)                          \
                                                                                                                                     \
  volatile_nonstatic_field(JavaFrameAnchor,    _last_Java_sp,                                 intptr_t*)                             \
@@ -282,7 +283,6 @@
  nonstatic_field(Klass,                       _name,                                         Symbol*)                               \
  volatile_nonstatic_field(Klass,              _next_sibling,                                 Klass*)                                \
  nonstatic_field(Klass,                       _java_mirror,                                  OopHandle)                             \
-  nonstatic_field(Klass,                       _access_flags,                                 AccessFlags)                           \
  nonstatic_field(Klass,                       _class_loader_data,                            ClassLoaderData*)                      \
  nonstatic_field(Klass,                       _secondary_supers_bitmap,                      uintx)                                 \
  nonstatic_field(Klass,                       _hash_slot,                                    uint8_t)                               \
--- a/src/hotspot/share/memory/universe.cpp
+++ b/src/hotspot/share/memory/universe.cpp
@@ -544,7 +544,7 @@ void Universe::genesis(TRAPS) {
      // Only modify the global variable inside the mutex.
      // If we had a race to here, the other dummy_array instances
      // and their elements just get dropped on the floor, which is fine.
-      MutexLocker ml(THREAD, FullGCALot_lock);
+      MutexLocker ml(THREAD, FullGCALot_lock, Mutex::_no_safepoint_check_flag);
      if (_fullgc_alot_dummy_array.is_empty()) {
        _fullgc_alot_dummy_array = OopHandle(vm_global(), dummy_array());
      }
@@ -1458,7 +1458,7 @@ uintptr_t Universe::verify_mark_bits() {
 #ifdef ASSERT
 // Release dummy object(s) at bottom of heap
 bool Universe::release_fullgc_alot_dummy() {
-  MutexLocker ml(FullGCALot_lock);
+  MutexLocker ml(FullGCALot_lock, Mutex::_no_safepoint_check_flag);
  objArrayOop fullgc_alot_dummy_array = (objArrayOop)_fullgc_alot_dummy_array.resolve();
  if (fullgc_alot_dummy_array != nullptr) {
    if (_fullgc_alot_dummy_next >= fullgc_alot_dummy_array->length()) {
--- a/src/hotspot/share/oops/arrayKlass.cpp
+++ b/src/hotspot/share/oops/arrayKlass.cpp
@@ -99,7 +99,8 @@ ArrayKlass::ArrayKlass(Symbol* name, KlassKind kind) :
  set_name(name);
  set_super(Universe::is_bootstrapping() ? nullptr : vmClasses::Object_klass());
  set_layout_helper(Klass::_lh_neutral_value);
-  set_is_cloneable(); // All arrays are considered to be cloneable (See JLS 20.1.5)
+  // All arrays are considered to be cloneable (See JLS 20.1.5)
+  set_is_cloneable_fast();
  JFR_ONLY(INIT_ID(this);)
  log_array_class_load(this);
 }
--- a/src/hotspot/share/oops/fieldInfo.hpp
+++ b/src/hotspot/share/oops/fieldInfo.hpp
@@ -27,6 +27,7 @@

 #include "memory/allocation.hpp"
 #include "oops/typeArrayOop.hpp"
+#include "utilities/accessFlags.hpp"
 #include "utilities/unsigned5.hpp"
 #include "utilities/vmEnums.hpp"

--- a/src/hotspot/share/oops/instanceKlass.cpp
+++ b/src/hotspot/share/oops/instanceKlass.cpp
@@ -552,6 +552,17 @@ InstanceKlass::InstanceKlass(const ClassFileParser& parser, KlassKind kind, Refe
  assert(size_helper() == parser.layout_size(), "incorrect size_helper?");
 }

+void InstanceKlass::set_is_cloneable() {
+  if (name() == vmSymbols::java_lang_invoke_MemberName()) {
+    assert(is_final(), "no subclasses allowed");
+    // MemberName cloning should not be intrinsified and always happen in JVM_Clone.
+  } else if (reference_type() != REF_NONE) {
+    // Reference cloning should not be intrinsified and always happen in JVM_Clone.
+  } else {
+    set_is_cloneable_fast();
+  }
+}
+
 void InstanceKlass::deallocate_methods(ClassLoaderData* loader_data,
                                       Array<Method*>* methods) {
  if (methods != nullptr && methods != Universe::the_empty_method_array() &&
--- a/src/hotspot/share/oops/instanceKlass.hpp
+++ b/src/hotspot/share/oops/instanceKlass.hpp
@@ -229,7 +229,9 @@ class InstanceKlass: public Klass {
  // _idnum_allocated_count.
  volatile ClassState _init_state;          // state of class

-  u1              _reference_type;                // reference type
+  u1                 _reference_type;                // reference type
+
+  AccessFlags        _access_flags;    // Access flags. The class/interface distinction is stored here.

  // State is set either at parse time or while executing, atomically to not disturb other state
  InstanceKlassFlags _misc_flags;
@@ -305,6 +307,22 @@ class InstanceKlass: public Klass {
  // Sets finalization state
  static void set_finalization_enabled(bool val) { _finalization_enabled = val; }

+  // Access flags
+  AccessFlags access_flags() const         { return _access_flags;  }
+  void set_access_flags(AccessFlags flags) { _access_flags = flags; }
+
+  bool is_public() const                { return _access_flags.is_public(); }
+  bool is_final() const                 { return _access_flags.is_final(); }
+  bool is_interface() const             { return _access_flags.is_interface(); }
+  bool is_abstract() const              { return _access_flags.is_abstract(); }
+  bool is_super() const                 { return _access_flags.is_super(); }
+  bool is_synthetic() const             { return _access_flags.is_synthetic(); }
+  void set_is_synthetic()               { _access_flags.set_is_synthetic(); }
+
+  static ByteSize access_flags_offset() { return byte_offset_of(InstanceKlass, _access_flags); }
+
+  void set_is_cloneable();
+
  // Quick checks for the loader that defined this class (without switching on this->class_loader())
  bool defined_by_boot_loader() const      { return _misc_flags.defined_by_boot_loader(); }
  bool defined_by_platform_loader() const  { return _misc_flags.defined_by_platform_loader(); }
--- a/src/hotspot/share/oops/klass.cpp
+++ b/src/hotspot/share/oops/klass.cpp
@@ -72,17 +72,6 @@ bool Klass::is_cloneable() const {
         is_subtype_of(vmClasses::Cloneable_klass());
 }

-void Klass::set_is_cloneable() {
-  if (name() == vmSymbols::java_lang_invoke_MemberName()) {
-    assert(is_final(), "no subclasses allowed");
-    // MemberName cloning should not be intrinsified and always happen in JVM_Clone.
-  } else if (is_instance_klass() && InstanceKlass::cast(this)->reference_type() != REF_NONE) {
-    // Reference cloning should not be intrinsified and always happen in JVM_Clone.
-  } else {
-    _misc_flags.set_is_cloneable_fast(true);
-  }
-}
-
 uint8_t Klass::compute_hash_slot(Symbol* n) {
  uint hash_code;
  // Special cases for the two superclasses of all Array instances.
--- a/src/hotspot/share/oops/klass.hpp
+++ b/src/hotspot/share/oops/klass.hpp
@@ -30,7 +30,6 @@
 #include "oops/metadata.hpp"
 #include "oops/oop.hpp"
 #include "oops/oopHandle.hpp"
-#include "utilities/accessFlags.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_JFR
 #include "jfr/support/jfrTraceIdExtension.hpp"
@@ -120,9 +119,8 @@ class Klass : public Metadata {
  //  - Various type checking in the JVM
  const KlassKind _kind;

-  AccessFlags _access_flags;    // Access flags. The class/interface distinction is stored here.
-                                // Some flags created by the JVM, not in the class file itself,
-                                // are in _misc_flags below.
+  // Some flags created by the JVM, not in the class file itself,
+  // are in _misc_flags below.
  KlassFlags  _misc_flags;

  // The fields _super_check_offset, _secondary_super_cache, _secondary_supers
@@ -453,7 +451,6 @@ protected:
  static ByteSize java_mirror_offset()           { return byte_offset_of(Klass, _java_mirror); }
  static ByteSize class_loader_data_offset()     { return byte_offset_of(Klass, _class_loader_data); }
  static ByteSize layout_helper_offset()         { return byte_offset_of(Klass, _layout_helper); }
-  static ByteSize access_flags_offset()          { return byte_offset_of(Klass, _access_flags); }
 #if INCLUDE_JVMCI
  static ByteSize subklass_offset()              { return byte_offset_of(Klass, _subklass); }
  static ByteSize next_sibling_offset()          { return byte_offset_of(Klass, _next_sibling); }
@@ -707,17 +704,10 @@ public:
  bool is_typeArray_klass()             const { return assert_same_query( _kind == TypeArrayKlassKind, is_typeArray_klass_slow()); }
  #undef assert_same_query

-  // Access flags
-  AccessFlags access_flags() const         { return _access_flags;  }
-  void set_access_flags(AccessFlags flags) { _access_flags = flags; }

-  bool is_public() const                { return _access_flags.is_public(); }
-  bool is_final() const                 { return _access_flags.is_final(); }
-  bool is_interface() const             { return _access_flags.is_interface(); }
-  bool is_abstract() const              { return _access_flags.is_abstract(); }
-  bool is_super() const                 { return _access_flags.is_super(); }
-  bool is_synthetic() const             { return _access_flags.is_synthetic(); }
-  void set_is_synthetic()               { _access_flags.set_is_synthetic(); }
+  virtual bool is_interface() const     { return false; }
+  virtual bool is_abstract() const      { return false; }
+
  bool has_finalizer() const            { return _misc_flags.has_finalizer(); }
  void set_has_finalizer()              { _misc_flags.set_has_finalizer(true); }
  bool is_hidden() const                { return _misc_flags.is_hidden_class(); }
@@ -730,7 +720,7 @@ public:
  inline bool is_non_strong_hidden() const;

  bool is_cloneable() const;
-  void set_is_cloneable();
+  void set_is_cloneable_fast() { _misc_flags.set_is_cloneable_fast(true); }

  inline markWord prototype_header() const;
  inline void set_prototype_header(markWord header);
--- a/src/hotspot/share/opto/castnode.cpp
+++ b/src/hotspot/share/opto/castnode.cpp
@@ -22,7 +22,6 @@
 *
 */

-#include "castnode.hpp"
 #include "opto/addnode.hpp"
 #include "opto/callnode.hpp"
 #include "opto/castnode.hpp"
@@ -35,12 +34,22 @@
 #include "opto/type.hpp"
 #include "utilities/checkedCast.hpp"

+const ConstraintCastNode::DependencyType ConstraintCastNode::DependencyType::FloatingNarrowing(true, true, "floating narrowing dependency"); // not pinned, narrows type
+const ConstraintCastNode::DependencyType ConstraintCastNode::DependencyType::FloatingNonNarrowing(true, false, "floating non-narrowing dependency"); // not pinned, doesn't narrow type
+const ConstraintCastNode::DependencyType ConstraintCastNode::DependencyType::NonFloatingNarrowing(false, true, "non-floating narrowing dependency"); // pinned, narrows type
+const ConstraintCastNode::DependencyType ConstraintCastNode::DependencyType::NonFloatingNonNarrowing(false, false, "non-floating non-narrowing dependency"); // pinned, doesn't narrow type
+
 //=============================================================================
 // If input is already higher or equal to cast type, then this is an identity.
 Node* ConstraintCastNode::Identity(PhaseGVN* phase) {
-  if (_dependency == UnconditionalDependency) {
+  if (!_dependency.narrows_type()) {
+    // If this cast doesn't carry a type dependency (i.e. not used for type narrowing), we cannot optimize it.
    return this;
  }
+
+  // This cast node carries a type dependency. We can remove it if:
+  // - Its input has a narrower type
+  // - There's a dominating cast with same input but narrower type
  Node* dom = dominating_cast(phase, phase);
  if (dom != nullptr) {
    return dom;
@@ -109,7 +118,7 @@ Node* ConstraintCastNode::Ideal(PhaseGVN* phase, bool can_reshape) {
 }

 uint ConstraintCastNode::hash() const {
-  return TypeNode::hash() + (int)_dependency + (_extra_types != nullptr ? _extra_types->hash() : 0);
+  return TypeNode::hash() + _dependency.hash() + (_extra_types != nullptr ? _extra_types->hash() : 0);
 }

 bool ConstraintCastNode::cmp(const Node &n) const {
@@ -117,7 +126,7 @@ bool ConstraintCastNode::cmp(const Node &n) const {
    return false;
  }
  ConstraintCastNode& cast = (ConstraintCastNode&) n;
-  if (cast._dependency != _dependency) {
+  if (!cast._dependency.cmp(_dependency)) {
    return false;
  }
  if (_extra_types == nullptr || cast._extra_types == nullptr) {
@@ -130,7 +139,7 @@ uint ConstraintCastNode::size_of() const {
  return sizeof(*this);
 }

-Node* ConstraintCastNode::make_cast_for_basic_type(Node* c, Node* n, const Type* t, DependencyType dependency, BasicType bt) {
+Node* ConstraintCastNode::make_cast_for_basic_type(Node* c, Node* n, const Type* t, const DependencyType& dependency, BasicType bt) {
  switch(bt) {
  case T_INT:
    return new CastIINode(c, n, t, dependency);
@@ -143,9 +152,9 @@ Node* ConstraintCastNode::make_cast_for_basic_type(Node* c, Node* n, const Type*
 }

 TypeNode* ConstraintCastNode::dominating_cast(PhaseGVN* gvn, PhaseTransform* pt) const {
-  if (_dependency == UnconditionalDependency) {
-    return nullptr;
-  }
+  // See discussion at definition of ConstraintCastNode::DependencyType: replacing this cast with a dominating one is
+  // not safe if _dependency.narrows_type() is not true.
+  assert(_dependency.narrows_type(), "cast can't be replaced by dominating one");
  Node* val = in(1);
  Node* ctl = in(0);
  int opc = Opcode();
@@ -205,30 +214,21 @@ void ConstraintCastNode::dump_spec(outputStream *st) const {
    st->print(" extra types: ");
    _extra_types->dump_on(st);
  }
-  if (_dependency != RegularDependency) {
-    st->print(" %s dependency", _dependency == StrongDependency ? "strong" : "unconditional");
-  }
+  st->print(" ");
+  _dependency.dump_on(st);
 }
 #endif

-const Type* CastIINode::Value(PhaseGVN* phase) const {
-  const Type *res = ConstraintCastNode::Value(phase);
-  if (res == Type::TOP) {
-    return Type::TOP;
-  }
-  assert(res->isa_int(), "res must be int");
-
-  // Similar to ConvI2LNode::Value() for the same reasons
-  // see if we can remove type assertion after loop opts
-  res = widen_type(phase, res, T_INT);
-
-  return res;
+CastIINode* CastIINode::make_with(Node* parent, const TypeInteger* type, const DependencyType& dependency) const {
+  return new CastIINode(in(0), parent, type, dependency, _range_check_dependency, _extra_types);
 }

-Node* ConstraintCastNode::find_or_make_integer_cast(PhaseIterGVN* igvn, Node* parent, const TypeInteger* type) const {
-  Node* n = clone();
-  n->set_req(1, parent);
-  n->as_ConstraintCast()->set_type(type);
+CastLLNode* CastLLNode::make_with(Node* parent, const TypeInteger* type, const DependencyType& dependency) const {
+  return new CastLLNode(in(0), parent, type, dependency, _extra_types);
+}
+
+Node* ConstraintCastNode::find_or_make_integer_cast(PhaseIterGVN* igvn, Node* parent, const TypeInteger* type, const DependencyType& dependency) const {
+  Node* n = make_with(parent, type, dependency);
  Node* existing = igvn->hash_find_insert(n);
  if (existing != nullptr) {
    n->destruct(igvn);
@@ -242,14 +242,13 @@ Node *CastIINode::Ideal(PhaseGVN *phase, bool can_reshape) {
  if (progress != nullptr) {
    return progress;
  }
-  if (can_reshape && !phase->C->post_loop_opts_phase()) {
-    // makes sure we run ::Value to potentially remove type assertion after loop opts
+  if (!phase->C->post_loop_opts_phase()) {
+    // makes sure we run widen_type() to potentially common type assertions after loop opts
    phase->C->record_for_post_loop_opts_igvn(this);
  }
  if (!_range_check_dependency || phase->C->post_loop_opts_phase()) {
    return optimize_integer_cast(phase, T_INT);
  }
-  phase->C->record_for_post_loop_opts_igvn(this);
  return nullptr;
 }

@@ -279,9 +278,9 @@ void CastIINode::dump_spec(outputStream* st) const {
 #endif

 CastIINode* CastIINode::pin_array_access_node() const {
-  assert(_dependency == RegularDependency, "already pinned");
+  assert(_dependency.is_floating(), "already pinned");
  if (has_range_check()) {
-    return new CastIINode(in(0), in(1), bottom_type(), StrongDependency, has_range_check());
+    return new CastIINode(in(0), in(1), bottom_type(), _dependency.with_pinned_dependency(), has_range_check());
  }
  return nullptr;
 }
@@ -315,16 +314,6 @@ void CastIINode::remove_range_check_cast(Compile* C) {
 }


-const Type* CastLLNode::Value(PhaseGVN* phase) const {
-  const Type* res = ConstraintCastNode::Value(phase);
-  if (res == Type::TOP) {
-    return Type::TOP;
-  }
-  assert(res->isa_long(), "res must be long");
-
-  return widen_type(phase, res, T_LONG);
-}
-
 bool CastLLNode::is_inner_loop_backedge(ProjNode* proj) {
  if (proj != nullptr) {
    Node* ctrl_use = proj->unique_ctrl_out_or_null();
@@ -392,7 +381,7 @@ Node* CastLLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
    return progress;
  }
  if (!phase->C->post_loop_opts_phase()) {
-    // makes sure we run ::Value to potentially remove type assertion after loop opts
+    // makes sure we run widen_type() to potentially common type assertions after loop opts
    phase->C->record_for_post_loop_opts_igvn(this);
  }
  // transform (CastLL (ConvI2L ..)) into (ConvI2L (CastII ..)) if the type of the CastLL is narrower than the type of
@@ -543,7 +532,7 @@ Node* CastP2XNode::Identity(PhaseGVN* phase) {
  return this;
 }

-Node* ConstraintCastNode::make_cast_for_type(Node* c, Node* in, const Type* type, DependencyType dependency,
+Node* ConstraintCastNode::make_cast_for_type(Node* c, Node* in, const Type* type, const DependencyType& dependency,
                                             const TypeTuple* types) {
  if (type->isa_int()) {
    return new CastIINode(c, in, type, dependency, false, types);
@@ -564,7 +553,7 @@ Node* ConstraintCastNode::make_cast_for_type(Node* c, Node* in, const Type* type
  return nullptr;
 }

-Node* ConstraintCastNode::optimize_integer_cast(PhaseGVN* phase, BasicType bt) {
+Node* ConstraintCastNode::optimize_integer_cast_of_add(PhaseGVN* phase, BasicType bt) {
  PhaseIterGVN *igvn = phase->is_IterGVN();
  const TypeInteger* this_type = this->type()->isa_integer(bt);
  if (this_type == nullptr) {
@@ -586,8 +575,42 @@ Node* ConstraintCastNode::optimize_integer_cast(PhaseGVN* phase, BasicType bt) {
    Node* x = z->in(1);
    Node* y = z->in(2);

-    Node* cx = find_or_make_integer_cast(igvn, x, rx);
-    Node* cy = find_or_make_integer_cast(igvn, y, ry);
+    const TypeInteger* tx = phase->type(x)->is_integer(bt);
+    const TypeInteger* ty = phase->type(y)->is_integer(bt);
+
+    // (Cast (Add x y) tz) is transformed into (Add (Cast x rx) (Cast y ry))
+    //
+    // tz = [tzlo, tzhi]
+    // rx = [rxlo, rxhi]
+    // ry = [rylo, ryhi]
+    // with type of x, tx = [txlo, txhi]
+    // with type of y, ty = [tylo, tyhi]
+    //
+    // From Compile::push_thru_add():
+    // rxlo = max(tzlo - tyhi, txlo)
+    // rxhi = min(tzhi - tylo, txhi)
+    // rylo = max(tzlo - txhi, tylo)
+    // ryhi = min(tzhi - txlo, tyhi)
+    //
+    // If x is a constant, then txlo = txhi
+    // rxlo = txlo, rxhi = txhi
+    // The bounds of the type of the Add after transformation then is:
+    // rxlo + rylo >= txlo + tzlo - txhi >= tzlo
+    // rxhi + ryhi <= txhi + tzhi - txlo <= tzhi
+    // The resulting type is not wider than the type of the Cast
+    // before transformation
+    //
+    // If neither x nor y are constant then the type of the resulting
+    // Add can be wider than the type of the type of the Cast before
+    // transformation.
+    // For instance, tx = [0, 10], ty = [0, 10], tz = [0, 10]
+    // then rx = [0, 10], ry = [0, 10]
+    // and rx + ry = [0, 20] which is wider than tz
+    //
+    // Same reasoning applies to (Cast (Sub x y) tz)
+    const DependencyType& dependency = (!tx->is_con() && !ty->is_con()) ? _dependency.with_non_narrowing() : _dependency;
+    Node* cx = find_or_make_integer_cast(igvn, x, rx, dependency);
+    Node* cy = find_or_make_integer_cast(igvn, y, ry, dependency);
    if (op == Op_Add(bt)) {
      return AddNode::make(cx, cy, bt);
    } else {
@@ -599,11 +622,26 @@ Node* ConstraintCastNode::optimize_integer_cast(PhaseGVN* phase, BasicType bt) {
  return nullptr;
 }

-const Type* ConstraintCastNode::widen_type(const PhaseGVN* phase, const Type* res, BasicType bt) const {
-  if (!phase->C->post_loop_opts_phase()) {
+Node* ConstraintCastNode::optimize_integer_cast(PhaseGVN* phase, BasicType bt) {
+  Node* res = optimize_integer_cast_of_add(phase, bt);
+  if (res != nullptr) {
    return res;
  }
+  const Type* t = Value(phase);
+  if (t != Type::TOP && phase->C->post_loop_opts_phase()) {
+    const Type* bottom_t = bottom_type();
+    const TypeInteger* wide_t = widen_type(phase, bottom_t, bt);
+    if (wide_t != bottom_t) {
+      // Widening the type of the Cast (to allow some commoning) causes the Cast to change how it can be optimized (if
+      // type of its input is narrower than the Cast's type, we can't remove it to not loose the control dependency).
+      return make_with(in(1), wide_t, _dependency.with_non_narrowing());
+    }
+  }
+  return nullptr;
+}

+const TypeInteger* ConstraintCastNode::widen_type(const PhaseGVN* phase, const Type* res, BasicType bt) const {
+  const TypeInteger* this_type = res->is_integer(bt);
  // At VerifyConstraintCasts == 1, we verify the ConstraintCastNodes that are present during code
  // emission. This allows us detecting possible mis-scheduling due to these nodes being pinned at
  // the wrong control nodes.
@@ -612,10 +650,9 @@ const Type* ConstraintCastNode::widen_type(const PhaseGVN* phase, const Type* re
  // mis-transformations that may happen due to these nodes being pinned at the wrong control
  // nodes.
  if (VerifyConstraintCasts > 1) {
-    return res;
+    return this_type;
  }

-  const TypeInteger* this_type = res->is_integer(bt);
  const TypeInteger* in_type = phase->type(in(1))->isa_integer(bt);
  if (in_type != nullptr &&
      (in_type->lo_as_long() != this_type->lo_as_long() ||
@@ -636,5 +673,5 @@ const Type* ConstraintCastNode::widen_type(const PhaseGVN* phase, const Type* re
                             MIN2(in_type->hi_as_long(), hi1),
                             MAX2((int)in_type->_widen, w1), bt);
  }
-  return res;
+  return this_type;
 }
--- a/src/hotspot/share/opto/castnode.hpp
+++ b/src/hotspot/share/opto/castnode.hpp
@@ -33,21 +33,119 @@
 // cast to a different range
 class ConstraintCastNode: public TypeNode {
 public:
-  enum DependencyType {
-    RegularDependency, // if cast doesn't improve input type, cast can be removed
-    StrongDependency,  // leave cast in even if _type doesn't improve input type, can be replaced by stricter dominating cast if one exist
-    UnconditionalDependency // leave cast in unconditionally
+  // Cast nodes are subject to a few optimizations:
+  //
+  // 1- if the type carried by the Cast doesn't narrow the type of its input, the cast can be replaced by its input.
+  // Similarly, if a dominating Cast with the same input and a narrower type constraint is found, it can replace the
+  // current cast.
+  //
+  // 2- if the condition that the Cast is control dependent is hoisted, the Cast is hoisted as well
+  //
+  // 1- and 2- are not always applied depending on what constraint are applied to the Cast: there are cases where 1-
+  // and 2- apply, where neither 1- nor 2- apply and where one or the other apply. This class abstract away these
+  // details.
+  //
+  // If _narrows_type is true, the cast carries a type dependency: "after" the control the cast is dependent on, its data
+  // input is known to have a narrower type (stored in the cast node itself). Optimizations 1- above only apply to cast
+  // nodes for which _narrows_type is true.
+  // if _floating is true, the cast only depends on a single control: its control input. Otherwise, it is pinned at its
+  // current location. Optimizations 2- only apply to cast nodes for which _floating is true.
+  // _floating here is similar to Node::depends_only_on_test().
+  // The 4 combinations of _narrows_types/_floating true/false have some use. See below, at the end of this class
+  // definition, for examples.
+  class DependencyType {
+  private:
+    const bool _floating; // Does this Cast depends on its control input or is it pinned?
+    const bool _narrows_type; // Does this Cast narrows the type i.e. if input type is narrower can it be removed?
+    const char* _desc;
+    DependencyType(bool depends_on_test, bool narrows_type, const char* desc)
+      : _floating(depends_on_test),
+        _narrows_type(narrows_type),
+        _desc(desc) {
+    }
+    NONCOPYABLE(DependencyType);
+
+  public:
+
+    bool is_floating() const {
+      return _floating;
+    }
+
+    bool narrows_type() const {
+      return _narrows_type;
+    }
+
+    void dump_on(outputStream *st) const {
+      st->print("%s", _desc);
+    }
+
+    uint hash() const {
+      return (_floating ? 1 : 0) + (_narrows_type ? 2 : 0);
+    }
+
+    bool cmp(const DependencyType& other) const {
+      return _floating == other._floating && _narrows_type == other._narrows_type;
+    }
+
+    const DependencyType& with_non_narrowing() const {
+      if (_floating) {
+        return FloatingNonNarrowing;
+      }
+      return NonFloatingNonNarrowing;
+    }
+
+    const DependencyType& with_pinned_dependency() const {
+      if (_narrows_type) {
+        return NonFloatingNarrowing;
+      }
+      return NonFloatingNonNarrowing;
+    }
+
+    // All the possible combinations of floating/narrowing with example use cases:
+
+    // Use case example: Range Check CastII
+    // Floating: The Cast is only dependent on the single range check. If the range check was ever to be hoisted it
+    //           would be safe to let the Cast float to where the range check is hoisted up to.
+    // Narrowing: The Cast narrows the type to a positive index. If the input to the Cast is narrower, we can safely
+    //            remove the cast because the array access will be safe.
+    static const DependencyType FloatingNarrowing;
+    // Use case example: Widening Cast nodes' types after loop opts: We want to common Casts with slightly different types.
+    // Floating: These Casts only depend on the single control.
+    // NonNarrowing: Even when the input type is narrower, we are not removing the Cast. Otherwise, the dependency
+    //               to the single control is lost, and an array access could float above its range check because we
+    //               just removed the dependency to the range check by removing the Cast. This could lead to an
+    //               out-of-bounds access.
+    static const DependencyType FloatingNonNarrowing;
+    // Use case example: An array accesses that is no longer dependent on a single range check (e.g. range check smearing).
+    // NonFloating: The array access must be pinned below all the checks it depends on. If the check it directly depends
+    //              on with a control input is hoisted, we do not hoist the Cast as well. If we allowed the Cast to float,
+    //              we risk that the array access ends up above another check it depends on (we cannot model two control
+    //              dependencies for a node in the IR). This could lead to an out-of-bounds access.
+    // Narrowing: If the Cast does not narrow the input type, then it's safe to remove the cast because the array access
+    //            will be safe.
+    static const DependencyType NonFloatingNarrowing;
+    // Use case example: Sinking nodes out of a loop
+    // Non-Floating & Non-Narrowing: We don't want the Cast that forces the node to be out of loop to be removed in any
+    //                               case. Otherwise, the sunk node could float back into the loop, undoing the sinking.
+    //                               This Cast is only used for pinning without caring about narrowing types.
+    static const DependencyType NonFloatingNonNarrowing;
+
  };

-  protected:
-  const DependencyType _dependency;
+protected:
+  const DependencyType& _dependency;
  virtual bool cmp( const Node &n ) const;
  virtual uint size_of() const;
  virtual uint hash() const;    // Check the type
-  const Type* widen_type(const PhaseGVN* phase, const Type* res, BasicType bt) const;
-  Node* find_or_make_integer_cast(PhaseIterGVN* igvn, Node* parent, const TypeInteger* type) const;
+  const TypeInteger* widen_type(const PhaseGVN* phase, const Type* res, BasicType bt) const;
+
+  virtual ConstraintCastNode* make_with(Node* parent, const TypeInteger* type, const DependencyType& dependency) const {
+    ShouldNotReachHere(); // Only implemented for CastII and CastLL
+    return nullptr;
+  }
+
+  Node* find_or_make_integer_cast(PhaseIterGVN* igvn, Node* parent, const TypeInteger* type, const DependencyType& dependency) const;

-  private:
  // PhiNode::Ideal() transforms a Phi that merges a single uncasted value into a single cast pinned at the region.
  // The types of cast nodes eliminated as a consequence of this transformation are collected and stored here so the
  // type dependencies carried by the cast are known. The cast can then be eliminated if the type of its input is
@@ -55,7 +153,7 @@ public:
  const TypeTuple* _extra_types;

  public:
-  ConstraintCastNode(Node* ctrl, Node* n, const Type* t, ConstraintCastNode::DependencyType dependency,
+  ConstraintCastNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency,
                     const TypeTuple* extra_types)
          : TypeNode(t,2), _dependency(dependency), _extra_types(extra_types) {
    init_class_id(Class_ConstraintCast);
@@ -67,18 +165,21 @@ public:
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual int Opcode() const;
  virtual uint ideal_reg() const = 0;
-  virtual bool depends_only_on_test() const { return _dependency == RegularDependency; }
-  bool carry_dependency() const { return _dependency != RegularDependency; }
+  bool carry_dependency() const { return !_dependency.cmp(DependencyType::FloatingNarrowing); }
+  // A cast node depends_only_on_test if and only if it is floating
+  virtual bool depends_only_on_test() const { return _dependency.is_floating(); }
+  const DependencyType& dependency() const { return _dependency; }
  TypeNode* dominating_cast(PhaseGVN* gvn, PhaseTransform* pt) const;
-  static Node* make_cast_for_basic_type(Node* c, Node* n, const Type* t, DependencyType dependency, BasicType bt);
+  static Node* make_cast_for_basic_type(Node* c, Node* n, const Type* t, const DependencyType& dependency, BasicType bt);

 #ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const;
 #endif

-  static Node* make_cast_for_type(Node* c, Node* in, const Type* type, DependencyType dependency,
+  static Node* make_cast_for_type(Node* c, Node* in, const Type* type, const DependencyType& dependency,
                                  const TypeTuple* types);

+  Node* optimize_integer_cast_of_add(PhaseGVN* phase, BasicType bt);
  Node* optimize_integer_cast(PhaseGVN* phase, BasicType bt);

  bool higher_equal_types(PhaseGVN* phase, const Node* other) const;
@@ -102,7 +203,7 @@ class CastIINode: public ConstraintCastNode {
  virtual uint size_of() const;

  public:
-  CastIINode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, bool range_check_dependency = false, const TypeTuple* types = nullptr)
+  CastIINode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, bool range_check_dependency = false, const TypeTuple* types = nullptr)
    : ConstraintCastNode(ctrl, n, t, dependency, types), _range_check_dependency(range_check_dependency) {
    assert(ctrl != nullptr, "control must be set");
    init_class_id(Class_CastII);
@@ -110,7 +211,7 @@ class CastIINode: public ConstraintCastNode {
  virtual int Opcode() const;
  virtual uint ideal_reg() const { return Op_RegI; }
  virtual Node* Identity(PhaseGVN* phase);
-  virtual const Type* Value(PhaseGVN* phase) const;
+
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  bool has_range_check() const {
 #ifdef _LP64
@@ -122,6 +223,7 @@ class CastIINode: public ConstraintCastNode {
  }

  CastIINode* pin_array_access_node() const;
+  CastIINode* make_with(Node* parent, const TypeInteger* type, const DependencyType& dependency) const;
  void remove_range_check_cast(Compile* C);

 #ifndef PRODUCT
@@ -131,14 +233,12 @@ class CastIINode: public ConstraintCastNode {

 class CastLLNode: public ConstraintCastNode {
 public:
-  CastLLNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
+  CastLLNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
          : ConstraintCastNode(ctrl, n, t, dependency, types) {
    assert(ctrl != nullptr, "control must be set");
    init_class_id(Class_CastLL);
  }

-  virtual const Type* Value(PhaseGVN* phase) const;
-
  static bool is_inner_loop_backedge(ProjNode* proj);

  static bool cmp_used_at_inner_loop_exit_test(CmpNode* cmp);
@@ -147,11 +247,12 @@ public:
  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
  virtual int Opcode() const;
  virtual uint ideal_reg() const { return Op_RegL; }
+  CastLLNode* make_with(Node* parent, const TypeInteger* type, const DependencyType& dependency) const;
 };

 class CastHHNode: public ConstraintCastNode {
 public:
-  CastHHNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
+  CastHHNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
          : ConstraintCastNode(ctrl, n, t, dependency, types) {
    assert(ctrl != nullptr, "control must be set");
    init_class_id(Class_CastHH);
@@ -162,7 +263,7 @@ public:

 class CastFFNode: public ConstraintCastNode {
 public:
-  CastFFNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
+  CastFFNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
          : ConstraintCastNode(ctrl, n, t, dependency, types) {
    assert(ctrl != nullptr, "control must be set");
    init_class_id(Class_CastFF);
@@ -173,7 +274,7 @@ public:

 class CastDDNode: public ConstraintCastNode {
 public:
-  CastDDNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
+  CastDDNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
          : ConstraintCastNode(ctrl, n, t, dependency, types) {
    assert(ctrl != nullptr, "control must be set");
    init_class_id(Class_CastDD);
@@ -184,7 +285,7 @@ public:

 class CastVVNode: public ConstraintCastNode {
 public:
-  CastVVNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
+  CastVVNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
          : ConstraintCastNode(ctrl, n, t, dependency, types) {
    assert(ctrl != nullptr, "control must be set");
    init_class_id(Class_CastVV);
@@ -198,7 +299,7 @@ public:
 // cast pointer to pointer (different type)
 class CastPPNode: public ConstraintCastNode {
  public:
-  CastPPNode (Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
+  CastPPNode (Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
    : ConstraintCastNode(ctrl, n, t, dependency, types) {
    init_class_id(Class_CastPP);
  }
@@ -210,7 +311,7 @@ class CastPPNode: public ConstraintCastNode {
 // for _checkcast, cast pointer to pointer (different type), without JOIN,
 class CheckCastPPNode: public ConstraintCastNode {
  public:
-  CheckCastPPNode(Node* ctrl, Node* n, const Type* t, DependencyType dependency = RegularDependency, const TypeTuple* types = nullptr)
+  CheckCastPPNode(Node* ctrl, Node* n, const Type* t, const DependencyType& dependency = DependencyType::FloatingNarrowing, const TypeTuple* types = nullptr)
    : ConstraintCastNode(ctrl, n, t, dependency, types) {
    assert(ctrl != nullptr, "control must be set");
    init_class_id(Class_CheckCastPP);
--- a/src/hotspot/share/opto/cfgnode.cpp
+++ b/src/hotspot/share/opto/cfgnode.cpp
@@ -2192,7 +2192,7 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
      if (phi_type->isa_ptr()) {
        const Type* uin_type = phase->type(uin);
        if (!phi_type->isa_oopptr() && !uin_type->isa_oopptr()) {
-          cast = new CastPPNode(r, uin, phi_type, ConstraintCastNode::StrongDependency, extra_types);
+          cast = new CastPPNode(r, uin, phi_type, ConstraintCastNode::DependencyType::NonFloatingNarrowing, extra_types);
        } else {
          // Use a CastPP for a cast to not null and a CheckCastPP for
          // a cast to a new klass (and both if both null-ness and
@@ -2202,7 +2202,7 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
          // null, uin's type must be casted to not null
          if (phi_type->join(TypePtr::NOTNULL) == phi_type->remove_speculative() &&
              uin_type->join(TypePtr::NOTNULL) != uin_type->remove_speculative()) {
-            cast = new CastPPNode(r, uin, TypePtr::NOTNULL, ConstraintCastNode::StrongDependency, extra_types);
+            cast = new CastPPNode(r, uin, TypePtr::NOTNULL, ConstraintCastNode::DependencyType::NonFloatingNarrowing, extra_types);
          }

          // If the type of phi and uin, both casted to not null,
@@ -2214,14 +2214,14 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
              cast = phase->transform(cast);
              n = cast;
            }
-            cast = new CheckCastPPNode(r, n, phi_type, ConstraintCastNode::StrongDependency, extra_types);
+            cast = new CheckCastPPNode(r, n, phi_type, ConstraintCastNode::DependencyType::NonFloatingNarrowing, extra_types);
          }
          if (cast == nullptr) {
-            cast = new CastPPNode(r, uin, phi_type, ConstraintCastNode::StrongDependency, extra_types);
+            cast = new CastPPNode(r, uin, phi_type, ConstraintCastNode::DependencyType::NonFloatingNarrowing, extra_types);
          }
        }
      } else {
-        cast = ConstraintCastNode::make_cast_for_type(r, uin, phi_type, ConstraintCastNode::StrongDependency, extra_types);
+        cast = ConstraintCastNode::make_cast_for_type(r, uin, phi_type, ConstraintCastNode::DependencyType::NonFloatingNarrowing, extra_types);
      }
      assert(cast != nullptr, "cast should be set");
      cast = phase->transform(cast);
--- a/src/hotspot/share/opto/compile.cpp
+++ b/src/hotspot/share/opto/compile.cpp
@@ -1726,8 +1726,6 @@ Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_cr
      }
      if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))
        alias_type(idx)->set_rewritable(false);
-      if (flat->offset() == in_bytes(Klass::access_flags_offset()))
-        alias_type(idx)->set_rewritable(false);
      if (flat->offset() == in_bytes(Klass::misc_flags_offset()))
        alias_type(idx)->set_rewritable(false);
      if (flat->offset() == in_bytes(Klass::java_mirror_offset()))
@@ -1735,6 +1733,12 @@ Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_cr
      if (flat->offset() == in_bytes(Klass::secondary_super_cache_offset()))
        alias_type(idx)->set_rewritable(false);
    }
+
+    if (flat->isa_instklassptr()) {
+      if (flat->offset() == in_bytes(InstanceKlass::access_flags_offset())) {
+        alias_type(idx)->set_rewritable(false);
+      }
+    }
    // %%% (We would like to finalize JavaThread::threadObj_offset(),
    // but the base pointer type is not distinctive enough to identify
    // references into JavaThread.)
@@ -4578,7 +4582,7 @@ Node* Compile::constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt*
    // node from floating above the range check during loop optimizations. Otherwise, the
    // ConvI2L node may be eliminated independently of the range check, causing the data path
    // to become TOP while the control path is still there (although it's unreachable).
-    value = new CastIINode(ctrl, value, itype, carry_dependency ? ConstraintCastNode::StrongDependency : ConstraintCastNode::RegularDependency, true /* range check dependency */);
+    value = new CastIINode(ctrl, value, itype, carry_dependency ? ConstraintCastNode::DependencyType::NonFloatingNarrowing : ConstraintCastNode::DependencyType::FloatingNarrowing, true /* range check dependency */);
    value = phase->transform(value);
  }
  const TypeLong* ltype = TypeLong::make(itype->_lo, itype->_hi, itype->_widen);
--- a/src/hotspot/share/opto/compile.hpp
+++ b/src/hotspot/share/opto/compile.hpp
@@ -984,7 +984,8 @@ public:
                                   JVMState* jvms, bool allow_inline, float profile_factor, ciKlass* speculative_receiver_type = nullptr,
                                   bool allow_intrinsics = true);
  bool should_delay_inlining(ciMethod* call_method, JVMState* jvms) {
-    return should_delay_string_inlining(call_method, jvms) ||
+    return C->directive()->should_delay_inline(call_method) ||
+           should_delay_string_inlining(call_method, jvms) ||
           should_delay_boxing_inlining(call_method, jvms) ||
           should_delay_vector_inlining(call_method, jvms);
  }
--- a/src/hotspot/share/opto/countbitsnode.cpp
+++ b/src/hotspot/share/opto/countbitsnode.cpp
@@ -26,97 +26,114 @@
 #include "opto/opcodes.hpp"
 #include "opto/phaseX.hpp"
 #include "opto/type.hpp"
+#include "utilities/count_leading_zeros.hpp"
+#include "utilities/count_trailing_zeros.hpp"
 #include "utilities/population_count.hpp"

+static int count_leading_zeros_int(jint i) {
+  return i == 0 ? BitsPerInt : count_leading_zeros(i);
+}
+
+static int count_leading_zeros_long(jlong l) {
+  return l == 0 ? BitsPerLong : count_leading_zeros(l);
+}
+
+static int count_trailing_zeros_int(jint i) {
+  return i == 0 ? BitsPerInt : count_trailing_zeros(i);
+}
+
+static int count_trailing_zeros_long(jlong l) {
+  return l == 0 ? BitsPerLong : count_trailing_zeros(l);
+}
+
 //------------------------------Value------------------------------------------
 const Type* CountLeadingZerosINode::Value(PhaseGVN* phase) const {
  const Type* t = phase->type(in(1));
-  if (t == Type::TOP) return Type::TOP;
-  const TypeInt* ti = t->isa_int();
-  if (ti && ti->is_con()) {
-    jint i = ti->get_con();
-    // HD, Figure 5-6
-    if (i == 0)
-    return TypeInt::make(BitsPerInt);
-    int n = 1;
-    unsigned int x = i;
-    if (x >> 16 == 0) { n += 16; x <<= 16; }
-    if (x >> 24 == 0) { n +=  8; x <<=  8; }
-    if (x >> 28 == 0) { n +=  4; x <<=  4; }
-    if (x >> 30 == 0) { n +=  2; x <<=  2; }
-    n -= x >> 31;
-    return TypeInt::make(n);
+  if (t == Type::TOP) {
+    return Type::TOP;
  }
-  return TypeInt::INT;
+
+  // To minimize `count_leading_zeros(x)`, we should make the highest 1 bit in x
+  // as far to the left as possible. A bit in x can be 1 iff this bit is not
+  // forced to be 0, i.e. the corresponding bit in `x._bits._zeros` is 0. Thus:
+  //   min(clz(x)) = number of bits to the left of the highest 0 bit in x._bits._zeros
+  //               = count_leading_ones(x._bits._zeros) = clz(~x._bits._zeros)
+  //
+  // To maximize `count_leading_zeros(x)`, we should make the leading zeros as
+  // many as possible. A bit in x can be 0 iff this bit is not forced to be 1,
+  // i.e. the corresponding bit in `x._bits._ones` is 0. Thus:
+  //   max(clz(x)) = clz(x._bits._ones)
+  //
+  // Therefore, the range of `count_leading_zeros(x)` is:
+  //   [clz(~x._bits._zeros), clz(x._bits._ones)]
+  //
+  // A more detailed proof using Z3 can be found at:
+  //   https://github.com/openjdk/jdk/pull/25928#discussion_r2256750507
+  const TypeInt* ti = t->is_int();
+  return TypeInt::make(count_leading_zeros_int(~ti->_bits._zeros),
+                       count_leading_zeros_int(ti->_bits._ones),
+                       ti->_widen);
 }

 //------------------------------Value------------------------------------------
 const Type* CountLeadingZerosLNode::Value(PhaseGVN* phase) const {
  const Type* t = phase->type(in(1));
-  if (t == Type::TOP) return Type::TOP;
-  const TypeLong* tl = t->isa_long();
-  if (tl && tl->is_con()) {
-    jlong l = tl->get_con();
-    // HD, Figure 5-6
-    if (l == 0)
-    return TypeInt::make(BitsPerLong);
-    int n = 1;
-    unsigned int x = (((julong) l) >> 32);
-    if (x == 0) { n += 32; x = (int) l; }
-    if (x >> 16 == 0) { n += 16; x <<= 16; }
-    if (x >> 24 == 0) { n +=  8; x <<=  8; }
-    if (x >> 28 == 0) { n +=  4; x <<=  4; }
-    if (x >> 30 == 0) { n +=  2; x <<=  2; }
-    n -= x >> 31;
-    return TypeInt::make(n);
+  if (t == Type::TOP) {
+    return Type::TOP;
  }
-  return TypeInt::INT;
+
+  // The proof of correctness is same as the above comments
+  // in `CountLeadingZerosINode::Value`.
+  const TypeLong* tl = t->is_long();
+  return TypeInt::make(count_leading_zeros_long(~tl->_bits._zeros),
+                       count_leading_zeros_long(tl->_bits._ones),
+                       tl->_widen);
 }

 //------------------------------Value------------------------------------------
 const Type* CountTrailingZerosINode::Value(PhaseGVN* phase) const {
  const Type* t = phase->type(in(1));
-  if (t == Type::TOP) return Type::TOP;
-  const TypeInt* ti = t->isa_int();
-  if (ti && ti->is_con()) {
-    jint i = ti->get_con();
-    // HD, Figure 5-14
-    int y;
-    if (i == 0)
-    return TypeInt::make(BitsPerInt);
-    int n = 31;
-    y = i << 16; if (y != 0) { n = n - 16; i = y; }
-    y = i <<  8; if (y != 0) { n = n -  8; i = y; }
-    y = i <<  4; if (y != 0) { n = n -  4; i = y; }
-    y = i <<  2; if (y != 0) { n = n -  2; i = y; }
-    y = i <<  1; if (y != 0) { n = n -  1; }
-    return TypeInt::make(n);
+  if (t == Type::TOP) {
+    return Type::TOP;
  }
-  return TypeInt::INT;
+
+  // To minimize `count_trailing_zeros(x)`, we should make the lowest 1 bit in x
+  // as far to the right as possible. A bit in x can be 1 iff this bit is not
+  // forced to be 0, i.e. the corresponding bit in `x._bits._zeros` is 0. Thus:
+  //   min(ctz(x)) = number of bits to the right of the lowest 0 bit in x._bits._zeros
+  //               = count_trailing_ones(x._bits._zeros) = ctz(~x._bits._zeros)
+  //
+  // To maximize `count_trailing_zeros(x)`, we should make the trailing zeros as
+  // many as possible. A bit in x can be 0 iff this bit is not forced to be 1,
+  // i.e. the corresponding bit in `x._bits._ones` is 0. Thus:
+  //   max(ctz(x)) = ctz(x._bits._ones)
+  //
+  // Therefore, the range of `count_trailing_zeros(x)` is:
+  //   [ctz(~x._bits._zeros), ctz(x._bits._ones)]
+  //
+  // A more detailed proof using Z3 can be found at:
+  //   https://github.com/openjdk/jdk/pull/25928#discussion_r2256750507
+  const TypeInt* ti = t->is_int();
+  return TypeInt::make(count_trailing_zeros_int(~ti->_bits._zeros),
+                       count_trailing_zeros_int(ti->_bits._ones),
+                       ti->_widen);
 }

 //------------------------------Value------------------------------------------
 const Type* CountTrailingZerosLNode::Value(PhaseGVN* phase) const {
  const Type* t = phase->type(in(1));
-  if (t == Type::TOP) return Type::TOP;
-  const TypeLong* tl = t->isa_long();
-  if (tl && tl->is_con()) {
-    jlong l = tl->get_con();
-    // HD, Figure 5-14
-    int x, y;
-    if (l == 0)
-    return TypeInt::make(BitsPerLong);
-    int n = 63;
-    y = (int) l; if (y != 0) { n = n - 32; x = y; } else x = (((julong) l) >> 32);
-    y = x << 16; if (y != 0) { n = n - 16; x = y; }
-    y = x <<  8; if (y != 0) { n = n -  8; x = y; }
-    y = x <<  4; if (y != 0) { n = n -  4; x = y; }
-    y = x <<  2; if (y != 0) { n = n -  2; x = y; }
-    y = x <<  1; if (y != 0) { n = n -  1; }
-    return TypeInt::make(n);
+  if (t == Type::TOP) {
+    return Type::TOP;
  }
-  return TypeInt::INT;
+
+  // The proof of correctness is same as the above comments
+  // in `CountTrailingZerosINode::Value`.
+  const TypeLong* tl = t->is_long();
+  return TypeInt::make(count_trailing_zeros_long(~tl->_bits._zeros),
+                       count_trailing_zeros_long(tl->_bits._ones),
+                       tl->_widen);
 }
+
 // We use the KnownBits information from the integer types to derive how many one bits
 // we have at least and at most.
 // From the definition of KnownBits, we know:
--- a/src/hotspot/share/opto/doCall.cpp
+++ b/src/hotspot/share/opto/doCall.cpp
@@ -192,7 +192,7 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
    // Try inlining a bytecoded method:
    if (!call_does_dispatch) {
      InlineTree* ilt = InlineTree::find_subtree_from_root(this->ilt(), jvms->caller(), jvms->method());
-      bool should_delay = C->should_delay_inlining();
+      bool should_delay = C->should_delay_inlining() || C->directive()->should_delay_inline(callee);
      if (ilt->ok_to_inline(callee, jvms, profile, should_delay)) {
        CallGenerator* cg = CallGenerator::for_inline(callee, expected_uses);
        // For optimized virtual calls assert at runtime that receiver object
--- a/src/hotspot/share/opto/escape.cpp
+++ b/src/hotspot/share/opto/escape.cpp
@@ -748,7 +748,7 @@ Node* ConnectionGraph::specialize_castpp(Node* castpp, Node* base, Node* current
  _igvn->_worklist.push(current_control);
  _igvn->_worklist.push(control_successor);

-  return _igvn->transform(ConstraintCastNode::make_cast_for_type(not_eq_control, base, _igvn->type(castpp), ConstraintCastNode::UnconditionalDependency, nullptr));
+  return _igvn->transform(ConstraintCastNode::make_cast_for_type(not_eq_control, base, _igvn->type(castpp), ConstraintCastNode::DependencyType::NonFloatingNonNarrowing, nullptr));
 }

 Node* ConnectionGraph::split_castpp_load_through_phi(Node* curr_addp, Node* curr_load, Node* region, GrowableArray<Node*>* bases_for_loads, GrowableArray<Node *>  &alloc_worklist) {
@@ -1235,7 +1235,7 @@ bool ConnectionGraph::reduce_phi_on_safepoints_helper(Node* ophi, Node* cast, No
  Node* nsr_merge_pointer = ophi;
  if (cast != nullptr) {
    const Type* new_t = merge_t->meet(TypePtr::NULL_PTR);
-    nsr_merge_pointer = _igvn->transform(ConstraintCastNode::make_cast_for_type(cast->in(0), cast->in(1), new_t, ConstraintCastNode::RegularDependency, nullptr));
+    nsr_merge_pointer = _igvn->transform(ConstraintCastNode::make_cast_for_type(cast->in(0), cast->in(1), new_t, ConstraintCastNode::DependencyType::FloatingNarrowing, nullptr));
  }

  for (uint spi = 0; spi < safepoints.size(); spi++) {
@@ -1376,7 +1376,7 @@ void ConnectionGraph::reset_scalar_replaceable_entries(PhiNode* ophi) {
      }

      if (change) {
-        Node* new_cast = ConstraintCastNode::make_cast_for_type(out->in(0), out->in(1), out_new_t, ConstraintCastNode::StrongDependency, nullptr);
+        Node* new_cast = ConstraintCastNode::make_cast_for_type(out->in(0), out->in(1), out_new_t, ConstraintCastNode::DependencyType::NonFloatingNarrowing, nullptr);
        _igvn->replace_node(out, new_cast);
        _igvn->register_new_node_with_optimizer(new_cast);
      }
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@@ -1183,7 +1183,7 @@ bool LibraryCallKit::inline_preconditions_checkIndex(BasicType bt) {
  jlong upper_bound = _gvn.type(length)->is_integer(bt)->hi_as_long();
  Node* casted_length = ConstraintCastNode::make_cast_for_basic_type(
      control(), length, TypeInteger::make(0, upper_bound, Type::WidenMax, bt),
-      ConstraintCastNode::RegularDependency, bt);
+      ConstraintCastNode::DependencyType::FloatingNarrowing, bt);
  casted_length = _gvn.transform(casted_length);
  replace_in_map(length, casted_length);
  length = casted_length;
@@ -1213,7 +1213,7 @@ bool LibraryCallKit::inline_preconditions_checkIndex(BasicType bt) {
  // index is now known to be >= 0 and < length, cast it
  Node* result = ConstraintCastNode::make_cast_for_basic_type(
      control(), index, TypeInteger::make(0, upper_bound, Type::WidenMax, bt),
-      ConstraintCastNode::RegularDependency, bt);
+      ConstraintCastNode::DependencyType::FloatingNarrowing, bt);
  result = _gvn.transform(result);
  set_result(result);
  replace_in_map(index, result);
@@ -4020,7 +4020,7 @@ Node* LibraryCallKit::generate_klass_flags_guard(Node* kls, int modifier_mask, i
 }
 Node* LibraryCallKit::generate_interface_guard(Node* kls, RegionNode* region) {
  return generate_klass_flags_guard(kls, JVM_ACC_INTERFACE, 0, region,
-                                    Klass::access_flags_offset(), TypeInt::CHAR, T_CHAR);
+                                    InstanceKlass::access_flags_offset(), TypeInt::CHAR, T_CHAR);
 }

 // Use this for testing if Klass is_hidden, has_finalizer, and is_cloneable_fast.
@@ -4132,12 +4132,16 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
    // Arrays store an intermediate super as _super, but must report Object.
    // Other types can report the actual _super.
    // (To verify this code sequence, check the asserts in JVM_IsInterface.)
-    if (generate_interface_guard(kls, region) != nullptr)
-      // A guard was added.  If the guard is taken, it was an interface.
-      phi->add_req(null());
-    if (generate_array_guard(kls, region) != nullptr)
+    if (generate_array_guard(kls, region) != nullptr) {
      // A guard was added.  If the guard is taken, it was an array.
      phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
+    }
+    // Check for interface after array since this checks AccessFlags offset into InstanceKlass.
+    // In other words, we are accessing subtype-specific information, so we need to determine the subtype first.
+    if (generate_interface_guard(kls, region) != nullptr) {
+      // A guard was added.  If the guard is taken, it was an interface.
+      phi->add_req(null());
+    }
    // If we fall through, it's a plain class.  Get its _super.
    p = basic_plus_adr(kls, in_bytes(Klass::super_offset()));
    kls = _gvn.transform(LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeInstKlassPtr::OBJECT_OR_NULL));
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@@ -1366,7 +1366,7 @@ Node *PhaseIdealLoop::clone_up_backedge_goo(Node *back_ctrl, Node *preheader_ctr
 // the backedge of the main or post loop is removed, a Div node won't be able to float above the zero trip guard of the
 // loop and can't execute even if the loop is not reached.
 void PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, CountedLoopNode* loop) {
-  Node* castii = new CastIINode(ctrl, incr, TypeInt::INT, ConstraintCastNode::UnconditionalDependency);
+  Node* castii = new CastIINode(ctrl, incr, TypeInt::INT, ConstraintCastNode::DependencyType::NonFloatingNonNarrowing);
  register_new_node(castii, ctrl);
  Node* phi = loop->phi();
  assert(phi->in(LoopNode::EntryControl) == incr, "replacing wrong input?");
@@ -3262,7 +3262,7 @@ bool IdealLoopTree::do_remove_empty_loop(PhaseIdealLoop *phase) {
  Node* cast_ii = ConstraintCastNode::make_cast_for_basic_type(
      cl->in(LoopNode::EntryControl), exact_limit,
      phase->_igvn.type(exact_limit),
-      ConstraintCastNode::UnconditionalDependency, T_INT);
+      ConstraintCastNode::DependencyType::NonFloatingNonNarrowing, T_INT);
  phase->register_new_node(cast_ii, cl->in(LoopNode::EntryControl));

  Node* final_iv = new SubINode(cast_ii, cl->stride());
--- a/src/hotspot/share/opto/loopnode.cpp
+++ b/src/hotspot/share/opto/loopnode.cpp
@@ -1001,7 +1001,7 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
    // a negative stride). We add a CastII here to guarantee that, when the counted loop is created in a subsequent loop
    // opts pass, an accurate range of values for the limits is found.
    const TypeInt* inner_iters_actual_int_range = TypeInt::make(0, iters_limit, Type::WidenMin);
-    inner_iters_actual_int = new CastIINode(outer_head, inner_iters_actual_int, inner_iters_actual_int_range, ConstraintCastNode::UnconditionalDependency);
+    inner_iters_actual_int = new CastIINode(outer_head, inner_iters_actual_int, inner_iters_actual_int_range, ConstraintCastNode::DependencyType::NonFloatingNonNarrowing);
    _igvn.register_new_node_with_optimizer(inner_iters_actual_int);
  } else {
    inner_iters_actual_int = inner_iters_actual;
@@ -1315,7 +1315,7 @@ bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint strid
    register_new_node(bol, iff->in(0));
    new_limit = ConstraintCastNode::make_cast_for_basic_type(new_predicate_proj, new_limit,
                                                             TypeInteger::make(1, iters_limit_long, Type::WidenMin, bt),
-                                                             ConstraintCastNode::UnconditionalDependency, bt);
+                                                             ConstraintCastNode::DependencyType::NonFloatingNonNarrowing, bt);
    register_new_node(new_limit, new_predicate_proj);

 #ifndef PRODUCT
@@ -1334,7 +1334,7 @@ bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint strid
    const TypeLong* new_limit_t = new_limit->Value(&_igvn)->is_long();
    new_limit = ConstraintCastNode::make_cast_for_basic_type(predicates.entry(), new_limit,
                                                             TypeLong::make(0, new_limit_t->_hi, new_limit_t->_widen),
-                                                             ConstraintCastNode::UnconditionalDependency, bt);
+                                                             ConstraintCastNode::DependencyType::NonFloatingNonNarrowing, bt);
    register_new_node(new_limit, predicates.entry());
  } else {
    assert(bt == T_INT && known_short_running_loop, "only CountedLoop statically known to be short running");
--- a/src/hotspot/share/opto/loopopts.cpp
+++ b/src/hotspot/share/opto/loopopts.cpp
@@ -1174,7 +1174,7 @@ Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
    if ( nn ) return nn;
  }

-  if (n->is_ConstraintCast()) {
+  if (n->is_ConstraintCast() && n->as_ConstraintCast()->dependency().narrows_type()) {
    Node* dom_cast = n->as_ConstraintCast()->dominating_cast(&_igvn, this);
    // ConstraintCastNode::dominating_cast() uses node control input to determine domination.
    // Node control inputs don't necessarily agree with loop control info (due to
@@ -1837,7 +1837,7 @@ void PhaseIdealLoop::try_sink_out_of_loop(Node* n) {
              if (in != nullptr && ctrl_is_member(n_loop, in)) {
                const Type* in_t = _igvn.type(in);
                cast = ConstraintCastNode::make_cast_for_type(x_ctrl, in, in_t,
-                                                              ConstraintCastNode::UnconditionalDependency, nullptr);
+                                                              ConstraintCastNode::DependencyType::NonFloatingNonNarrowing, nullptr);
              }
              if (cast != nullptr) {
                Node* prev = _igvn.hash_find_insert(cast);
--- a/src/hotspot/share/opto/macro.cpp
+++ b/src/hotspot/share/opto/macro.cpp
@@ -1914,7 +1914,8 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
      transform_later(cache_adr);
      cache_adr = new CastP2XNode(needgc_false, cache_adr);
      transform_later(cache_adr);
-      // Address is aligned to execute prefetch to the beginning of cache line size.
+      // Address is aligned to execute prefetch to the beginning of cache line size
+      // (it is important when BIS instruction is used on SPARC as prefetch).
      Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
      cache_adr = new AndXNode(cache_adr, mask);
      transform_later(cache_adr);
--- a/src/hotspot/share/opto/macroArrayCopy.cpp
+++ b/src/hotspot/share/opto/macroArrayCopy.cpp
@@ -233,7 +233,7 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode
  Node* inline_block = generate_guard(ctrl, bol_le, nullptr, PROB_FAIR);
  Node* stub_block = *ctrl;

-  Node* casted_length = new CastLLNode(inline_block, length, inline_range, ConstraintCastNode::RegularDependency);
+  Node* casted_length = new CastLLNode(inline_block, length, inline_range, ConstraintCastNode::DependencyType::FloatingNarrowing);
  transform_later(casted_length);
  Node* mask_gen = VectorMaskGenNode::make(casted_length, type);
  transform_later(mask_gen);
--- a/src/hotspot/share/opto/matcher.hpp
+++ b/src/hotspot/share/opto/matcher.hpp
@@ -329,6 +329,10 @@ public:

  static bool match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt);

+  // Determines if a vector operation needs to be partially implemented with a mask
+  // controlling only the lanes in range [0, vector_length) are processed. This applies
+  // to operations whose vector length is less than the hardware-supported maximum
+  // vector length. Returns true if the operation requires masking, false otherwise.
  static bool vector_needs_partial_operations(Node* node, const TypeVect* vt);

  static bool vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen);
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Man Cao	23c39757ec	8373403: [TESTBUG] TestG1ClassUnloadingHWM.java could fail with large G1HeapRegionSize and small InitialHeapSize Reviewed-by: tschatzl, iwalulya	2025-12-12 21:19:09 +00:00
Vladimir Ivanov	f2e56e4c18	8372634: C2: Materialize type information from instanceof checks Reviewed-by: dlong, qamai, roland	2025-12-12 21:12:09 +00:00
Matthias Baesken	4e9525ef36	8373388: Reenable LTO for libsplashscreen Reviewed-by: erikj, dholmes, serb, prr	2025-12-12 18:57:25 +00:00
Volkan Yazici	b6319f5b42	8369595: HttpClient: HttpHeaders.firstValueAsLong failures should be converted to ProtocolException Reviewed-by: dfuchs, djelinski	2025-12-12 18:19:35 +00:00
Srinivas Mandalika	6e2ab84154	8068378: [TEST_BUG]The java/awt/Modal/PrintDialogsTest/PrintDialogsTest.java instruction need to update Reviewed-by: psadhukhan, prr	2025-12-12 18:09:51 +00:00
Phil Race	9b12c0bb19	7067310: 3 tests from closed/javax/sound/sampled caused BSOD on win 7 x86 8307574: ClipIsRunningAfterStop.java failed with "../nptl/pthread_mutex_lock.c:81: __pthread_mutex_lock: Assertion `mutex->__data.__owner == 0' failed." 8308395: javax/sound/sampled/Clip/ClipFlushCrash.java timed out Reviewed-by: serb	2025-12-12 18:06:46 +00:00
Phil Race	e65e06867e	8372592: Adjust logger usage in java2d tests Reviewed-by: kizune, serb, rriggs	2025-12-12 18:04:14 +00:00
Ben Taylor	0eb2bcd260	8372250: Merge PtrQueue into SATBMarkQueue Reviewed-by: kbarrett, iwalulya, tschatzl, wkemper	2025-12-12 16:27:55 +00:00
Ferenc Rakoczi	6ec36d348b	8373059: Test sun/security/provider/acvp/ML_DSA_Intrinsic_Test.java should pass on Aarch64 Reviewed-by: weijun, vpaprotski	2025-12-12 16:04:56 +00:00
Artur Barashev	a99f340e1b	8371721: Refactor checkTrusted methods in X509TrustManagerImpl Reviewed-by: coffeys, djelinski	2025-12-12 14:39:42 +00:00
Stefan Karlsson	d854a04231	8373411: Crash when PrintSharedArchiveAndExit is enabled but shared heap is disabled Reviewed-by: shade, iklam	2025-12-12 14:02:50 +00:00
Kelvin Nilsen	410014377c	8373225: GenShen: More adaptive old-generation growth heuristics Reviewed-by: wkemper, ysr	2025-12-12 14:02:35 +00:00
Benoît Maillard	a05d5d2514	8373579: Problem list compiler/runtime/Test7196199.java Reviewed-by: chagedorn, epeter	2025-12-12 13:45:28 +00:00
Daisuke Yamazaki	180d8c1b57	8372746: Some httpserver files could benefit from some formatting cleanup Reviewed-by: jpai, mikael, michaelm, djelinski, dfuchs	2025-12-12 12:04:20 +00:00
Hamlin Li	dc6255261f	8371920: [TEST] Enable CMove tests on other platforms Reviewed-by: fyang, epeter	2025-12-12 09:59:33 +00:00
Emanuel Peter	650de99fc6	8367158: C2: create better fill and copy benchmarks, taking alignment into account Reviewed-by: qamai, kvn	2025-12-12 07:17:17 +00:00
Jaikiran Pai	325cdb7fc5	8373517: Revert the macos Tahoe specific change done in JDK-8359830 Reviewed-by: rriggs, bpb	2025-12-12 05:46:33 +00:00
Serguei Spitsyn	c46bed7292	8371502: serviceability/jvmti/vthread/ThreadListStackTracesTest/ThreadListStackTracesTest.java failing Reviewed-by: lmesnik, amenkov	2025-12-12 04:03:33 +00:00
Xueming Shen	ae85d899d0	8373389: Two jdk/incubator/vector/ tests fails after JDK-8371446 Reviewed-by: psandoz	2025-12-11 20:47:32 +00:00
Sergey Bylokhov	66d7b0ce8f	8371657: [macosx] Programmatically selecting/deselecting List item triggers an ItemEvent Reviewed-by: aivanov, azvegint, dnguyen, tr	2025-12-11 20:32:58 +00:00
Daniel Gredler	431dcf84e9	8368702: [macosx] Printing text with composite fonts loses font transform Reviewed-by: psadhukhan, prr	2025-12-11 20:15:45 +00:00
Nizar Benalla	692edc4879	8373443: Update --release 26 symbol information for JDK 26 build 27 Reviewed-by: jlahoda, iris, darcy	2025-12-11 15:30:21 +00:00
Ioi Lam	2a1c676e0a	8373464: Test JdkManagementCheckSince.java fails after JDK-8369736 Reviewed-by: dholmes, shade, kevinw	2025-12-11 14:33:44 +00:00
Matthias Baesken	b0bd0c398e	`8372759`: Test build/AbsPathsInImage.java fails after JDK-8370438 Reviewed-by: erikj	2025-12-11 14:13:32 +00:00
Daniel Gredler	e1d1d53cd1	8167268: StandardGlyphVector.getGlyphMetrics creates metrics with erroneous bounds for characters with no outline (e.g., the space character ' ') Reviewed-by: serb, prr	2025-12-11 13:53:01 +00:00
Albert Mingkun Yang	aa986be752	8373421: Parallel: Rename young generation eden and survivor space pool Reviewed-by: tschatzl, jsikstro	2025-12-11 10:56:20 +00:00
Roman Marchenko	6a6ff876c5	8372860: TestCodeCacheUnloadDuringConcCycle fails on ARM32 Reviewed-by: tschatzl, shade	2025-12-11 08:48:26 +00:00
Saranya Natarajan	4b774cb46d	8370489: Some compiler tests miss the @key randomness Reviewed-by: dfenacci, epeter, chagedorn	2025-12-11 08:43:31 +00:00
Matthias Baesken	b46aef88b3	8371871: libSharedCloseAgent.cpp crashes VS2019 and older VS2022 compiler Reviewed-by: jvernee, mdoerr	2025-12-11 08:17:25 +00:00
Kevin Walls	920a99faeb	8370731: Tests in vmTestbase/nsk/monitoring/GarbageCollectorMXBean/CollectionCounters/ failed: OutOfMemoryError Reviewed-by: sspitsyn	2025-12-11 07:44:10 +00:00
Brian Burkhalter	74dca863c2	8371718: (sc) Channels.new{Input,Output}Stream can allocate unbounded memory for a socket channel Reviewed-by: alanb	2025-12-10 22:46:35 +00:00
Phil Race	52aa7fe1c9	8334549: [Sound] Test timed out: javax/sound/sampled/Clip/OpenNonIntegralNumberOfSampleframes.java Reviewed-by: aivanov, kizune	2025-12-10 21:40:18 +00:00
Mat Carter	413f852bdb	8369736: Add management interface for AOT cache creation Reviewed-by: mr, iklam, kevinw	2025-12-10 18:49:30 +00:00
Fairoz Matte	11aa6e10c0	8373270: GCC 14.2.0 reports warning: '%s' directive output may be truncated Reviewed-by: kbarrett, dholmes, alanb	2025-12-10 18:15:32 +00:00
Daniel Fuchs	54430a8722	8373362: Http3TestServer should not log an exception stack trace when it is stopping normally Reviewed-by: jpai, djelinski	2025-12-10 15:14:46 +00:00
Albert Mingkun Yang	655e9cda3f	8373335: Serial: Clean up SerialHeap members by access specifies Reviewed-by: jsikstro	2025-12-10 13:08:12 +00:00
Daniel Fuchs	b58e3b600b	8373227: Test java/net/httpclient/http2/StreamFlowControlTest.java failed: should sleep time be raised? Reviewed-by: djelinski	2025-12-10 12:08:53 +00:00
David Briemann	8eaeb6990b	8372589: VM crashes on init when NonNMethodCodeHeapSize is set too small and UseTransparentHugePages is enabled Reviewed-by: mdoerr, chagedorn	2025-12-10 10:21:42 +00:00
Anton Seoane Ampudia	b60ac710be	8364490: Fatal error on large SpecTrapLimitExtraEntries value Reviewed-by: chagedorn, roland	2025-12-10 08:53:30 +00:00
Roland Westrelin	00068a8030	8354282: C2: more crashes in compiled code because of dependency on removed range check CastIIs Reviewed-by: chagedorn, qamai, galder, epeter	2025-12-10 08:45:20 +00:00
Prasanta Sadhukhan	1bbbce75c5	6726690: SwingUtilities.replaceUI*Map() methods do not remove previously installed maps Reviewed-by: azvegint, tr	2025-12-10 04:31:37 +00:00
Anjian Wen	a5968f9364	8371968: RISC-V: implement AES CBC intrinsics Reviewed-by: fyang, fjiang	2025-12-10 02:34:52 +00:00
Ioi Lam	d36a234c12	8368701: CDS VerifierTest_1A.java failed on machines with 512 GB RAM Reviewed-by: dholmes, lmesnik	2025-12-10 02:26:04 +00:00
Xiaohong Gong	b6732d6048	8371603: C2: Missing Ideal optimizations for load and store vectors on SVE Co-authored-by: Emanuel Peter <epeter@openjdk.org> Reviewed-by: epeter, erfang, haosun	2025-12-10 02:09:49 +00:00
Jaikiran Pai	a26221299e	8255463: java/nio/channels/spi/SelectorProvider/inheritedChannel/InheritedChannelTest.java failed with ThreadTimeoutException Reviewed-by: dfuchs, djelinski, bpb	2025-12-10 02:04:12 +00:00
Xueming Shen	eef9813ad4	8371446: VectorAPI: Add unit tests for masks from various long values Reviewed-by: psandoz	2025-12-10 00:50:48 +00:00
Kim Barrett	7f9951a934	8373207: Make DeferredStatic class template constant initializable Reviewed-by: jsjolen, stefank, iwalulya	2025-12-10 00:07:28 +00:00
Matthew Donovan	1ae4a6c43e	8373101: JdkClient and JdkServer test classes ignore namedGroups field Reviewed-by: rhalade	2025-12-09 18:48:33 +00:00
Naoto Sato	b2daf9de30	`8355522`: Remove the `java.locale.useOldISOCodes` system property Reviewed-by: jlu, joehw	2025-12-09 18:21:12 +00:00
Neha Joshi	b99be505a5	8368524: Tests are skipped and shown as passed in test/jdk/sun/security/pkcs11/Cipher/KeyWrap Reviewed-by: myankelevich, rhalade	2025-12-09 18:06:39 +00:00
Erik Joelsson	831fe94c75	8373255: Unexpected iobj and ipdb files after JDK-8370438 Reviewed-by: serb	2025-12-09 17:01:08 +00:00
Ioi Lam	8c8d21db6f	8373295: Wrong log tag for UseCompressedOops ergo setting Reviewed-by: dholmes, ysuenaga	2025-12-09 16:10:13 +00:00
Emanuel Peter	a4eb57c5ec	8367028: compiler/c2/irTests/TestFloat16ScalarOperations.java failing intermittently because of constant folding Reviewed-by: chagedorn, syan, rcastanedalo	2025-12-09 12:45:36 +00:00
Anton Artemov	830c4d3b19	8366272: The os::xxx APIs do not manage errno correctly Reviewed-by: dholmes	2025-12-09 10:15:04 +00:00
Axel Boldt-Christmas	0a557890a5	8373025: test/hotspot/jtreg/gc/cslocker/TestCSLocker.java may deadlock Reviewed-by: ayang, tschatzl, stefank	2025-12-09 10:04:25 +00:00
Prasanta Sadhukhan	1f49edd978	4459231: Focus of JTabbedPane(with Scrollable tablayout) changes on change in LookAndFeel Reviewed-by: tr, kizune	2025-12-09 10:02:01 +00:00
Joel Sikström	786833cd1b	8373022: serviceability/sa/ClhsdbScanOops.java assumes no GC should occur Reviewed-by: cjplummer, stefank, ayang, tschatzl	2025-12-09 09:44:18 +00:00
Kevin Walls	9c91c68d1d	8373111: Test java/lang/management/MemoryMXBean/MemoryManagement.java timed out Reviewed-by: lmesnik	2025-12-09 09:18:04 +00:00
Ramkumar Sunderbabu	24244e4121	8319161: GC: Make TestParallelGCThreads use createTestJavaProcessBuilder Reviewed-by: stefank, iwalulya	2025-12-09 09:17:38 +00:00
Ramkumar Sunderbabu	3a8a6e07f2	8319326: GC: Make TestParallelRefProc use createTestJavaProcessBuilder Reviewed-by: stefank, iwalulya	2025-12-09 09:15:04 +00:00
Prasanta Sadhukhan	cba09cd10d	5107379: Component orientation in JOptionPane is not proper in Motif L&F. Reviewed-by: tr, kizune	2025-12-09 07:40:52 +00:00
David Holmes	020e3f9591	8373293: Change the exception handling in TestNestHostErrorWithMultiThread.java Reviewed-by: jpai, iklam	2025-12-09 05:15:47 +00:00
Harshit470250	35fe0b1101	8372641: [s390x] Test failure TestMergeStores.java Reviewed-by: mhaessig, amitkumar, lucy	2025-12-09 04:59:53 +00:00
Xiaolong Peng	c9ab330b7b	8373116: Genshen: arraycopy_work should be always done for arrays in old gen during young concurrent marking 8372498: [genshen] gc/TestAllocHumongousFragment.java#generational causes intermittent SIGSEGV crashes Reviewed-by: wkemper, kdnilsen	2025-12-09 03:28:11 +00:00
Xiaolong Peng	3ea82b9ff9	8373272: Genshen: ShenandoahOldGenerationTest fails after JDK-8373056 Reviewed-by: wkemper	2025-12-09 01:16:48 +00:00
Phil Race	b1c9550182	8372554: Test windows-x64-cmp-baseline failed due to differences with splashscreen object file Reviewed-by: dholmes	2025-12-09 01:00:52 +00:00
Prasanta Sadhukhan	c03d445a8c	6223700: XP L&F: Non-TopLevel JMenu's painting error Reviewed-by: kizune, dnguyen	2025-12-09 00:34:58 +00:00
Ben Taylor	b86b2cbc7d	8352914: Shenandoah: Change definition of ShenandoahSharedValue to int32_t to leverage platform atomics Reviewed-by: wkemper, ysr	2025-12-09 00:17:30 +00:00
Erik Joelsson	8df3f3d341	8373117: Update build doc link in README.md Reviewed-by: ayang, tbell	2025-12-08 22:45:59 +00:00
Alexandre Iline	b118caf677	8373285: Update JCov for class file version 71 Reviewed-by: erikj	2025-12-08 22:16:28 +00:00
Albert Mingkun Yang	d34ef196c2	8370198: Test gc/arguments/TestShrinkHeapInSteps.java crashed: assert(left >= right) failed: avoid underflow Reviewed-by: stefank, tschatzl	2025-12-08 18:51:34 +00:00
Albert Mingkun Yang	811591c5c3	8373262: Parallel: gc/metaspace/CompressedClassSpaceSizeInJmapHeap.java fails Reviewed-by: cjplummer	2025-12-08 16:11:28 +00:00
Anton Artemov	355755d35d	8366671: Refactor Thread::SpinAcquire and Thread::SpinRelease Co-authored-by: Axel Boldt-Christmas <aboldtch@openjdk.org> Reviewed-by: coleenp, kbarrett, dholmes, aboldtch	2025-12-08 16:07:01 +00:00
Chris Plummer	ac81ce51fa	8372555: Test com/sun/jdi/ExceptionEvents.java failed: ObjectCollectedException Reviewed-by: amenkov, dholmes	2025-12-08 15:38:35 +00:00
Albert Mingkun Yang	ed5fc9ad2d	8373087: Parallel: Rename PSGenerationPool to PSOldGenerationPool Reviewed-by: tschatzl, jsikstro, iwalulya	2025-12-08 14:21:40 +00:00
Hamlin Li	6700baa505	8357551: RISC-V: support CMoveF/D vectorization Reviewed-by: fyang, luhenry	2025-12-08 13:38:22 +00:00
Qizheng Xing	b83bf0717e	8360192: C2: Make the type of count leading/trailing zero nodes more precise Reviewed-by: qamai, epeter, jbhateja	2025-12-08 13:16:39 +00:00
Francesco Andreuzzi	a659479483	8367541: Parallel: Make young and old generation fields nonstatic in ParallelScavengeHeap Reviewed-by: ayang	2025-12-08 11:45:53 +00:00
Jan Lahoda	3500150882	8373094: javac may fail because of unattributed break in a loop Reviewed-by: vromero	2025-12-08 10:04:44 +00:00
Sergey Chernyshev	7da91533aa	8369950: TLS connection to IPv6 address fails with BCJSSE due to IllegalArgumentException Co-authored-by: Mikhail Yankelevich <myankelevich@openjdk.org> Reviewed-by: djelinski, vyazici, dfuchs, myankelevich	2025-12-08 09:06:21 +00:00
Patrick Strawderman	5f083abafc	8179918: EnumSet spliterator should report SORTED, ORDERED, NONNULL Reviewed-by: vklang	2025-12-06 15:34:14 +00:00
Leonid Mesnik	b0f59f6021	8373127: Update nsk/monitoring tests to support virtual thread factory testing Reviewed-by: kevinw, amenkov	2025-12-06 00:02:51 +00:00
Leonid Mesnik	2596608ba1	8370846: Support execution of mlvm testing with test thread factory Reviewed-by: cjplummer	2025-12-05 21:20:20 +00:00
Chris Plummer	be8cbfa612	8362083: JDI VirtualMachine/dispose/dispose001 failed with FATAL ERROR in native method: JDWP cannot set thread local storage, jvmtiError=JVMTI_ERROR_WRONG_PHASE(112) Reviewed-by: lmesnik, sspitsyn, amenkov	2025-12-05 20:37:10 +00:00
Brent Christian	f3dd8daaa9	8371748: Remove the (empty) ThreadPoolExecutor.finalize() method Reviewed-by: vklang, jpai, alanb	2025-12-05 19:30:04 +00:00
Albert Mingkun Yang	4378789029	8373145: [BACKOUT] Remove ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch Reviewed-by: mdoerr, kvn	2025-12-05 19:17:45 +00:00
Joe Darcy	a20b7eb943	8373125: Add defensive screening of modifiers for Field and Parameter toString() results Reviewed-by: alanb, liach	2025-12-05 17:35:30 +00:00
Neha Joshi	520c092a65	8362658: sun/security/ssl/SSLEngineImpl/* tests duplicate jvm flags Co-authored-by: Lei Zhu <korov9.c@gmail.com> Reviewed-by: myankelevich, rhalade	2025-12-05 16:46:26 +00:00
Albert Mingkun Yang	4d696d0d0e	8373086: Make isexceeded001.java more robust Reviewed-by: jsikstro, tschatzl	2025-12-05 15:46:07 +00:00
Chris Plummer	ee0b8a72c6	8373102: com/sun/jdi/MethodInvokeWithTraceOnTest.java can fail with ObjectCollectedException when run with a small heap Reviewed-by: amenkov, sspitsyn	2025-12-05 15:39:49 +00:00
Erik Joelsson	c09167df60	8373113: Fix whitespace in RunTests.gmk Reviewed-by: tbell	2025-12-05 14:01:36 +00:00
Prasanta Sadhukhan	674cc3eeca	8042054: JTree.updateUI uses out-of-date item size information Reviewed-by: dnguyen, serb	2025-12-05 03:30:31 +00:00
Anjian Wen	7e91d34f3e	8365732: RISC-V: implement AES CTR intrinsics Reviewed-by: fyang, mli	2025-12-05 02:51:13 +00:00
Xiaolong Peng	15f2538943	8373056: Shenandoah: Remove unnecessary use of ShenandoahAllocRequest.type() Reviewed-by: wkemper, kdnilsen	2025-12-04 23:56:20 +00:00
Coleen Phillimore	13e32bf166	8372098: Move AccessFlags to InstanceKlass Reviewed-by: liach, vlivanov, dlong, sspitsyn	2025-12-04 22:39:58 +00:00
Coleen Phillimore	6db1c4f5b9	8371409: Wrong lock ordering between FullGCALot_lock and ThreadsLockThrottle_lock/MethodCompileQueue_lock Reviewed-by: rehn, pchilanomate	2025-12-04 22:34:42 +00:00
Ben Taylor	c8b30da7ef	8373039: Remove Incorrect Asserts in shenandoahScanRemembered Reviewed-by: wkemper, ysr, xpeng	2025-12-04 22:11:48 +00:00
Ben Taylor	5ec5a6ea6c	8373054: Shenandoah: Remove unnecessary BarrierSetNMethod::arm in shenandoahCodeRoots Reviewed-by: wkemper, ysr, shade	2025-12-04 21:37:09 +00:00
Phil Race	8e653d394e	8373099: Problem list intermittently failing test sun/awt/image/bug8038000.java Reviewed-by: dholmes	2025-12-04 20:17:02 +00:00
Mikhail Yankelevich	ef7532e7e6	8367994: test/jdk/sun/security/pkcs11/Signature/ tests pass when they should skip Reviewed-by: rhalade	2025-12-04 18:41:12 +00:00
Matthew Donovan	b19163b107	8356544: Implement additional tests for ciphersuites disabled with wildcards Reviewed-by: rhalade	2025-12-04 18:38:57 +00:00