8321565: [REDO] Heap dump does not contain virtual Thread stack references

Reviewed-by: sspitsyn, yyang, dholmes
8321400: java/foreign/TestStubAllocFailure.java fails with code cache exhaustion
2025-12-19 07:49:41 +01:00 · 2023-12-13 18:47:04 +00:00 · 2023-12-13 17:34:37 +00:00 · 2023-12-13 12:43:41 +00:00 · 2023-12-13 11:18:51 +00:00 · 2023-12-13 11:18:38 +00:00
173 changed files with 11065 additions and 2903 deletions
--- a/make/CompileDemos.gmk
+++ b/make/CompileDemos.gmk
@@ -58,7 +58,7 @@ DEMO_MANIFEST := $(SUPPORT_OUTPUTDIR)/demos/java-main-manifest.mf
 # This rule will be depended on due to the MANIFEST line in SetupBuildDemo
 # and SetupBuildJvmtiDemo.
 $(eval $(call SetupTextFileProcessing, BUILD_JAVA_MANIFEST, \
-  SOURCE_FILES := $(TOPDIR)/make/data/mainmanifest/manifest.mf, \
+  SOURCE_FILES := $(TOPDIR)/make/data/mainmanifest/manifest.mf.template, \
  OUTPUT_FILE := $(DEMO_MANIFEST), \
  REPLACEMENTS := \
      @@VERSION_SPECIFICATION@@ => $(VERSION_SPECIFICATION) ; \
--- a/make/JrtfsJar.gmk
+++ b/make/JrtfsJar.gmk
@@ -33,7 +33,7 @@ include TextFileProcessing.gmk
 # This rule will be depended on due to the MANIFEST line
 $(eval $(call SetupTextFileProcessing, BUILD_JAVA_MANIFEST, \
-  SOURCE_FILES := $(TOPDIR)/make/data/mainmanifest/manifest.mf, \
+  SOURCE_FILES := $(TOPDIR)/make/data/mainmanifest/manifest.mf.template, \
  OUTPUT_FILE := $(SUPPORT_OUTPUTDIR)/java-main-manifest.mf, \
  REPLACEMENTS := \
      @@VERSION_SPECIFICATION@@ => $(VERSION_SPECIFICATION) ; \
--- a/make/MacBundles.gmk
+++ b/make/MacBundles.gmk
@@ -69,7 +69,7 @@ ifeq ($(call isTargetOs, macosx), true)
  ))
  $(eval $(call SetupTextFileProcessing, BUILD_JDK_PLIST, \
-      SOURCE_FILES := $(MACOSX_PLIST_SRC)/JDK-Info.plist, \
+      SOURCE_FILES := $(MACOSX_PLIST_SRC)/JDK-Info.plist.template, \
      OUTPUT_FILE := $(JDK_MACOSX_CONTENTS_DIR)/Info.plist, \
      REPLACEMENTS := \
          @@ID@@ => $(MACOSX_BUNDLE_ID_BASE).jdk ; \
@@ -82,7 +82,7 @@ ifeq ($(call isTargetOs, macosx), true)
  ))
  $(eval $(call SetupTextFileProcessing, BUILD_JRE_PLIST, \
-      SOURCE_FILES := $(MACOSX_PLIST_SRC)/JRE-Info.plist, \
+      SOURCE_FILES := $(MACOSX_PLIST_SRC)/JRE-Info.plist.template, \
      OUTPUT_FILE := $(JRE_MACOSX_CONTENTS_DIR)/Info.plist, \
      REPLACEMENTS := \
          @@ID@@ => $(MACOSX_BUNDLE_ID_BASE).jre ; \
--- a/make/Main.gmk
+++ b/make/Main.gmk
@@ -744,9 +744,16 @@ endif
 $(eval $(call SetupTarget, build-test-lib, \
    MAKEFILE := test/BuildTestLib, \
    TARGET := build-test-lib, \
    DEPS := exploded-image, \
 ))
 $(eval $(call SetupTarget, test-image-lib, \
    MAKEFILE := test/BuildTestLib, \
    TARGET := test-image-lib, \
    DEPS := build-test-lib, \
 ))
 ifeq ($(BUILD_FAILURE_HANDLER), true)
  # Builds the failure handler jtreg extension
  $(eval $(call SetupTarget, build-test-failure-handler, \
@@ -781,7 +788,7 @@ endif
 $(eval $(call SetupTarget, build-microbenchmark, \
    MAKEFILE := test/BuildMicrobenchmark, \
-    DEPS := interim-langtools exploded-image, \
+    DEPS := interim-langtools exploded-image build-test-lib, \
 ))
 ################################################################################
@@ -1264,7 +1271,7 @@ all-docs-bundles: docs-jdk-bundles docs-javase-bundles docs-reference-bundles
 # This target builds the test image
 test-image: prepare-test-image test-image-jdk-jtreg-native \
    test-image-demos-jdk test-image-libtest-jtreg-native \
-    test-image-lib-native
+    test-image-lib test-image-lib-native
 ifneq ($(JVM_TEST_IMAGE_TARGETS), )
  # If JVM_TEST_IMAGE_TARGETS is externally defined, use it instead of the
--- a/make/autoconf/Makefile.template
+++ b/make/autoconf/Makefile.template
--- a/make/autoconf/basic.m4
+++ b/make/autoconf/basic.m4
@@ -448,17 +448,17 @@ AC_DEFUN_ONCE([BASIC_SETUP_OUTPUT_DIR],
  AC_SUBST(CONFIGURESUPPORT_OUTPUTDIR)
  # The spec.gmk file contains all variables for the make system.
-  AC_CONFIG_FILES([$OUTPUTDIR/spec.gmk:$AUTOCONF_DIR/spec.gmk.in])
+  AC_CONFIG_FILES([$OUTPUTDIR/spec.gmk:$AUTOCONF_DIR/spec.gmk.template])
  # The bootcycle-spec.gmk file contains support for boot cycle builds.
-  AC_CONFIG_FILES([$OUTPUTDIR/bootcycle-spec.gmk:$AUTOCONF_DIR/bootcycle-spec.gmk.in])
+  AC_CONFIG_FILES([$OUTPUTDIR/bootcycle-spec.gmk:$AUTOCONF_DIR/bootcycle-spec.gmk.template])
  # The buildjdk-spec.gmk file contains support for building a buildjdk when cross compiling.
-  AC_CONFIG_FILES([$OUTPUTDIR/buildjdk-spec.gmk:$AUTOCONF_DIR/buildjdk-spec.gmk.in])
+  AC_CONFIG_FILES([$OUTPUTDIR/buildjdk-spec.gmk:$AUTOCONF_DIR/buildjdk-spec.gmk.template])
  # The compare.sh is used to compare the build output to other builds.
-  AC_CONFIG_FILES([$OUTPUTDIR/compare.sh:$AUTOCONF_DIR/compare.sh.in])
+  AC_CONFIG_FILES([$OUTPUTDIR/compare.sh:$AUTOCONF_DIR/compare.sh.template])
  # The generated Makefile knows where the spec.gmk is and where the source is.
  # You can run make from the OUTPUTDIR, or from the top-level Makefile
  # which will look for generated configurations
-  AC_CONFIG_FILES([$OUTPUTDIR/Makefile:$AUTOCONF_DIR/Makefile.in])
+  AC_CONFIG_FILES([$OUTPUTDIR/Makefile:$AUTOCONF_DIR/Makefile.template])
 ])
 ###############################################################################
--- a/make/autoconf/bootcycle-spec.gmk.template
+++ b/make/autoconf/bootcycle-spec.gmk.template
--- a/make/autoconf/buildjdk-spec.gmk.template
+++ b/make/autoconf/buildjdk-spec.gmk.template
--- a/make/autoconf/compare.sh.template
+++ b/make/autoconf/compare.sh.template
--- a/make/autoconf/jdk-version.m4
+++ b/make/autoconf/jdk-version.m4
@@ -110,6 +110,15 @@ AC_DEFUN_ONCE([JDKVER_SETUP_JDK_VERSION_NUMBERS],
    CHECK_VALUE: [UTIL_CHECK_STRING_NON_EMPTY_PRINTABLE])
  AC_SUBST(COMPANY_NAME)
  # Set the JDK RC Company name
  # Otherwise uses the value set for "vendor-name".
  UTIL_ARG_WITH(NAME: jdk-rc-company-name, TYPE: string,
    DEFAULT: $COMPANY_NAME,
    DESC: [Set JDK RC company name. This is used for CompanyName properties of MS Windows binaries.],
    DEFAULT_DESC: [from branding.conf],
    CHECK_VALUE: [UTIL_CHECK_STRING_NON_EMPTY_PRINTABLE])
  AC_SUBST(JDK_RC_COMPANY_NAME)
  # The vendor URL, if any
  # Only set VENDOR_URL if '--with-vendor-url' was used and is not empty.
  # Otherwise we will use the value from "branding.conf" included above.
--- a/make/autoconf/spec.gmk.template
+++ b/make/autoconf/spec.gmk.template
@@ -191,6 +191,7 @@ PRODUCT_NAME := @PRODUCT_NAME@
 PRODUCT_SUFFIX := @PRODUCT_SUFFIX@
 JDK_RC_PLATFORM_NAME := @JDK_RC_PLATFORM_NAME@
 JDK_RC_NAME := @JDK_RC_NAME@
 JDK_RC_COMPANY_NAME:=@JDK_RC_COMPANY_NAME@
 COMPANY_NAME := @COMPANY_NAME@
 HOTSPOT_VM_DISTRO := @HOTSPOT_VM_DISTRO@
 MACOSX_BUNDLE_NAME_BASE := @MACOSX_BUNDLE_NAME_BASE@
--- a/make/common/JdkNativeCompilation.gmk
+++ b/make/common/JdkNativeCompilation.gmk
@@ -98,7 +98,7 @@ GLOBAL_VERSION_INFO_RESOURCE := $(TOPDIR)/src/java.base/windows/native/common/ve
 JDK_RCFLAGS=$(RCFLAGS) \
    -D"JDK_VERSION_STRING=$(VERSION_STRING)" \
-    -D"JDK_COMPANY=$(COMPANY_NAME)" \
+    -D"JDK_COMPANY=$(JDK_RC_COMPANY_NAME)" \
    -D"JDK_VER=$(VERSION_NUMBER_FOUR_POSITIONS)" \
    -D"JDK_COPYRIGHT=Copyright \xA9 $(COPYRIGHT_YEAR)" \
    -D"JDK_NAME=$(JDK_RC_NAME) $(VERSION_SHORT)" \
--- a/make/common/modules/LauncherCommon.gmk
+++ b/make/common/modules/LauncherCommon.gmk
@@ -112,7 +112,7 @@ define SetupBuildLauncherBody
    $1_PLIST_FILE := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/$1/Info.plist
    $$(eval $$(call SetupTextFileProcessing, BUILD_PLIST_$1, \
-        SOURCE_FILES := $(TOPDIR)/make/data/bundle/cmdline-Info.plist, \
+        SOURCE_FILES := $(TOPDIR)/make/data/bundle/cmdline-Info.plist.template, \
        OUTPUT_FILE := $$($1_PLIST_FILE), \
        REPLACEMENTS := \
            @@ID@@ => $(MACOSX_BUNDLE_ID_BASE).$1 ; \
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@@ -1206,7 +1206,7 @@ var getJibProfilesDependencies = function (input, common) {
        jcov: {
            organization: common.organization,
-            revision: "3.0-15-jdk-asm+1.0",
+            revision: "3.0-16-jdk-asm+1.0",
            ext: "zip",
            environment_name: "JCOV_HOME",
        },
--- a/make/data/bundle/JDK-Info.plist.template
+++ b/make/data/bundle/JDK-Info.plist.template
--- a/make/data/bundle/JRE-Info.plist.template
+++ b/make/data/bundle/JRE-Info.plist.template
--- a/make/data/bundle/cmdline-Info.plist.template
+++ b/make/data/bundle/cmdline-Info.plist.template
--- a/make/data/mainmanifest/manifest.mf.template
+++ b/make/data/mainmanifest/manifest.mf.template
--- a/make/hotspot/gensrc/GenerateSources.gmk
+++ b/make/hotspot/gensrc/GenerateSources.gmk
@@ -48,7 +48,7 @@ $(eval $(call IncludeCustomExtension, hotspot/gensrc/GenerateSources.gmk))
 # Setup the hotspot launcher script for developer use
 $(eval $(call SetupTextFileProcessing, CREATE_HOTSPOT_LAUNCHER, \
-    SOURCE_FILES := $(TOPDIR)/make/scripts/hotspot.sh, \
+    SOURCE_FILES := $(TOPDIR)/make/scripts/hotspot.sh.template, \
    OUTPUT_FILE := $(JVM_OUTPUTDIR)/hotspot, \
    REPLACEMENTS := \
        @@LIBARCH@@ => $(OPENJDK_TARGET_CPU_LEGACY_LIB) ; \
--- a/make/modules/java.base/Lib.gmk
+++ b/make/modules/java.base/Lib.gmk
@@ -245,7 +245,7 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, x86_64)+$(INCLUDE_COMPILER2)
      TOOLCHAIN := TOOLCHAIN_LINK_CXX, \
      OPTIMIZATION := HIGH, \
      CFLAGS := $(CFLAGS_JDKLIB), \
-      CXXFLAGS := $(CXXFLAGS_JDKLIB), \
+      CXXFLAGS := $(CXXFLAGS_JDKLIB) -std=c++17, \
      LDFLAGS := $(LDFLAGS_JDKLIB) \
          $(call SET_SHARED_LIBRARY_ORIGIN), \
      LIBS := $(LIBCXX), \
--- a/make/scripts/hotspot.sh.template
+++ b/make/scripts/hotspot.sh.template
--- a/make/test/BuildMicrobenchmark.gmk
+++ b/make/test/BuildMicrobenchmark.gmk
@@ -53,11 +53,10 @@ JMH_UNPACKED_DIR := $(MICROBENCHMARK_OUTPUT)/jmh_jars
 JMH_UNPACKED_JARS_DONE := $(JMH_UNPACKED_DIR)/_unpacked.marker
 # External dependencies
-JMH_COMPILE_JARS := $(JMH_CORE_JAR) $(JMH_GENERATOR_JAR)
+WHITEBOX_JAR := $(SUPPORT_OUTPUTDIR)/test/lib/wb.jar
 JMH_COMPILE_JARS := $(JMH_CORE_JAR) $(JMH_GENERATOR_JAR) $(WHITEBOX_JAR)
 JMH_RUNTIME_JARS := $(JMH_CORE_JAR) $(JMH_COMMONS_MATH_JAR) $(JMH_JOPT_SIMPLE_JAR)
 MICROBENCHMARK_CLASSPATH := $(call PathList, $(JMH_COMPILE_JARS))
 # Native dependencies
 MICROBENCHMARK_NATIVE_SRC_DIRS := $(MICROBENCHMARK_SRC)
 MICROBENCHMARK_NATIVE_OUTPUT := $(MICROBENCHMARK_OUTPUT)/native
@@ -92,24 +91,28 @@ $(eval $(call SetupJavaCompilation, BUILD_INDIFY, \
 $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \
    TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \
    SMALL_JAVA := false, \
-    CLASSPATH := $(MICROBENCHMARK_CLASSPATH), \
+    CLASSPATH := $(JMH_COMPILE_JARS), \
-    DISABLED_WARNINGS := restricted this-escape processing rawtypes cast serial preview, \
+    DISABLED_WARNINGS := restricted this-escape processing rawtypes cast \
        serial preview, \
    SRC := $(MICROBENCHMARK_SRC), \
    BIN := $(MICROBENCHMARK_CLASSES), \
-    JAVAC_FLAGS := --add-exports java.base/sun.security.util=ALL-UNNAMED \
+    JAVAC_FLAGS := \
        --add-exports java.base/sun.invoke.util=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.classfile.impl=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.org.objectweb.asm=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.org.objectweb.asm.tree=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.misc=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.event=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.foreign=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.misc=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.org.objectweb.asm.tree=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.org.objectweb.asm=ALL-UNNAMED \
        --add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
        --add-exports java.base/sun.invoke.util=ALL-UNNAMED \
        --add-exports java.base/sun.security.util=ALL-UNNAMED \
        --enable-preview \
        -processor org.openjdk.jmh.generators.BenchmarkProcessor, \
-    JAVA_FLAGS := --add-modules jdk.unsupported --limit-modules java.management \
+    JAVA_FLAGS := \
        --add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
-        --enable-preview, \
+        --add-modules jdk.unsupported \
        --enable-preview \
        --limit-modules java.management, \
 ))
 $(BUILD_JDK_MICROBENCHMARK): $(JMH_COMPILE_JARS)
--- a/make/test/BuildTestLib.gmk
+++ b/make/test/BuildTestLib.gmk
@@ -23,12 +23,22 @@
 # questions.
 #
 ################################################################################
 # This file builds the Java components of testlib.
 # It also covers the test-image part, where the built files are copied to the
 # test image.
 ################################################################################
 default: all
 include $(SPEC)
 include MakeBase.gmk
 include JavaCompilation.gmk
 ################################################################################
 # Targets for building the test lib jars
 ################################################################################
 TARGETS :=
 TEST_LIB_SOURCE_DIR := $(TOPDIR)/test/lib
@@ -63,8 +73,21 @@ $(eval $(call SetupJavaCompilation, BUILD_TEST_LIB_JAR, \
 TARGETS += $(BUILD_TEST_LIB_JAR)
-##########################################################################################
+build-test-lib: $(TARGETS)
-all: $(TARGETS)
+################################################################################
 # Targets for building test-image.
 ################################################################################
-.PHONY: default all
+# Copy the jars to the test image.
 $(eval $(call SetupCopyFiles, COPY_LIBTEST_JARS, \
    DEST := $(TEST_IMAGE_DIR)/lib-test, \
    FILES := $(BUILD_WB_JAR_JAR) $(BUILD_TEST_LIB_JAR_JAR), \
 ))
 #
 test-image-lib: $(COPY_LIBTEST_JARS)
 all: build-test-lib
 .PHONY: default all build-test-lib test-image-lib
--- a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp
@@ -193,4 +193,9 @@
    }
  }
  // Is SIMD sort supported for this CPU?
  static bool supports_simd_sort(BasicType bt) {
    return false;
  }
 #endif // CPU_AARCH64_MATCHER_AARCH64_HPP
--- a/src/hotspot/cpu/arm/interp_masm_arm.cpp
+++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp
@@ -303,15 +303,19 @@ void InterpreterMacroAssembler::load_field_entry(Register cache, Register index,
 }
 void InterpreterMacroAssembler::load_method_entry(Register cache, Register index, int bcp_offset) {
  assert_different_registers(cache, index);
  // Get index out of bytecode pointer
  get_index_at_bcp(index, bcp_offset, cache /* as tmp */, sizeof(u2));
  // sizeof(ResolvedMethodEntry) is not a power of 2 on Arm, so can't use shift
  mov(cache, sizeof(ResolvedMethodEntry));
  mul(index, index, cache); // Scale the index to be the entry index * sizeof(ResolvedMethodEntry)
  // load constant pool cache pointer
  ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize));
  // Get address of method entries array
-  ldr(cache, Address(cache, ConstantPoolCache::method_entries_offset()));
+  ldr(cache, Address(cache, in_bytes(ConstantPoolCache::method_entries_offset())));
  add(cache, cache, Array<ResolvedMethodEntry>::base_offset_in_bytes());
  add(cache, cache, index);
 }
--- a/src/hotspot/cpu/arm/matcher_arm.hpp
+++ b/src/hotspot/cpu/arm/matcher_arm.hpp
@@ -186,4 +186,9 @@
    }
  }
  // Is SIMD sort supported for this CPU?
  static bool supports_simd_sort(BasicType bt) {
    return false;
  }
 #endif // CPU_ARM_MATCHER_ARM_HPP
--- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp
@@ -370,16 +370,15 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
  if (index_size == sizeof(u4)) {
    __ load_resolved_indy_entry(Rcache, Rindex);
    __ ldrh(Rcache, Address(Rcache, in_bytes(ResolvedIndyEntry::num_parameters_offset())));
    __ check_stack_top();
    __ add(Rstack_top, Rstack_top, AsmOperand(Rcache, lsl, Interpreter::logStackElementSize));
  } else {
    // Pop N words from the stack
    assert(index_size == sizeof(u2), "Can only be u2");
    __ load_method_entry(Rcache, Rindex);
-    __ ldrh(Rcache, Address(Rcache, in_bytes(ResolvedIndyEntry::num_parameters_offset())));
+    __ ldrh(Rcache, Address(Rcache, in_bytes(ResolvedMethodEntry::num_parameters_offset())));
  }
  __ check_stack_top();
  __ add(Rstack_top, Rstack_top, AsmOperand(Rcache, lsl, Interpreter::logStackElementSize));
  }
  __ convert_retval_to_tos(state);
--- a/src/hotspot/cpu/arm/templateTable_arm.cpp
+++ b/src/hotspot/cpu/arm/templateTable_arm.cpp
@@ -3666,15 +3666,15 @@ void TemplateTable::prepare_invoke(Register Rcache, Register recv) {
  // load receiver if needed (after extra argument is pushed so parameter size is correct)
  if (load_receiver) {
    __ ldrh(recv, Address(Rcache, in_bytes(ResolvedMethodEntry::num_parameters_offset())));
-    Address recv_addr = __ receiver_argument_address(Rstack_top, Rtemp, recv);
+    __ add(recv, Rstack_top, AsmOperand(recv, lsl, Interpreter::logStackElementSize));
-    __ ldr(recv, recv_addr);
+    __ ldr(recv, Address(recv, -Interpreter::stackElementSize));
    __ verify_oop(recv);
  }
  // load return address
  { const address table = (address) Interpreter::invoke_return_entry_table_for(code);
-    __ mov_slow(Rtemp, table);
+    __ mov_slow(LR, table);
-    __ ldr(LR, Address::indexed_ptr(Rtemp, ret_type));
+    __ ldr(LR, Address::indexed_ptr(LR, ret_type));
  }
 }
@@ -3744,10 +3744,13 @@ void TemplateTable::invokevirtual(int byte_no) {
 void TemplateTable::invokespecial(int byte_no) {
  transition(vtos, vtos);
  assert(byte_no == f1_byte, "use this argument");
  const Register Rrecv  = R2_tmp;
-  load_resolved_method_entry_special_or_static(R2_tmp,  // ResolvedMethodEntry*
+  const Register Rflags = R3_tmp;
  load_resolved_method_entry_special_or_static(Rrecv,  // ResolvedMethodEntry*
                                               Rmethod, // Method*
-                                               R3_tmp); // Flags
+                                               Rflags); // Flags
  prepare_invoke(Rrecv, Rrecv);
  __ verify_oop(Rrecv);
  __ null_check(Rrecv, Rtemp);
@@ -3760,12 +3763,16 @@ void TemplateTable::invokespecial(int byte_no) {
 void TemplateTable::invokestatic(int byte_no) {
  transition(vtos, vtos);
  assert(byte_no == f1_byte, "use this argument");
-  load_resolved_method_entry_special_or_static(R2_tmp,  // ResolvedMethodEntry*
+
  const Register Rrecv  = R2_tmp;
  const Register Rflags = R3_tmp;
  load_resolved_method_entry_special_or_static(Rrecv,  // ResolvedMethodEntry*
                                               Rmethod, // Method*
-                                               R3_tmp); // Flags
+                                               Rflags); // Flags
-  prepare_invoke(R2_tmp, R2_tmp);
+  prepare_invoke(Rrecv, Rrecv);
  // do the call
-  __ profile_call(R2_tmp);
+  __ profile_call(Rrecv);
  __ jump_from_interpreted(Rmethod);
 }
@@ -3788,10 +3795,10 @@ void TemplateTable::invokeinterface(int byte_no) {
  const Register Rflags  = R3_tmp;
  const Register Rklass  = R2_tmp; // Note! Same register with Rrecv
-  load_resolved_method_entry_interface(R2_tmp,  // ResolvedMethodEntry*
+  load_resolved_method_entry_interface(Rrecv,   // ResolvedMethodEntry*
-                                       R1_tmp,  // Klass*
+                                       Rinterf, // Klass*
                                       Rmethod, // Method* or itable/vtable index
-                                       R3_tmp); // Flags
+                                       Rflags); // Flags
  prepare_invoke(Rrecv, Rrecv);
  // First check for Object case, then private interface method,
--- a/src/hotspot/cpu/ppc/matcher_ppc.hpp
+++ b/src/hotspot/cpu/ppc/matcher_ppc.hpp
@@ -195,4 +195,9 @@
    }
  }
  // Is SIMD sort supported for this CPU?
  static bool supports_simd_sort(BasicType bt) {
    return false;
  }
 #endif // CPU_PPC_MATCHER_PPC_HPP
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -1459,6 +1459,112 @@ void C2_MacroAssembler::string_equals(Register a1, Register a2,
  BLOCK_COMMENT("} string_equals");
 }
 // jdk.internal.util.ArraysSupport.vectorizedHashCode
 void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register result,
                                        Register tmp1, Register tmp2, Register tmp3,
                                        Register tmp4, Register tmp5, Register tmp6,
                                        BasicType eltype)
 {
  assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, t0, t1);
  const int elsize = arrays_hashcode_elsize(eltype);
  const int chunks_end_shift = exact_log2(elsize);
  switch (eltype) {
  case T_BOOLEAN: BLOCK_COMMENT("arrays_hashcode(unsigned byte) {"); break;
  case T_CHAR:    BLOCK_COMMENT("arrays_hashcode(char) {");          break;
  case T_BYTE:    BLOCK_COMMENT("arrays_hashcode(byte) {");          break;
  case T_SHORT:   BLOCK_COMMENT("arrays_hashcode(short) {");         break;
  case T_INT:     BLOCK_COMMENT("arrays_hashcode(int) {");           break;
  default:
    ShouldNotReachHere();
  }
  const int stride = 4;
  const Register pow31_4 = tmp1;
  const Register pow31_3 = tmp2;
  const Register pow31_2 = tmp3;
  const Register chunks  = tmp4;
  const Register chunks_end = chunks;
  Label DONE, TAIL, TAIL_LOOP, WIDE_LOOP;
  // result has a value initially
  beqz(cnt, DONE);
  andi(chunks, cnt, ~(stride-1));
  beqz(chunks, TAIL);
  mv(pow31_4, 923521);           // [31^^4]
  mv(pow31_3,  29791);           // [31^^3]
  mv(pow31_2,    961);           // [31^^2]
  slli(chunks_end, chunks, chunks_end_shift);
  add(chunks_end, ary, chunks_end);
  andi(cnt, cnt, stride-1);      // don't forget about tail!
  bind(WIDE_LOOP);
  mulw(result, result, pow31_4); // 31^^4 * h
  arrays_hashcode_elload(t0,   Address(ary, 0 * elsize), eltype);
  arrays_hashcode_elload(t1,   Address(ary, 1 * elsize), eltype);
  arrays_hashcode_elload(tmp5, Address(ary, 2 * elsize), eltype);
  arrays_hashcode_elload(tmp6, Address(ary, 3 * elsize), eltype);
  mulw(t0, t0, pow31_3);         // 31^^3 * ary[i+0]
  addw(result, result, t0);
  mulw(t1, t1, pow31_2);         // 31^^2 * ary[i+1]
  addw(result, result, t1);
  slli(t0, tmp5, 5);             // optimize 31^^1 * ary[i+2]
  subw(tmp5, t0, tmp5);          // with ary[i+2]<<5 - ary[i+2]
  addw(result, result, tmp5);
  addw(result, result, tmp6);    // 31^^4 * h + 31^^3 * ary[i+0] + 31^^2 * ary[i+1]
                                 //           + 31^^1 * ary[i+2] + 31^^0 * ary[i+3]
  addi(ary, ary, elsize * stride);
  bne(ary, chunks_end, WIDE_LOOP);
  beqz(cnt, DONE);
  bind(TAIL);
  slli(chunks_end, cnt, chunks_end_shift);
  add(chunks_end, ary, chunks_end);
  bind(TAIL_LOOP);
  arrays_hashcode_elload(t0, Address(ary), eltype);
  slli(t1, result, 5);           // optimize 31 * result
  subw(result, t1, result);      // with result<<5 - result
  addw(result, result, t0);
  addi(ary, ary, elsize);
  bne(ary, chunks_end, TAIL_LOOP);
  bind(DONE);
  BLOCK_COMMENT("} // arrays_hashcode");
 }
 int C2_MacroAssembler::arrays_hashcode_elsize(BasicType eltype) {
  switch (eltype) {
  case T_BOOLEAN: return sizeof(jboolean);
  case T_BYTE:    return sizeof(jbyte);
  case T_SHORT:   return sizeof(jshort);
  case T_CHAR:    return sizeof(jchar);
  case T_INT:     return sizeof(jint);
  default:
    ShouldNotReachHere();
    return -1;
  }
 }
 void C2_MacroAssembler::arrays_hashcode_elload(Register dst, Address src, BasicType eltype) {
  switch (eltype) {
  // T_BOOLEAN used as surrogate for unsigned byte
  case T_BOOLEAN: lbu(dst, src);   break;
  case T_BYTE:     lb(dst, src);   break;
  case T_SHORT:    lh(dst, src);   break;
  case T_CHAR:    lhu(dst, src);   break;
  case T_INT:      lw(dst, src);   break;
  default:
    ShouldNotReachHere();
  }
 }
 typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
 typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
                                                              bool is_far, bool is_unordered);
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
@@ -82,6 +82,15 @@
                     Register result, Register cnt1,
                     int elem_size);
  void arrays_hashcode(Register ary, Register cnt, Register result,
                       Register tmp1, Register tmp2,
                       Register tmp3, Register tmp4,
                       Register tmp5, Register tmp6,
                       BasicType eltype);
  // helper function for arrays_hashcode
  int arrays_hashcode_elsize(BasicType eltype);
  void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
  void string_equals(Register r1, Register r2,
                     Register result, Register cnt1,
                     int elem_size);
--- a/src/hotspot/cpu/riscv/matcher_riscv.hpp
+++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp
@@ -192,4 +192,9 @@
    }
  }
  // Is SIMD sort supported for this CPU?
  static bool supports_simd_sort(BasicType bt) {
    return false;
  }
 #endif // CPU_RISCV_MATCHER_RISCV_HPP
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -10371,6 +10371,26 @@ instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
  ins_pipe(pipe_class_memory);
 %}
 // fast ArraysSupport.vectorizedHashCode
 instruct arrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI basic_type,
                         iRegLNoSp tmp1, iRegLNoSp tmp2,
                         iRegLNoSp tmp3, iRegLNoSp tmp4,
                         iRegLNoSp tmp5, iRegLNoSp tmp6, rFlagsReg cr)
 %{
  match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type)));
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
         USE_KILL ary, USE_KILL cnt, USE basic_type, KILL cr);
  format %{ "Array HashCode array[] $ary,$cnt,$result,$basic_type -> $result   // KILL all" %}
  ins_encode %{
    __ arrays_hashcode($ary$$Register, $cnt$$Register, $result$$Register,
                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
                       $tmp4$$Register, $tmp5$$Register, $tmp6$$Register,
                       (BasicType)$basic_type$$constant);
  %}
  ins_pipe(pipe_class_memory);
 %}
 // ============================================================================
 // Safepoint Instructions
--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@@ -315,6 +315,10 @@ void VM_Version::c2_initialize() {
    }
  }
  if (FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
    FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
  }
  if (!UseZicbop) {
    if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
      warning("Zicbop is not available on this CPU");
--- a/src/hotspot/cpu/s390/matcher_s390.hpp
+++ b/src/hotspot/cpu/s390/matcher_s390.hpp
@@ -184,4 +184,9 @@
    }
  }
  // Is SIMD sort supported for this CPU?
  static bool supports_simd_sort(BasicType bt) {
    return false;
  }
 #endif // CPU_S390_MATCHER_S390_HPP
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -920,6 +920,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
    case 0x11: // movups
    case 0x12: // movlps
    case 0x28: // movaps
    case 0x29: // movaps
    case 0x2E: // ucomiss
    case 0x2F: // comiss
    case 0x54: // andps
@@ -969,7 +970,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
      assert(which == call32_operand, "jcc has no disp32 or imm");
      return ip;
    default:
-      ShouldNotReachHere();
+      fatal("not handled: 0x0F%2X", 0xFF & *(ip-1));
    }
    break;
--- a/src/hotspot/cpu/x86/matcher_x86.hpp
+++ b/src/hotspot/cpu/x86/matcher_x86.hpp
@@ -248,4 +248,17 @@
    }
  }
  // Is SIMD sort supported for this CPU?
  static bool supports_simd_sort(BasicType bt) {
    if (VM_Version::supports_avx512dq()) {
      return true;
    }
    else if (VM_Version::supports_avx2() && !is_double_word_type(bt)) {
      return true;
    }
    else {
      return false;
    }
  }
 #endif // CPU_X86_MATCHER_X86_HPP
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -4193,22 +4193,23 @@ void StubGenerator::generate_compiler_stubs() {
      = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
  }
-  // Load x86_64_sort library on supported hardware to enable avx512 sort and partition intrinsics
+  // Load x86_64_sort library on supported hardware to enable SIMD sort and partition intrinsics
-  if (VM_Version::is_intel() && VM_Version::supports_avx512dq()) {
+
  if (VM_Version::is_intel() && (VM_Version::supports_avx512dq() || VM_Version::supports_avx2())) {
    void *libsimdsort = nullptr;
    char ebuf_[1024];
    char dll_name_simd_sort[JVM_MAXPATHLEN];
    if (os::dll_locate_lib(dll_name_simd_sort, sizeof(dll_name_simd_sort), Arguments::get_dll_dir(), "simdsort")) {
      libsimdsort = os::dll_load(dll_name_simd_sort, ebuf_, sizeof ebuf_);
    }
-    // Get addresses for avx512 sort and partition routines
+    // Get addresses for SIMD sort and partition routines
    if (libsimdsort != nullptr) {
      log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "simdsort" JNI_LIB_SUFFIX, p2i(libsimdsort));
-      snprintf(ebuf_, sizeof(ebuf_), "avx512_sort");
+      snprintf(ebuf_, sizeof(ebuf_), VM_Version::supports_avx512dq() ? "avx512_sort" : "avx2_sort");
      StubRoutines::_array_sort = (address)os::dll_lookup(libsimdsort, ebuf_);
-      snprintf(ebuf_, sizeof(ebuf_), "avx512_partition");
+      snprintf(ebuf_, sizeof(ebuf_), VM_Version::supports_avx512dq() ? "avx512_partition" : "avx2_partition");
      StubRoutines::_array_partition = (address)os::dll_lookup(libsimdsort, ebuf_);
    }
  }
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -858,7 +858,7 @@ void VM_Version::get_processor_features() {
  // Check if processor has Intel Ecore
  if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
-    (_model == 0x97 || _model == 0xAC || _model == 0xAF)) {
+    (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) {
    FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
  }
@@ -1130,6 +1130,7 @@ void VM_Version::get_processor_features() {
    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
  }
 #ifdef _LP64
  // ChaCha20 Intrinsics
  // As long as the system supports AVX as a baseline we can do a
  // SIMD-enabled block function.  StubGenerator makes the determination
@@ -1145,6 +1146,13 @@ void VM_Version::get_processor_features() {
      }
      FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
  }
 #else
  // No support currently for ChaCha20 intrinsics on 32-bit platforms
  if (UseChaCha20Intrinsics) {
      warning("ChaCha20 intrinsics are not available on this CPU.");
      FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
  }
 #endif // _LP64
  // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
  if (UseAVX >= 2) {
--- a/src/hotspot/share/cds/cdsConfig.hpp
+++ b/src/hotspot/share/cds/cdsConfig.hpp
@@ -52,7 +52,7 @@ public:
  static void initialize() NOT_CDS_RETURN;
  static void check_system_property(const char* key, const char* value) NOT_CDS_RETURN;
  static void check_unsupported_dumping_properties() NOT_CDS_RETURN;
-  static bool check_vm_args_consistency(bool patch_mod_javabase,  bool mode_flag_cmd_line) NOT_CDS_RETURN_(false);
+  static bool check_vm_args_consistency(bool patch_mod_javabase,  bool mode_flag_cmd_line) NOT_CDS_RETURN_(true);
  // Basic CDS features
  static bool      is_dumping_archive()                      { return is_dumping_static_archive() || is_dumping_dynamic_archive(); }
--- a/src/hotspot/share/cds/cdsHeapVerifier.cpp
+++ b/src/hotspot/share/cds/cdsHeapVerifier.cpp
@@ -129,7 +129,23 @@ CDSHeapVerifier::CDSHeapVerifier() : _archived_objs(0), _problems(0)
  // This just points to an empty Map
  ADD_EXCL("jdk/internal/reflect/Reflection",            "methodFilterMap");       // E
  ADD_EXCL("jdk/internal/util/StaticProperty",           "FILE_ENCODING",          // C
-                                                 "JAVA_LOCALE_USE_OLD_ISO_CODES"); // C
+                                                 "JAVA_LOCALE_USE_OLD_ISO_CODES",  // C
                                                 "USER_LANGUAGE",                  // C
                                                 "USER_LANGUAGE_DISPLAY",          // C
                                                 "USER_LANGUAGE_FORMAT",           // C
                                                 "USER_SCRIPT",                    // C
                                                 "USER_SCRIPT_DISPLAY",            // C
                                                 "USER_SCRIPT_FORMAT",             // C
                                                 "USER_COUNTRY",                   // C
                                                 "USER_COUNTRY_DISPLAY",           // C
                                                 "USER_COUNTRY_FORMAT",            // C
                                                 "USER_VARIANT",                   // C
                                                 "USER_VARIANT_DISPLAY",           // C
                                                 "USER_VARIANT_FORMAT",            // C
                                                 "USER_EXTENSIONS",                // C
                                                 "USER_EXTENSIONS_DISPLAY",        // C
                                                 "USER_EXTENSIONS_FORMAT",         // C
                                                 "USER_REGION");                   // C
  // Integer for 0 and 1 are in java/lang/Integer$IntegerCache and are archived
  ADD_EXCL("sun/invoke/util/ValueConversions",           "ONE_INT",                // E
--- a/src/hotspot/share/cds/filemap.cpp
+++ b/src/hotspot/share/cds/filemap.cpp
@@ -1465,7 +1465,7 @@ BitMapView FileMapRegion::ptrmap_view() {
  return bitmap_view(false);
 }
-bool FileMapRegion::check_region_crc() const {
+bool FileMapRegion::check_region_crc(char* base) const {
  // This function should be called after the region has been properly
  // loaded into memory via FileMapInfo::map_region() or FileMapInfo::read_region().
  // I.e., this->mapped_base() must be valid.
@@ -1474,8 +1474,8 @@ bool FileMapRegion::check_region_crc() const {
    return true;
  }
-  assert(mapped_base() != nullptr, "must be initialized");
+  assert(base != nullptr, "must be initialized");
-  int crc = ClassLoader::crc32(0, mapped_base(), (jint)sz);
+  int crc = ClassLoader::crc32(0, base, (jint)sz);
  if (crc != this->crc()) {
    log_warning(cds)("Checksum verification failed.");
    return false;
@@ -1760,13 +1760,13 @@ bool FileMapInfo::read_region(int i, char* base, size_t size, bool do_commit) {
    return false;
  }
-  r->set_mapped_from_file(false);
+  if (VerifySharedSpaces && !r->check_region_crc(base)) {
  r->set_mapped_base(base);
  if (VerifySharedSpaces && !r->check_region_crc()) {
    return false;
  }
  r->set_mapped_from_file(false);
  r->set_mapped_base(base);
  return true;
 }
@@ -1803,6 +1803,7 @@ MapArchiveResult FileMapInfo::map_region(int i, intx addr_delta, char* mapped_ba
      return MAP_ARCHIVE_OTHER_FAILURE; // oom or I/O error.
    } else {
      assert(r->mapped_base() != nullptr, "must be initialized");
      return MAP_ARCHIVE_SUCCESS;
    }
  } else {
    // Note that this may either be a "fresh" mapping into unreserved address
@@ -1817,16 +1818,17 @@ MapArchiveResult FileMapInfo::map_region(int i, intx addr_delta, char* mapped_ba
      _memory_mapping_failed = true;
      return MAP_ARCHIVE_MMAP_FAILURE;
    }
    r->set_mapped_from_file(true);
    r->set_mapped_base(requested_addr);
  }
-  if (VerifySharedSpaces && !r->check_region_crc()) {
+    if (VerifySharedSpaces && !r->check_region_crc(requested_addr)) {
      return MAP_ARCHIVE_OTHER_FAILURE;
    }
    r->set_mapped_from_file(true);
    r->set_mapped_base(requested_addr);
    return MAP_ARCHIVE_SUCCESS;
  }
 }
 // The return value is the location of the archive relocation bitmap.
 char* FileMapInfo::map_bitmap_region() {
@@ -1843,8 +1845,7 @@ char* FileMapInfo::map_bitmap_region() {
    return nullptr;
  }
-  r->set_mapped_base(bitmap_base);
+  if (VerifySharedSpaces && !r->check_region_crc(bitmap_base)) {
  if (VerifySharedSpaces && !r->check_region_crc()) {
    log_error(cds)("relocation bitmap CRC error");
    if (!os::unmap_memory(bitmap_base, r->used_aligned())) {
      fatal("os::unmap_memory of relocation bitmap failed");
@@ -1853,6 +1854,7 @@ char* FileMapInfo::map_bitmap_region() {
  }
  r->set_mapped_from_file(true);
  r->set_mapped_base(bitmap_base);
  log_info(cds)("Mapped %s region #%d at base " INTPTR_FORMAT " top " INTPTR_FORMAT " (%s)",
                is_static() ? "static " : "dynamic",
                MetaspaceShared::bm, p2i(r->mapped_base()), p2i(r->mapped_end()),
@@ -2128,13 +2130,14 @@ bool FileMapInfo::map_heap_region_impl() {
    return false;
  }
-  r->set_mapped_base(base);
+  if (VerifySharedSpaces && !r->check_region_crc(base)) {
  if (VerifySharedSpaces && !r->check_region_crc()) {
    dealloc_heap_region();
    log_info(cds)("UseSharedSpaces: mapped heap region is corrupt");
    return false;
  }
  r->set_mapped_base(base);
  // If the requested range is different from the range allocated by GC, then
  // the pointers need to be patched.
  address mapped_start = (address) _mapped_heap_memregion.start();
--- a/src/hotspot/share/cds/filemap.hpp
+++ b/src/hotspot/share/cds/filemap.hpp
@@ -170,7 +170,7 @@ public:
  BitMapView ptrmap_view();
  bool has_ptrmap()                  { return _ptrmap_size_in_bits != 0; }
-  bool check_region_crc() const;
+  bool check_region_crc(char* base) const;
  void print(outputStream* st, int region_index);
 };
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp
@@ -175,7 +175,7 @@ G1ConcurrentRefine::G1ConcurrentRefine(G1Policy* policy) :
 {}
 jint G1ConcurrentRefine::initialize() {
-  return _thread_control.initialize(this, max_num_threads());
+  return _thread_control.initialize(this, G1ConcRefinementThreads);
 }
 G1ConcurrentRefine* G1ConcurrentRefine::create(G1Policy* policy, jint* ecode) {
@@ -199,10 +199,6 @@ void G1ConcurrentRefine::threads_do(ThreadClosure *tc) {
  _thread_control.worker_threads_do(tc);
 }
 uint G1ConcurrentRefine::max_num_threads() {
  return G1ConcRefinementThreads;
 }
 void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms,
                                                     size_t processed_logged_cards,
                                                     size_t predicted_thread_buffer_cards,
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp
@@ -215,9 +215,6 @@ public:
  // Iterate over all concurrent refinement threads applying the given closure.
  void threads_do(ThreadClosure *tc);
  // Maximum number of refinement threads.
  static uint max_num_threads();
 };
 #endif // SHARE_GC_G1_G1CONCURRENTREFINE_HPP
--- a/src/hotspot/share/gc/g1/g1FromCardCache.cpp
+++ b/src/hotspot/share/gc/g1/g1FromCardCache.cpp
@@ -81,7 +81,7 @@ void G1FromCardCache::print(outputStream* out) {
 #endif
 uint G1FromCardCache::num_par_rem_sets() {
-  return G1DirtyCardQueueSet::num_par_ids() + G1ConcurrentRefine::max_num_threads() + MAX2(ConcGCThreads, ParallelGCThreads);
+  return G1DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads + MAX2(ConcGCThreads, ParallelGCThreads);
 }
 void G1FromCardCache::clear(uint region_idx) {
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp
@@ -91,11 +91,6 @@ class G1RemSetScanState : public CHeapObj<mtGC> {
  size_t _max_reserved_regions;
  // Has this region that is part of the regions in the collection set been processed yet.
  typedef bool G1RemsetIterState;
  G1RemsetIterState volatile* _collection_set_iter_state;
  // Card table iteration claim for each heap region, from 0 (completely unscanned)
  // to (>=) HeapRegion::CardsPerRegion (completely scanned).
  uint volatile* _card_table_scan_state;
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.cpp
+++ b/src/hotspot/share/gc/g1/g1RemSetSummary.cpp
@@ -67,7 +67,7 @@ double G1RemSetSummary::rs_thread_vtime(uint thread) const {
 }
 G1RemSetSummary::G1RemSetSummary(bool should_update) :
-  _num_vtimes(G1ConcurrentRefine::max_num_threads()),
+  _num_vtimes(G1ConcRefinementThreads),
  _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)) {
  memset(_rs_threads_vtimes, 0, sizeof(double) * _num_vtimes);
--- a/src/hotspot/share/gc/g1/g1RemSetTrackingPolicy.cpp
+++ b/src/hotspot/share/gc/g1/g1RemSetTrackingPolicy.cpp
@@ -38,18 +38,16 @@ bool G1RemSetTrackingPolicy::needs_scan_for_rebuild(HeapRegion* r) const {
 }
 void G1RemSetTrackingPolicy::update_at_allocate(HeapRegion* r) {
-  if (r->is_young()) {
+  assert(r->is_young() || r->is_humongous() || r->is_old(),
-    // Always collect remembered set for young regions.
+        "Region %u with unexpected heap region type %s", r->hrm_index(), r->get_type_str());
-    r->rem_set()->set_state_complete();
+  if (r->is_old()) {
  } else if (r->is_humongous()) {
    // Collect remembered sets for humongous regions by default to allow eager reclaim.
    r->rem_set()->set_state_complete();
  } else if (r->is_old()) {
    // By default, do not create remembered set for new old regions.
    r->rem_set()->set_state_untracked();
-  } else {
+    return;
    guarantee(false, "Unhandled region %u with heap region type %s", r->hrm_index(), r->get_type_str());
  }
  // Always collect remembered set for young regions and for humongous regions.
  // Humongous regions need that for eager reclaim.
  r->rem_set()->set_state_complete();
 }
 void G1RemSetTrackingPolicy::update_at_free(HeapRegion* r) {
--- a/src/hotspot/share/gc/g1/g1_globals.hpp
+++ b/src/hotspot/share/gc/g1/g1_globals.hpp
@@ -117,7 +117,7 @@
          "Confidence level for MMU/pause predictions")                     \
          range(0, 100)                                                     \
                                                                            \
-  product(intx, G1SummarizeRSetStatsPeriod, 0, DIAGNOSTIC,                  \
+  product(uintx, G1SummarizeRSetStatsPeriod, 0, DIAGNOSTIC,                 \
          "The period (in number of GCs) at which we will generate "        \
          "update buffer processing info "                                  \
          "(0 means do not periodically generate this info); "              \
@@ -148,7 +148,7 @@
          "Number of entries in an SATB log buffer.")                       \
          constraint(G1SATBBufferSizeConstraintFunc, AtParse)               \
                                                                            \
-  develop(intx, G1SATBProcessCompletedThreshold, 20,                        \
+  develop(uintx, G1SATBProcessCompletedThreshold, 20,                       \
          "Number of completed buffers that triggers log processing.")      \
          range(0, max_jint)                                                \
                                                                            \
--- a/src/hotspot/share/gc/shared/adaptiveSizePolicy.hpp
+++ b/src/hotspot/share/gc/shared/adaptiveSizePolicy.hpp
@@ -344,17 +344,11 @@ class AdaptiveSizePolicy : public CHeapObj<mtGC> {
  AdaptiveWeightedAverage* avg_eden_live() const { return _avg_eden_live; }
  AdaptiveWeightedAverage* avg_old_live() const { return _avg_old_live; }
  AdaptivePaddedAverage*  avg_survived() const { return _avg_survived; }
  AdaptivePaddedNoZeroDevAverage*  avg_pretenured() { return _avg_pretenured; }
  // Methods indicating events of interest to the adaptive size policy,
  // called by GC algorithms. It is the responsibility of users of this
  // policy to call these methods at the correct times!
  virtual void minor_collection_begin();
  virtual void minor_collection_end(GCCause::Cause gc_cause);
  virtual LinearLeastSquareFit* minor_pause_old_estimator() const {
    return _minor_pause_old_estimator;
  }
  LinearLeastSquareFit* minor_pause_young_estimator() {
    return _minor_pause_young_estimator;
@@ -404,10 +398,6 @@ class AdaptiveSizePolicy : public CHeapObj<mtGC> {
    _overhead_checker.set_gc_overhead_limit_exceeded(v);
  }
  bool gc_overhead_limit_near() {
    return _overhead_checker.gc_overhead_limit_near();
  }
  void reset_gc_overhead_limit_count() {
    _overhead_checker.reset_gc_overhead_limit_count();
  }
--- a/src/hotspot/share/jfr/support/jfrNativeLibraryLoadEvent.cpp
+++ b/src/hotspot/share/jfr/support/jfrNativeLibraryLoadEvent.cpp
@@ -105,13 +105,15 @@ static void commit(HelperType& helper) {
  assert(thread != nullptr, "invariant");
  if (thread->is_Java_thread()) {
    JavaThread* jt = JavaThread::cast(thread);
-    if (jt->thread_state() != _thread_in_vm) {
+    if (jt->thread_state() == _thread_in_native) {
      assert(jt->thread_state() == _thread_in_native, "invariant");
      // For a JavaThread to take a JFR stacktrace, it must be in _thread_in_vm. Can safepoint here.
      ThreadInVMfromNative transition(jt);
      event.commit();
      return;
    }
    // If a thread comes here still _thread_in_Java, which can happen for example
    // when loading the disassembler library in response to traps in JIT code - all is ok.
    // Since there is no ljf, an event will be committed without a stacktrace.
  }
  event.commit();
 }
--- a/src/hotspot/share/oops/access.hpp
+++ b/src/hotspot/share/oops/access.hpp
@@ -53,8 +53,8 @@
 // * store_at: Store a value in an internal pointer relative to a base object.
 // * atomic_cmpxchg: Atomically compare-and-swap a new value at an address if previous value matched the compared value.
 // * atomic_cmpxchg_at: Atomically compare-and-swap a new value at an internal pointer address if previous value matched the compared value.
-// * atomic_xchg: Atomically swap a new value at an address if previous value matched the compared value.
+// * atomic_xchg: Atomically swap a new value at an address without checking the previous value.
-// * atomic_xchg_at: Atomically swap a new value at an internal pointer address if previous value matched the compared value.
+// * atomic_xchg_at: Atomically swap a new value at an internal pointer address without checking the previous value.
 // * arraycopy: Copy data from one heap array to another heap array. The ArrayAccess class has convenience functions for this.
 // * clone: Clone the contents of an object to a newly allocated object.
 //
@@ -83,12 +83,11 @@
 //             and whether the access is performed on the heap or outside. Then the
 //             appropriate BarrierSet::AccessBarrier is called to perform the access.
 //
-// The implementation of step 1-4 resides in in accessBackend.hpp, to allow selected
+// The implementation of step 1-4 resides in accessBackend.hpp, to allow selected
 // accesses to be accessible from only access.hpp, as opposed to access.inline.hpp.
 // Steps 5.a and 5.b require knowledge about the GC backends, and therefore needs to
 // include the various GC backend .inline.hpp headers. Their implementation resides in
-// access.inline.hpp. The accesses that are allowed through the access.hpp file
+// access.inline.hpp.
 // must be instantiated in access.cpp using the INSTANTIATE_HPP_ACCESS macro.
 template <DecoratorSet decorators = DECORATORS_NONE>
 class Access: public AllStatic {
--- a/src/hotspot/share/opto/c2_globals.hpp
+++ b/src/hotspot/share/opto/c2_globals.hpp
@@ -365,10 +365,10 @@
          "Level of detail of the ideal graph printout. "                   \
          "System-wide value, -1=printing is disabled, "                    \
          "0=print nothing except IGVPrintLevel directives, "               \
-          "5=all details printed. "                                         \
+          "6=all details printed. "                                         \
          "Level of detail of printouts can be set on a per-method level "  \
          "as well by using CompileCommand=option.")                        \
-          range(-1, 5)                                                      \
+          range(-1, 6)                                                      \
                                                                            \
  notproduct(intx, PrintIdealGraphPort, 4444,                               \
          "Ideal graph printer to network port")                            \
--- a/src/hotspot/share/opto/compile.cpp
+++ b/src/hotspot/share/opto/compile.cpp
@@ -1041,6 +1041,10 @@ void Compile::Init(bool aliasing) {
  Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
  set_decompile_count(0);
 #ifndef PRODUCT
  Copy::zero_to_bytes(_igv_phase_iter, sizeof(_igv_phase_iter));
 #endif
  set_do_freq_based_layout(_directive->BlockLayoutByFrequencyOption);
  _loop_opts_cnt = LoopOptsCount;
  set_do_inlining(Inline);
@@ -2397,6 +2401,7 @@ void Compile::Optimize() {
  if (failing())  return;
  // Conditional Constant Propagation;
  print_method(PHASE_BEFORE_CCP1, 2);
  PhaseCCP ccp( &igvn );
  assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
  {
@@ -2972,6 +2977,8 @@ void Compile::Code_Gen() {
    if (failing()) {
      return;
    }
    print_method(PHASE_REGISTER_ALLOCATION, 2);
  }
  // Prior to register allocation we kept empty basic blocks in case the
@@ -2989,6 +2996,7 @@ void Compile::Code_Gen() {
    cfg.fixup_flow();
    cfg.remove_unreachable_blocks();
    cfg.verify_dominator_tree();
    print_method(PHASE_BLOCK_ORDERING, 3);
  }
  // Apply peephole optimizations
@@ -2996,12 +3004,14 @@ void Compile::Code_Gen() {
    TracePhase tp("peephole", &timers[_t_peephole]);
    PhasePeephole peep( _regalloc, cfg);
    peep.do_transform();
    print_method(PHASE_PEEPHOLE, 3);
  }
  // Do late expand if CPU requires this.
  if (Matcher::require_postalloc_expand) {
    TracePhase tp("postalloc_expand", &timers[_t_postalloc_expand]);
    cfg.postalloc_expand(_regalloc);
    print_method(PHASE_POSTALLOC_EXPAND, 3);
  }
  // Convert Nodes to instruction bits in a buffer
@@ -5102,6 +5112,10 @@ void Compile::print_method(CompilerPhaseType cpt, int level, Node* n) {
  ResourceMark rm;
  stringStream ss;
  ss.print_raw(CompilerPhaseTypeHelper::to_description(cpt));
  int iter = ++_igv_phase_iter[cpt];
  if (iter > 1) {
    ss.print(" %d", iter);
  }
  if (n != nullptr) {
    ss.print(": %d %s ", n->_idx, NodeClassNames[n->Opcode()]);
  }
--- a/src/hotspot/share/opto/compile.hpp
+++ b/src/hotspot/share/opto/compile.hpp
@@ -343,6 +343,7 @@ class Compile : public Phase {
  bool                  _print_intrinsics;      // True if we should print intrinsics for this compilation
 #ifndef PRODUCT
  uint                  _igv_idx;               // Counter for IGV node identifiers
  uint                  _igv_phase_iter[PHASE_NUM_TYPES]; // Counters for IGV phase iterations
  bool                  _trace_opto_output;
  bool                  _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
 #endif
@@ -531,6 +532,7 @@ private:
 #ifndef PRODUCT
  IdealGraphPrinter* igv_printer() { return _igv_printer; }
  void reset_igv_phase_iter(CompilerPhaseType cpt) { _igv_phase_iter[cpt] = 0; }
 #endif
  void log_late_inline(CallGenerator* cg);
--- a/src/hotspot/share/opto/graphKit.cpp
+++ b/src/hotspot/share/opto/graphKit.cpp
@@ -1563,6 +1563,11 @@ Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
  if (((bt == T_OBJECT) && C->do_escape_analysis()) || C->eliminate_boxing()) {
    // Improve graph before escape analysis and boxing elimination.
    record_for_igvn(ld);
    if (ld->is_DecodeN()) {
      // Also record the actual load (LoadN) in case ld is DecodeN
      assert(ld->in(1)->Opcode() == Op_LoadN, "Assumption invalid: input to DecodeN is not LoadN");
      record_for_igvn(ld->in(1));
    }
  }
  return ld;
 }
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@@ -5387,6 +5387,10 @@ bool LibraryCallKit::inline_array_partition() {
    const TypeInstPtr* elem_klass = gvn().type(elementType)->isa_instptr();
    ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
    BasicType bt = elem_type->basic_type();
    // Disable the intrinsic if the CPU does not support SIMD sort
    if (!Matcher::supports_simd_sort(bt)) {
      return false;
    }
    address stubAddr = nullptr;
    stubAddr = StubRoutines::select_array_partition_function();
    // stub not loaded
@@ -5440,6 +5444,10 @@ bool LibraryCallKit::inline_array_sort() {
  const TypeInstPtr* elem_klass = gvn().type(elementType)->isa_instptr();
  ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
  BasicType bt = elem_type->basic_type();
  // Disable the intrinsic if the CPU does not support SIMD sort
  if (!Matcher::supports_simd_sort(bt)) {
    return false;
  }
  address stubAddr = nullptr;
  stubAddr = StubRoutines::select_arraysort_function();
  //stub not loaded
--- a/src/hotspot/share/opto/loopPredicate.cpp
+++ b/src/hotspot/share/opto/loopPredicate.cpp
@@ -1180,6 +1180,7 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
  }
  BoolNode* bol = test->as_Bool();
  if (invar.is_invariant(bol)) {
    C->print_method(PHASE_BEFORE_LOOP_PREDICATION_IC, 4, iff);
    // Invariant test
    new_predicate_proj = create_new_if_for_predicate(parse_predicate_proj, nullptr,
                                                     reason,
@@ -1197,6 +1198,9 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
    IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If();
    _igvn.hash_delete(new_predicate_iff);
    new_predicate_iff->set_req(1, new_predicate_bol);
    C->print_method(PHASE_AFTER_LOOP_PREDICATION_IC, 4, new_predicate_proj->in(0));
 #ifndef PRODUCT
    if (TraceLoopPredicate) {
      tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx);
@@ -1207,6 +1211,7 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
    }
 #endif
  } else if (cl != nullptr && loop->is_range_check_if(if_success_proj, this, invar DEBUG_ONLY(COMMA parse_predicate_proj))) {
    C->print_method(PHASE_BEFORE_LOOP_PREDICATION_RC, 4, iff);
    // Range check for counted loops
    assert(if_success_proj->is_IfTrue(), "trap must be on false projection for a range check");
    const Node*    cmp    = bol->in(1)->as_Cmp();
@@ -1270,6 +1275,8 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
    new_predicate_proj = add_template_assertion_predicate(iff, loop, if_success_proj, parse_predicate_proj, upper_bound_proj, scale,
                                                          offset, init, limit, stride, rng, overflow, reason);
    C->print_method(PHASE_AFTER_LOOP_PREDICATION_RC, 4, new_predicate_proj->in(0));
 #ifndef PRODUCT
    if (TraceLoopOpts && !TraceLoopPredicate) {
      tty->print("Predicate RC ");
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@@ -703,6 +703,9 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
  }
 #endif
  LoopNode* head = loop->_head->as_Loop();
  C->print_method(PHASE_BEFORE_LOOP_PEELING, 4, head);
  bool counted_loop = head->is_CountedLoop();
  if (counted_loop) {
    CountedLoopNode *cl = head->as_CountedLoop();
@@ -795,6 +798,8 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
  peeled_dom_test_elim(loop,old_new);
  loop->record_for_igvn();
  C->print_method(PHASE_AFTER_LOOP_PEELING, 4, new_head);
 }
 //------------------------------policy_maximally_unroll------------------------
@@ -1629,6 +1634,8 @@ void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_n
  CountedLoopEndNode *main_end = main_head->loopexit();
  assert(main_end->outcnt() == 2, "1 true, 1 false path only");
  C->print_method(PHASE_BEFORE_PRE_MAIN_POST, 4, main_head);
  Node *pre_header= main_head->in(LoopNode::EntryControl);
  Node *init      = main_head->init_trip();
  Node *incr      = main_end ->incr();
@@ -1825,6 +1832,8 @@ void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_n
  // finds some, but we _know_ they are all useless.
  peeled_dom_test_elim(loop,old_new);
  loop->record_for_igvn();
  C->print_method(PHASE_AFTER_PRE_MAIN_POST, 4, main_head);
 }
 //------------------------------insert_vector_post_loop------------------------
@@ -2127,6 +2136,9 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
  assert(LoopUnrollLimit, "");
  CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
  CountedLoopEndNode *loop_end = loop_head->loopexit();
  C->print_method(PHASE_BEFORE_LOOP_UNROLLING, 4, loop_head);
 #ifndef PRODUCT
  if (PrintOpto && VerifyLoopOptimizations) {
    tty->print("Unrolling ");
@@ -2374,6 +2386,8 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
    }
  }
 #endif
  C->print_method(PHASE_AFTER_LOOP_UNROLLING, 4, clone_head);
 }
 //------------------------------do_maximally_unroll----------------------------
@@ -3003,6 +3017,8 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
      // stride_con and scale_con can be negative which will flip about the
      // sense of the test.
      C->print_method(PHASE_BEFORE_RANGE_CHECK_ELIMINATION, 4, iff);
      // Perform the limit computations in jlong to avoid overflow
      jlong lscale_con = scale_con;
      Node* int_offset = offset;
@@ -3103,6 +3119,9 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
          --imax;
        }
      }
      C->print_method(PHASE_AFTER_RANGE_CHECK_ELIMINATION, 4, cl);
    } // End of is IF
  }
  if (loop_entry != cl->skip_strip_mined()->in(LoopNode::EntryControl)) {
--- a/src/hotspot/share/opto/loopUnswitch.cpp
+++ b/src/hotspot/share/opto/loopUnswitch.cpp
@@ -134,6 +134,8 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
  }
 #endif
  C->print_method(PHASE_BEFORE_LOOP_UNSWITCHING, 4, head);
  // Need to revert back to normal loop
  if (head->is_CountedLoop() && !head->as_CountedLoop()->is_normal_loop()) {
    head->as_CountedLoop()->set_normal_loop();
@@ -200,6 +202,8 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
  }
 #endif
  C->print_method(PHASE_AFTER_LOOP_UNSWITCHING, 4, head_clone);
  C->set_major_progress();
 }
--- a/src/hotspot/share/opto/loopopts.cpp
+++ b/src/hotspot/share/opto/loopopts.cpp
@@ -1446,7 +1446,12 @@ void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
    }
    // Now split the IF
    C->print_method(PHASE_BEFORE_SPLIT_IF, 4, iff);
    if ((PrintOpto && VerifyLoopOptimizations) || TraceLoopOpts) {
      tty->print_cr("Split-If");
    }
    do_split_if(iff);
    C->print_method(PHASE_AFTER_SPLIT_IF, 4, iff);
    return;
  }
@@ -3625,6 +3630,9 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
    }
  }
 #endif
  C->print_method(PHASE_BEFORE_PARTIAL_PEELING, 4, head);
  VectorSet peel;
  VectorSet not_peel;
  Node_List peel_list;
@@ -3919,6 +3927,9 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
    }
  }
 #endif
  C->print_method(PHASE_AFTER_PARTIAL_PEELING, 4, new_head_clone);
  return true;
 }
--- a/src/hotspot/share/opto/parse2.cpp
+++ b/src/hotspot/share/opto/parse2.cpp
@@ -2779,7 +2779,7 @@ void Parse::do_one_bytecode() {
  }
 #ifndef PRODUCT
-  constexpr int perBytecode = 5;
+  constexpr int perBytecode = 6;
  if (C->should_print_igv(perBytecode)) {
    IdealGraphPrinter* printer = C->igv_printer();
    char buffer[256];
--- a/src/hotspot/share/opto/phaseX.cpp
+++ b/src/hotspot/share/opto/phaseX.cpp
@@ -894,7 +894,7 @@ void PhaseIterGVN::verify_step(Node* n) {
 void PhaseIterGVN::trace_PhaseIterGVN(Node* n, Node* nn, const Type* oldtype) {
  const Type* newtype = type_or_null(n);
  if (nn != n || oldtype != newtype) {
-    C->print_method(PHASE_AFTER_ITER_GVN_STEP, 4, n);
+    C->print_method(PHASE_AFTER_ITER_GVN_STEP, 5, n);
  }
  if (TraceIterativeGVN) {
    uint wlsize = _worklist.size();
@@ -1025,6 +1025,7 @@ void PhaseIterGVN::trace_PhaseIterGVN_verbose(Node* n, int num_processed) {
 void PhaseIterGVN::optimize() {
  DEBUG_ONLY(uint num_processed  = 0;)
  NOT_PRODUCT(init_verifyPhaseIterGVN();)
  NOT_PRODUCT(C->reset_igv_phase_iter(PHASE_AFTER_ITER_GVN_STEP);)
  C->print_method(PHASE_BEFORE_ITER_GVN, 3);
  if (StressIGVN) {
    shuffle_worklist();
--- a/src/hotspot/share/opto/phasetype.hpp
+++ b/src/hotspot/share/opto/phasetype.hpp
@@ -49,6 +49,27 @@
  flags(ITER_GVN_AFTER_VECTOR,          "Iter GVN after vector box elimination") \
  flags(BEFORE_BEAUTIFY_LOOPS,          "Before beautify loops") \
  flags(AFTER_BEAUTIFY_LOOPS,           "After beautify loops") \
  flags(BEFORE_LOOP_UNROLLING,          "Before Loop Unrolling") \
  flags(AFTER_LOOP_UNROLLING,           "After Loop Unrolling") \
  flags(BEFORE_SPLIT_IF,                "Before Split-If") \
  flags(AFTER_SPLIT_IF,                 "After Split-If") \
  flags(BEFORE_LOOP_PREDICATION_IC,     "Before Loop Predication IC") \
  flags(AFTER_LOOP_PREDICATION_IC,      "After Loop Predication IC") \
  flags(BEFORE_LOOP_PREDICATION_RC,     "Before Loop Predication RC") \
  flags(AFTER_LOOP_PREDICATION_RC,      "After Loop Predication RC") \
  flags(BEFORE_PARTIAL_PEELING,         "Before Partial Peeling") \
  flags(AFTER_PARTIAL_PEELING,          "After Partial Peeling") \
  flags(BEFORE_LOOP_PEELING,            "Before Loop Peeling") \
  flags(AFTER_LOOP_PEELING,             "After Loop Peeling") \
  flags(BEFORE_LOOP_UNSWITCHING,        "Before Loop Unswitching") \
  flags(AFTER_LOOP_UNSWITCHING,         "After Loop Unswitching") \
  flags(BEFORE_RANGE_CHECK_ELIMINATION, "Before Range Check Elimination") \
  flags(AFTER_RANGE_CHECK_ELIMINATION,  "After Range Check Elimination") \
  flags(BEFORE_PRE_MAIN_POST,           "Before Pre/Main/Post Loops") \
  flags(AFTER_PRE_MAIN_POST,            "After Pre/Main/Post Loops") \
  flags(SUPERWORD1_BEFORE_SCHEDULE,     "Superword 1, Before Schedule") \
  flags(SUPERWORD2_BEFORE_OUTPUT,       "Superword 2, Before Output") \
  flags(SUPERWORD3_AFTER_OUTPUT,        "Superword 3, After Output") \
  flags(BEFORE_CLOOPS,                  "Before CountedLoop") \
  flags(AFTER_CLOOPS,                   "After CountedLoop") \
  flags(PHASEIDEAL_BEFORE_EA,           "PhaseIdealLoop before EA") \
@@ -58,6 +79,7 @@
  flags(PHASEIDEALLOOP1,                "PhaseIdealLoop 1") \
  flags(PHASEIDEALLOOP2,                "PhaseIdealLoop 2") \
  flags(PHASEIDEALLOOP3,                "PhaseIdealLoop 3") \
  flags(BEFORE_CCP1,                    "Before PhaseCCP 1") \
  flags(CCP1,                           "PhaseCCP 1") \
  flags(ITER_GVN2,                      "Iter GVN 2") \
  flags(PHASEIDEALLOOP_ITERATIONS,      "PhaseIdealLoop iterations") \
@@ -67,6 +89,10 @@
  flags(BEFORE_MATCHING,                "Before matching") \
  flags(MATCHING,                       "After matching") \
  flags(GLOBAL_CODE_MOTION,             "Global code motion") \
  flags(REGISTER_ALLOCATION,            "Register Allocation") \
  flags(BLOCK_ORDERING,                 "Block Ordering") \
  flags(PEEPHOLE,                       "Peephole") \
  flags(POSTALLOC_EXPAND,               "Post-Allocation Expand") \
  flags(MACH_ANALYSIS,                  "After mach analysis") \
  flags(FINAL_CODE,                     "Final Code") \
  flags(END,                            "End") \
--- a/src/hotspot/share/opto/split_if.cpp
+++ b/src/hotspot/share/opto/split_if.cpp
@@ -591,12 +591,6 @@ void PhaseIdealLoop::handle_use( Node *use, Node *def, small_cache *cache, Node
 // Found an If getting its condition-code input from a Phi in the same block.
 // Split thru the Region.
 void PhaseIdealLoop::do_split_if(Node* iff, RegionNode** new_false_region, RegionNode** new_true_region) {
  if (PrintOpto && VerifyLoopOptimizations) {
    tty->print_cr("Split-if");
  }
  if (TraceLoopOpts) {
    tty->print_cr("SplitIf");
  }
  C->set_major_progress();
  RegionNode *region = iff->in(0)->as_Region();
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -2381,6 +2381,9 @@ void SuperWord::schedule() {
  }
 #endif
  CountedLoopNode* cl = lpt()->_head->as_CountedLoop();
  _phase->C->print_method(PHASE_SUPERWORD1_BEFORE_SCHEDULE, 4, cl);
  // (4) Use the memops_schedule to re-order the memops in all slices.
  schedule_reorder_memops(memops_schedule);
 }
@@ -2488,6 +2491,7 @@ bool SuperWord::output() {
    lpt()->dump_head();
  }
 #endif
  _phase->C->print_method(PHASE_SUPERWORD2_BEFORE_OUTPUT, 4, cl);
  // Ensure main loop's initial value is properly aligned
  //  (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
@@ -2808,6 +2812,8 @@ bool SuperWord::output() {
    }
  }
  _phase->C->print_method(PHASE_SUPERWORD3_AFTER_OUTPUT, 4, cl);
  return true;
 }
--- a/src/hotspot/share/prims/unsafe.cpp
+++ b/src/hotspot/share/prims/unsafe.cpp
@@ -390,7 +390,10 @@ UNSAFE_ENTRY_SCOPED(void, Unsafe_SetMemory0(JNIEnv *env, jobject unsafe, jobject
  oop base = JNIHandles::resolve(obj);
  void* p = index_oop_from_field_offset_long(base, offset);
  {
    GuardUnsafeAccess guard(thread);
    Copy::fill_to_memory_atomic(p, sz, value);
  }
 } UNSAFE_END
 UNSAFE_ENTRY_SCOPED(void, Unsafe_CopyMemory0(JNIEnv *env, jobject unsafe, jobject srcObj, jlong srcOffset, jobject dstObj, jlong dstOffset, jlong size)) {
--- a/src/hotspot/share/runtime/prefetch.hpp
+++ b/src/hotspot/share/runtime/prefetch.hpp
@@ -35,12 +35,6 @@
 class Prefetch : AllStatic {
 public:
  enum style {
    do_none,  // Do no prefetching
    do_read,  // Do read prefetching
    do_write  // Do write prefetching
  };
  // Prefetch anticipating read; must not fault, semantically a no-op
  static void read(const void* loc, intx interval);
--- a/src/hotspot/share/services/heapDumper.cpp
+++ b/src/hotspot/share/services/heapDumper.cpp
@@ -1473,6 +1473,25 @@ void SymbolTableDumper::do_symbol(Symbol** p) {
  }
 }
 // Support class used to generate HPROF_GC_CLASS_DUMP records
 class ClassDumper : public KlassClosure {
 private:
  AbstractDumpWriter* _writer;
  AbstractDumpWriter* writer() const { return _writer; }
 public:
  ClassDumper(AbstractDumpWriter* writer) : _writer(writer) {}
  void do_klass(Klass* k) {
    if (k->is_instance_klass()) {
      DumperSupport::dump_instance_class(writer(), k);
    } else {
      DumperSupport::dump_array_class(writer(), k);
    }
  }
 };
 // Support class used to generate HPROF_GC_ROOT_JNI_LOCAL records
 class JNILocalsDumper : public OopClosure {
@@ -1860,21 +1879,25 @@ vframe* ThreadDumper::get_top_frame() const {
  return nullptr;
 }
 // Callback to dump thread-related data for unmounted virtual threads;
 // implemented by VM_HeapDumper.
 class UnmountedVThreadDumper {
 public:
  virtual void dump_vthread(oop vt, AbstractDumpWriter* segment_writer) = 0;
 };
-class VM_HeapDumper;
+// Support class used when iterating over the heap.
 // Support class using when iterating over the heap.
 class HeapObjectDumper : public ObjectClosure {
 private:
  AbstractDumpWriter* _writer;
  AbstractDumpWriter* writer()                  { return _writer; }
  UnmountedVThreadDumper* _vthread_dumper;
  DumperClassCacheTable _class_cache;
 public:
-  HeapObjectDumper(AbstractDumpWriter* writer) {
+  HeapObjectDumper(AbstractDumpWriter* writer, UnmountedVThreadDumper* vthread_dumper)
-    _writer = writer;
+    : _writer(writer), _vthread_dumper(vthread_dumper) {}
  }
  // called for each object in the heap
  void do_object(oop o);
@@ -1895,6 +1918,9 @@ void HeapObjectDumper::do_object(oop o) {
  if (o->is_instance()) {
    // create a HPROF_GC_INSTANCE record for each object
    DumperSupport::dump_instance(writer(), o, &_class_cache);
    if (java_lang_VirtualThread::is_instance(o) && ThreadDumper::should_dump_vthread(o)) {
      _vthread_dumper->dump_vthread(o, writer());
    }
  } else if (o->is_objArray()) {
    // create a HPROF_GC_OBJ_ARRAY_DUMP record for each object array
    DumperSupport::dump_object_array(writer(), objArrayOop(o));
@@ -1908,16 +1934,52 @@ void HeapObjectDumper::do_object(oop o) {
 class DumperController : public CHeapObj<mtInternal> {
 private:
   Monitor* _lock;
   Mutex* _global_writer_lock;
   const uint   _dumper_number;
   uint   _complete_number;
   bool   _started; // VM dumper started and acquired global writer lock
 public:
   DumperController(uint number) :
-     _lock(new (std::nothrow) PaddedMonitor(Mutex::safepoint, "DumperController_lock")),
+     // _lock and _global_writer_lock are used for synchronization between GC worker threads inside safepoint,
     // so we lock with _no_safepoint_check_flag.
     // signal_start() acquires _lock when global writer is locked,
     // its rank must be less than _global_writer_lock rank.
     _lock(new (std::nothrow) PaddedMonitor(Mutex::nosafepoint - 1, "DumperController_lock")),
     _global_writer_lock(new (std::nothrow) Mutex(Mutex::nosafepoint, "DumpWriter_lock")),
     _dumper_number(number),
-     _complete_number(0) { }
+     _complete_number(0),
     _started(false)
   {}
-   ~DumperController() { delete _lock; }
+   ~DumperController() {
     delete _lock;
     delete _global_writer_lock;
   }
   // parallel (non VM) dumpers must wait until VM dumper acquires global writer lock
   void wait_for_start_signal() {
     MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
     while (_started == false) {
       ml.wait();
     }
   }
   void signal_start() {
     MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
     _started = true;
     ml.notify_all();
   }
   void lock_global_writer() {
     _global_writer_lock->lock_without_safepoint_check();
   }
   void unlock_global_writer() {
     _global_writer_lock->unlock();
   }
   void dumper_complete(DumpWriter* local_writer, DumpWriter* global_writer) {
     MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
@@ -1946,7 +2008,7 @@ private:
  int _dump_seq;
 private:
-  void merge_file(char* path);
+  void merge_file(const char* path);
  void merge_done();
  void set_error(const char* msg);
@@ -1958,8 +2020,28 @@ public:
    _dump_seq(dump_seq) {}
  void do_merge();
  // returns path for the parallel DumpWriter (resource allocated)
  static char* get_writer_path(const char* base_path, int seq);
 };
 char* DumpMerger::get_writer_path(const char* base_path, int seq) {
  // approximate required buffer size
  size_t buf_size = strlen(base_path)
                    + 2                 // ".p"
                    + 10                // number (that's enough for 2^32 parallel dumpers)
                    + 1;                // '\0'
  char* path = NEW_RESOURCE_ARRAY(char, buf_size);
  memset(path, 0, buf_size);
  os::snprintf(path, buf_size, "%s.p%d", base_path, seq);
  return path;
 }
 void DumpMerger::merge_done() {
  // Writes the HPROF_HEAP_DUMP_END record.
  if (!_has_error) {
@@ -1980,8 +2062,7 @@ void DumpMerger::set_error(const char* msg) {
 // Merge segmented heap files via sendfile, it's more efficient than the
 // read+write combination, which would require transferring data to and from
 // user space.
-void DumpMerger::merge_file(char* path) {
+void DumpMerger::merge_file(const char* path) {
  assert(!SafepointSynchronize::is_at_safepoint(), "merging happens outside safepoint");
  TraceTime timer("Merge segmented heap file directly", TRACETIME_LOG(Info, heapdump));
  int segment_fd = os::open(path, O_RDONLY, 0);
@@ -2018,8 +2099,7 @@ void DumpMerger::merge_file(char* path) {
 }
 #else
 // Generic implementation using read+write
-void DumpMerger::merge_file(char* path) {
+void DumpMerger::merge_file(const char* path) {
  assert(!SafepointSynchronize::is_at_safepoint(), "merging happens outside safepoint");
  TraceTime timer("Merge segmented heap file", TRACETIME_LOG(Info, heapdump));
  fileStream segment_fs(path, "rb");
@@ -2044,7 +2124,6 @@ void DumpMerger::merge_file(char* path) {
 #endif
 void DumpMerger::do_merge() {
  assert(!SafepointSynchronize::is_at_safepoint(), "merging happens outside safepoint");
  TraceTime timer("Merge heap files complete", TRACETIME_LOG(Info, heapdump));
  // Since contents in segmented heap file were already zipped, we don't need to zip
@@ -2054,10 +2133,9 @@ void DumpMerger::do_merge() {
  // Merge the content of the remaining files into base file. Regardless of whether
  // the merge process is successful or not, these segmented files will be deleted.
  char path[JVM_MAXPATHLEN];
  for (int i = 0; i < _dump_seq; i++) {
-    memset(path, 0, JVM_MAXPATHLEN);
+    ResourceMark rm;
-    os::snprintf(path, JVM_MAXPATHLEN, "%s.p%d", _path, i);
+    const char* path = get_writer_path(_path, i);
    if (!_has_error) {
      merge_file(path);
    }
@@ -2087,7 +2165,7 @@ public:
 };
 // The VM operation that performs the heap dump
-class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
+class VM_HeapDumper : public VM_GC_Operation, public WorkerTask, public UnmountedVThreadDumper {
 private:
  static VM_HeapDumper*   _global_dumper;
  static DumpWriter*      _global_writer;
@@ -2107,10 +2185,15 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
  uint                    _num_dumper_threads;
  DumperController*       _dumper_controller;
  ParallelObjectIterator* _poi;
-  // worker id of VMDumper thread.
+
-  static const size_t VMDumperWorkerId = 0;
+  // Dumper id of VMDumper thread.
  static const int VMDumperId = 0;
  // VM dumper dumps both heap and non-heap data, other dumpers dump heap-only data.
-  static bool is_vm_dumper(uint worker_id) { return worker_id == VMDumperWorkerId; }
+  static bool is_vm_dumper(int dumper_id) { return dumper_id == VMDumperId; }
  // the 1st dumper calling get_next_dumper_id becomes VM dumper
  int get_next_dumper_id() {
    return Atomic::fetch_then_add(&_dump_seq, 1);
  }
  // accessors and setters
  static VM_HeapDumper* dumper()         {  assert(_global_dumper != nullptr, "Error"); return _global_dumper; }
@@ -2129,17 +2212,11 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
  bool skip_operation() const;
-  // create dump writer for every parallel dump thread
+  // writes a HPROF_LOAD_CLASS record to global writer
  DumpWriter* create_local_writer();
  // writes a HPROF_LOAD_CLASS record
  static void do_load_class(Klass* k);
  // writes a HPROF_GC_CLASS_DUMP record for the given class
  static void do_class_dump(Klass* k);
  // HPROF_GC_ROOT_THREAD_OBJ records for platform and mounted virtual threads
-  void dump_threads();
+  void dump_threads(AbstractDumpWriter* writer);
  void add_class_serial_number(Klass* k, int serial_num) {
    _klass_map->at_put_grow(serial_num, k);
@@ -2150,7 +2227,7 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
  }
  // HPROF_TRACE and HPROF_FRAME records for platform and mounted virtual threads
-  void dump_stack_traces();
+  void dump_stack_traces(AbstractDumpWriter* writer);
 public:
  VM_HeapDumper(DumpWriter* writer, bool gc_before_heap_dump, bool oome, uint num_dump_threads) :
@@ -2168,7 +2245,7 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
    _thread_serial_num = 1;
    _frame_serial_num = 1;
-    _dump_seq = 0;
+    _dump_seq = VMDumperId;
    _num_dumper_threads = num_dump_threads;
    _dumper_controller = nullptr;
    _poi = nullptr;
@@ -2202,12 +2279,15 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
  }
  int dump_seq()           { return _dump_seq; }
  bool is_parallel_dump()  { return _num_dumper_threads > 1; }
-  bool can_parallel_dump(WorkerThreads* workers);
+  void prepare_parallel_dump(WorkerThreads* workers);
  VMOp_Type type() const { return VMOp_HeapDumper; }
  virtual bool doit_prologue();
  void doit();
  void work(uint worker_id);
  // UnmountedVThreadDumper implementation
  void dump_vthread(oop vt, AbstractDumpWriter* segment_writer);
 };
 VM_HeapDumper* VM_HeapDumper::_global_dumper = nullptr;
@@ -2251,21 +2331,12 @@ void VM_HeapDumper::do_load_class(Klass* k) {
  writer()->write_symbolID(name);
 }
 // writes a HPROF_GC_CLASS_DUMP record for the given class
 void VM_HeapDumper::do_class_dump(Klass* k) {
  if (k->is_instance_klass()) {
    DumperSupport::dump_instance_class(writer(), k);
  } else {
    DumperSupport::dump_array_class(writer(), k);
  }
 }
 // Write a HPROF_GC_ROOT_THREAD_OBJ record for platform/carrier and mounted virtual threads.
 // Then walk the stack so that locals and JNI locals are dumped.
-void VM_HeapDumper::dump_threads() {
+void VM_HeapDumper::dump_threads(AbstractDumpWriter* writer) {
  for (int i = 0; i < _thread_dumpers_count; i++) {
-        _thread_dumpers[i]->dump_thread_obj(writer());
+    _thread_dumpers[i]->dump_thread_obj(writer);
-        _thread_dumpers[i]->dump_stack_refs(writer());
+    _thread_dumpers[i]->dump_stack_refs(writer);
  }
 }
@@ -2280,31 +2351,21 @@ bool VM_HeapDumper::doit_prologue() {
  return VM_GC_Operation::doit_prologue();
 }
-bool VM_HeapDumper::can_parallel_dump(WorkerThreads* workers) {
+void VM_HeapDumper::prepare_parallel_dump(WorkerThreads* workers) {
  bool can_parallel = true;
  uint num_active_workers = workers != nullptr ? workers->active_workers() : 0;
  uint num_requested_dump_threads = _num_dumper_threads;
  // check if we can dump in parallel based on requested and active threads
  if (num_active_workers <= 1 || num_requested_dump_threads <= 1) {
    _num_dumper_threads = 1;
    can_parallel = false;
  } else {
    // check if we have extra path room to accommodate segmented heap files
    const char* base_path = writer()->get_file_path();
    assert(base_path != nullptr, "sanity check");
    if ((strlen(base_path) + 7/*.p\d\d\d\d\0*/) >= JVM_MAXPATHLEN) {
      _num_dumper_threads = 1;
      can_parallel = false;
  } else {
    _num_dumper_threads = clamp(num_requested_dump_threads, 2U, num_active_workers);
  }
-  }
+  _dumper_controller = new (std::nothrow) DumperController(_num_dumper_threads);
-
+  bool can_parallel = _num_dumper_threads > 1;
  log_info(heapdump)("Requested dump threads %u, active dump threads %u, "
                     "actual dump threads %u, parallelism %s",
                     num_requested_dump_threads, num_active_workers,
                     _num_dumper_threads, can_parallel ? "true" : "false");
  return can_parallel;
 }
 // The VM operation that dumps the heap. The dump consists of the following
@@ -2352,11 +2413,11 @@ void VM_HeapDumper::doit() {
  set_global_writer();
  WorkerThreads* workers = ch->safepoint_workers();
-  if (!can_parallel_dump(workers)) {
+  prepare_parallel_dump(workers);
-    work(VMDumperWorkerId);
+
  if (!is_parallel_dump()) {
    work(VMDumperId);
  } else {
    uint heap_only_dumper_threads = _num_dumper_threads - 1 /* VMDumper thread */;
    _dumper_controller = new (std::nothrow) DumperController(heap_only_dumper_threads);
    ParallelObjectIterator poi(_num_dumper_threads);
    _poi = &poi;
    workers->run_task(this, _num_dumper_threads);
@@ -2368,26 +2429,19 @@ void VM_HeapDumper::doit() {
  clear_global_writer();
 }
 // prepare DumpWriter for every parallel dump thread
 DumpWriter* VM_HeapDumper::create_local_writer() {
  char* path = NEW_RESOURCE_ARRAY(char, JVM_MAXPATHLEN);
  memset(path, 0, JVM_MAXPATHLEN);
  // generate segmented heap file path
  const char* base_path = writer()->get_file_path();
  // share global compressor, local DumpWriter is not responsible for its life cycle
  AbstractCompressor* compressor = writer()->compressor();
  int seq = Atomic::fetch_then_add(&_dump_seq, 1);
  os::snprintf(path, JVM_MAXPATHLEN, "%s.p%d", base_path, seq);
  // create corresponding writer for that
  DumpWriter* local_writer = new DumpWriter(path, writer()->is_overwrite(), compressor);
  return local_writer;
 }
 void VM_HeapDumper::work(uint worker_id) {
  // VM Dumper works on all non-heap data dumping and part of heap iteration.
-  if (is_vm_dumper(worker_id)) {
+  int dumper_id = get_next_dumper_id();
  if (is_vm_dumper(dumper_id)) {
    // lock global writer, it will be unlocked after VM Dumper finishes with non-heap data
    _dumper_controller->lock_global_writer();
    _dumper_controller->signal_start();
  } else {
    _dumper_controller->wait_for_start_signal();
  }
  if (is_vm_dumper(dumper_id)) {
    TraceTime timer("Dump non-objects", TRACETIME_LOG(Info, heapdump));
    // Write the file header - we always use 1.0.2
    const char* header = "JAVA PROFILE 1.0.2";
@@ -2409,21 +2463,31 @@ void VM_HeapDumper::work(uint worker_id) {
    // write HPROF_FRAME and HPROF_TRACE records
    // this must be called after _klass_map is built when iterating the classes above.
-    dump_stack_traces();
+    dump_stack_traces(writer());
    // unlock global writer, so parallel dumpers can dump stack traces of unmounted virtual threads
    _dumper_controller->unlock_global_writer();
  }
  // HPROF_HEAP_DUMP/HPROF_HEAP_DUMP_SEGMENT starts here
  ResourceMark rm;
  // share global compressor, local DumpWriter is not responsible for its life cycle
  DumpWriter segment_writer(DumpMerger::get_writer_path(writer()->get_file_path(), dumper_id),
                            writer()->is_overwrite(), writer()->compressor());
  if (!segment_writer.has_error()) {
    if (is_vm_dumper(dumper_id)) {
      // dump some non-heap subrecords to heap dump segment
      TraceTime timer("Dump non-objects (part 2)", TRACETIME_LOG(Info, heapdump));
      // Writes HPROF_GC_CLASS_DUMP records
-    {
+      ClassDumper class_dumper(&segment_writer);
-      LockedClassesDo locked_dump_class(&do_class_dump);
+      ClassLoaderDataGraph::classes_do(&class_dumper);
      ClassLoaderDataGraph::classes_do(&locked_dump_class);
    }
      // HPROF_GC_ROOT_THREAD_OBJ + frames + jni locals
-    dump_threads();
+      dump_threads(&segment_writer);
      // HPROF_GC_ROOT_JNI_GLOBAL
-    JNIGlobalsDumper jni_dumper(writer());
+      JNIGlobalsDumper jni_dumper(&segment_writer);
      JNIHandles::oops_do(&jni_dumper);
      // technically not jni roots, but global roots
      // for things like preallocated throwable backtraces
@@ -2431,8 +2495,8 @@ void VM_HeapDumper::work(uint worker_id) {
      // HPROF_GC_ROOT_STICKY_CLASS
      // These should be classes in the null class loader data, and not all classes
      // if !ClassUnloading
-    StickyClassDumper class_dumper(writer());
+      StickyClassDumper stiky_class_dumper(&segment_writer);
-    ClassLoaderData::the_null_class_loader_data()->classes_do(&class_dumper);
+      ClassLoaderData::the_null_class_loader_data()->classes_do(&stiky_class_dumper);
    }
    // Heap iteration.
@@ -2442,46 +2506,39 @@ void VM_HeapDumper::work(uint worker_id) {
    // segment is started.
    // The HPROF_GC_CLASS_DUMP and HPROF_GC_INSTANCE_DUMP are the vast bulk
    // of the heap dump.
    TraceTime timer(is_parallel_dump() ? "Dump heap objects in parallel" : "Dump heap objects", TRACETIME_LOG(Info, heapdump));
    HeapObjectDumper obj_dumper(&segment_writer, this);
    if (!is_parallel_dump()) {
    assert(is_vm_dumper(worker_id), "must be");
    // == Serial dump
    ResourceMark rm;
    TraceTime timer("Dump heap objects", TRACETIME_LOG(Info, heapdump));
    HeapObjectDumper obj_dumper(writer());
      Universe::heap()->object_iterate(&obj_dumper);
    writer()->finish_dump_segment();
    // Writes the HPROF_HEAP_DUMP_END record because merge does not happen in serial dump
    DumperSupport::end_of_dump(writer());
    writer()->flush();
    } else {
      // == Parallel dump
    ResourceMark rm;
    TraceTime timer("Dump heap objects in parallel", TRACETIME_LOG(Info, heapdump));
    DumpWriter* local_writer = is_vm_dumper(worker_id) ? writer() : create_local_writer();
    if (!local_writer->has_error()) {
      HeapObjectDumper obj_dumper(local_writer);
      _poi->object_iterate(&obj_dumper, worker_id);
      local_writer->finish_dump_segment();
      local_writer->flush();
    }
-    if (is_vm_dumper(worker_id)) {
+
    segment_writer.finish_dump_segment();
    segment_writer.flush();
  }
  _dumper_controller->dumper_complete(&segment_writer, writer());
  if (is_vm_dumper(dumper_id)) {
    _dumper_controller->wait_all_dumpers_complete();
-    } else {
+
-      _dumper_controller->dumper_complete(local_writer, writer());
+    // flush global writer
-      delete local_writer;
+    writer()->flush();
-      return;
+
    }
  }
    // At this point, all fragments of the heapdump have been written to separate files.
    // We need to merge them into a complete heapdump and write HPROF_HEAP_DUMP_END at that time.
  }
 }
-void VM_HeapDumper::dump_stack_traces() {
+void VM_HeapDumper::dump_stack_traces(AbstractDumpWriter* writer) {
  // write a HPROF_TRACE record without any frames to be referenced as object alloc sites
-  DumperSupport::write_header(writer(), HPROF_TRACE, 3 * sizeof(u4));
+  DumperSupport::write_header(writer, HPROF_TRACE, 3 * sizeof(u4));
-  writer()->write_u4((u4)STACK_TRACE_ID);
+  writer->write_u4((u4)STACK_TRACE_ID);
-  writer()->write_u4(0);                    // thread number
+  writer->write_u4(0);                    // thread number
-  writer()->write_u4(0);                    // frame count
+  writer->write_u4(0);                    // frame count
  // max number if every platform thread is carrier with mounted virtual thread
  _thread_dumpers = NEW_C_HEAP_ARRAY(ThreadDumper*, Threads::number_of_threads() * 2, mtInternal);
@@ -2505,7 +2562,7 @@ void VM_HeapDumper::dump_stack_traces() {
          add_oom_frame = false;
        }
        thread_dumper->init_serial_nums(&_thread_serial_num, &_frame_serial_num);
-        thread_dumper->dump_stack_traces(writer(), _klass_map);
+        thread_dumper->dump_stack_traces(writer, _klass_map);
      }
      // platform or carrier thread
@@ -2515,11 +2572,27 @@ void VM_HeapDumper::dump_stack_traces() {
        thread_dumper->add_oom_frame(_oome_constructor);
      }
      thread_dumper->init_serial_nums(&_thread_serial_num, &_frame_serial_num);
-      thread_dumper->dump_stack_traces(writer(), _klass_map);
+      thread_dumper->dump_stack_traces(writer, _klass_map);
    }
  }
 }
 void VM_HeapDumper::dump_vthread(oop vt, AbstractDumpWriter* segment_writer) {
  // unmounted vthread has no JavaThread
  ThreadDumper thread_dumper(ThreadDumper::ThreadType::UnmountedVirtual, nullptr, vt);
  thread_dumper.init_serial_nums(&_thread_serial_num, &_frame_serial_num);
  // write HPROF_TRACE/HPROF_FRAME records to global writer
  _dumper_controller->lock_global_writer();
  thread_dumper.dump_stack_traces(writer(), _klass_map);
  _dumper_controller->unlock_global_writer();
  // write HPROF_GC_ROOT_THREAD_OBJ/HPROF_GC_ROOT_JAVA_FRAME/HPROF_GC_ROOT_JNI_LOCAL subrecord
  // to segment writer
  thread_dumper.dump_thread_obj(segment_writer);
  thread_dumper.dump_stack_refs(segment_writer);
 }
 // dump the heap to given path.
 int HeapDumper::dump(const char* path, outputStream* out, int compression, bool overwrite, uint num_dump_threads) {
  assert(path != nullptr && strlen(path) > 0, "path missing");
@@ -2561,16 +2634,14 @@ int HeapDumper::dump(const char* path, outputStream* out, int compression, bool
  // record any error that the writer may have encountered
  set_error(writer.error());
-  // For serial dump, once VM_HeapDumper completes, the whole heap dump process
+  // Heap dump process is done in two phases
  // is done, no further phases needed. For parallel dump, the whole heap dump
  // process is done in two phases
  //
  // Phase 1: Concurrent threads directly write heap data to multiple heap files.
  //          This is done by VM_HeapDumper, which is performed within safepoint.
  //
  // Phase 2: Merge multiple heap files into one complete heap dump file.
  //          This is done by DumpMerger, which is performed outside safepoint
-  if (dumper.is_parallel_dump()) {
+
  DumpMerger merger(path, &writer, dumper.dump_seq());
  Thread* current_thread = Thread::current();
  if (current_thread->is_AttachListener_thread()) {
@@ -2583,6 +2654,7 @@ int HeapDumper::dump(const char* path, outputStream* out, int compression, bool
    VM_HeapDumpMerge op(&merger);
    VMThread::execute(&op);
  }
  if (writer.error() != nullptr) {
    set_error(writer.error());
  }
--- a/src/java.base/linux/native/libsimdsort/avx2-32bit-qsort.hpp
+++ b/src/java.base/linux/native/libsimdsort/avx2-32bit-qsort.hpp
@@ -0,0 +1,367 @@
 /*
 * Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
 * Copyright (c) 2021 Serge Sans Paille. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 // This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
 #ifndef AVX2_QSORT_32BIT
 #define AVX2_QSORT_32BIT
 #include "avx2-emu-funcs.hpp"
 #include "xss-common-qsort.h"
 /*
 * Constants used in sorting 8 elements in a ymm registers. Based on Bitonic
 * sorting network (see
 * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg)
 */
 // ymm                  7, 6, 5, 4, 3, 2, 1, 0
 #define NETWORK_32BIT_AVX2_1 4, 5, 6, 7, 0, 1, 2, 3
 #define NETWORK_32BIT_AVX2_2 0, 1, 2, 3, 4, 5, 6, 7
 #define NETWORK_32BIT_AVX2_3 5, 4, 7, 6, 1, 0, 3, 2
 #define NETWORK_32BIT_AVX2_4 3, 2, 1, 0, 7, 6, 5, 4
 /*
 * Assumes ymm is random and performs a full sorting network defined in
 * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
 */
 template <typename vtype, typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_INLINE reg_t sort_ymm_32bit(reg_t ymm) {
    const typename vtype::opmask_t oxAA = _mm256_set_epi32(
        0xFFFFFFFF, 0, 0xFFFFFFFF, 0, 0xFFFFFFFF, 0, 0xFFFFFFFF, 0);
    const typename vtype::opmask_t oxCC = _mm256_set_epi32(
        0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
    const typename vtype::opmask_t oxF0 = _mm256_set_epi32(
        0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0);
    const typename vtype::ymmi_t rev_index = vtype::seti(NETWORK_32BIT_AVX2_2);
    ymm = cmp_merge<vtype>(
        ymm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(ymm), oxAA);
    ymm = cmp_merge<vtype>(
        ymm, vtype::permutexvar(vtype::seti(NETWORK_32BIT_AVX2_1), ymm), oxCC);
    ymm = cmp_merge<vtype>(
        ymm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(ymm), oxAA);
    ymm = cmp_merge<vtype>(ymm, vtype::permutexvar(rev_index, ymm), oxF0);
    ymm = cmp_merge<vtype>(
        ymm, vtype::permutexvar(vtype::seti(NETWORK_32BIT_AVX2_3), ymm), oxCC);
    ymm = cmp_merge<vtype>(
        ymm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(ymm), oxAA);
    return ymm;
 }
 struct avx2_32bit_swizzle_ops;
 template <>
 struct avx2_vector<int32_t> {
    using type_t = int32_t;
    using reg_t = __m256i;
    using ymmi_t = __m256i;
    using opmask_t = __m256i;
    static const uint8_t numlanes = 8;
 #ifdef XSS_MINIMAL_NETWORK_SORT
    static constexpr int network_sort_threshold = numlanes;
 #else
    static constexpr int network_sort_threshold = 256;
 #endif
    static constexpr int partition_unroll_factor = 4;
    using swizzle_ops = avx2_32bit_swizzle_ops;
    static type_t type_max() { return X86_SIMD_SORT_MAX_INT32; }
    static type_t type_min() { return X86_SIMD_SORT_MIN_INT32; }
    static reg_t zmm_max() {
        return _mm256_set1_epi32(type_max());
    }  // TODO: this should broadcast bits as is?
    static opmask_t get_partial_loadmask(uint64_t num_to_read) {
        auto mask = ((0x1ull << num_to_read) - 0x1ull);
        return convert_int_to_avx2_mask(mask);
    }
    static ymmi_t seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
                       int v8) {
        return _mm256_set_epi32(v1, v2, v3, v4, v5, v6, v7, v8);
    }
    static opmask_t kxor_opmask(opmask_t x, opmask_t y) {
        return _mm256_xor_si256(x, y);
    }
    static opmask_t ge(reg_t x, reg_t y) {
        opmask_t equal = eq(x, y);
        opmask_t greater = _mm256_cmpgt_epi32(x, y);
        return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(equal),
                                                _mm256_castsi256_ps(greater)));
    }
    static opmask_t gt(reg_t x, reg_t y) { return _mm256_cmpgt_epi32(x, y); }
    static opmask_t eq(reg_t x, reg_t y) { return _mm256_cmpeq_epi32(x, y); }
    template <int scale>
    static reg_t mask_i64gather(reg_t src, opmask_t mask, __m256i index,
                                void const *base) {
        return _mm256_mask_i32gather_epi32(src, base, index, mask, scale);
    }
    template <int scale>
    static reg_t i64gather(__m256i index, void const *base) {
        return _mm256_i32gather_epi32((int const *)base, index, scale);
    }
    static reg_t loadu(void const *mem) {
        return _mm256_loadu_si256((reg_t const *)mem);
    }
    static reg_t max(reg_t x, reg_t y) { return _mm256_max_epi32(x, y); }
    static void mask_compressstoreu(void *mem, opmask_t mask, reg_t x) {
        return avx2_emu_mask_compressstoreu32<type_t>(mem, mask, x);
    }
    static reg_t maskz_loadu(opmask_t mask, void const *mem) {
        return _mm256_maskload_epi32((const int *)mem, mask);
    }
    static reg_t mask_loadu(reg_t x, opmask_t mask, void const *mem) {
        reg_t dst = _mm256_maskload_epi32((type_t *)mem, mask);
        return mask_mov(x, mask, dst);
    }
    static reg_t mask_mov(reg_t x, opmask_t mask, reg_t y) {
        return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(x),
                                                    _mm256_castsi256_ps(y),
                                                    _mm256_castsi256_ps(mask)));
    }
    static void mask_storeu(void *mem, opmask_t mask, reg_t x) {
        return _mm256_maskstore_epi32((type_t *)mem, mask, x);
    }
    static reg_t min(reg_t x, reg_t y) { return _mm256_min_epi32(x, y); }
    static reg_t permutexvar(__m256i idx, reg_t ymm) {
        return _mm256_permutevar8x32_epi32(ymm, idx);
        // return avx2_emu_permutexvar_epi32(idx, ymm);
    }
    static reg_t permutevar(reg_t ymm, __m256i idx) {
        return _mm256_permutevar8x32_epi32(ymm, idx);
    }
    static reg_t reverse(reg_t ymm) {
        const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2);
        return permutexvar(rev_index, ymm);
    }
    static type_t reducemax(reg_t v) {
        return avx2_emu_reduce_max32<type_t>(v);
    }
    static type_t reducemin(reg_t v) {
        return avx2_emu_reduce_min32<type_t>(v);
    }
    static reg_t set1(type_t v) { return _mm256_set1_epi32(v); }
    template <uint8_t mask>
    static reg_t shuffle(reg_t ymm) {
        return _mm256_shuffle_epi32(ymm, mask);
    }
    static void storeu(void *mem, reg_t x) {
        _mm256_storeu_si256((__m256i *)mem, x);
    }
    static reg_t sort_vec(reg_t x) {
        return sort_ymm_32bit<avx2_vector<type_t>>(x);
    }
    static reg_t cast_from(__m256i v) { return v; }
    static __m256i cast_to(reg_t v) { return v; }
    static int double_compressstore(type_t *left_addr, type_t *right_addr,
                                    opmask_t k, reg_t reg) {
        return avx2_double_compressstore32<type_t>(left_addr, right_addr, k,
                                                   reg);
    }
 };
 template <>
 struct avx2_vector<float> {
    using type_t = float;
    using reg_t = __m256;
    using ymmi_t = __m256i;
    using opmask_t = __m256i;
    static const uint8_t numlanes = 8;
 #ifdef XSS_MINIMAL_NETWORK_SORT
    static constexpr int network_sort_threshold = numlanes;
 #else
    static constexpr int network_sort_threshold = 256;
 #endif
    static constexpr int partition_unroll_factor = 4;
    using swizzle_ops = avx2_32bit_swizzle_ops;
    static type_t type_max() { return X86_SIMD_SORT_INFINITYF; }
    static type_t type_min() { return -X86_SIMD_SORT_INFINITYF; }
    static reg_t zmm_max() { return _mm256_set1_ps(type_max()); }
    static ymmi_t seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
                       int v8) {
        return _mm256_set_epi32(v1, v2, v3, v4, v5, v6, v7, v8);
    }
    static reg_t maskz_loadu(opmask_t mask, void const *mem) {
        return _mm256_maskload_ps((const float *)mem, mask);
    }
    static opmask_t ge(reg_t x, reg_t y) {
        return _mm256_castps_si256(_mm256_cmp_ps(x, y, _CMP_GE_OQ));
    }
    static opmask_t gt(reg_t x, reg_t y) {
        return _mm256_castps_si256(_mm256_cmp_ps(x, y, _CMP_GT_OQ));
    }
    static opmask_t eq(reg_t x, reg_t y) {
        return _mm256_castps_si256(_mm256_cmp_ps(x, y, _CMP_EQ_OQ));
    }
    static opmask_t get_partial_loadmask(uint64_t num_to_read) {
        auto mask = ((0x1ull << num_to_read) - 0x1ull);
        return convert_int_to_avx2_mask(mask);
    }
    static int32_t convert_mask_to_int(opmask_t mask) {
        return convert_avx2_mask_to_int(mask);
    }
    template <int type>
    static opmask_t fpclass(reg_t x) {
        if constexpr (type == (0x01 | 0x80)) {
            return _mm256_castps_si256(_mm256_cmp_ps(x, x, _CMP_UNORD_Q));
        } else {
            static_assert(type == (0x01 | 0x80), "should not reach here");
        }
    }
    template <int scale>
    static reg_t mask_i64gather(reg_t src, opmask_t mask, __m256i index,
                                void const *base) {
        return _mm256_mask_i32gather_ps(src, base, index,
                                        _mm256_castsi256_ps(mask), scale);
        ;
    }
    template <int scale>
    static reg_t i64gather(__m256i index, void const *base) {
        return _mm256_i32gather_ps((float *)base, index, scale);
    }
    static reg_t loadu(void const *mem) {
        return _mm256_loadu_ps((float const *)mem);
    }
    static reg_t max(reg_t x, reg_t y) { return _mm256_max_ps(x, y); }
    static void mask_compressstoreu(void *mem, opmask_t mask, reg_t x) {
        return avx2_emu_mask_compressstoreu32<type_t>(mem, mask, x);
    }
    static reg_t mask_loadu(reg_t x, opmask_t mask, void const *mem) {
        reg_t dst = _mm256_maskload_ps((type_t *)mem, mask);
        return mask_mov(x, mask, dst);
    }
    static reg_t mask_mov(reg_t x, opmask_t mask, reg_t y) {
        return _mm256_blendv_ps(x, y, _mm256_castsi256_ps(mask));
    }
    static void mask_storeu(void *mem, opmask_t mask, reg_t x) {
        return _mm256_maskstore_ps((type_t *)mem, mask, x);
    }
    static reg_t min(reg_t x, reg_t y) { return _mm256_min_ps(x, y); }
    static reg_t permutexvar(__m256i idx, reg_t ymm) {
        return _mm256_permutevar8x32_ps(ymm, idx);
    }
    static reg_t permutevar(reg_t ymm, __m256i idx) {
        return _mm256_permutevar8x32_ps(ymm, idx);
    }
    static reg_t reverse(reg_t ymm) {
        const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2);
        return permutexvar(rev_index, ymm);
    }
    static type_t reducemax(reg_t v) {
        return avx2_emu_reduce_max32<type_t>(v);
    }
    static type_t reducemin(reg_t v) {
        return avx2_emu_reduce_min32<type_t>(v);
    }
    static reg_t set1(type_t v) { return _mm256_set1_ps(v); }
    template <uint8_t mask>
    static reg_t shuffle(reg_t ymm) {
        return _mm256_castsi256_ps(
            _mm256_shuffle_epi32(_mm256_castps_si256(ymm), mask));
    }
    static void storeu(void *mem, reg_t x) {
        _mm256_storeu_ps((float *)mem, x);
    }
    static reg_t sort_vec(reg_t x) {
        return sort_ymm_32bit<avx2_vector<type_t>>(x);
    }
    static reg_t cast_from(__m256i v) { return _mm256_castsi256_ps(v); }
    static __m256i cast_to(reg_t v) { return _mm256_castps_si256(v); }
    static int double_compressstore(type_t *left_addr, type_t *right_addr,
                                    opmask_t k, reg_t reg) {
        return avx2_double_compressstore32<type_t>(left_addr, right_addr, k,
                                                   reg);
    }
 };
 struct avx2_32bit_swizzle_ops {
    template <typename vtype, int scale>
    X86_SIMD_SORT_INLINE typename vtype::reg_t swap_n(
        typename vtype::reg_t reg) {
        __m256i v = vtype::cast_to(reg);
        if constexpr (scale == 2) {
            __m256 vf = _mm256_castsi256_ps(v);
            vf = _mm256_permute_ps(vf, 0b10110001);
            v = _mm256_castps_si256(vf);
        } else if constexpr (scale == 4) {
            __m256 vf = _mm256_castsi256_ps(v);
            vf = _mm256_permute_ps(vf, 0b01001110);
            v = _mm256_castps_si256(vf);
        } else if constexpr (scale == 8) {
            v = _mm256_permute2x128_si256(v, v, 0b00000001);
        } else {
            static_assert(scale == -1, "should not be reached");
        }
        return vtype::cast_from(v);
    }
    template <typename vtype, int scale>
    X86_SIMD_SORT_INLINE typename vtype::reg_t reverse_n(
        typename vtype::reg_t reg) {
        __m256i v = vtype::cast_to(reg);
        if constexpr (scale == 2) {
            return swap_n<vtype, 2>(reg);
        } else if constexpr (scale == 4) {
            constexpr uint64_t mask = 0b00011011;
            __m256 vf = _mm256_castsi256_ps(v);
            vf = _mm256_permute_ps(vf, mask);
            v = _mm256_castps_si256(vf);
        } else if constexpr (scale == 8) {
            return vtype::reverse(reg);
        } else {
            static_assert(scale == -1, "should not be reached");
        }
        return vtype::cast_from(v);
    }
    template <typename vtype, int scale>
    X86_SIMD_SORT_INLINE typename vtype::reg_t merge_n(
        typename vtype::reg_t reg, typename vtype::reg_t other) {
        __m256i v1 = vtype::cast_to(reg);
        __m256i v2 = vtype::cast_to(other);
        if constexpr (scale == 2) {
            v1 = _mm256_blend_epi32(v1, v2, 0b01010101);
        } else if constexpr (scale == 4) {
            v1 = _mm256_blend_epi32(v1, v2, 0b00110011);
        } else if constexpr (scale == 8) {
            v1 = _mm256_blend_epi32(v1, v2, 0b00001111);
        } else {
            static_assert(scale == -1, "should not be reached");
        }
        return vtype::cast_from(v1);
    }
 };
 #endif  // AVX2_QSORT_32BIT
--- a/src/java.base/linux/native/libsimdsort/avx2-emu-funcs.hpp
+++ b/src/java.base/linux/native/libsimdsort/avx2-emu-funcs.hpp
@@ -0,0 +1,183 @@
 /*
 * Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
 * Copyright (c) 2021 Serge Sans Paille. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 // This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
 #ifndef AVX2_EMU_FUNCS
 #define AVX2_EMU_FUNCS
 #include <array>
 #include <utility>
 #include "xss-common-qsort.h"
 constexpr auto avx2_mask_helper_lut32 = [] {
    std::array<std::array<int32_t, 8>, 256> lut{};
    for (int64_t i = 0; i <= 0xFF; i++) {
        std::array<int32_t, 8> entry{};
        for (int j = 0; j < 8; j++) {
            if (((i >> j) & 1) == 1)
                entry[j] = 0xFFFFFFFF;
            else
                entry[j] = 0;
        }
        lut[i] = entry;
    }
    return lut;
 }();
 constexpr auto avx2_compressstore_lut32_gen = [] {
    std::array<std::array<std::array<int32_t, 8>, 256>, 2> lutPair{};
    auto &permLut = lutPair[0];
    auto &leftLut = lutPair[1];
    for (int64_t i = 0; i <= 0xFF; i++) {
        std::array<int32_t, 8> indices{};
        std::array<int32_t, 8> leftEntry = {0, 0, 0, 0, 0, 0, 0, 0};
        int right = 7;
        int left = 0;
        for (int j = 0; j < 8; j++) {
            bool ge = (i >> j) & 1;
            if (ge) {
                indices[right] = j;
                right--;
            } else {
                indices[left] = j;
                leftEntry[left] = 0xFFFFFFFF;
                left++;
            }
        }
        permLut[i] = indices;
        leftLut[i] = leftEntry;
    }
    return lutPair;
 }();
 constexpr auto avx2_compressstore_lut32_perm = avx2_compressstore_lut32_gen[0];
 constexpr auto avx2_compressstore_lut32_left = avx2_compressstore_lut32_gen[1];
 X86_SIMD_SORT_INLINE
 __m256i convert_int_to_avx2_mask(int32_t m) {
    return _mm256_loadu_si256(
        (const __m256i *)avx2_mask_helper_lut32[m].data());
 }
 X86_SIMD_SORT_INLINE
 int32_t convert_avx2_mask_to_int(__m256i m) {
    return _mm256_movemask_ps(_mm256_castsi256_ps(m));
 }
 // Emulators for intrinsics missing from AVX2 compared to AVX512
 template <typename T>
 T avx2_emu_reduce_max32(typename avx2_vector<T>::reg_t x) {
    using vtype = avx2_vector<T>;
    using reg_t = typename vtype::reg_t;
    reg_t inter1 =
        vtype::max(x, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(x));
    reg_t inter2 = vtype::max(
        inter1, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(inter1));
    T arr[vtype::numlanes];
    vtype::storeu(arr, inter2);
    return std::max(arr[0], arr[7]);
 }
 template <typename T>
 T avx2_emu_reduce_min32(typename avx2_vector<T>::reg_t x) {
    using vtype = avx2_vector<T>;
    using reg_t = typename vtype::reg_t;
    reg_t inter1 =
        vtype::min(x, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(x));
    reg_t inter2 = vtype::min(
        inter1, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(inter1));
    T arr[vtype::numlanes];
    vtype::storeu(arr, inter2);
    return std::min(arr[0], arr[7]);
 }
 template <typename T>
 void avx2_emu_mask_compressstoreu32(void *base_addr,
                                    typename avx2_vector<T>::opmask_t k,
                                    typename avx2_vector<T>::reg_t reg) {
    using vtype = avx2_vector<T>;
    T *leftStore = (T *)base_addr;
    int32_t shortMask = convert_avx2_mask_to_int(k);
    const __m256i &perm = _mm256_loadu_si256(
        (const __m256i *)avx2_compressstore_lut32_perm[shortMask].data());
    const __m256i &left = _mm256_loadu_si256(
        (const __m256i *)avx2_compressstore_lut32_left[shortMask].data());
    typename vtype::reg_t temp = vtype::permutevar(reg, perm);
    vtype::mask_storeu(leftStore, left, temp);
 }
 template <typename T>
 int avx2_double_compressstore32(void *left_addr, void *right_addr,
                                typename avx2_vector<T>::opmask_t k,
                                typename avx2_vector<T>::reg_t reg) {
    using vtype = avx2_vector<T>;
    T *leftStore = (T *)left_addr;
    T *rightStore = (T *)right_addr;
    int32_t shortMask = convert_avx2_mask_to_int(k);
    const __m256i &perm = _mm256_loadu_si256(
        (const __m256i *)avx2_compressstore_lut32_perm[shortMask].data());
    typename vtype::reg_t temp = vtype::permutevar(reg, perm);
    vtype::storeu(leftStore, temp);
    vtype::storeu(rightStore, temp);
    return _mm_popcnt_u32(shortMask);
 }
 template <typename T>
 typename avx2_vector<T>::reg_t avx2_emu_max(typename avx2_vector<T>::reg_t x,
                                            typename avx2_vector<T>::reg_t y) {
    using vtype = avx2_vector<T>;
    typename vtype::opmask_t nlt = vtype::gt(x, y);
    return _mm256_castpd_si256(_mm256_blendv_pd(_mm256_castsi256_pd(y),
                                                _mm256_castsi256_pd(x),
                                                _mm256_castsi256_pd(nlt)));
 }
 template <typename T>
 typename avx2_vector<T>::reg_t avx2_emu_min(typename avx2_vector<T>::reg_t x,
                                            typename avx2_vector<T>::reg_t y) {
    using vtype = avx2_vector<T>;
    typename vtype::opmask_t nlt = vtype::gt(x, y);
    return _mm256_castpd_si256(_mm256_blendv_pd(_mm256_castsi256_pd(x),
                                                _mm256_castsi256_pd(y),
                                                _mm256_castsi256_pd(nlt)));
 }
 #endif
--- a/src/java.base/linux/native/libsimdsort/avx2-linux-qsort.cpp
+++ b/src/java.base/linux/native/libsimdsort/avx2-linux-qsort.cpp
@@ -0,0 +1,66 @@
 /*
 * Copyright (c) 2023 Intel Corporation. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #include "simdsort-support.hpp"
 #ifdef __SIMDSORT_SUPPORTED_LINUX
 #pragma GCC target("avx2")
 #include "avx2-32bit-qsort.hpp"
 #include "classfile_constants.h"
 #define DLL_PUBLIC __attribute__((visibility("default")))
 #define INSERTION_SORT_THRESHOLD_32BIT 16
 extern "C" {
    DLL_PUBLIC void avx2_sort(void *array, int elem_type, int32_t from_index, int32_t to_index) {
        switch(elem_type) {
            case JVM_T_INT:
                avx2_fast_sort((int32_t*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
                break;
            case JVM_T_FLOAT:
                avx2_fast_sort((float*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
                break;
            default:
                assert(false, "Unexpected type");
        }
    }
    DLL_PUBLIC void avx2_partition(void *array, int elem_type, int32_t from_index, int32_t to_index, int32_t *pivot_indices, int32_t index_pivot1, int32_t index_pivot2) {
        switch(elem_type) {
            case JVM_T_INT:
                avx2_fast_partition((int32_t*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
                break;
            case JVM_T_FLOAT:
                avx2_fast_partition((float*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
                break;
            default:
                assert(false, "Unexpected type");
        }
    }
 }
 #endif
--- a/src/java.base/linux/native/libsimdsort/avx512-32bit-qsort.hpp
+++ b/src/java.base/linux/native/libsimdsort/avx512-32bit-qsort.hpp
@@ -28,7 +28,7 @@
 #ifndef AVX512_QSORT_32BIT
 #define AVX512_QSORT_32BIT
-#include "avx512-common-qsort.h"
+#include "xss-common-qsort.h"
 /*
 * Constants used in sorting 16 elements in a ZMM registers. Based on Bitonic
@@ -43,130 +43,204 @@
 #define NETWORK_32BIT_6 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4
 #define NETWORK_32BIT_7 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
 template <typename vtype, typename reg_t>
 X86_SIMD_SORT_INLINE reg_t sort_zmm_32bit(reg_t zmm);
 struct avx512_32bit_swizzle_ops;
 template <>
 struct zmm_vector<int32_t> {
    using type_t = int32_t;
-    using zmm_t = __m512i;
+    using reg_t = __m512i;
-    using ymm_t = __m256i;
+    using halfreg_t = __m256i;
    using opmask_t = __mmask16;
    static const uint8_t numlanes = 16;
 #ifdef XSS_MINIMAL_NETWORK_SORT
    static constexpr int network_sort_threshold = numlanes;
 #else
    static constexpr int network_sort_threshold = 512;
 #endif
    static constexpr int partition_unroll_factor = 8;
    using swizzle_ops = avx512_32bit_swizzle_ops;
    static type_t type_max() { return X86_SIMD_SORT_MAX_INT32; }
    static type_t type_min() { return X86_SIMD_SORT_MIN_INT32; }
-    static zmm_t zmm_max() { return _mm512_set1_epi32(type_max()); }
+    static reg_t zmm_max() { return _mm512_set1_epi32(type_max()); }
    static opmask_t knot_opmask(opmask_t x) { return _mm512_knot(x); }
-    static opmask_t ge(zmm_t x, zmm_t y) {
+
    static opmask_t ge(reg_t x, reg_t y) {
        return _mm512_cmp_epi32_mask(x, y, _MM_CMPINT_NLT);
    }
-    static opmask_t gt(zmm_t x, zmm_t y) {
+
    static opmask_t gt(reg_t x, reg_t y) {
        return _mm512_cmp_epi32_mask(x, y, _MM_CMPINT_GT);
    }
    static opmask_t get_partial_loadmask(uint64_t num_to_read) {
        return ((0x1ull << num_to_read) - 0x1ull);
    }
    template <int scale>
-    static ymm_t i64gather(__m512i index, void const *base) {
+    static halfreg_t i64gather(__m512i index, void const *base) {
        return _mm512_i64gather_epi32(index, base, scale);
    }
-    static zmm_t merge(ymm_t y1, ymm_t y2) {
+    static reg_t merge(halfreg_t y1, halfreg_t y2) {
-        zmm_t z1 = _mm512_castsi256_si512(y1);
+        reg_t z1 = _mm512_castsi256_si512(y1);
        return _mm512_inserti32x8(z1, y2, 1);
    }
-    static zmm_t loadu(void const *mem) { return _mm512_loadu_si512(mem); }
+    static reg_t loadu(void const *mem) { return _mm512_loadu_si512(mem); }
-    static void mask_compressstoreu(void *mem, opmask_t mask, zmm_t x) {
+    static void mask_compressstoreu(void *mem, opmask_t mask, reg_t x) {
        return _mm512_mask_compressstoreu_epi32(mem, mask, x);
    }
-    static zmm_t mask_loadu(zmm_t x, opmask_t mask, void const *mem) {
+    static reg_t mask_loadu(reg_t x, opmask_t mask, void const *mem) {
        return _mm512_mask_loadu_epi32(x, mask, mem);
    }
-    static zmm_t mask_mov(zmm_t x, opmask_t mask, zmm_t y) {
+    static reg_t mask_mov(reg_t x, opmask_t mask, reg_t y) {
        return _mm512_mask_mov_epi32(x, mask, y);
    }
-    static void mask_storeu(void *mem, opmask_t mask, zmm_t x) {
+    static void mask_storeu(void *mem, opmask_t mask, reg_t x) {
        return _mm512_mask_storeu_epi32(mem, mask, x);
    }
-    static zmm_t min(zmm_t x, zmm_t y) { return _mm512_min_epi32(x, y); }
+    static reg_t min(reg_t x, reg_t y) { return _mm512_min_epi32(x, y); }
-    static zmm_t max(zmm_t x, zmm_t y) { return _mm512_max_epi32(x, y); }
+    static reg_t max(reg_t x, reg_t y) { return _mm512_max_epi32(x, y); }
-    static zmm_t permutexvar(__m512i idx, zmm_t zmm) {
+    static reg_t permutexvar(__m512i idx, reg_t zmm) {
        return _mm512_permutexvar_epi32(idx, zmm);
    }
-    static type_t reducemax(zmm_t v) { return _mm512_reduce_max_epi32(v); }
+    static type_t reducemax(reg_t v) { return _mm512_reduce_max_epi32(v); }
-    static type_t reducemin(zmm_t v) { return _mm512_reduce_min_epi32(v); }
+    static type_t reducemin(reg_t v) { return _mm512_reduce_min_epi32(v); }
-    static zmm_t set1(type_t v) { return _mm512_set1_epi32(v); }
+    static reg_t set1(type_t v) { return _mm512_set1_epi32(v); }
    template <uint8_t mask>
-    static zmm_t shuffle(zmm_t zmm) {
+    static reg_t shuffle(reg_t zmm) {
        return _mm512_shuffle_epi32(zmm, (_MM_PERM_ENUM)mask);
    }
-    static void storeu(void *mem, zmm_t x) {
+    static void storeu(void *mem, reg_t x) {
        return _mm512_storeu_si512(mem, x);
    }
-    static ymm_t max(ymm_t x, ymm_t y) { return _mm256_max_epi32(x, y); }
+    static halfreg_t max(halfreg_t x, halfreg_t y) {
-    static ymm_t min(ymm_t x, ymm_t y) { return _mm256_min_epi32(x, y); }
+        return _mm256_max_epi32(x, y);
    }
    static halfreg_t min(halfreg_t x, halfreg_t y) {
        return _mm256_min_epi32(x, y);
    }
    static reg_t reverse(reg_t zmm) {
        const auto rev_index = _mm512_set_epi32(NETWORK_32BIT_5);
        return permutexvar(rev_index, zmm);
    }
    static reg_t sort_vec(reg_t x) {
        return sort_zmm_32bit<zmm_vector<type_t>>(x);
    }
    static reg_t cast_from(__m512i v) { return v; }
    static __m512i cast_to(reg_t v) { return v; }
    static int double_compressstore(type_t *left_addr, type_t *right_addr,
                                    opmask_t k, reg_t reg) {
        return avx512_double_compressstore<zmm_vector<type_t>>(
            left_addr, right_addr, k, reg);
    }
 };
 template <>
 struct zmm_vector<float> {
    using type_t = float;
-    using zmm_t = __m512;
+    using reg_t = __m512;
-    using ymm_t = __m256;
+    using halfreg_t = __m256;
    using opmask_t = __mmask16;
    static const uint8_t numlanes = 16;
 #ifdef XSS_MINIMAL_NETWORK_SORT
    static constexpr int network_sort_threshold = numlanes;
 #else
    static constexpr int network_sort_threshold = 512;
 #endif
    static constexpr int partition_unroll_factor = 8;
    using swizzle_ops = avx512_32bit_swizzle_ops;
    static type_t type_max() { return X86_SIMD_SORT_INFINITYF; }
    static type_t type_min() { return -X86_SIMD_SORT_INFINITYF; }
-    static zmm_t zmm_max() { return _mm512_set1_ps(type_max()); }
+    static reg_t zmm_max() { return _mm512_set1_ps(type_max()); }
    static opmask_t knot_opmask(opmask_t x) { return _mm512_knot(x); }
-    static opmask_t ge(zmm_t x, zmm_t y) {
+    static opmask_t ge(reg_t x, reg_t y) {
        return _mm512_cmp_ps_mask(x, y, _CMP_GE_OQ);
    }
-    static opmask_t gt(zmm_t x, zmm_t y) {
+    static opmask_t gt(reg_t x, reg_t y) {
        return _mm512_cmp_ps_mask(x, y, _CMP_GT_OQ);
    }
    static opmask_t get_partial_loadmask(uint64_t num_to_read) {
        return ((0x1ull << num_to_read) - 0x1ull);
    }
    static int32_t convert_mask_to_int(opmask_t mask) { return mask; }
    template <int type>
    static opmask_t fpclass(reg_t x) {
        return _mm512_fpclass_ps_mask(x, type);
    }
    template <int scale>
-    static ymm_t i64gather(__m512i index, void const *base) {
+    static halfreg_t i64gather(__m512i index, void const *base) {
        return _mm512_i64gather_ps(index, base, scale);
    }
-    static zmm_t merge(ymm_t y1, ymm_t y2) {
+    static reg_t merge(halfreg_t y1, halfreg_t y2) {
-        zmm_t z1 = _mm512_castsi512_ps(
+        reg_t z1 = _mm512_castsi512_ps(
            _mm512_castsi256_si512(_mm256_castps_si256(y1)));
        return _mm512_insertf32x8(z1, y2, 1);
    }
-    static zmm_t loadu(void const *mem) { return _mm512_loadu_ps(mem); }
+    static reg_t loadu(void const *mem) { return _mm512_loadu_ps(mem); }
-    static zmm_t max(zmm_t x, zmm_t y) { return _mm512_max_ps(x, y); }
+    static reg_t max(reg_t x, reg_t y) { return _mm512_max_ps(x, y); }
-    static void mask_compressstoreu(void *mem, opmask_t mask, zmm_t x) {
+    static void mask_compressstoreu(void *mem, opmask_t mask, reg_t x) {
        return _mm512_mask_compressstoreu_ps(mem, mask, x);
    }
-    static zmm_t mask_loadu(zmm_t x, opmask_t mask, void const *mem) {
+    static reg_t maskz_loadu(opmask_t mask, void const *mem) {
        return _mm512_maskz_loadu_ps(mask, mem);
    }
    static reg_t mask_loadu(reg_t x, opmask_t mask, void const *mem) {
        return _mm512_mask_loadu_ps(x, mask, mem);
    }
-    static zmm_t mask_mov(zmm_t x, opmask_t mask, zmm_t y) {
+    static reg_t mask_mov(reg_t x, opmask_t mask, reg_t y) {
        return _mm512_mask_mov_ps(x, mask, y);
    }
-    static void mask_storeu(void *mem, opmask_t mask, zmm_t x) {
+    static void mask_storeu(void *mem, opmask_t mask, reg_t x) {
        return _mm512_mask_storeu_ps(mem, mask, x);
    }
-    static zmm_t min(zmm_t x, zmm_t y) { return _mm512_min_ps(x, y); }
+    static reg_t min(reg_t x, reg_t y) { return _mm512_min_ps(x, y); }
-    static zmm_t permutexvar(__m512i idx, zmm_t zmm) {
+    static reg_t permutexvar(__m512i idx, reg_t zmm) {
        return _mm512_permutexvar_ps(idx, zmm);
    }
-    static type_t reducemax(zmm_t v) { return _mm512_reduce_max_ps(v); }
+    static type_t reducemax(reg_t v) { return _mm512_reduce_max_ps(v); }
-    static type_t reducemin(zmm_t v) { return _mm512_reduce_min_ps(v); }
+    static type_t reducemin(reg_t v) { return _mm512_reduce_min_ps(v); }
-    static zmm_t set1(type_t v) { return _mm512_set1_ps(v); }
+    static reg_t set1(type_t v) { return _mm512_set1_ps(v); }
    template <uint8_t mask>
-    static zmm_t shuffle(zmm_t zmm) {
+    static reg_t shuffle(reg_t zmm) {
        return _mm512_shuffle_ps(zmm, zmm, (_MM_PERM_ENUM)mask);
    }
-    static void storeu(void *mem, zmm_t x) { return _mm512_storeu_ps(mem, x); }
+    static void storeu(void *mem, reg_t x) { return _mm512_storeu_ps(mem, x); }
-    static ymm_t max(ymm_t x, ymm_t y) { return _mm256_max_ps(x, y); }
+    static halfreg_t max(halfreg_t x, halfreg_t y) {
-    static ymm_t min(ymm_t x, ymm_t y) { return _mm256_min_ps(x, y); }
+        return _mm256_max_ps(x, y);
    }
    static halfreg_t min(halfreg_t x, halfreg_t y) {
        return _mm256_min_ps(x, y);
    }
    static reg_t reverse(reg_t zmm) {
        const auto rev_index = _mm512_set_epi32(NETWORK_32BIT_5);
        return permutexvar(rev_index, zmm);
    }
    static reg_t sort_vec(reg_t x) {
        return sort_zmm_32bit<zmm_vector<type_t>>(x);
    }
    static reg_t cast_from(__m512i v) { return _mm512_castsi512_ps(v); }
    static __m512i cast_to(reg_t v) { return _mm512_castps_si512(v); }
    static int double_compressstore(type_t *left_addr, type_t *right_addr,
                                    opmask_t k, reg_t reg) {
        return avx512_double_compressstore<zmm_vector<type_t>>(
            left_addr, right_addr, k, reg);
    }
 };
 /*
 * Assumes zmm is random and performs a full sorting network defined in
 * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
 */
-template <typename vtype, typename zmm_t = typename vtype::zmm_t>
+template <typename vtype, typename reg_t = typename vtype::reg_t>
-X86_SIMD_SORT_INLINE zmm_t sort_zmm_32bit(zmm_t zmm) {
+X86_SIMD_SORT_INLINE reg_t sort_zmm_32bit(reg_t zmm) {
    zmm = cmp_merge<vtype>(
        zmm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(zmm), 0xAAAA);
    zmm = cmp_merge<vtype>(
@@ -193,249 +267,71 @@ X86_SIMD_SORT_INLINE zmm_t sort_zmm_32bit(zmm_t zmm) {
    return zmm;
 }
-// Assumes zmm is bitonic and performs a recursive half cleaner
+struct avx512_32bit_swizzle_ops {
-template <typename vtype, typename zmm_t = typename vtype::zmm_t>
+    template <typename vtype, int scale>
-X86_SIMD_SORT_INLINE zmm_t bitonic_merge_zmm_32bit(zmm_t zmm) {
+    X86_SIMD_SORT_INLINE typename vtype::reg_t swap_n(
-    // 1) half_cleaner[16]: compare 1-9, 2-10, 3-11 etc ..
+        typename vtype::reg_t reg) {
-    zmm = cmp_merge<vtype>(
+        __m512i v = vtype::cast_to(reg);
-        zmm, vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_7), zmm),
+
-        0xFF00);
+        if constexpr (scale == 2) {
-    // 2) half_cleaner[8]: compare 1-5, 2-6, 3-7 etc ..
+            v = _mm512_shuffle_epi32(v, (_MM_PERM_ENUM)0b10110001);
-    zmm = cmp_merge<vtype>(
+        } else if constexpr (scale == 4) {
-        zmm, vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_6), zmm),
+            v = _mm512_shuffle_epi32(v, (_MM_PERM_ENUM)0b01001110);
-        0xF0F0);
+        } else if constexpr (scale == 8) {
-    // 3) half_cleaner[4]
+            v = _mm512_shuffle_i64x2(v, v, 0b10110001);
-    zmm = cmp_merge<vtype>(
+        } else if constexpr (scale == 16) {
-        zmm, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(zmm), 0xCCCC);
+            v = _mm512_shuffle_i64x2(v, v, 0b01001110);
-    // 3) half_cleaner[1]
+        } else {
-    zmm = cmp_merge<vtype>(
+            static_assert(scale == -1, "should not be reached");
        zmm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(zmm), 0xAAAA);
    return zmm;
        }
-// Assumes zmm1 and zmm2 are sorted and performs a recursive half cleaner
+        return vtype::cast_from(v);
 template <typename vtype, typename zmm_t = typename vtype::zmm_t>
 X86_SIMD_SORT_INLINE void bitonic_merge_two_zmm_32bit(zmm_t *zmm1,
                                                      zmm_t *zmm2) {
    // 1) First step of a merging network: coex of zmm1 and zmm2 reversed
    *zmm2 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), *zmm2);
    zmm_t zmm3 = vtype::min(*zmm1, *zmm2);
    zmm_t zmm4 = vtype::max(*zmm1, *zmm2);
    // 2) Recursive half cleaner for each
    *zmm1 = bitonic_merge_zmm_32bit<vtype>(zmm3);
    *zmm2 = bitonic_merge_zmm_32bit<vtype>(zmm4);
    }
-// Assumes [zmm0, zmm1] and [zmm2, zmm3] are sorted and performs a recursive
+    template <typename vtype, int scale>
-// half cleaner
+    X86_SIMD_SORT_INLINE typename vtype::reg_t reverse_n(
-template <typename vtype, typename zmm_t = typename vtype::zmm_t>
+        typename vtype::reg_t reg) {
-X86_SIMD_SORT_INLINE void bitonic_merge_four_zmm_32bit(zmm_t *zmm) {
+        __m512i v = vtype::cast_to(reg);
-    zmm_t zmm2r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[2]);
+
-    zmm_t zmm3r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[3]);
+        if constexpr (scale == 2) {
-    zmm_t zmm_t1 = vtype::min(zmm[0], zmm3r);
+            return swap_n<vtype, 2>(reg);
-    zmm_t zmm_t2 = vtype::min(zmm[1], zmm2r);
+        } else if constexpr (scale == 4) {
-    zmm_t zmm_t3 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
+            __m512i mask = _mm512_set_epi32(12, 13, 14, 15, 8, 9, 10, 11, 4, 5,
-                                      vtype::max(zmm[1], zmm2r));
+                                            6, 7, 0, 1, 2, 3);
-    zmm_t zmm_t4 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
+            v = _mm512_permutexvar_epi32(mask, v);
-                                      vtype::max(zmm[0], zmm3r));
+        } else if constexpr (scale == 8) {
-    zmm_t zmm0 = vtype::min(zmm_t1, zmm_t2);
+            __m512i mask = _mm512_set_epi32(8, 9, 10, 11, 12, 13, 14, 15, 0, 1,
-    zmm_t zmm1 = vtype::max(zmm_t1, zmm_t2);
+                                            2, 3, 4, 5, 6, 7);
-    zmm_t zmm2 = vtype::min(zmm_t3, zmm_t4);
+            v = _mm512_permutexvar_epi32(mask, v);
-    zmm_t zmm3 = vtype::max(zmm_t3, zmm_t4);
+        } else if constexpr (scale == 16) {
-    zmm[0] = bitonic_merge_zmm_32bit<vtype>(zmm0);
+            return vtype::reverse(reg);
-    zmm[1] = bitonic_merge_zmm_32bit<vtype>(zmm1);
+        } else {
-    zmm[2] = bitonic_merge_zmm_32bit<vtype>(zmm2);
+            static_assert(scale == -1, "should not be reached");
    zmm[3] = bitonic_merge_zmm_32bit<vtype>(zmm3);
        }
-template <typename vtype, typename zmm_t = typename vtype::zmm_t>
+        return vtype::cast_from(v);
 X86_SIMD_SORT_INLINE void bitonic_merge_eight_zmm_32bit(zmm_t *zmm) {
    zmm_t zmm4r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[4]);
    zmm_t zmm5r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[5]);
    zmm_t zmm6r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[6]);
    zmm_t zmm7r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[7]);
    zmm_t zmm_t1 = vtype::min(zmm[0], zmm7r);
    zmm_t zmm_t2 = vtype::min(zmm[1], zmm6r);
    zmm_t zmm_t3 = vtype::min(zmm[2], zmm5r);
    zmm_t zmm_t4 = vtype::min(zmm[3], zmm4r);
    zmm_t zmm_t5 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
                                      vtype::max(zmm[3], zmm4r));
    zmm_t zmm_t6 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
                                      vtype::max(zmm[2], zmm5r));
    zmm_t zmm_t7 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
                                      vtype::max(zmm[1], zmm6r));
    zmm_t zmm_t8 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
                                      vtype::max(zmm[0], zmm7r));
    COEX<vtype>(zmm_t1, zmm_t3);
    COEX<vtype>(zmm_t2, zmm_t4);
    COEX<vtype>(zmm_t5, zmm_t7);
    COEX<vtype>(zmm_t6, zmm_t8);
    COEX<vtype>(zmm_t1, zmm_t2);
    COEX<vtype>(zmm_t3, zmm_t4);
    COEX<vtype>(zmm_t5, zmm_t6);
    COEX<vtype>(zmm_t7, zmm_t8);
    zmm[0] = bitonic_merge_zmm_32bit<vtype>(zmm_t1);
    zmm[1] = bitonic_merge_zmm_32bit<vtype>(zmm_t2);
    zmm[2] = bitonic_merge_zmm_32bit<vtype>(zmm_t3);
    zmm[3] = bitonic_merge_zmm_32bit<vtype>(zmm_t4);
    zmm[4] = bitonic_merge_zmm_32bit<vtype>(zmm_t5);
    zmm[5] = bitonic_merge_zmm_32bit<vtype>(zmm_t6);
    zmm[6] = bitonic_merge_zmm_32bit<vtype>(zmm_t7);
    zmm[7] = bitonic_merge_zmm_32bit<vtype>(zmm_t8);
    }
-template <typename vtype, typename type_t>
+    template <typename vtype, int scale>
-X86_SIMD_SORT_INLINE void sort_16_32bit(type_t *arr, int32_t N) {
+    X86_SIMD_SORT_INLINE typename vtype::reg_t merge_n(
-    typename vtype::opmask_t load_mask = (0x0001 << N) - 0x0001;
+        typename vtype::reg_t reg, typename vtype::reg_t other) {
-    typename vtype::zmm_t zmm =
+        __m512i v1 = vtype::cast_to(reg);
-        vtype::mask_loadu(vtype::zmm_max(), load_mask, arr);
+        __m512i v2 = vtype::cast_to(other);
-    vtype::mask_storeu(arr, load_mask, sort_zmm_32bit<vtype>(zmm));
+
        if constexpr (scale == 2) {
            v1 = _mm512_mask_blend_epi32(0b0101010101010101, v1, v2);
        } else if constexpr (scale == 4) {
            v1 = _mm512_mask_blend_epi32(0b0011001100110011, v1, v2);
        } else if constexpr (scale == 8) {
            v1 = _mm512_mask_blend_epi32(0b0000111100001111, v1, v2);
        } else if constexpr (scale == 16) {
            v1 = _mm512_mask_blend_epi32(0b0000000011111111, v1, v2);
        } else {
            static_assert(scale == -1, "should not be reached");
        }
-template <typename vtype, typename type_t>
+        return vtype::cast_from(v1);
 X86_SIMD_SORT_INLINE void sort_32_32bit(type_t *arr, int32_t N) {
    if (N <= 16) {
        sort_16_32bit<vtype>(arr, N);
        return;
    }
    using zmm_t = typename vtype::zmm_t;
    zmm_t zmm1 = vtype::loadu(arr);
    typename vtype::opmask_t load_mask = (0x0001 << (N - 16)) - 0x0001;
    zmm_t zmm2 = vtype::mask_loadu(vtype::zmm_max(), load_mask, arr + 16);
    zmm1 = sort_zmm_32bit<vtype>(zmm1);
    zmm2 = sort_zmm_32bit<vtype>(zmm2);
    bitonic_merge_two_zmm_32bit<vtype>(&zmm1, &zmm2);
    vtype::storeu(arr, zmm1);
    vtype::mask_storeu(arr + 16, load_mask, zmm2);
 }
 template <typename vtype, typename type_t>
 X86_SIMD_SORT_INLINE void sort_64_32bit(type_t *arr, int32_t N) {
    if (N <= 32) {
        sort_32_32bit<vtype>(arr, N);
        return;
    }
    using zmm_t = typename vtype::zmm_t;
    using opmask_t = typename vtype::opmask_t;
    zmm_t zmm[4];
    zmm[0] = vtype::loadu(arr);
    zmm[1] = vtype::loadu(arr + 16);
    opmask_t load_mask1 = 0xFFFF, load_mask2 = 0xFFFF;
    uint64_t combined_mask = (0x1ull << (N - 32)) - 0x1ull;
    load_mask1 &= combined_mask & 0xFFFF;
    load_mask2 &= (combined_mask >> 16) & 0xFFFF;
    zmm[2] = vtype::mask_loadu(vtype::zmm_max(), load_mask1, arr + 32);
    zmm[3] = vtype::mask_loadu(vtype::zmm_max(), load_mask2, arr + 48);
    zmm[0] = sort_zmm_32bit<vtype>(zmm[0]);
    zmm[1] = sort_zmm_32bit<vtype>(zmm[1]);
    zmm[2] = sort_zmm_32bit<vtype>(zmm[2]);
    zmm[3] = sort_zmm_32bit<vtype>(zmm[3]);
    bitonic_merge_two_zmm_32bit<vtype>(&zmm[0], &zmm[1]);
    bitonic_merge_two_zmm_32bit<vtype>(&zmm[2], &zmm[3]);
    bitonic_merge_four_zmm_32bit<vtype>(zmm);
    vtype::storeu(arr, zmm[0]);
    vtype::storeu(arr + 16, zmm[1]);
    vtype::mask_storeu(arr + 32, load_mask1, zmm[2]);
    vtype::mask_storeu(arr + 48, load_mask2, zmm[3]);
 }
 template <typename vtype, typename type_t>
 X86_SIMD_SORT_INLINE void sort_128_32bit(type_t *arr, int32_t N) {
    if (N <= 64) {
        sort_64_32bit<vtype>(arr, N);
        return;
    }
    using zmm_t = typename vtype::zmm_t;
    using opmask_t = typename vtype::opmask_t;
    zmm_t zmm[8];
    zmm[0] = vtype::loadu(arr);
    zmm[1] = vtype::loadu(arr + 16);
    zmm[2] = vtype::loadu(arr + 32);
    zmm[3] = vtype::loadu(arr + 48);
    zmm[0] = sort_zmm_32bit<vtype>(zmm[0]);
    zmm[1] = sort_zmm_32bit<vtype>(zmm[1]);
    zmm[2] = sort_zmm_32bit<vtype>(zmm[2]);
    zmm[3] = sort_zmm_32bit<vtype>(zmm[3]);
    opmask_t load_mask1 = 0xFFFF, load_mask2 = 0xFFFF;
    opmask_t load_mask3 = 0xFFFF, load_mask4 = 0xFFFF;
    if (N != 128) {
        uint64_t combined_mask = (0x1ull << (N - 64)) - 0x1ull;
        load_mask1 &= combined_mask & 0xFFFF;
        load_mask2 &= (combined_mask >> 16) & 0xFFFF;
        load_mask3 &= (combined_mask >> 32) & 0xFFFF;
        load_mask4 &= (combined_mask >> 48) & 0xFFFF;
    }
    zmm[4] = vtype::mask_loadu(vtype::zmm_max(), load_mask1, arr + 64);
    zmm[5] = vtype::mask_loadu(vtype::zmm_max(), load_mask2, arr + 80);
    zmm[6] = vtype::mask_loadu(vtype::zmm_max(), load_mask3, arr + 96);
    zmm[7] = vtype::mask_loadu(vtype::zmm_max(), load_mask4, arr + 112);
    zmm[4] = sort_zmm_32bit<vtype>(zmm[4]);
    zmm[5] = sort_zmm_32bit<vtype>(zmm[5]);
    zmm[6] = sort_zmm_32bit<vtype>(zmm[6]);
    zmm[7] = sort_zmm_32bit<vtype>(zmm[7]);
    bitonic_merge_two_zmm_32bit<vtype>(&zmm[0], &zmm[1]);
    bitonic_merge_two_zmm_32bit<vtype>(&zmm[2], &zmm[3]);
    bitonic_merge_two_zmm_32bit<vtype>(&zmm[4], &zmm[5]);
    bitonic_merge_two_zmm_32bit<vtype>(&zmm[6], &zmm[7]);
    bitonic_merge_four_zmm_32bit<vtype>(zmm);
    bitonic_merge_four_zmm_32bit<vtype>(zmm + 4);
    bitonic_merge_eight_zmm_32bit<vtype>(zmm);
    vtype::storeu(arr, zmm[0]);
    vtype::storeu(arr + 16, zmm[1]);
    vtype::storeu(arr + 32, zmm[2]);
    vtype::storeu(arr + 48, zmm[3]);
    vtype::mask_storeu(arr + 64, load_mask1, zmm[4]);
    vtype::mask_storeu(arr + 80, load_mask2, zmm[5]);
    vtype::mask_storeu(arr + 96, load_mask3, zmm[6]);
    vtype::mask_storeu(arr + 112, load_mask4, zmm[7]);
 }
 template <typename vtype, typename type_t>
 static void qsort_32bit_(type_t *arr, int64_t left, int64_t right,
                         int64_t max_iters) {
    /*
     * Resort to std::sort if quicksort isnt making any progress
     */
    if (max_iters <= 0) {
        std::sort(arr + left, arr + right + 1);
        return;
    }
    /*
     * Base case: use bitonic networks to sort arrays <= 128
     */
    if (right + 1 - left <= 128) {
        sort_128_32bit<vtype>(arr + left, (int32_t)(right + 1 - left));
        return;
    }
    type_t pivot = get_pivot_scalar<type_t>(arr, left, right);
    type_t smallest = vtype::type_max();
    type_t biggest = vtype::type_min();
    int64_t pivot_index = partition_avx512_unrolled<vtype, 2>(
        arr, left, right + 1, pivot, &smallest, &biggest, false);
    if (pivot != smallest)
        qsort_32bit_<vtype>(arr, left, pivot_index - 1, max_iters - 1);
    if (pivot != biggest)
        qsort_32bit_<vtype>(arr, pivot_index, right, max_iters - 1);
 }
 template <>
 void inline avx512_qsort<int32_t>(int32_t *arr, int64_t fromIndex, int64_t toIndex) {
    int64_t arrsize = toIndex - fromIndex;
    if (arrsize > 1) {
        qsort_32bit_<zmm_vector<int32_t>, int32_t>(arr, fromIndex, toIndex - 1,
                                                   2 * (int64_t)log2(arrsize));
    }
 }
 template <>
 void inline avx512_qsort<float>(float *arr, int64_t fromIndex, int64_t toIndex) {
    int64_t arrsize = toIndex - fromIndex;
    if (arrsize > 1) {
        qsort_32bit_<zmm_vector<float>, float>(arr, fromIndex, toIndex - 1,
                                               2 * (int64_t)log2(arrsize));
    }
    }
 };
 #endif  // AVX512_QSORT_32BIT
--- a/src/java.base/linux/native/libsimdsort/avx512-64bit-common.h
+++ b/src/java.base/linux/native/libsimdsort/avx512-64bit-common.h
@@ -1,212 +0,0 @@
 /*
 * Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 // This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
 #ifndef AVX512_64BIT_COMMON
 #define AVX512_64BIT_COMMON
 #include "avx512-common-qsort.h"
 /*
 * Constants used in sorting 8 elements in a ZMM registers. Based on Bitonic
 * sorting network (see
 * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg)
 */
 // ZMM                  7, 6, 5, 4, 3, 2, 1, 0
 #define NETWORK_64BIT_1 4, 5, 6, 7, 0, 1, 2, 3
 #define NETWORK_64BIT_2 0, 1, 2, 3, 4, 5, 6, 7
 #define NETWORK_64BIT_3 5, 4, 7, 6, 1, 0, 3, 2
 #define NETWORK_64BIT_4 3, 2, 1, 0, 7, 6, 5, 4
 template <>
 struct zmm_vector<int64_t> {
    using type_t = int64_t;
    using zmm_t = __m512i;
    using zmmi_t = __m512i;
    using ymm_t = __m512i;
    using opmask_t = __mmask8;
    static const uint8_t numlanes = 8;
    static type_t type_max() { return X86_SIMD_SORT_MAX_INT64; }
    static type_t type_min() { return X86_SIMD_SORT_MIN_INT64; }
    static zmm_t zmm_max() {
        return _mm512_set1_epi64(type_max());
    }  // TODO: this should broadcast bits as is?
    static zmmi_t seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
                       int v8) {
        return _mm512_set_epi64(v1, v2, v3, v4, v5, v6, v7, v8);
    }
    static opmask_t kxor_opmask(opmask_t x, opmask_t y) {
        return _kxor_mask8(x, y);
    }
    static opmask_t knot_opmask(opmask_t x) { return _knot_mask8(x); }
    static opmask_t le(zmm_t x, zmm_t y) {
        return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_LE);
    }
    static opmask_t ge(zmm_t x, zmm_t y) {
        return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_NLT);
    }
    static opmask_t gt(zmm_t x, zmm_t y) {
        return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_GT);
    }
    static opmask_t eq(zmm_t x, zmm_t y) {
        return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_EQ);
    }
    template <int scale>
    static zmm_t mask_i64gather(zmm_t src, opmask_t mask, __m512i index,
                                void const *base) {
        return _mm512_mask_i64gather_epi64(src, mask, index, base, scale);
    }
    template <int scale>
    static zmm_t i64gather(__m512i index, void const *base) {
        return _mm512_i64gather_epi64(index, base, scale);
    }
    static zmm_t loadu(void const *mem) { return _mm512_loadu_si512(mem); }
    static zmm_t max(zmm_t x, zmm_t y) { return _mm512_max_epi64(x, y); }
    static void mask_compressstoreu(void *mem, opmask_t mask, zmm_t x) {
        return _mm512_mask_compressstoreu_epi64(mem, mask, x);
    }
    static zmm_t maskz_loadu(opmask_t mask, void const *mem) {
        return _mm512_maskz_loadu_epi64(mask, mem);
    }
    static zmm_t mask_loadu(zmm_t x, opmask_t mask, void const *mem) {
        return _mm512_mask_loadu_epi64(x, mask, mem);
    }
    static zmm_t mask_mov(zmm_t x, opmask_t mask, zmm_t y) {
        return _mm512_mask_mov_epi64(x, mask, y);
    }
    static void mask_storeu(void *mem, opmask_t mask, zmm_t x) {
        return _mm512_mask_storeu_epi64(mem, mask, x);
    }
    static zmm_t min(zmm_t x, zmm_t y) { return _mm512_min_epi64(x, y); }
    static zmm_t permutexvar(__m512i idx, zmm_t zmm) {
        return _mm512_permutexvar_epi64(idx, zmm);
    }
    static type_t reducemax(zmm_t v) { return _mm512_reduce_max_epi64(v); }
    static type_t reducemin(zmm_t v) { return _mm512_reduce_min_epi64(v); }
    static zmm_t set1(type_t v) { return _mm512_set1_epi64(v); }
    template <uint8_t mask>
    static zmm_t shuffle(zmm_t zmm) {
        __m512d temp = _mm512_castsi512_pd(zmm);
        return _mm512_castpd_si512(
            _mm512_shuffle_pd(temp, temp, (_MM_PERM_ENUM)mask));
    }
    static void storeu(void *mem, zmm_t x) { _mm512_storeu_si512(mem, x); }
 };
 template <>
 struct zmm_vector<double> {
    using type_t = double;
    using zmm_t = __m512d;
    using zmmi_t = __m512i;
    using ymm_t = __m512d;
    using opmask_t = __mmask8;
    static const uint8_t numlanes = 8;
    static type_t type_max() { return X86_SIMD_SORT_INFINITY; }
    static type_t type_min() { return -X86_SIMD_SORT_INFINITY; }
    static zmm_t zmm_max() { return _mm512_set1_pd(type_max()); }
    static zmmi_t seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
                       int v8) {
        return _mm512_set_epi64(v1, v2, v3, v4, v5, v6, v7, v8);
    }
    static zmm_t maskz_loadu(opmask_t mask, void const *mem) {
        return _mm512_maskz_loadu_pd(mask, mem);
    }
    static opmask_t knot_opmask(opmask_t x) { return _knot_mask8(x); }
    static opmask_t ge(zmm_t x, zmm_t y) {
        return _mm512_cmp_pd_mask(x, y, _CMP_GE_OQ);
    }
    static opmask_t gt(zmm_t x, zmm_t y) {
        return _mm512_cmp_pd_mask(x, y, _CMP_GT_OQ);
    }
    static opmask_t eq(zmm_t x, zmm_t y) {
        return _mm512_cmp_pd_mask(x, y, _CMP_EQ_OQ);
    }
    template <int type>
    static opmask_t fpclass(zmm_t x) {
        return _mm512_fpclass_pd_mask(x, type);
    }
    template <int scale>
    static zmm_t mask_i64gather(zmm_t src, opmask_t mask, __m512i index,
                                void const *base) {
        return _mm512_mask_i64gather_pd(src, mask, index, base, scale);
    }
    template <int scale>
    static zmm_t i64gather(__m512i index, void const *base) {
        return _mm512_i64gather_pd(index, base, scale);
    }
    static zmm_t loadu(void const *mem) { return _mm512_loadu_pd(mem); }
    static zmm_t max(zmm_t x, zmm_t y) { return _mm512_max_pd(x, y); }
    static void mask_compressstoreu(void *mem, opmask_t mask, zmm_t x) {
        return _mm512_mask_compressstoreu_pd(mem, mask, x);
    }
    static zmm_t mask_loadu(zmm_t x, opmask_t mask, void const *mem) {
        return _mm512_mask_loadu_pd(x, mask, mem);
    }
    static zmm_t mask_mov(zmm_t x, opmask_t mask, zmm_t y) {
        return _mm512_mask_mov_pd(x, mask, y);
    }
    static void mask_storeu(void *mem, opmask_t mask, zmm_t x) {
        return _mm512_mask_storeu_pd(mem, mask, x);
    }
    static zmm_t min(zmm_t x, zmm_t y) { return _mm512_min_pd(x, y); }
    static zmm_t permutexvar(__m512i idx, zmm_t zmm) {
        return _mm512_permutexvar_pd(idx, zmm);
    }
    static type_t reducemax(zmm_t v) { return _mm512_reduce_max_pd(v); }
    static type_t reducemin(zmm_t v) { return _mm512_reduce_min_pd(v); }
    static zmm_t set1(type_t v) { return _mm512_set1_pd(v); }
    template <uint8_t mask>
    static zmm_t shuffle(zmm_t zmm) {
        return _mm512_shuffle_pd(zmm, zmm, (_MM_PERM_ENUM)mask);
    }
    static void storeu(void *mem, zmm_t x) { _mm512_storeu_pd(mem, x); }
 };
 /*
 * Assumes zmm is random and performs a full sorting network defined in
 * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
 */
 template <typename vtype, typename zmm_t = typename vtype::zmm_t>
 X86_SIMD_SORT_INLINE zmm_t sort_zmm_64bit(zmm_t zmm) {
    const typename vtype::zmmi_t rev_index = vtype::seti(NETWORK_64BIT_2);
    zmm = cmp_merge<vtype>(
        zmm, vtype::template shuffle<SHUFFLE_MASK(1, 1, 1, 1)>(zmm), 0xAA);
    zmm = cmp_merge<vtype>(
        zmm, vtype::permutexvar(vtype::seti(NETWORK_64BIT_1), zmm), 0xCC);
    zmm = cmp_merge<vtype>(
        zmm, vtype::template shuffle<SHUFFLE_MASK(1, 1, 1, 1)>(zmm), 0xAA);
    zmm = cmp_merge<vtype>(zmm, vtype::permutexvar(rev_index, zmm), 0xF0);
    zmm = cmp_merge<vtype>(
        zmm, vtype::permutexvar(vtype::seti(NETWORK_64BIT_3), zmm), 0xCC);
    zmm = cmp_merge<vtype>(
        zmm, vtype::template shuffle<SHUFFLE_MASK(1, 1, 1, 1)>(zmm), 0xAA);
    return zmm;
 }
 #endif
--- a/src/java.base/linux/native/libsimdsort/avx512-64bit-qsort.hpp
+++ b/src/java.base/linux/native/libsimdsort/avx512-64bit-qsort.hpp
--- a/src/java.base/linux/native/libsimdsort/avx512-common-qsort.h
+++ b/src/java.base/linux/native/libsimdsort/avx512-common-qsort.h
@@ -1,483 +0,0 @@
 /*
 * Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
 * Copyright (c) 2021 Serge Sans Paille. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 // This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
 #ifndef AVX512_QSORT_COMMON
 #define AVX512_QSORT_COMMON
 /*
 * Quicksort using AVX-512. The ideas and code are based on these two research
 * papers [1] and [2]. On a high level, the idea is to vectorize quicksort
 * partitioning using AVX-512 compressstore instructions. If the array size is
 * < 128, then use Bitonic sorting network implemented on 512-bit registers.
 * The precise network definitions depend on the dtype and are defined in
 * separate files: avx512-16bit-qsort.hpp, avx512-32bit-qsort.hpp and
 * avx512-64bit-qsort.hpp. Article [4] is a good resource for bitonic sorting
 * network. The core implementations of the vectorized qsort functions
 * avx512_qsort<T>(T*, int64_t) are modified versions of avx2 quicksort
 * presented in the paper [2] and source code associated with that paper [3].
 *
 * [1] Fast and Robust Vectorized In-Place Sorting of Primitive Types
 *     https://drops.dagstuhl.de/opus/volltexte/2021/13775/
 *
 * [2] A Novel Hybrid Quicksort Algorithm Vectorized using AVX-512 on Intel
 * Skylake https://arxiv.org/pdf/1704.08579.pdf
 *
 * [3] https://github.com/simd-sorting/fast-and-robust: SPDX-License-Identifier:
 * MIT
 *
 * [4]
 * http://mitp-content-server.mit.edu:18180/books/content/sectbyfn?collid=books_pres_0&fn=Chapter%2027.pdf&id=8030
 *
 */
 #include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <cstring>
 #include <limits>
 /*
 Workaround for the bug in GCC12 (that was fixed in GCC 12.3.1).
 More details are available at: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
 */
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #pragma GCC diagnostic ignored "-Wuninitialized"
 #include <immintrin.h>
 #pragma GCC diagnostic pop
 #define X86_SIMD_SORT_INFINITY std::numeric_limits<double>::infinity()
 #define X86_SIMD_SORT_INFINITYF std::numeric_limits<float>::infinity()
 #define X86_SIMD_SORT_INFINITYH 0x7c00
 #define X86_SIMD_SORT_NEGINFINITYH 0xfc00
 #define X86_SIMD_SORT_MAX_UINT16 std::numeric_limits<uint16_t>::max()
 #define X86_SIMD_SORT_MAX_INT16 std::numeric_limits<int16_t>::max()
 #define X86_SIMD_SORT_MIN_INT16 std::numeric_limits<int16_t>::min()
 #define X86_SIMD_SORT_MAX_UINT32 std::numeric_limits<uint32_t>::max()
 #define X86_SIMD_SORT_MAX_INT32 std::numeric_limits<int32_t>::max()
 #define X86_SIMD_SORT_MIN_INT32 std::numeric_limits<int32_t>::min()
 #define X86_SIMD_SORT_MAX_UINT64 std::numeric_limits<uint64_t>::max()
 #define X86_SIMD_SORT_MAX_INT64 std::numeric_limits<int64_t>::max()
 #define X86_SIMD_SORT_MIN_INT64 std::numeric_limits<int64_t>::min()
 #define ZMM_MAX_DOUBLE _mm512_set1_pd(X86_SIMD_SORT_INFINITY)
 #define ZMM_MAX_UINT64 _mm512_set1_epi64(X86_SIMD_SORT_MAX_UINT64)
 #define ZMM_MAX_INT64 _mm512_set1_epi64(X86_SIMD_SORT_MAX_INT64)
 #define ZMM_MAX_FLOAT _mm512_set1_ps(X86_SIMD_SORT_INFINITYF)
 #define ZMM_MAX_UINT _mm512_set1_epi32(X86_SIMD_SORT_MAX_UINT32)
 #define ZMM_MAX_INT _mm512_set1_epi32(X86_SIMD_SORT_MAX_INT32)
 #define ZMM_MAX_HALF _mm512_set1_epi16(X86_SIMD_SORT_INFINITYH)
 #define YMM_MAX_HALF _mm256_set1_epi16(X86_SIMD_SORT_INFINITYH)
 #define ZMM_MAX_UINT16 _mm512_set1_epi16(X86_SIMD_SORT_MAX_UINT16)
 #define ZMM_MAX_INT16 _mm512_set1_epi16(X86_SIMD_SORT_MAX_INT16)
 #define SHUFFLE_MASK(a, b, c, d) (a << 6) | (b << 4) | (c << 2) | d
 #ifdef _MSC_VER
 #define X86_SIMD_SORT_INLINE static inline
 #define X86_SIMD_SORT_FINLINE static __forceinline
 #elif defined(__CYGWIN__)
 /*
 * Force inline in cygwin to work around a compiler bug. See
 * https://github.com/numpy/numpy/pull/22315#issuecomment-1267757584
 */
 #define X86_SIMD_SORT_INLINE static __attribute__((always_inline))
 #define X86_SIMD_SORT_FINLINE static __attribute__((always_inline))
 #elif defined(__GNUC__)
 #define X86_SIMD_SORT_INLINE static inline
 #define X86_SIMD_SORT_FINLINE static __attribute__((always_inline))
 #else
 #define X86_SIMD_SORT_INLINE static
 #define X86_SIMD_SORT_FINLINE static
 #endif
 #define LIKELY(x) __builtin_expect((x), 1)
 #define UNLIKELY(x) __builtin_expect((x), 0)
 template <typename type>
 struct zmm_vector;
 template <typename type>
 struct ymm_vector;
 // Regular quicksort routines:
 template <typename T>
 void avx512_qsort(T *arr, int64_t arrsize);
 template <typename T>
 void inline avx512_qsort(T *arr, int64_t from_index, int64_t to_index);
 template <typename T>
 bool is_a_nan(T elem) {
    return std::isnan(elem);
 }
 template <typename T>
 X86_SIMD_SORT_INLINE T get_pivot_scalar(T *arr, const int64_t left, const int64_t right) {
    // median of 8 equally spaced elements
    int64_t NUM_ELEMENTS = 8;
    int64_t MID = NUM_ELEMENTS / 2;
    int64_t size = (right - left) / NUM_ELEMENTS;
    T temp[NUM_ELEMENTS];
    for (int64_t i = 0; i < NUM_ELEMENTS; i++) temp[i] = arr[left + (i * size)];
    std::sort(temp, temp + NUM_ELEMENTS);
    return temp[MID];
 }
 template <typename vtype, typename T = typename vtype::type_t>
 bool comparison_func_ge(const T &a, const T &b) {
    return a < b;
 }
 template <typename vtype, typename T = typename vtype::type_t>
 bool comparison_func_gt(const T &a, const T &b) {
    return a <= b;
 }
 /*
 * COEX == Compare and Exchange two registers by swapping min and max values
 */
 template <typename vtype, typename mm_t>
 static void COEX(mm_t &a, mm_t &b) {
    mm_t temp = a;
    a = vtype::min(a, b);
    b = vtype::max(temp, b);
 }
 template <typename vtype, typename zmm_t = typename vtype::zmm_t,
          typename opmask_t = typename vtype::opmask_t>
 static inline zmm_t cmp_merge(zmm_t in1, zmm_t in2, opmask_t mask) {
    zmm_t min = vtype::min(in2, in1);
    zmm_t max = vtype::max(in2, in1);
    return vtype::mask_mov(min, mask, max);  // 0 -> min, 1 -> max
 }
 /*
 * Parition one ZMM register based on the pivot and returns the
 * number of elements that are greater than or equal to the pivot.
 */
 template <typename vtype, typename type_t, typename zmm_t>
 static inline int32_t partition_vec(type_t *arr, int64_t left, int64_t right,
                                    const zmm_t curr_vec, const zmm_t pivot_vec,
                                    zmm_t *smallest_vec, zmm_t *biggest_vec, bool use_gt) {
    /* which elements are larger than or equal to the pivot */
    typename vtype::opmask_t mask;
    if (use_gt) mask = vtype::gt(curr_vec, pivot_vec);
    else mask = vtype::ge(curr_vec, pivot_vec);
    //mask = vtype::ge(curr_vec, pivot_vec);
    int32_t amount_ge_pivot = _mm_popcnt_u32((int32_t)mask);
    vtype::mask_compressstoreu(arr + left, vtype::knot_opmask(mask),
                               curr_vec);
    vtype::mask_compressstoreu(arr + right - amount_ge_pivot, mask,
                               curr_vec);
    *smallest_vec = vtype::min(curr_vec, *smallest_vec);
    *biggest_vec = vtype::max(curr_vec, *biggest_vec);
    return amount_ge_pivot;
 }
 /*
 * Parition an array based on the pivot and returns the index of the
 * first element that is greater than or equal to the pivot.
 */
 template <typename vtype, typename type_t>
 static inline int64_t partition_avx512(type_t *arr, int64_t left, int64_t right,
                                       type_t pivot, type_t *smallest,
                                       type_t *biggest, bool use_gt) {
    auto comparison_func = use_gt ? comparison_func_gt<vtype> : comparison_func_ge<vtype>;
    /* make array length divisible by vtype::numlanes , shortening the array */
    for (int32_t i = (right - left) % vtype::numlanes; i > 0; --i) {
        *smallest = std::min(*smallest, arr[left], comparison_func);
        *biggest = std::max(*biggest, arr[left], comparison_func);
        if (!comparison_func(arr[left], pivot)) {
            std::swap(arr[left], arr[--right]);
        } else {
            ++left;
        }
    }
    if (left == right)
        return left; /* less than vtype::numlanes elements in the array */
    using zmm_t = typename vtype::zmm_t;
    zmm_t pivot_vec = vtype::set1(pivot);
    zmm_t min_vec = vtype::set1(*smallest);
    zmm_t max_vec = vtype::set1(*biggest);
    if (right - left == vtype::numlanes) {
        zmm_t vec = vtype::loadu(arr + left);
        int32_t amount_ge_pivot =
            partition_vec<vtype>(arr, left, left + vtype::numlanes, vec,
                                 pivot_vec, &min_vec, &max_vec, use_gt);
        *smallest = vtype::reducemin(min_vec);
        *biggest = vtype::reducemax(max_vec);
        return left + (vtype::numlanes - amount_ge_pivot);
    }
    // first and last vtype::numlanes values are partitioned at the end
    zmm_t vec_left = vtype::loadu(arr + left);
    zmm_t vec_right = vtype::loadu(arr + (right - vtype::numlanes));
    // store points of the vectors
    int64_t r_store = right - vtype::numlanes;
    int64_t l_store = left;
    // indices for loading the elements
    left += vtype::numlanes;
    right -= vtype::numlanes;
    while (right - left != 0) {
        zmm_t curr_vec;
        /*
         * if fewer elements are stored on the right side of the array,
         * then next elements are loaded from the right side,
         * otherwise from the left side
         */
        if ((r_store + vtype::numlanes) - right < left - l_store) {
            right -= vtype::numlanes;
            curr_vec = vtype::loadu(arr + right);
        } else {
            curr_vec = vtype::loadu(arr + left);
            left += vtype::numlanes;
        }
        // partition the current vector and save it on both sides of the array
        int32_t amount_ge_pivot =
            partition_vec<vtype>(arr, l_store, r_store + vtype::numlanes,
                                 curr_vec, pivot_vec, &min_vec, &max_vec, use_gt);
        ;
        r_store -= amount_ge_pivot;
        l_store += (vtype::numlanes - amount_ge_pivot);
    }
    /* partition and save vec_left and vec_right */
    int32_t amount_ge_pivot =
        partition_vec<vtype>(arr, l_store, r_store + vtype::numlanes, vec_left,
                             pivot_vec, &min_vec, &max_vec, use_gt);
    l_store += (vtype::numlanes - amount_ge_pivot);
    amount_ge_pivot =
        partition_vec<vtype>(arr, l_store, l_store + vtype::numlanes, vec_right,
                             pivot_vec, &min_vec, &max_vec, use_gt);
    l_store += (vtype::numlanes - amount_ge_pivot);
    *smallest = vtype::reducemin(min_vec);
    *biggest = vtype::reducemax(max_vec);
    return l_store;
 }
 template <typename vtype, int num_unroll,
          typename type_t = typename vtype::type_t>
 static inline int64_t partition_avx512_unrolled(type_t *arr, int64_t left,
                                                int64_t right, type_t pivot,
                                                type_t *smallest,
                                                type_t *biggest, bool use_gt) {
    if (right - left <= 2 * num_unroll * vtype::numlanes) {
        return partition_avx512<vtype>(arr, left, right, pivot, smallest,
                                       biggest, use_gt);
    }
    auto comparison_func = use_gt ? comparison_func_gt<vtype> : comparison_func_ge<vtype>;
    /* make array length divisible by 8*vtype::numlanes , shortening the array
     */
    for (int32_t i = ((right - left) % (num_unroll * vtype::numlanes)); i > 0;
         --i) {
        *smallest = std::min(*smallest, arr[left], comparison_func);
        *biggest = std::max(*biggest, arr[left], comparison_func);
        if (!comparison_func(arr[left], pivot)) {
            std::swap(arr[left], arr[--right]);
        } else {
            ++left;
        }
    }
    if (left == right)
        return left; /* less than vtype::numlanes elements in the array */
    using zmm_t = typename vtype::zmm_t;
    zmm_t pivot_vec = vtype::set1(pivot);
    zmm_t min_vec = vtype::set1(*smallest);
    zmm_t max_vec = vtype::set1(*biggest);
    // We will now have atleast 16 registers worth of data to process:
    // left and right vtype::numlanes values are partitioned at the end
    zmm_t vec_left[num_unroll], vec_right[num_unroll];
 #pragma GCC unroll 8
    for (int ii = 0; ii < num_unroll; ++ii) {
        vec_left[ii] = vtype::loadu(arr + left + vtype::numlanes * ii);
        vec_right[ii] =
            vtype::loadu(arr + (right - vtype::numlanes * (num_unroll - ii)));
    }
    // store points of the vectors
    int64_t r_store = right - vtype::numlanes;
    int64_t l_store = left;
    // indices for loading the elements
    left += num_unroll * vtype::numlanes;
    right -= num_unroll * vtype::numlanes;
    while (right - left != 0) {
        zmm_t curr_vec[num_unroll];
        /*
         * if fewer elements are stored on the right side of the array,
         * then next elements are loaded from the right side,
         * otherwise from the left side
         */
        if ((r_store + vtype::numlanes) - right < left - l_store) {
            right -= num_unroll * vtype::numlanes;
 #pragma GCC unroll 8
            for (int ii = 0; ii < num_unroll; ++ii) {
                curr_vec[ii] = vtype::loadu(arr + right + ii * vtype::numlanes);
            }
        } else {
 #pragma GCC unroll 8
            for (int ii = 0; ii < num_unroll; ++ii) {
                curr_vec[ii] = vtype::loadu(arr + left + ii * vtype::numlanes);
            }
            left += num_unroll * vtype::numlanes;
        }
 // partition the current vector and save it on both sides of the array
 #pragma GCC unroll 8
        for (int ii = 0; ii < num_unroll; ++ii) {
            int32_t amount_ge_pivot = partition_vec<vtype>(
                arr, l_store, r_store + vtype::numlanes, curr_vec[ii],
                pivot_vec, &min_vec, &max_vec, use_gt);
            l_store += (vtype::numlanes - amount_ge_pivot);
            r_store -= amount_ge_pivot;
        }
    }
 /* partition and save vec_left[8] and vec_right[8] */
 #pragma GCC unroll 8
    for (int ii = 0; ii < num_unroll; ++ii) {
        int32_t amount_ge_pivot =
            partition_vec<vtype>(arr, l_store, r_store + vtype::numlanes,
                                 vec_left[ii], pivot_vec, &min_vec, &max_vec, use_gt);
        l_store += (vtype::numlanes - amount_ge_pivot);
        r_store -= amount_ge_pivot;
    }
 #pragma GCC unroll 8
    for (int ii = 0; ii < num_unroll; ++ii) {
        int32_t amount_ge_pivot =
            partition_vec<vtype>(arr, l_store, r_store + vtype::numlanes,
                                 vec_right[ii], pivot_vec, &min_vec, &max_vec, use_gt);
        l_store += (vtype::numlanes - amount_ge_pivot);
        r_store -= amount_ge_pivot;
    }
    *smallest = vtype::reducemin(min_vec);
    *biggest = vtype::reducemax(max_vec);
    return l_store;
 }
 // to_index (exclusive)
 template <typename vtype, typename type_t>
 static int64_t vectorized_partition(type_t *arr, int64_t from_index, int64_t to_index, type_t pivot, bool use_gt) {
    type_t smallest = vtype::type_max();
    type_t biggest = vtype::type_min();
    int64_t pivot_index = partition_avx512_unrolled<vtype, 2>(
            arr, from_index, to_index, pivot, &smallest, &biggest, use_gt);
    return pivot_index;
 }
 // partitioning functions
 template <typename T>
 void avx512_dual_pivot_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot1, int64_t index_pivot2){
    const T pivot1 = arr[index_pivot1];
    const T pivot2 = arr[index_pivot2];
    const int64_t low = from_index;
    const int64_t high = to_index;
    const int64_t start = low + 1;
    const int64_t end = high - 1;
    std::swap(arr[index_pivot1], arr[low]);
    std::swap(arr[index_pivot2], arr[end]);
    const int64_t pivot_index2 = vectorized_partition<zmm_vector<T>, T>(arr, start, end, pivot2, true); // use_gt = true
    std::swap(arr[end], arr[pivot_index2]);
    int64_t upper = pivot_index2;
    // if all other elements are greater than pivot2 (and pivot1), no need to do further partitioning
    if (upper == start) {
        pivot_indices[0] = low;
        pivot_indices[1] = upper;
        return;
    }
    const int64_t pivot_index1 = vectorized_partition<zmm_vector<T>, T>(arr, start, upper, pivot1, false); // use_ge (use_gt = false)
    int64_t lower = pivot_index1 - 1;
    std::swap(arr[low], arr[lower]);
    pivot_indices[0] = lower;
    pivot_indices[1] = upper;
 }
 template <typename T>
 void avx512_single_pivot_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot){
    const T pivot = arr[index_pivot];
    const int64_t low = from_index;
    const int64_t high = to_index;
    const int64_t end = high - 1;
    const int64_t pivot_index1 = vectorized_partition<zmm_vector<T>, T>(arr, low, high, pivot, false); // use_gt = false (use_ge)
    int64_t lower = pivot_index1;
    const int64_t pivot_index2 = vectorized_partition<zmm_vector<T>, T>(arr, pivot_index1, high, pivot, true); // use_gt = true
    int64_t upper = pivot_index2;
    pivot_indices[0] = lower;
    pivot_indices[1] = upper;
 }
 template <typename T>
 void inline avx512_fast_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot1, int64_t index_pivot2) {
    if (index_pivot1 != index_pivot2) {
        avx512_dual_pivot_partition<T>(arr, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
    }
    else {
        avx512_single_pivot_partition<T>(arr, from_index, to_index, pivot_indices, index_pivot1);
    }
 }
 template <typename T>
 void inline insertion_sort(T *arr, int32_t from_index, int32_t to_index) {
    for (int i, k = from_index; ++k < to_index; ) {
        T ai = arr[i = k];
        if (ai < arr[i - 1]) {
            while (--i >= from_index && ai < arr[i]) {
                arr[i + 1] = arr[i];
            }
            arr[i + 1] = ai;
        }
    }
 }
 template <typename T>
 void inline avx512_fast_sort(T *arr, int64_t from_index, int64_t to_index, const int32_t INS_SORT_THRESHOLD) {
    int32_t size = to_index - from_index;
    if (size <= INS_SORT_THRESHOLD) {
        insertion_sort<T>(arr, from_index, to_index);
    }
    else {
        avx512_qsort<T>(arr, from_index, to_index);
    }
 }
 #endif  // AVX512_QSORT_COMMON
--- a/src/java.base/linux/native/libsimdsort/avx512-linux-qsort.cpp
+++ b/src/java.base/linux/native/libsimdsort/avx512-linux-qsort.cpp
@@ -21,12 +21,15 @@
 * questions.
 *
 */
 #include "simdsort-support.hpp"
 #ifdef __SIMDSORT_SUPPORTED_LINUX
 #pragma GCC target("avx512dq", "avx512f")
 #include "avx512-32bit-qsort.hpp"
 #include "avx512-64bit-qsort.hpp"
 #include "classfile_constants.h"
 #define DLL_PUBLIC __attribute__((visibility("default")))
 #define INSERTION_SORT_THRESHOLD_32BIT 16
 #define INSERTION_SORT_THRESHOLD_64BIT 20
@@ -36,35 +39,41 @@ extern "C" {
    DLL_PUBLIC void avx512_sort(void *array, int elem_type, int32_t from_index, int32_t to_index) {
        switch(elem_type) {
            case JVM_T_INT:
-                avx512_fast_sort<int32_t>((int32_t*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
+                avx512_fast_sort((int32_t*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
                break;
            case JVM_T_LONG:
-                avx512_fast_sort<int64_t>((int64_t*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_64BIT);
+                avx512_fast_sort((int64_t*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_64BIT);
                break;
            case JVM_T_FLOAT:
-                avx512_fast_sort<float>((float*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
+                avx512_fast_sort((float*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
                break;
            case JVM_T_DOUBLE:
-                avx512_fast_sort<double>((double*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_64BIT);
+                avx512_fast_sort((double*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_64BIT);
                break;
            default:
                assert(false, "Unexpected type");
        }
    }
    DLL_PUBLIC void avx512_partition(void *array, int elem_type, int32_t from_index, int32_t to_index, int32_t *pivot_indices, int32_t index_pivot1, int32_t index_pivot2) {
        switch(elem_type) {
            case JVM_T_INT:
-                avx512_fast_partition<int32_t>((int32_t*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
+                avx512_fast_partition((int32_t*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
                break;
            case JVM_T_LONG:
-                avx512_fast_partition<int64_t>((int64_t*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
+                avx512_fast_partition((int64_t*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
                break;
            case JVM_T_FLOAT:
-                avx512_fast_partition<float>((float*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
+                avx512_fast_partition((float*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
                break;
            case JVM_T_DOUBLE:
-                avx512_fast_partition<double>((double*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
+                avx512_fast_partition((double*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
                break;
            default:
                assert(false, "Unexpected type");
        }
    }
 }
 #endif
--- a/src/java.base/linux/native/libsimdsort/simdsort-support.hpp
+++ b/src/java.base/linux/native/libsimdsort/simdsort-support.hpp
@@ -0,0 +1,39 @@
 /*
 * Copyright (c) 2023 Intel Corporation. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #ifndef SIMDSORT_SUPPORT_HPP
 #define SIMDSORT_SUPPORT_HPP
 #include <stdio.h>
 #include <stdlib.h>
 #undef assert
 #define assert(cond, msg) { if (!(cond)) { fprintf(stderr, "assert fails %s %d: %s\n", __FILE__, __LINE__, msg); abort(); }}
 // GCC >= 7.5 is needed to build AVX2 portions of libsimdsort using C++17 features
 #if defined(_LP64) && (defined(__GNUC__) && ((__GNUC__ > 7) || ((__GNUC__ == 7) && (__GNUC_MINOR__ >= 5))))
 #define __SIMDSORT_SUPPORTED_LINUX
 #endif
 #endif //SIMDSORT_SUPPORT_HPP
--- a/src/java.base/linux/native/libsimdsort/xss-common-includes.h
+++ b/src/java.base/linux/native/libsimdsort/xss-common-includes.h
@@ -0,0 +1,101 @@
 /*
 * Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
 * Copyright (c) 2021 Serge Sans Paille. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 // This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
 #ifndef XSS_COMMON_INCLUDES
 #define XSS_COMMON_INCLUDES
 #include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <cstring>
 /*
 Workaround for the bug in GCC12 (that was fixed in GCC 12.3.1).
 More details are available at:
 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
 */
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #pragma GCC diagnostic ignored "-Wuninitialized"
 #include <immintrin.h>
 #pragma GCC diagnostic pop
 #include <limits>
 #include <vector>
 #define X86_SIMD_SORT_INFINITY std::numeric_limits<double>::infinity()
 #define X86_SIMD_SORT_INFINITYF std::numeric_limits<float>::infinity()
 #define X86_SIMD_SORT_INFINITYH 0x7c00
 #define X86_SIMD_SORT_NEGINFINITYH 0xfc00
 #define X86_SIMD_SORT_MAX_UINT16 std::numeric_limits<uint16_t>::max()
 #define X86_SIMD_SORT_MAX_INT16 std::numeric_limits<int16_t>::max()
 #define X86_SIMD_SORT_MIN_INT16 std::numeric_limits<int16_t>::min()
 #define X86_SIMD_SORT_MAX_UINT32 std::numeric_limits<uint32_t>::max()
 #define X86_SIMD_SORT_MAX_INT32 std::numeric_limits<int32_t>::max()
 #define X86_SIMD_SORT_MIN_INT32 std::numeric_limits<int32_t>::min()
 #define X86_SIMD_SORT_MAX_UINT64 std::numeric_limits<uint64_t>::max()
 #define X86_SIMD_SORT_MAX_INT64 std::numeric_limits<int64_t>::max()
 #define X86_SIMD_SORT_MIN_INT64 std::numeric_limits<int64_t>::min()
 #define ZMM_MAX_DOUBLE _mm512_set1_pd(X86_SIMD_SORT_INFINITY)
 #define ZMM_MAX_UINT64 _mm512_set1_epi64(X86_SIMD_SORT_MAX_UINT64)
 #define ZMM_MAX_INT64 _mm512_set1_epi64(X86_SIMD_SORT_MAX_INT64)
 #define ZMM_MAX_FLOAT _mm512_set1_ps(X86_SIMD_SORT_INFINITYF)
 #define ZMM_MAX_UINT _mm512_set1_epi32(X86_SIMD_SORT_MAX_UINT32)
 #define ZMM_MAX_INT _mm512_set1_epi32(X86_SIMD_SORT_MAX_INT32)
 #define ZMM_MAX_HALF _mm512_set1_epi16(X86_SIMD_SORT_INFINITYH)
 #define YMM_MAX_HALF _mm256_set1_epi16(X86_SIMD_SORT_INFINITYH)
 #define ZMM_MAX_UINT16 _mm512_set1_epi16(X86_SIMD_SORT_MAX_UINT16)
 #define ZMM_MAX_INT16 _mm512_set1_epi16(X86_SIMD_SORT_MAX_INT16)
 #define SHUFFLE_MASK(a, b, c, d) (a << 6) | (b << 4) | (c << 2) | d
 #define PRAGMA(x) _Pragma(#x)
 #define UNUSED(x) (void)(x)
 /* Compiler specific macros specific */
 #if defined(__GNUC__)
 #define X86_SIMD_SORT_INLINE static inline
 #define X86_SIMD_SORT_FINLINE static inline __attribute__((always_inline))
 #else
 #define X86_SIMD_SORT_INLINE static
 #define X86_SIMD_SORT_FINLINE static
 #endif
 #if __GNUC__ >= 8
 #define X86_SIMD_SORT_UNROLL_LOOP(num) PRAGMA(GCC unroll num)
 #else
 #define X86_SIMD_SORT_UNROLL_LOOP(num)
 #endif
 typedef size_t arrsize_t;
 template <typename type>
 struct zmm_vector;
 template <typename type>
 struct ymm_vector;
 template <typename type>
 struct avx2_vector;
 #endif // XSS_COMMON_INCLUDES
--- a/src/java.base/linux/native/libsimdsort/xss-common-qsort.h
+++ b/src/java.base/linux/native/libsimdsort/xss-common-qsort.h
@@ -0,0 +1,528 @@
 /*
 * Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
 * Copyright (c) 2021 Serge Sans Paille. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 // This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
 #ifndef XSS_COMMON_QSORT
 #define XSS_COMMON_QSORT
 /*
 * Quicksort using AVX-512. The ideas and code are based on these two research
 * papers [1] and [2]. On a high level, the idea is to vectorize quicksort
 * partitioning using AVX-512 compressstore instructions. If the array size is
 * < 128, then use Bitonic sorting network implemented on 512-bit registers.
 * The precise network definitions depend on the dtype and are defined in
 * separate files: avx512-16bit-qsort.hpp, avx512-32bit-qsort.hpp and
 * avx512-64bit-qsort.hpp. Article [4] is a good resource for bitonic sorting
 * network. The core implementations of the vectorized qsort functions
 * avx512_qsort<T>(T*, arrsize_t) are modified versions of avx2 quicksort
 * presented in the paper [2] and source code associated with that paper [3].
 *
 * [1] Fast and Robust Vectorized In-Place Sorting of Primitive Types
 *     https://drops.dagstuhl.de/opus/volltexte/2021/13775/
 *
 * [2] A Novel Hybrid Quicksort Algorithm Vectorized using AVX-512 on Intel
 * Skylake https://arxiv.org/pdf/1704.08579.pdf
 *
 * [3] https://github.com/simd-sorting/fast-and-robust: SPDX-License-Identifier:
 * MIT
 *
 * [4] http://mitp-content-server.mit.edu:18180/books/content/sectbyfn?collid=books_pres_0&fn=Chapter%2027.pdf&id=8030
 *
 */
 #include "xss-common-includes.h"
 #include "xss-pivot-selection.hpp"
 #include "xss-network-qsort.hpp"
 template <typename T>
 bool is_a_nan(T elem) {
    return std::isnan(elem);
 }
 template <typename T>
 X86_SIMD_SORT_INLINE T get_pivot_scalar(T *arr, const int64_t left, const int64_t right) {
    // median of 8 equally spaced elements
    int64_t NUM_ELEMENTS = 8;
    int64_t MID = NUM_ELEMENTS / 2;
    int64_t size = (right - left) / NUM_ELEMENTS;
    T temp[NUM_ELEMENTS];
    for (int64_t i = 0; i < NUM_ELEMENTS; i++) temp[i] = arr[left + (i * size)];
    std::sort(temp, temp + NUM_ELEMENTS);
    return temp[MID];
 }
 template <typename vtype, typename T = typename vtype::type_t>
 bool comparison_func_ge(const T &a, const T &b) {
    return a < b;
 }
 template <typename vtype, typename T = typename vtype::type_t>
 bool comparison_func_gt(const T &a, const T &b) {
    return a <= b;
 }
 /*
 * COEX == Compare and Exchange two registers by swapping min and max values
 */
 template <typename vtype, typename mm_t>
 X86_SIMD_SORT_INLINE void COEX(mm_t &a, mm_t &b) {
    mm_t temp = a;
    a = vtype::min(a, b);
    b = vtype::max(temp, b);
 }
 template <typename vtype, typename reg_t = typename vtype::reg_t,
          typename opmask_t = typename vtype::opmask_t>
 X86_SIMD_SORT_INLINE reg_t cmp_merge(reg_t in1, reg_t in2, opmask_t mask) {
    reg_t min = vtype::min(in2, in1);
    reg_t max = vtype::max(in2, in1);
    return vtype::mask_mov(min, mask, max);  // 0 -> min, 1 -> max
 }
 template <typename vtype, typename type_t, typename reg_t>
 int avx512_double_compressstore(type_t *left_addr, type_t *right_addr,
                                typename vtype::opmask_t k, reg_t reg) {
    int amount_ge_pivot = _mm_popcnt_u32((int)k);
    vtype::mask_compressstoreu(left_addr, vtype::knot_opmask(k), reg);
    vtype::mask_compressstoreu(right_addr + vtype::numlanes - amount_ge_pivot,
                               k, reg);
    return amount_ge_pivot;
 }
 // Generic function dispatches to AVX2 or AVX512 code
 template <typename vtype, typename type_t,
          typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_INLINE arrsize_t partition_vec(type_t *l_store, type_t *r_store,
                                             const reg_t curr_vec,
                                             const reg_t pivot_vec,
                                             reg_t &smallest_vec,
                                             reg_t &biggest_vec, bool use_gt) {
    //typename vtype::opmask_t ge_mask = vtype::ge(curr_vec, pivot_vec);
    typename vtype::opmask_t mask;
    if (use_gt) mask = vtype::gt(curr_vec, pivot_vec);
    else mask = vtype::ge(curr_vec, pivot_vec);
    int amount_ge_pivot =
        vtype::double_compressstore(l_store, r_store, mask, curr_vec);
    smallest_vec = vtype::min(curr_vec, smallest_vec);
    biggest_vec = vtype::max(curr_vec, biggest_vec);
    return amount_ge_pivot;
 }
 /*
 * Parition an array based on the pivot and returns the index of the
 * first element that is greater than or equal to the pivot.
 */
 template <typename vtype, typename type_t>
 X86_SIMD_SORT_INLINE arrsize_t partition_avx512(type_t *arr, arrsize_t left,
                                                arrsize_t right, type_t pivot,
                                                type_t *smallest,
                                                type_t *biggest,
                                                bool use_gt) {
    auto comparison_func = use_gt ? comparison_func_gt<vtype> : comparison_func_ge<vtype>;
    /* make array length divisible by vtype::numlanes , shortening the array */
    for (int32_t i = (right - left) % vtype::numlanes; i > 0; --i) {
        *smallest = std::min(*smallest, arr[left], comparison_func);
        *biggest = std::max(*biggest, arr[left], comparison_func);
        if (!comparison_func(arr[left], pivot)) {
            std::swap(arr[left], arr[--right]);
        } else {
            ++left;
        }
    }
    if (left == right)
        return left; /* less than vtype::numlanes elements in the array */
    using reg_t = typename vtype::reg_t;
    reg_t pivot_vec = vtype::set1(pivot);
    reg_t min_vec = vtype::set1(*smallest);
    reg_t max_vec = vtype::set1(*biggest);
    if (right - left == vtype::numlanes) {
        reg_t vec = vtype::loadu(arr + left);
        arrsize_t unpartitioned = right - left - vtype::numlanes;
        arrsize_t l_store = left;
        arrsize_t amount_ge_pivot =
            partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
                                 vec, pivot_vec, min_vec, max_vec, use_gt);
        l_store += (vtype::numlanes - amount_ge_pivot);
        *smallest = vtype::reducemin(min_vec);
        *biggest = vtype::reducemax(max_vec);
        return l_store;
    }
    // first and last vtype::numlanes values are partitioned at the end
    reg_t vec_left = vtype::loadu(arr + left);
    reg_t vec_right = vtype::loadu(arr + (right - vtype::numlanes));
    // store points of the vectors
    arrsize_t unpartitioned = right - left - vtype::numlanes;
    arrsize_t l_store = left;
    // indices for loading the elements
    left += vtype::numlanes;
    right -= vtype::numlanes;
    while (right - left != 0) {
        reg_t curr_vec;
        /*
         * if fewer elements are stored on the right side of the array,
         * then next elements are loaded from the right side,
         * otherwise from the left side
         */
        if ((l_store + unpartitioned + vtype::numlanes) - right <
            left - l_store) {
            right -= vtype::numlanes;
            curr_vec = vtype::loadu(arr + right);
        } else {
            curr_vec = vtype::loadu(arr + left);
            left += vtype::numlanes;
        }
        // partition the current vector and save it on both sides of the array
        arrsize_t amount_ge_pivot =
            partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
                                 curr_vec, pivot_vec, min_vec, max_vec, use_gt);
        l_store += (vtype::numlanes - amount_ge_pivot);
        unpartitioned -= vtype::numlanes;
    }
    /* partition and save vec_left and vec_right */
    arrsize_t amount_ge_pivot =
        partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
                             vec_left, pivot_vec, min_vec, max_vec, use_gt);
    l_store += (vtype::numlanes - amount_ge_pivot);
    unpartitioned -= vtype::numlanes;
    amount_ge_pivot =
        partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
                             vec_right, pivot_vec, min_vec, max_vec, use_gt);
    l_store += (vtype::numlanes - amount_ge_pivot);
    unpartitioned -= vtype::numlanes;
    *smallest = vtype::reducemin(min_vec);
    *biggest = vtype::reducemax(max_vec);
    return l_store;
 }
 template <typename vtype, int num_unroll,
          typename type_t = typename vtype::type_t>
 X86_SIMD_SORT_INLINE arrsize_t
 partition_avx512_unrolled(type_t *arr, arrsize_t left, arrsize_t right,
                          type_t pivot, type_t *smallest, type_t *biggest, bool use_gt) {
    if constexpr (num_unroll == 0) {
        return partition_avx512<vtype>(arr, left, right, pivot, smallest,
                                       biggest, use_gt);
    }
    /* Use regular partition_avx512 for smaller arrays */
    if (right - left < 3 * num_unroll * vtype::numlanes) {
        return partition_avx512<vtype>(arr, left, right, pivot, smallest,
                                       biggest, use_gt);
    }
    auto comparison_func = use_gt ? comparison_func_gt<vtype> : comparison_func_ge<vtype>;
    /* make array length divisible by vtype::numlanes, shortening the array */
    for (int32_t i = ((right - left) % (vtype::numlanes)); i > 0; --i) {
        *smallest = std::min(*smallest, arr[left], comparison_func);
        *biggest = std::max(*biggest, arr[left], comparison_func);
        if (!comparison_func(arr[left], pivot)) {
            std::swap(arr[left], arr[--right]);
        } else {
            ++left;
        }
    }
    arrsize_t unpartitioned = right - left - vtype::numlanes;
    arrsize_t l_store = left;
    using reg_t = typename vtype::reg_t;
    reg_t pivot_vec = vtype::set1(pivot);
    reg_t min_vec = vtype::set1(*smallest);
    reg_t max_vec = vtype::set1(*biggest);
    /* Calculate and load more registers to make the rest of the array a
     * multiple of num_unroll. These registers will be partitioned at the very
     * end. */
    int vecsToPartition = ((right - left) / vtype::numlanes) % num_unroll;
    reg_t vec_align[num_unroll];
    for (int i = 0; i < vecsToPartition; i++) {
        vec_align[i] = vtype::loadu(arr + left + i * vtype::numlanes);
    }
    left += vecsToPartition * vtype::numlanes;
    /* We will now have atleast 3*num_unroll registers worth of data to
     * process. Load left and right vtype::numlanes*num_unroll values into
     * registers to make space for in-place parition. The vec_left and
     * vec_right registers are partitioned at the end */
    reg_t vec_left[num_unroll], vec_right[num_unroll];
    X86_SIMD_SORT_UNROLL_LOOP(8)
    for (int ii = 0; ii < num_unroll; ++ii) {
        vec_left[ii] = vtype::loadu(arr + left + vtype::numlanes * ii);
        vec_right[ii] =
            vtype::loadu(arr + (right - vtype::numlanes * (num_unroll - ii)));
    }
    /* indices for loading the elements */
    left += num_unroll * vtype::numlanes;
    right -= num_unroll * vtype::numlanes;
    while (right - left != 0) {
        reg_t curr_vec[num_unroll];
        /*
         * if fewer elements are stored on the right side of the array,
         * then next elements are loaded from the right side,
         * otherwise from the left side
         */
        if ((l_store + unpartitioned + vtype::numlanes) - right <
            left - l_store) {
            right -= num_unroll * vtype::numlanes;
            X86_SIMD_SORT_UNROLL_LOOP(8)
            for (int ii = 0; ii < num_unroll; ++ii) {
                curr_vec[ii] = vtype::loadu(arr + right + ii * vtype::numlanes);
                /*
                 * error: '_mm_prefetch' needs target feature mmx on clang-cl
                 */
 #if !(defined(_MSC_VER) && defined(__clang__))
                _mm_prefetch((char *)(arr + right + ii * vtype::numlanes -
                                      num_unroll * vtype::numlanes),
                             _MM_HINT_T0);
 #endif
            }
        } else {
            X86_SIMD_SORT_UNROLL_LOOP(8)
            for (int ii = 0; ii < num_unroll; ++ii) {
                curr_vec[ii] = vtype::loadu(arr + left + ii * vtype::numlanes);
                /*
                 * error: '_mm_prefetch' needs target feature mmx on clang-cl
                 */
 #if !(defined(_MSC_VER) && defined(__clang__))
                _mm_prefetch((char *)(arr + left + ii * vtype::numlanes +
                                      num_unroll * vtype::numlanes),
                             _MM_HINT_T0);
 #endif
            }
            left += num_unroll * vtype::numlanes;
        }
        /* partition the current vector and save it on both sides of the array
         * */
        X86_SIMD_SORT_UNROLL_LOOP(8)
        for (int ii = 0; ii < num_unroll; ++ii) {
            arrsize_t amount_ge_pivot = partition_vec<vtype>(
                arr + l_store, arr + l_store + unpartitioned, curr_vec[ii],
                pivot_vec, min_vec, max_vec, use_gt);
            l_store += (vtype::numlanes - amount_ge_pivot);
            unpartitioned -= vtype::numlanes;
        }
    }
    /* partition and save vec_left[num_unroll] and vec_right[num_unroll] */
    X86_SIMD_SORT_UNROLL_LOOP(8)
    for (int ii = 0; ii < num_unroll; ++ii) {
        arrsize_t amount_ge_pivot =
            partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
                                 vec_left[ii], pivot_vec, min_vec, max_vec, use_gt);
        l_store += (vtype::numlanes - amount_ge_pivot);
        unpartitioned -= vtype::numlanes;
    }
    X86_SIMD_SORT_UNROLL_LOOP(8)
    for (int ii = 0; ii < num_unroll; ++ii) {
        arrsize_t amount_ge_pivot =
            partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
                                 vec_right[ii], pivot_vec, min_vec, max_vec, use_gt);
        l_store += (vtype::numlanes - amount_ge_pivot);
        unpartitioned -= vtype::numlanes;
    }
    /* partition and save vec_align[vecsToPartition] */
    X86_SIMD_SORT_UNROLL_LOOP(8)
    for (int ii = 0; ii < vecsToPartition; ++ii) {
        arrsize_t amount_ge_pivot =
            partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
                                 vec_align[ii], pivot_vec, min_vec, max_vec, use_gt);
        l_store += (vtype::numlanes - amount_ge_pivot);
        unpartitioned -= vtype::numlanes;
    }
    *smallest = vtype::reducemin(min_vec);
    *biggest = vtype::reducemax(max_vec);
    return l_store;
 }
 template <typename vtype, int maxN>
 void sort_n(typename vtype::type_t *arr, int N);
 template <typename vtype, typename type_t>
 static void qsort_(type_t *arr, arrsize_t left, arrsize_t right,
                   arrsize_t max_iters) {
    /*
     * Resort to std::sort if quicksort isnt making any progress
     */
    if (max_iters <= 0) {
        std::sort(arr + left, arr + right + 1, comparison_func_ge<vtype>);
        return;
    }
    /*
     * Base case: use bitonic networks to sort arrays <=
     * vtype::network_sort_threshold
     */
    if (right + 1 - left <= vtype::network_sort_threshold) {
        sort_n<vtype, vtype::network_sort_threshold>(
            arr + left, (int32_t)(right + 1 - left));
        return;
    }
    type_t pivot = get_pivot_blocks<vtype, type_t>(arr, left, right);
    type_t smallest = vtype::type_max();
    type_t biggest = vtype::type_min();
    arrsize_t pivot_index =
        partition_avx512_unrolled<vtype, vtype::partition_unroll_factor>(
            arr, left, right + 1, pivot, &smallest, &biggest, false);
    if (pivot != smallest)
        qsort_<vtype>(arr, left, pivot_index - 1, max_iters - 1);
    if (pivot != biggest) qsort_<vtype>(arr, pivot_index, right, max_iters - 1);
 }
 // Hooks for OpenJDK sort
 // to_index (exclusive)
 template <typename vtype, typename type_t>
 static int64_t vectorized_partition(type_t *arr, int64_t from_index, int64_t to_index, type_t pivot, bool use_gt) {
    type_t smallest = vtype::type_max();
    type_t biggest = vtype::type_min();
    int64_t pivot_index = partition_avx512_unrolled<vtype, 2>(
            arr, from_index, to_index, pivot, &smallest, &biggest, use_gt);
    return pivot_index;
 }
 // partitioning functions
 template <typename vtype, typename T>
 X86_SIMD_SORT_INLINE void simd_dual_pivot_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot1, int64_t index_pivot2){
    const T pivot1 = arr[index_pivot1];
    const T pivot2 = arr[index_pivot2];
    const int64_t low = from_index;
    const int64_t high = to_index;
    const int64_t start = low + 1;
    const int64_t end = high - 1;
    std::swap(arr[index_pivot1], arr[low]);
    std::swap(arr[index_pivot2], arr[end]);
    const int64_t pivot_index2 = vectorized_partition<vtype, T>(arr, start, end, pivot2, true); // use_gt = true
    std::swap(arr[end], arr[pivot_index2]);
    int64_t upper = pivot_index2;
    // if all other elements are greater than pivot2 (and pivot1), no need to do further partitioning
    if (upper == start) {
        pivot_indices[0] = low;
        pivot_indices[1] = upper;
        return;
    }
    const int64_t pivot_index1 = vectorized_partition<vtype, T>(arr, start, upper, pivot1, false); // use_ge (use_gt = false)
    int64_t lower = pivot_index1 - 1;
    std::swap(arr[low], arr[lower]);
    pivot_indices[0] = lower;
    pivot_indices[1] = upper;
 }
 template <typename vtype, typename T>
 X86_SIMD_SORT_INLINE void simd_single_pivot_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot) {
    const T pivot = arr[index_pivot];
    const int64_t low = from_index;
    const int64_t high = to_index;
    const int64_t end = high - 1;
    const int64_t pivot_index1 = vectorized_partition<vtype, T>(arr, low, high, pivot, false); // use_gt = false (use_ge)
    int64_t lower = pivot_index1;
    const int64_t pivot_index2 = vectorized_partition<vtype, T>(arr, pivot_index1, high, pivot, true); // use_gt = true
    int64_t upper = pivot_index2;
    pivot_indices[0] = lower;
    pivot_indices[1] = upper;
 }
 template <typename vtype, typename T>
 X86_SIMD_SORT_INLINE void simd_fast_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot1, int64_t index_pivot2) {
    if (index_pivot1 != index_pivot2) {
        simd_dual_pivot_partition<vtype, T>(arr, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
    }
    else {
        simd_single_pivot_partition<vtype, T>(arr, from_index, to_index, pivot_indices, index_pivot1);
    }
 }
 template <typename T>
 X86_SIMD_SORT_INLINE void insertion_sort(T *arr, int32_t from_index, int32_t to_index) {
    for (int i, k = from_index; ++k < to_index; ) {
        T ai = arr[i = k];
        if (ai < arr[i - 1]) {
            while (--i >= from_index && ai < arr[i]) {
                arr[i + 1] = arr[i];
            }
            arr[i + 1] = ai;
        }
    }
 }
 template <typename vtype, typename T>
 X86_SIMD_SORT_INLINE void simd_fast_sort(T *arr, arrsize_t from_index, arrsize_t to_index, const arrsize_t INS_SORT_THRESHOLD)
 {
    arrsize_t arrsize = to_index - from_index;
    if (arrsize <= INS_SORT_THRESHOLD) {
        insertion_sort<T>(arr, from_index, to_index);
    } else {
        qsort_<vtype, T>(arr, from_index, to_index - 1, 2 * (arrsize_t)log2(arrsize));
    }
 }
 #define DEFINE_METHODS(ISA, VTYPE) \
    template <typename T> \
    X86_SIMD_SORT_INLINE void ISA##_fast_sort( \
            T *arr, arrsize_t from_index, arrsize_t to_index, const arrsize_t INS_SORT_THRESHOLD) \
    { \
        simd_fast_sort<VTYPE, T>(arr, from_index, to_index, INS_SORT_THRESHOLD); \
    } \
    template <typename T> \
    X86_SIMD_SORT_INLINE void ISA##_fast_partition( \
            T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot1, int64_t index_pivot2) \
    { \
        simd_fast_partition<VTYPE, T>(arr, from_index, to_index, pivot_indices, index_pivot1, index_pivot2); \
    }
 DEFINE_METHODS(avx2, avx2_vector<T>)
 DEFINE_METHODS(avx512, zmm_vector<T>)
 #endif  // XSS_COMMON_QSORT
--- a/src/java.base/linux/native/libsimdsort/xss-network-qsort.hpp
+++ b/src/java.base/linux/native/libsimdsort/xss-network-qsort.hpp
@@ -0,0 +1,209 @@
 /*
 * Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
 * Copyright (c) 2021 Serge Sans Paille. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 // This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
 #ifndef XSS_NETWORK_QSORT
 #define XSS_NETWORK_QSORT
 #include "xss-common-qsort.h"
 #include "xss-optimal-networks.hpp"
 template <typename vtype, int numVecs, typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_FINLINE void bitonic_sort_n_vec(reg_t *regs) {
    if constexpr (numVecs == 1) {
        UNUSED(regs);
        return;
    } else if constexpr (numVecs == 2) {
        COEX<vtype>(regs[0], regs[1]);
    } else if constexpr (numVecs == 4) {
        optimal_sort_4<vtype>(regs);
    } else if constexpr (numVecs == 8) {
        optimal_sort_8<vtype>(regs);
    } else if constexpr (numVecs == 16) {
        optimal_sort_16<vtype>(regs);
    } else if constexpr (numVecs == 32) {
        optimal_sort_32<vtype>(regs);
    } else {
        static_assert(numVecs == -1, "should not reach here");
    }
 }
 /*
 * Swizzle ops explained:
 * swap_n<scale>: swap neighbouring blocks of size <scale/2> within block of
 * size <scale> reg i        = [7,6,5,4,3,2,1,0] swap_n<2>:   =
 * [[6,7],[4,5],[2,3],[0,1]] swap_n<4>:   = [[5,4,7,6],[1,0,3,2]] swap_n<8>:   =
 * [[3,2,1,0,7,6,5,4]] reverse_n<scale>: reverse elements within block of size
 * <scale> reg i        = [7,6,5,4,3,2,1,0] rev_n<2>:    =
 * [[6,7],[4,5],[2,3],[0,1]] rev_n<4>:    = [[4,5,6,7],[0,1,2,3]] rev_n<8>:    =
 * [[0,1,2,3,4,5,6,7]] merge_n<scale>: merge blocks of <scale/2> elements from
 * two regs reg b,a      = [a,a,a,a,a,a,a,a], [b,b,b,b,b,b,b,b] merge_n<2>   =
 * [a,b,a,b,a,b,a,b] merge_n<4>   = [a,a,b,b,a,a,b,b] merge_n<8>   =
 * [a,a,a,a,b,b,b,b]
 */
 template <typename vtype, int numVecs, int scale, bool first = true>
 X86_SIMD_SORT_FINLINE void internal_merge_n_vec(typename vtype::reg_t *reg) {
    using reg_t = typename vtype::reg_t;
    using swizzle = typename vtype::swizzle_ops;
    if constexpr (scale <= 1) {
        UNUSED(reg);
        return;
    } else {
        if constexpr (first) {
            // Use reverse then merge
            X86_SIMD_SORT_UNROLL_LOOP(64)
            for (int i = 0; i < numVecs; i++) {
                reg_t &v = reg[i];
                reg_t rev = swizzle::template reverse_n<vtype, scale>(v);
                COEX<vtype>(rev, v);
                v = swizzle::template merge_n<vtype, scale>(v, rev);
            }
        } else {
            // Use swap then merge
            X86_SIMD_SORT_UNROLL_LOOP(64)
            for (int i = 0; i < numVecs; i++) {
                reg_t &v = reg[i];
                reg_t swap = swizzle::template swap_n<vtype, scale>(v);
                COEX<vtype>(swap, v);
                v = swizzle::template merge_n<vtype, scale>(v, swap);
            }
        }
        internal_merge_n_vec<vtype, numVecs, scale / 2, false>(reg);
    }
 }
 template <typename vtype, int numVecs, int scale,
          typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_FINLINE void merge_substep_n_vec(reg_t *regs) {
    using swizzle = typename vtype::swizzle_ops;
    if constexpr (numVecs <= 1) {
        UNUSED(regs);
        return;
    }
    // Reverse upper half of vectors
    X86_SIMD_SORT_UNROLL_LOOP(64)
    for (int i = numVecs / 2; i < numVecs; i++) {
        regs[i] = swizzle::template reverse_n<vtype, scale>(regs[i]);
    }
    // Do compare exchanges
    X86_SIMD_SORT_UNROLL_LOOP(64)
    for (int i = 0; i < numVecs / 2; i++) {
        COEX<vtype>(regs[i], regs[numVecs - 1 - i]);
    }
    merge_substep_n_vec<vtype, numVecs / 2, scale>(regs);
    merge_substep_n_vec<vtype, numVecs / 2, scale>(regs + numVecs / 2);
 }
 template <typename vtype, int numVecs, int scale,
          typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_FINLINE void merge_step_n_vec(reg_t *regs) {
    // Do cross vector merges
    merge_substep_n_vec<vtype, numVecs, scale>(regs);
    // Do internal vector merges
    internal_merge_n_vec<vtype, numVecs, scale>(regs);
 }
 template <typename vtype, int numVecs, int numPer = 2,
          typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_FINLINE void merge_n_vec(reg_t *regs) {
    if constexpr (numPer > vtype::numlanes) {
        UNUSED(regs);
        return;
    } else {
        merge_step_n_vec<vtype, numVecs, numPer>(regs);
        merge_n_vec<vtype, numVecs, numPer * 2>(regs);
    }
 }
 template <typename vtype, int numVecs, typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_INLINE void sort_n_vec(typename vtype::type_t *arr, int N) {
    static_assert(numVecs > 0, "numVecs should be > 0");
    if constexpr (numVecs > 1) {
        if (N * 2 <= numVecs * vtype::numlanes) {
            sort_n_vec<vtype, numVecs / 2>(arr, N);
            return;
        }
    }
    reg_t vecs[numVecs];
    // Generate masks for loading and storing
    typename vtype::opmask_t ioMasks[numVecs - numVecs / 2];
    X86_SIMD_SORT_UNROLL_LOOP(64)
    for (int i = numVecs / 2, j = 0; i < numVecs; i++, j++) {
        uint64_t num_to_read =
            std::min((uint64_t)std::max(0, N - i * vtype::numlanes),
                     (uint64_t)vtype::numlanes);
        ioMasks[j] = vtype::get_partial_loadmask(num_to_read);
    }
    // Unmasked part of the load
    X86_SIMD_SORT_UNROLL_LOOP(64)
    for (int i = 0; i < numVecs / 2; i++) {
        vecs[i] = vtype::loadu(arr + i * vtype::numlanes);
    }
    // Masked part of the load
    X86_SIMD_SORT_UNROLL_LOOP(64)
    for (int i = numVecs / 2, j = 0; i < numVecs; i++, j++) {
        vecs[i] = vtype::mask_loadu(vtype::zmm_max(), ioMasks[j],
                                    arr + i * vtype::numlanes);
    }
    /* Run the initial sorting network to sort the columns of the [numVecs x
     * num_lanes] matrix
     */
    bitonic_sort_n_vec<vtype, numVecs>(vecs);
    // Merge the vectors using bitonic merging networks
    merge_n_vec<vtype, numVecs>(vecs);
    // Unmasked part of the store
    X86_SIMD_SORT_UNROLL_LOOP(64)
    for (int i = 0; i < numVecs / 2; i++) {
        vtype::storeu(arr + i * vtype::numlanes, vecs[i]);
    }
    // Masked part of the store
    X86_SIMD_SORT_UNROLL_LOOP(64)
    for (int i = numVecs / 2, j = 0; i < numVecs; i++, j++) {
        vtype::mask_storeu(arr + i * vtype::numlanes, ioMasks[j], vecs[i]);
    }
 }
 template <typename vtype, int maxN>
 X86_SIMD_SORT_INLINE void sort_n(typename vtype::type_t *arr, int N) {
    constexpr int numVecs = maxN / vtype::numlanes;
    constexpr bool isMultiple = (maxN == (vtype::numlanes * numVecs));
    constexpr bool powerOfTwo = (numVecs != 0 && !(numVecs & (numVecs - 1)));
    static_assert(powerOfTwo == true && isMultiple == true,
                  "maxN must be vtype::numlanes times a power of 2");
    sort_n_vec<vtype, numVecs>(arr, N);
 }
 #endif
--- a/src/java.base/linux/native/libsimdsort/xss-optimal-networks.hpp
+++ b/src/java.base/linux/native/libsimdsort/xss-optimal-networks.hpp
@@ -0,0 +1,342 @@
 /*
 * Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
 * Copyright (c) 2021 Serge Sans Paille. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 // This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort) All of these sources
 // files are generated from the optimal networks described in
 // https://bertdobbelaere.github.io/sorting_networks.html
 template <typename vtype, typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_FINLINE void optimal_sort_4(reg_t *vecs) {
    COEX<vtype>(vecs[0], vecs[2]);
    COEX<vtype>(vecs[1], vecs[3]);
    COEX<vtype>(vecs[0], vecs[1]);
    COEX<vtype>(vecs[2], vecs[3]);
    COEX<vtype>(vecs[1], vecs[2]);
 }
 template <typename vtype, typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_FINLINE void optimal_sort_8(reg_t *vecs) {
    COEX<vtype>(vecs[0], vecs[2]);
    COEX<vtype>(vecs[1], vecs[3]);
    COEX<vtype>(vecs[4], vecs[6]);
    COEX<vtype>(vecs[5], vecs[7]);
    COEX<vtype>(vecs[0], vecs[4]);
    COEX<vtype>(vecs[1], vecs[5]);
    COEX<vtype>(vecs[2], vecs[6]);
    COEX<vtype>(vecs[3], vecs[7]);
    COEX<vtype>(vecs[0], vecs[1]);
    COEX<vtype>(vecs[2], vecs[3]);
    COEX<vtype>(vecs[4], vecs[5]);
    COEX<vtype>(vecs[6], vecs[7]);
    COEX<vtype>(vecs[2], vecs[4]);
    COEX<vtype>(vecs[3], vecs[5]);
    COEX<vtype>(vecs[1], vecs[4]);
    COEX<vtype>(vecs[3], vecs[6]);
    COEX<vtype>(vecs[1], vecs[2]);
    COEX<vtype>(vecs[3], vecs[4]);
    COEX<vtype>(vecs[5], vecs[6]);
 }
 template <typename vtype, typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_FINLINE void optimal_sort_16(reg_t *vecs) {
    COEX<vtype>(vecs[0], vecs[13]);
    COEX<vtype>(vecs[1], vecs[12]);
    COEX<vtype>(vecs[2], vecs[15]);
    COEX<vtype>(vecs[3], vecs[14]);
    COEX<vtype>(vecs[4], vecs[8]);
    COEX<vtype>(vecs[5], vecs[6]);
    COEX<vtype>(vecs[7], vecs[11]);
    COEX<vtype>(vecs[9], vecs[10]);
    COEX<vtype>(vecs[0], vecs[5]);
    COEX<vtype>(vecs[1], vecs[7]);
    COEX<vtype>(vecs[2], vecs[9]);
    COEX<vtype>(vecs[3], vecs[4]);
    COEX<vtype>(vecs[6], vecs[13]);
    COEX<vtype>(vecs[8], vecs[14]);
    COEX<vtype>(vecs[10], vecs[15]);
    COEX<vtype>(vecs[11], vecs[12]);
    COEX<vtype>(vecs[0], vecs[1]);
    COEX<vtype>(vecs[2], vecs[3]);
    COEX<vtype>(vecs[4], vecs[5]);
    COEX<vtype>(vecs[6], vecs[8]);
    COEX<vtype>(vecs[7], vecs[9]);
    COEX<vtype>(vecs[10], vecs[11]);
    COEX<vtype>(vecs[12], vecs[13]);
    COEX<vtype>(vecs[14], vecs[15]);
    COEX<vtype>(vecs[0], vecs[2]);
    COEX<vtype>(vecs[1], vecs[3]);
    COEX<vtype>(vecs[4], vecs[10]);
    COEX<vtype>(vecs[5], vecs[11]);
    COEX<vtype>(vecs[6], vecs[7]);
    COEX<vtype>(vecs[8], vecs[9]);
    COEX<vtype>(vecs[12], vecs[14]);
    COEX<vtype>(vecs[13], vecs[15]);
    COEX<vtype>(vecs[1], vecs[2]);
    COEX<vtype>(vecs[3], vecs[12]);
    COEX<vtype>(vecs[4], vecs[6]);
    COEX<vtype>(vecs[5], vecs[7]);
    COEX<vtype>(vecs[8], vecs[10]);
    COEX<vtype>(vecs[9], vecs[11]);
    COEX<vtype>(vecs[13], vecs[14]);
    COEX<vtype>(vecs[1], vecs[4]);
    COEX<vtype>(vecs[2], vecs[6]);
    COEX<vtype>(vecs[5], vecs[8]);
    COEX<vtype>(vecs[7], vecs[10]);
    COEX<vtype>(vecs[9], vecs[13]);
    COEX<vtype>(vecs[11], vecs[14]);
    COEX<vtype>(vecs[2], vecs[4]);
    COEX<vtype>(vecs[3], vecs[6]);
    COEX<vtype>(vecs[9], vecs[12]);
    COEX<vtype>(vecs[11], vecs[13]);
    COEX<vtype>(vecs[3], vecs[5]);
    COEX<vtype>(vecs[6], vecs[8]);
    COEX<vtype>(vecs[7], vecs[9]);
    COEX<vtype>(vecs[10], vecs[12]);
    COEX<vtype>(vecs[3], vecs[4]);
    COEX<vtype>(vecs[5], vecs[6]);
    COEX<vtype>(vecs[7], vecs[8]);
    COEX<vtype>(vecs[9], vecs[10]);
    COEX<vtype>(vecs[11], vecs[12]);
    COEX<vtype>(vecs[6], vecs[7]);
    COEX<vtype>(vecs[8], vecs[9]);
 }
 template <typename vtype, typename reg_t = typename vtype::reg_t>
 X86_SIMD_SORT_FINLINE void optimal_sort_32(reg_t *vecs) {
    COEX<vtype>(vecs[0], vecs[1]);
    COEX<vtype>(vecs[2], vecs[3]);
    COEX<vtype>(vecs[4], vecs[5]);
    COEX<vtype>(vecs[6], vecs[7]);
    COEX<vtype>(vecs[8], vecs[9]);
    COEX<vtype>(vecs[10], vecs[11]);
    COEX<vtype>(vecs[12], vecs[13]);
    COEX<vtype>(vecs[14], vecs[15]);
    COEX<vtype>(vecs[16], vecs[17]);
    COEX<vtype>(vecs[18], vecs[19]);
    COEX<vtype>(vecs[20], vecs[21]);
    COEX<vtype>(vecs[22], vecs[23]);
    COEX<vtype>(vecs[24], vecs[25]);
    COEX<vtype>(vecs[26], vecs[27]);
    COEX<vtype>(vecs[28], vecs[29]);
    COEX<vtype>(vecs[30], vecs[31]);
    COEX<vtype>(vecs[0], vecs[2]);
    COEX<vtype>(vecs[1], vecs[3]);
    COEX<vtype>(vecs[4], vecs[6]);
    COEX<vtype>(vecs[5], vecs[7]);
    COEX<vtype>(vecs[8], vecs[10]);
    COEX<vtype>(vecs[9], vecs[11]);
    COEX<vtype>(vecs[12], vecs[14]);
    COEX<vtype>(vecs[13], vecs[15]);
    COEX<vtype>(vecs[16], vecs[18]);
    COEX<vtype>(vecs[17], vecs[19]);
    COEX<vtype>(vecs[20], vecs[22]);
    COEX<vtype>(vecs[21], vecs[23]);
    COEX<vtype>(vecs[24], vecs[26]);
    COEX<vtype>(vecs[25], vecs[27]);
    COEX<vtype>(vecs[28], vecs[30]);
    COEX<vtype>(vecs[29], vecs[31]);
    COEX<vtype>(vecs[0], vecs[4]);
    COEX<vtype>(vecs[1], vecs[5]);
    COEX<vtype>(vecs[2], vecs[6]);
    COEX<vtype>(vecs[3], vecs[7]);
    COEX<vtype>(vecs[8], vecs[12]);
    COEX<vtype>(vecs[9], vecs[13]);
    COEX<vtype>(vecs[10], vecs[14]);
    COEX<vtype>(vecs[11], vecs[15]);
    COEX<vtype>(vecs[16], vecs[20]);
    COEX<vtype>(vecs[17], vecs[21]);
    COEX<vtype>(vecs[18], vecs[22]);
    COEX<vtype>(vecs[19], vecs[23]);
    COEX<vtype>(vecs[24], vecs[28]);
    COEX<vtype>(vecs[25], vecs[29]);
    COEX<vtype>(vecs[26], vecs[30]);
    COEX<vtype>(vecs[27], vecs[31]);
    COEX<vtype>(vecs[0], vecs[8]);
    COEX<vtype>(vecs[1], vecs[9]);
    COEX<vtype>(vecs[2], vecs[10]);
    COEX<vtype>(vecs[3], vecs[11]);
    COEX<vtype>(vecs[4], vecs[12]);
    COEX<vtype>(vecs[5], vecs[13]);
    COEX<vtype>(vecs[6], vecs[14]);
    COEX<vtype>(vecs[7], vecs[15]);
    COEX<vtype>(vecs[16], vecs[24]);
    COEX<vtype>(vecs[17], vecs[25]);
    COEX<vtype>(vecs[18], vecs[26]);
    COEX<vtype>(vecs[19], vecs[27]);
    COEX<vtype>(vecs[20], vecs[28]);
    COEX<vtype>(vecs[21], vecs[29]);
    COEX<vtype>(vecs[22], vecs[30]);
    COEX<vtype>(vecs[23], vecs[31]);
    COEX<vtype>(vecs[0], vecs[16]);
    COEX<vtype>(vecs[1], vecs[8]);
    COEX<vtype>(vecs[2], vecs[4]);
    COEX<vtype>(vecs[3], vecs[12]);
    COEX<vtype>(vecs[5], vecs[10]);
    COEX<vtype>(vecs[6], vecs[9]);
    COEX<vtype>(vecs[7], vecs[14]);
    COEX<vtype>(vecs[11], vecs[13]);
    COEX<vtype>(vecs[15], vecs[31]);
    COEX<vtype>(vecs[17], vecs[24]);
    COEX<vtype>(vecs[18], vecs[20]);
    COEX<vtype>(vecs[19], vecs[28]);
    COEX<vtype>(vecs[21], vecs[26]);
    COEX<vtype>(vecs[22], vecs[25]);
    COEX<vtype>(vecs[23], vecs[30]);
    COEX<vtype>(vecs[27], vecs[29]);
    COEX<vtype>(vecs[1], vecs[2]);
    COEX<vtype>(vecs[3], vecs[5]);
    COEX<vtype>(vecs[4], vecs[8]);
    COEX<vtype>(vecs[6], vecs[22]);
    COEX<vtype>(vecs[7], vecs[11]);
    COEX<vtype>(vecs[9], vecs[25]);
    COEX<vtype>(vecs[10], vecs[12]);
    COEX<vtype>(vecs[13], vecs[14]);
    COEX<vtype>(vecs[17], vecs[18]);
    COEX<vtype>(vecs[19], vecs[21]);
    COEX<vtype>(vecs[20], vecs[24]);
    COEX<vtype>(vecs[23], vecs[27]);
    COEX<vtype>(vecs[26], vecs[28]);
    COEX<vtype>(vecs[29], vecs[30]);
    COEX<vtype>(vecs[1], vecs[17]);
    COEX<vtype>(vecs[2], vecs[18]);
    COEX<vtype>(vecs[3], vecs[19]);
    COEX<vtype>(vecs[4], vecs[20]);
    COEX<vtype>(vecs[5], vecs[10]);
    COEX<vtype>(vecs[7], vecs[23]);
    COEX<vtype>(vecs[8], vecs[24]);
    COEX<vtype>(vecs[11], vecs[27]);
    COEX<vtype>(vecs[12], vecs[28]);
    COEX<vtype>(vecs[13], vecs[29]);
    COEX<vtype>(vecs[14], vecs[30]);
    COEX<vtype>(vecs[21], vecs[26]);
    COEX<vtype>(vecs[3], vecs[17]);
    COEX<vtype>(vecs[4], vecs[16]);
    COEX<vtype>(vecs[5], vecs[21]);
    COEX<vtype>(vecs[6], vecs[18]);
    COEX<vtype>(vecs[7], vecs[9]);
    COEX<vtype>(vecs[8], vecs[20]);
    COEX<vtype>(vecs[10], vecs[26]);
    COEX<vtype>(vecs[11], vecs[23]);
    COEX<vtype>(vecs[13], vecs[25]);
    COEX<vtype>(vecs[14], vecs[28]);
    COEX<vtype>(vecs[15], vecs[27]);
    COEX<vtype>(vecs[22], vecs[24]);
    COEX<vtype>(vecs[1], vecs[4]);
    COEX<vtype>(vecs[3], vecs[8]);
    COEX<vtype>(vecs[5], vecs[16]);
    COEX<vtype>(vecs[7], vecs[17]);
    COEX<vtype>(vecs[9], vecs[21]);
    COEX<vtype>(vecs[10], vecs[22]);
    COEX<vtype>(vecs[11], vecs[19]);
    COEX<vtype>(vecs[12], vecs[20]);
    COEX<vtype>(vecs[14], vecs[24]);
    COEX<vtype>(vecs[15], vecs[26]);
    COEX<vtype>(vecs[23], vecs[28]);
    COEX<vtype>(vecs[27], vecs[30]);
    COEX<vtype>(vecs[2], vecs[5]);
    COEX<vtype>(vecs[7], vecs[8]);
    COEX<vtype>(vecs[9], vecs[18]);
    COEX<vtype>(vecs[11], vecs[17]);
    COEX<vtype>(vecs[12], vecs[16]);
    COEX<vtype>(vecs[13], vecs[22]);
    COEX<vtype>(vecs[14], vecs[20]);
    COEX<vtype>(vecs[15], vecs[19]);
    COEX<vtype>(vecs[23], vecs[24]);
    COEX<vtype>(vecs[26], vecs[29]);
    COEX<vtype>(vecs[2], vecs[4]);
    COEX<vtype>(vecs[6], vecs[12]);
    COEX<vtype>(vecs[9], vecs[16]);
    COEX<vtype>(vecs[10], vecs[11]);
    COEX<vtype>(vecs[13], vecs[17]);
    COEX<vtype>(vecs[14], vecs[18]);
    COEX<vtype>(vecs[15], vecs[22]);
    COEX<vtype>(vecs[19], vecs[25]);
    COEX<vtype>(vecs[20], vecs[21]);
    COEX<vtype>(vecs[27], vecs[29]);
    COEX<vtype>(vecs[5], vecs[6]);
    COEX<vtype>(vecs[8], vecs[12]);
    COEX<vtype>(vecs[9], vecs[10]);
    COEX<vtype>(vecs[11], vecs[13]);
    COEX<vtype>(vecs[14], vecs[16]);
    COEX<vtype>(vecs[15], vecs[17]);
    COEX<vtype>(vecs[18], vecs[20]);
    COEX<vtype>(vecs[19], vecs[23]);
    COEX<vtype>(vecs[21], vecs[22]);
    COEX<vtype>(vecs[25], vecs[26]);
    COEX<vtype>(vecs[3], vecs[5]);
    COEX<vtype>(vecs[6], vecs[7]);
    COEX<vtype>(vecs[8], vecs[9]);
    COEX<vtype>(vecs[10], vecs[12]);
    COEX<vtype>(vecs[11], vecs[14]);
    COEX<vtype>(vecs[13], vecs[16]);
    COEX<vtype>(vecs[15], vecs[18]);
    COEX<vtype>(vecs[17], vecs[20]);
    COEX<vtype>(vecs[19], vecs[21]);
    COEX<vtype>(vecs[22], vecs[23]);
    COEX<vtype>(vecs[24], vecs[25]);
    COEX<vtype>(vecs[26], vecs[28]);
    COEX<vtype>(vecs[3], vecs[4]);
    COEX<vtype>(vecs[5], vecs[6]);
    COEX<vtype>(vecs[7], vecs[8]);
    COEX<vtype>(vecs[9], vecs[10]);
    COEX<vtype>(vecs[11], vecs[12]);
    COEX<vtype>(vecs[13], vecs[14]);
    COEX<vtype>(vecs[15], vecs[16]);
    COEX<vtype>(vecs[17], vecs[18]);
    COEX<vtype>(vecs[19], vecs[20]);
    COEX<vtype>(vecs[21], vecs[22]);
    COEX<vtype>(vecs[23], vecs[24]);
    COEX<vtype>(vecs[25], vecs[26]);
    COEX<vtype>(vecs[27], vecs[28]);
 }
--- a/src/java.base/linux/native/libsimdsort/xss-pivot-selection.hpp
+++ b/src/java.base/linux/native/libsimdsort/xss-pivot-selection.hpp
@@ -0,0 +1,88 @@
 /*
 * Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
 * Copyright (c) 2021 Serge Sans Paille. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 // This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
 template <typename vtype, typename mm_t>
 X86_SIMD_SORT_INLINE void COEX(mm_t &a, mm_t &b);
 template <typename vtype, typename type_t>
 X86_SIMD_SORT_INLINE type_t get_pivot(type_t *arr, const arrsize_t left,
                                      const arrsize_t right) {
    using reg_t = typename vtype::reg_t;
    type_t samples[vtype::numlanes];
    arrsize_t delta = (right - left) / vtype::numlanes;
    for (int i = 0; i < vtype::numlanes; i++) {
        samples[i] = arr[left + i * delta];
    }
    reg_t rand_vec = vtype::loadu(samples);
    reg_t sort = vtype::sort_vec(rand_vec);
    return ((type_t *)&sort)[vtype::numlanes / 2];
 }
 template <typename vtype, typename type_t>
 X86_SIMD_SORT_INLINE type_t get_pivot_blocks(type_t *arr, const arrsize_t left,
                                             const arrsize_t right) {
    if (right - left <= 1024) {
        return get_pivot<vtype>(arr, left, right);
    }
    using reg_t = typename vtype::reg_t;
    constexpr int numVecs = 5;
    arrsize_t width = (right - vtype::numlanes) - left;
    arrsize_t delta = width / numVecs;
    reg_t vecs[numVecs];
    // Load data
    for (int i = 0; i < numVecs; i++) {
        vecs[i] = vtype::loadu(arr + left + delta * i);
    }
    // Implement sorting network (from
    // https://bertdobbelaere.github.io/sorting_networks.html)
    COEX<vtype>(vecs[0], vecs[3]);
    COEX<vtype>(vecs[1], vecs[4]);
    COEX<vtype>(vecs[0], vecs[2]);
    COEX<vtype>(vecs[1], vecs[3]);
    COEX<vtype>(vecs[0], vecs[1]);
    COEX<vtype>(vecs[2], vecs[4]);
    COEX<vtype>(vecs[1], vecs[2]);
    COEX<vtype>(vecs[3], vecs[4]);
    COEX<vtype>(vecs[2], vecs[3]);
    // Calculate median of the middle vector
    reg_t &vec = vecs[numVecs / 2];
    vec = vtype::sort_vec(vec);
    type_t data[vtype::numlanes];
    vtype::storeu(data, vec);
    return data[vtype::numlanes / 2];
 }
--- a/src/java.base/share/classes/java/lang/StringUTF16.java
+++ b/src/java.base/share/classes/java/lang/StringUTF16.java
@@ -42,15 +42,10 @@ import static java.lang.String.LATIN1;
 final class StringUTF16 {
    // Return a new byte array for a UTF16-coded string for len chars
    // Throw an exception if out of range
    public static byte[] newBytesFor(int len) {
-        if (len < 0) {
+        return new byte[newBytesLength(len)];
            throw new NegativeArraySizeException();
        }
        if (len > MAX_LENGTH) {
            throw new OutOfMemoryError("UTF16 String size is " + len +
                                       ", should be less than " + MAX_LENGTH);
        }
        return new byte[len << 1];
    }
    // Check the size of a UTF16-coded string
@@ -59,7 +54,7 @@ final class StringUTF16 {
        if (len < 0) {
            throw new NegativeArraySizeException();
        }
-        if (len > MAX_LENGTH) {
+        if (len >= MAX_LENGTH) {
            throw new OutOfMemoryError("UTF16 String size is " + len +
                                       ", should be less than " + MAX_LENGTH);
        }
--- a/src/java.base/share/classes/java/lang/Thread.java
+++ b/src/java.base/share/classes/java/lang/Thread.java
@@ -1647,7 +1647,7 @@ public class Thread implements Runnable {
     *       interrupt the wait.
     *       For more information, see
     *       <a href="{@docRoot}/java.base/java/lang/doc-files/threadPrimitiveDeprecation.html">Why
-     *       are Thread.stop, Thread.suspend and Thread.resume Deprecated?</a>.
+     *       is Thread.stop deprecated and the ability to stop a thread removed?</a>.
     */
    @Deprecated(since="1.2", forRemoval=true)
    public final void stop() {
@@ -1788,44 +1788,6 @@ public class Thread implements Runnable {
        return eetop != 0;
    }
    /**
     * Throws {@code UnsupportedOperationException}.
     *
     * @throws  UnsupportedOperationException always
     *
     * @deprecated This method was originally specified to suspend a thread.
     *     It was inherently deadlock-prone. If the target thread held a lock on
     *     a monitor protecting a critical system resource when it was suspended,
     *     no thread could access the resource until the target thread was resumed.
     *     If the thread intending to resume the target thread attempted to lock
     *     the monitor prior to calling {@code resume}, deadlock would result.
     *     Such deadlocks typically manifested themselves as "frozen" processes.
     *     For more information, see
     *     <a href="{@docRoot}/java.base/java/lang/doc-files/threadPrimitiveDeprecation.html">Why
     *     are Thread.stop, Thread.suspend and Thread.resume Deprecated?</a>.
     */
    @Deprecated(since="1.2", forRemoval=true)
    public final void suspend() {
        throw new UnsupportedOperationException();
    }
    /**
     * Throws {@code UnsupportedOperationException}.
     *
     * @throws  UnsupportedOperationException always
     *
     * @deprecated This method was originally specified to resume a thread
     *     suspended with {@link #suspend()}. Suspending a thread was
     *     inherently deadlock-prone.
     *     For more information, see
     *     <a href="{@docRoot}/java.base/java/lang/doc-files/threadPrimitiveDeprecation.html">Why
     *     are Thread.stop, Thread.suspend and Thread.resume Deprecated?</a>.
     */
    @Deprecated(since="1.2", forRemoval=true)
    public final void resume() {
        throw new UnsupportedOperationException();
    }
    /**
     * Changes the priority of this thread.
     *
--- a/src/java.base/share/classes/java/lang/ThreadGroup.java
+++ b/src/java.base/share/classes/java/lang/ThreadGroup.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1995, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1995, 2023, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -554,17 +554,6 @@ public class ThreadGroup implements Thread.UncaughtExceptionHandler {
        return i;
    }
    /**
     * Throws {@code UnsupportedOperationException}.
     *
     * @deprecated This method was originally specified to stop all threads in
     *             the thread group. It was inherently unsafe.
     */
    @Deprecated(since="1.2", forRemoval=true)
    public final void stop() {
        throw new UnsupportedOperationException();
    }
    /**
     * Interrupts all {@linkplain Thread#isAlive() live} platform threads in
     * this thread group and its subgroups.
@@ -587,28 +576,6 @@ public class ThreadGroup implements Thread.UncaughtExceptionHandler {
        }
    }
    /**
     * Throws {@code UnsupportedOperationException}.
     *
     * @deprecated This method was originally specified to suspend all threads
     *             in the thread group.
     */
    @Deprecated(since="1.2", forRemoval=true)
    public final void suspend() {
        throw new UnsupportedOperationException();
    }
    /**
     * Throws {@code UnsupportedOperationException}.
     *
     * @deprecated This method was originally specified to resume all threads
     *             in the thread group.
     */
    @Deprecated(since="1.2", forRemoval=true)
    public final void resume() {
        throw new UnsupportedOperationException();
    }
    /**
     * Does nothing.
     *
--- a/src/java.base/share/classes/java/lang/classfile/attribute/ModuleAttribute.java
+++ b/src/java.base/share/classes/java/lang/classfile/attribute/ModuleAttribute.java
@@ -202,7 +202,7 @@ public sealed interface ModuleAttribute
        }
        /**
-         * Sets the module flags
+         * Sets the module version
         * @param version the module version
         * @return this builder
         */
--- a/src/java.base/share/classes/java/lang/doc-files/threadPrimitiveDeprecation.html
+++ b/src/java.base/share/classes/java/lang/doc-files/threadPrimitiveDeprecation.html
@@ -1,6 +1,6 @@
 <!doctype html>
 <!--
- Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
 DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 This code is free software; you can redistribute it and/or modify it
@@ -158,173 +158,5 @@ wouldn't respond to <code>Thread.stop</code> either.</em> Such
 cases include deliberate denial-of-service attacks, and I/O
 operations for which thread.stop and thread.interrupt do not work
 properly.</p>
 <hr>
 <h2>Why are <code>Thread.suspend</code> and
 <code>Thread.resume</code> deprecated and the ability to suspend or
 resume a thread removed?</h2>
 <p><code>Thread.suspend</code> was inherently deadlock-prone. If the
 target thread held a lock on a monitor protecting a critical
 system resource when it is suspended, no thread could access the
 resource until the target thread was resumed. If the thread intending
 to resume the target thread attempted to lock the monitor prior
 to calling <code>resume</code>, deadlock resulted. Such deadlocks
 typically manifest themselves as "frozen" processes.</p>
 <hr>
 <h2>What should I use instead of <code>Thread.suspend</code> and
 <code>Thread.resume</code>?</h2>
 <p>As with <code>Thread.stop</code>, the prudent approach is to
 have the "target thread" poll a variable indicating the desired
 state of the thread (active or suspended). When the desired state
 is suspended, the thread waits using <code>Object.wait</code>. When
 the thread is resumed, the target thread is notified using
 <code>Object.notify</code>.</p>
 <p>For example, suppose your applet contains the following
 mousePressed event handler, which toggles the state of a thread
 called <code>blinker</code>:</p>
 <pre>
    private boolean threadSuspended;
    Public void mousePressed(MouseEvent e) {
        e.consume();
        if (threadSuspended)
            blinker.resume();
        else
            blinker.suspend();  // DEADLOCK-PRONE!
        threadSuspended = !threadSuspended;
    }
 </pre>
 You can avoid the use of <code>Thread.suspend</code> and
 <code>Thread.resume</code> by replacing the event handler above
 with:
 <pre>
    public synchronized void mousePressed(MouseEvent e) {
        e.consume();
        threadSuspended = !threadSuspended;
        if (!threadSuspended)
            notify();
    }
 </pre>
 and adding the following code to the "run loop":
 <pre>
                synchronized(this) {
                    while (threadSuspended)
                        wait();
                }
 </pre>
 The <code>wait</code> method throws the
 <code>InterruptedException</code>, so it must be inside a <code>try
 ... catch</code> clause. It's fine to put it in the same clause as
 the <code>sleep</code>. The check should follow (rather than
 precede) the <code>sleep</code> so the window is immediately
 repainted when the thread is "resumed." The resulting
 <code>run</code> method follows:
 <pre>
    public void run() {
        while (true) {
            try {
                Thread.sleep(interval);
                synchronized(this) {
                    while (threadSuspended)
                        wait();
                }
            } catch (InterruptedException e){
            }
            repaint();
        }
    }
 </pre>
 Note that the <code>notify</code> in the <code>mousePressed</code>
 method and the <code>wait</code> in the <code>run</code> method are
 inside <code>synchronized</code> blocks. This is required by the
 language, and ensures that <code>wait</code> and
 <code>notify</code> are properly serialized. In practical terms,
 this eliminates race conditions that could cause the "suspended"
 thread to miss a <code>notify</code> and remain suspended
 indefinitely.
 <p>While the cost of synchronization in Java is decreasing as the
 platform matures, it will never be free. A simple trick can be used
 to remove the synchronization that we've added to each iteration of
 the "run loop." The synchronized block that was added is replaced
 by a slightly more complex piece of code that enters a synchronized
 block only if the thread has actually been suspended:</p>
 <pre>
                if (threadSuspended) {
                    synchronized(this) {
                        while (threadSuspended)
                            wait();
                    }
                }
 </pre>
 <p>In the absence of explicit synchronization,
 <code>threadSuspended</code> must be made <code>volatile</code> to ensure
 prompt communication of the suspend-request.</p>
 The resulting <code>run</code> method is:
 <pre>
    private volatile boolean threadSuspended;
    public void run() {
        while (true) {
            try {
                Thread.sleep(interval);
                if (threadSuspended) {
                    synchronized(this) {
                        while (threadSuspended)
                            wait();
                    }
                }
            } catch (InterruptedException e){
            }
            repaint();
        }
    }
 </pre>
 <hr>
 <h2>Can I combine the two techniques to produce a thread that may
 be safely "stopped" or "suspended"?</h2>
 Yes, it's reasonably straightforward. The one subtlety is that the
 target thread may already be suspended at the time that another
 thread tries to stop it. If the <code>stop</code> method merely sets
 the state variable (<code>blinker</code>) to null, the target thread
 will remain suspended (waiting on the monitor), rather than exiting
 gracefully as it should. If the applet is restarted, multiple
 threads could end up waiting on the monitor at the same time,
 resulting in erratic behavior.
 <p>To rectify this situation, the <code>stop</code> method must ensure
 that the target thread resumes immediately if it is suspended. Once
 the target thread resumes, it must recognize immediately that it
 has been stopped, and exit gracefully. Here's how the resulting
 <code>run</code> and <code>stop</code> methods look:</p>
 <pre>
    public void run() {
        Thread thisThread = Thread.currentThread();
        while (blinker == thisThread) {
            try {
                Thread.sleep(interval);
                synchronized(this) {
                    while (threadSuspended &amp;&amp; blinker==thisThread)
                        wait();
                }
            } catch (InterruptedException e){
            }
            repaint();
        }
    }
    public synchronized void stop() {
        blinker = null;
        notify();
    }
 </pre>
 If the <code>stop</code> method calls <code>Thread.interrupt</code>, as
 described above, it needn't call <code>notify</code> as well, but it
 still must be synchronized. This ensures that the target thread
 won't miss an interrupt due to a race condition.
 </body>
 </html>
--- a/src/java.base/share/classes/java/lang/foreign/MemoryLayout.java
+++ b/src/java.base/share/classes/java/lang/foreign/MemoryLayout.java
@@ -631,6 +631,9 @@ public sealed interface MemoryLayout
     *     <li>The accessed memory segment must be
     *     {@link MemorySegment#isAccessibleBy(Thread) accessible} from the thread
     *     performing the access operation, or a {@link WrongThreadException} is thrown.</li>
     *     <li>For write operations, the accessed memory segment must not be
     *     {@link MemorySegment#isReadOnly() read only}, or an
     *     {@link IllegalArgumentException} is thrown.</li>
     *     <li>The {@linkplain MemorySegment#scope() scope} associated with the accessed
     *     segment must be {@linkplain MemorySegment.Scope#isAlive() alive}, or an
     *     {@link IllegalStateException} is thrown.</li>
--- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java
+++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java
@@ -869,7 +869,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         this segment is not {@linkplain Scope#isAlive() alive}
     * @throws WrongThreadException if this method is called from a thread {@code T},
     *         such that {@code isAccessibleBy(T) == false}
-     * @throws UnsupportedOperationException if this segment is
+     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    MemorySegment fill(byte value);
@@ -894,7 +894,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         {@code src} is not {@linkplain Scope#isAlive() alive}
     * @throws WrongThreadException if this method is called from a thread {@code T},
     *         such that {@code src.isAccessibleBy(T) == false}
-     * @throws UnsupportedOperationException if this segment is
+     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     * @return this segment
     */
@@ -1269,6 +1269,8 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         this segment is not {@linkplain Scope#isAlive() alive}
     * @throws WrongThreadException if this method is called from a thread {@code T},
     *         such that {@code isAccessibleBy(T) == false}
     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void setString(long offset, String str);
@@ -1306,6 +1308,8 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         such that {@code isAccessibleBy(T) == false}
     * @throws IllegalArgumentException if {@code charset} is not a
     *         {@linkplain StandardCharsets standard charset}
     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void setString(long offset, String str, Charset charset);
@@ -1493,7 +1497,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - bytes}
     * @throws IndexOutOfBoundsException if either {@code srcOffset},
     *         {@code dstOffset} or {@code bytes} are {@code < 0}
-     * @throws UnsupportedOperationException if {@code dstSegment} is
+     * @throws IllegalArgumentException if {@code dstSegment} is
     *         {@linkplain #isReadOnly() read-only}
     */
    @ForceInline
@@ -1552,7 +1556,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         {@code dstSegment} is not {@linkplain Scope#isAlive() alive}
     * @throws WrongThreadException if this method is called from a thread {@code T},
     *         such that {@code dstSegment.isAccessibleBy(T) == false}
-     * @throws UnsupportedOperationException if {@code dstSegment} is {@linkplain #isReadOnly() read-only}
+     * @throws IllegalArgumentException if {@code dstSegment} is {@linkplain #isReadOnly() read-only}
     * @throws IndexOutOfBoundsException if {@code elementCount * srcLayout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code elementCount * dtsLayout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code srcOffset > srcSegment.byteSize() - (elementCount * srcLayout.byteSize())}
@@ -1605,7 +1609,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
     *         in the provided layout
     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is
+     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void set(ValueLayout.OfByte layout, long offset, byte value);
@@ -1643,7 +1647,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
     *         in the provided layout
     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is
+     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void set(ValueLayout.OfBoolean layout, long offset, boolean value);
@@ -1681,7 +1685,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
     *         in the provided layout
     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is
+     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void set(ValueLayout.OfChar layout, long offset, char value);
@@ -1719,7 +1723,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
     *         in the provided layout
     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is
+     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void set(ValueLayout.OfShort layout, long offset, short value);
@@ -1757,7 +1761,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
     *         in the provided layout
     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is
+     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void set(ValueLayout.OfInt layout, long offset, int value);
@@ -1795,7 +1799,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
     *         in the provided layout
     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is
+     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void set(ValueLayout.OfFloat layout, long offset, float value);
@@ -1833,7 +1837,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
     *         in the provided layout
     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is
+     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void set(ValueLayout.OfLong layout, long offset, long value);
@@ -1871,7 +1875,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
     *         in the provided layout
     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is
+     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void set(ValueLayout.OfDouble layout, long offset, double value);
@@ -1921,8 +1925,10 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
     * @throws UnsupportedOperationException if this segment is
     *         {@linkplain #isReadOnly() read-only}
-     * @throws UnsupportedOperationException if {@code value} is not a
+     * @throws IllegalArgumentException if {@code value} is not a
     *         {@linkplain #isNative() native} segment
     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void set(AddressLayout layout, long offset, MemorySegment value);
@@ -2055,7 +2061,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
+     * @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
     */
    void setAtIndex(ValueLayout.OfByte layout, long index, byte value);
@@ -2078,7 +2084,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
+     * @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
     */
    void setAtIndex(ValueLayout.OfBoolean layout, long index, boolean value);
@@ -2101,7 +2107,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
+     * @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
     */
    void setAtIndex(ValueLayout.OfShort layout, long index, short value);
@@ -2146,7 +2152,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
+     * @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
     */
    void setAtIndex(ValueLayout.OfInt layout, long index, int value);
@@ -2191,7 +2197,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
+     * @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
     */
    void setAtIndex(ValueLayout.OfFloat layout, long index, float value);
@@ -2236,7 +2242,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
+     * @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
     */
    void setAtIndex(ValueLayout.OfLong layout, long index, long value);
@@ -2281,7 +2287,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
-     * @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
+     * @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
     */
    void setAtIndex(ValueLayout.OfDouble layout, long index, double value);
@@ -2336,7 +2342,9 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
     * @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
-     * @throws UnsupportedOperationException if {@code value} is not a {@linkplain #isNative() native} segment
+     * @throws IllegalArgumentException if {@code value} is not a {@linkplain #isNative() native} segment
     * @throws IllegalArgumentException if this segment is
     *         {@linkplain #isReadOnly() read-only}
     */
    void setAtIndex(AddressLayout layout, long index, MemorySegment value);
@@ -2460,7 +2468,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
     *         <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
     *         in the source element layout
     * @throws IllegalArgumentException if {@code dstLayout.byteAlignment() > dstLayout.byteSize()}
-     * @throws UnsupportedOperationException if {@code dstSegment} is {@linkplain #isReadOnly() read-only}
+     * @throws IllegalArgumentException if {@code dstSegment} is {@linkplain #isReadOnly() read-only}
     * @throws IndexOutOfBoundsException if {@code elementCount * dstLayout.byteSize()} overflows
     * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - (elementCount * dstLayout.byteSize())}
     * @throws IndexOutOfBoundsException if {@code srcIndex > srcArray.length - elementCount}
--- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java
+++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java
@@ -350,7 +350,7 @@ public interface SegmentAllocator {
     *
     * @param layout the layout of the block of memory to be allocated
     * @param value  the value to be set in the newly allocated memory segment
-     * @throws UnsupportedOperationException if {@code value} is not
+     * @throws IllegalArgumentException if {@code value} is not
     *         a {@linkplain MemorySegment#isNative() native} segment
     */
    default MemorySegment allocateFrom(AddressLayout layout, MemorySegment value) {
@@ -670,9 +670,11 @@ public interface SegmentAllocator {
     *
     * @param segment the segment from which the returned allocator should slice from
     * @return a new slicing allocator
     * @throws IllegalArgumentException if the {@code segment} is
     *         {@linkplain MemorySegment#isReadOnly() read-only}
     */
    static SegmentAllocator slicingAllocator(MemorySegment segment) {
-        Objects.requireNonNull(segment);
+        assertWritable(segment);
        return new SlicingAllocator(segment);
    }
@@ -700,9 +702,19 @@ public interface SegmentAllocator {
     * @param segment the memory segment to be recycled by the returned allocator
     * @return an allocator that recycles an existing segment upon each new
     *         allocation request
     * @throws IllegalArgumentException if the {@code segment} is
     *         {@linkplain MemorySegment#isReadOnly() read-only}
     */
    static SegmentAllocator prefixAllocator(MemorySegment segment) {
-        return (AbstractMemorySegmentImpl)Objects.requireNonNull(segment);
+        assertWritable(segment);
        return (AbstractMemorySegmentImpl)segment;
    }
    private static void assertWritable(MemorySegment segment) {
        // Implicit null check
        if (segment.isReadOnly()) {
            throw new IllegalArgumentException("read-only segment");
        }
    }
    @ForceInline
--- a/src/java.base/share/classes/java/lang/invoke/MethodHandles.java
+++ b/src/java.base/share/classes/java/lang/invoke/MethodHandles.java
@@ -1841,7 +1841,7 @@ public class MethodHandles {
         *                           <a href="MethodHandles.Lookup.html#secmgr">refuses access</a>
         * @throws NullPointerException if {@code bytes} is {@code null}
         * @since 9
-         * @see Lookup#privateLookupIn
+         * @see MethodHandles#privateLookupIn
         * @see Lookup#dropLookupMode
         * @see ClassLoader#defineClass(String,byte[],int,int,ProtectionDomain)
         */
--- a/src/java.base/share/classes/java/nio/Heap-X-Buffer.java.template
+++ b/src/java.base/share/classes/java/nio/Heap-X-Buffer.java.template
@@ -29,6 +29,7 @@ package java.nio;
 import java.lang.foreign.MemorySegment;
 import java.util.Objects;
 import jdk.internal.util.ArraysSupport;
 /**
 #if[rw]
@@ -705,6 +706,9 @@ class Heap$Type$Buffer$RW$
                                                                   addr, segment)));
    }
    public int hashCode() {
        return ArraysSupport.vectorizedHashCode(hb, ix(position()), remaining(), 1, ArraysSupport.T_BYTE);
    }
 #end[byte]
@@ -733,6 +737,9 @@ class Heap$Type$Buffer$RW$
                                      offset, segment);
    }
    public int hashCode() {
        return ArraysSupport.vectorizedHashCode(hb, ix(position()), remaining(), 1, ArraysSupport.T_CHAR);
    }
 #end[char]
--- a/src/java.base/share/classes/java/text/ChoiceFormat.java
+++ b/src/java.base/share/classes/java/text/ChoiceFormat.java
@@ -88,7 +88,6 @@ import java.util.Arrays;
 * <p>
 * Below is an example of constructing a ChoiceFormat with arrays to format
 * and parse values:
 * <blockquote>
 * {@snippet lang=java :
 * double[] limits = {1,2,3,4,5,6,7};
 * String[] dayOfWeekNames = {"Sun","Mon","Tue","Wed","Thur","Fri","Sat"};
@@ -100,34 +99,27 @@ import java.util.Arrays;
 *                              + form.parse(form.format(i),status));
 * }
 * }
- * </blockquote>
+ *
 * <p>
 * For more sophisticated patterns, {@code ChoiceFormat} can be used with
 * {@link MessageFormat} to produce accurate forms for singular and plural:
 * <blockquote>
 * {@snippet lang=java :
- * double[] filelimits = {0,1,2};
+ * MessageFormat msgFmt = new MessageFormat("The disk \"{0}\" contains {1}.");
- * String[] filepart = {"are no files","is one file","are {2} files"};
+ * double[] fileLimits = {0,1,2};
- * ChoiceFormat fileform = new ChoiceFormat(filelimits, filepart);
+ * String[] filePart = {"no files","one file","{1,number} files"};
- * Format[] testFormats = {fileform, null, NumberFormat.getInstance()};
+ * ChoiceFormat fileChoices = new ChoiceFormat(fileLimits, filePart);
- * MessageFormat pattform = new MessageFormat("There {0} on {1}");
+ * msgFmt.setFormatByArgumentIndex(1, fileChoices);
- * pattform.setFormats(testFormats);
+ * Object[] args = {"MyDisk", 1273};
- * Object[] testArgs = {null, "ADisk", null};
+ * System.out.println(msgFmt.format(args));
 * for (int i = 0; i < 4; ++i) {
 *     testArgs[0] = Integer.valueOf(i);
 *     testArgs[2] = testArgs[0];
 *     System.out.println(pattform.format(testArgs));
 * }
- * }
+ * The output with different values for {@code fileCount}:
- * </blockquote>
+ * <blockquote><pre>
- * Would output the following:
+ * The disk "MyDisk" contains no files.
- * <blockquote>
+ * The disk "MyDisk" contains one file.
- * <pre>{@code
+ * The disk "MyDisk" contains 1,273 files.
- * There are no files on ADisk
+ * </pre></blockquote>
- * There is one file on ADisk
+ * See {@link MessageFormat##pattern_caveats MessageFormat} for caveats regarding
- * There are 2 files on ADisk
+ * {@code MessageFormat} patterns within a {@code ChoiceFormat} pattern.
 * There are 3 files on ADisk
 * }</pre>
 * </blockquote>
 *
 * <h2><a id="patterns">Patterns</a></h2>
 * A {@code ChoiceFormat} pattern has the following syntax:
@@ -194,7 +186,6 @@ import java.util.Arrays;
 * {@code new ChoiceFormat("1# ''one'' ").format(1)} returns {@code " 'one' "}.
 *
 * <p>Below is an example of constructing a ChoiceFormat with a pattern:
 * <blockquote>
 * {@snippet lang=java :
 * ChoiceFormat fmt = new ChoiceFormat(
 *      "-1#is negative| 0#is zero or fraction | 1#is one |1.0<is 1+ |2#is two |2<is more than 2.");
@@ -210,7 +201,6 @@ import java.util.Arrays;
 * System.out.println(fmt.format(Double.NaN)); // outputs "is negative"
 * System.out.println(fmt.format(Double.POSITIVE_INFINITY)); // outputs "is more than 2."
 * }
 * </blockquote>
 *
 * <h2><a id="synchronization">Synchronization</a></h2>
 *
--- a/src/java.base/share/classes/java/text/MessageFormat.java
+++ b/src/java.base/share/classes/java/text/MessageFormat.java
@@ -231,7 +231,6 @@ import java.util.Objects;
 * <p>
 * The first example uses the static method {@code MessageFormat.format},
 * which internally creates a {@code MessageFormat} for one-time use:
 * <blockquote>
 * {@snippet lang=java :
 * int planet = 7;
 * String event = "a disturbance in the Force";
@@ -240,7 +239,6 @@ import java.util.Objects;
 *     "At {1,time} on {1,date}, there was {2} on planet {0,number,integer}.",
 *     planet, new Date(), event);
 * }
 * </blockquote>
 * The output is:
 * <blockquote><pre>
 * At 12:30 PM on Jul 3, 2053, there was a disturbance in the Force on planet 7.
@@ -249,7 +247,6 @@ import java.util.Objects;
 * <p>
 * The following example creates a {@code MessageFormat} instance that
 * can be used repeatedly:
 * <blockquote>
 * {@snippet lang=java :
 * int fileCount = 1273;
 * String diskName = "MyDisk";
@@ -260,7 +257,6 @@ import java.util.Objects;
 *
 * System.out.println(form.format(testArgs));
 * }
 * </blockquote>
 * The output with different values for {@code fileCount}:
 * <blockquote><pre>
 * The disk "MyDisk" contains 0 file(s).
@@ -269,23 +265,17 @@ import java.util.Objects;
 * </pre></blockquote>
 *
 * <p>
- * For more sophisticated patterns, you can use a {@code ChoiceFormat}
+ * For more sophisticated patterns, {@link ChoiceFormat} can be used with
- * to produce correct forms for singular and plural:
+ * {@code MessageFormat} to produce accurate forms for singular and plural:
 * <blockquote>
 * {@snippet lang=java :
- * MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0}.");
+ * MessageFormat msgFmt = new MessageFormat("The disk \"{0}\" contains {1}.");
- * double[] filelimits = {0,1,2};
+ * double[] fileLimits = {0,1,2};
- * String[] filepart = {"no files","one file","{0,number} files"};
+ * String[] filePart = {"no files","one file","{1,number} files"};
- * ChoiceFormat fileform = new ChoiceFormat(filelimits, filepart);
+ * ChoiceFormat fileChoices = new ChoiceFormat(fileLimits, filePart);
- * form.setFormatByArgumentIndex(0, fileform);
+ * msgFmt.setFormatByArgumentIndex(1, fileChoices);
- *
+ * Object[] args = {"MyDisk", 1273};
- * int fileCount = 1273;
+ * System.out.println(msgFmt.format(args));
 * String diskName = "MyDisk";
 * Object[] testArgs = {Long.valueOf(fileCount), diskName};
 *
 * System.out.println(form.format(testArgs));
 * }
 * </blockquote>
 * The output with different values for {@code fileCount}:
 * <blockquote><pre>
 * The disk "MyDisk" contains no files.
@@ -297,24 +287,26 @@ import java.util.Objects;
 * You can create the {@code ChoiceFormat} programmatically, as in the
 * above example, or by using a pattern. See {@link ChoiceFormat}
 * for more information.
 * <blockquote>
 * {@snippet lang=java :
- * form.applyPattern(
+ * msgFmt.applyPattern(
- *    "There {0,choice,0#are no files|1#is one file|1<are {0,number,integer} files}.");
+ *    "There {0,choice,0#are no files|1#is one file|1<are {1,number,integer} files}.");
 * }
 * </blockquote>
 *
 * <p>
- * <strong>Note:</strong> As we see above, the string produced
+ * <strong id="pattern_caveats">Notes:</strong> As seen in the previous snippet,
- * by a {@code ChoiceFormat} in {@code MessageFormat} is treated as special;
+ * the string produced by a {@code ChoiceFormat} in {@code MessageFormat} is
- * occurrences of '{' are used to indicate subformats, and cause recursion.
+ * treated as special; occurrences of '{' are used to indicate subformats, and
 * cause recursion. If a {@code FormatElement} is defined in the {@code ChoiceFormat}
 * pattern, it will only be formatted according to the {@code FormatType} and
 * {@code FormatStyle} pattern provided. The associated subformats of the
 * top level {@code MessageFormat} will not be applied to the {@code FormatElement}
 * defined in the {@code ChoiceFormat} pattern.
 * If you create both a {@code MessageFormat} and {@code ChoiceFormat}
 * programmatically (instead of using the string patterns), then be careful not to
 * produce a format that recurses on itself, which will cause an infinite loop.
 * <p>
 * When a single argument is parsed more than once in the string, the last match
 * will be the final result of the parsing.  For example,
 * <blockquote>
 * {@snippet lang=java :
 * MessageFormat mf = new MessageFormat("{0,number,#.##}, {0,number,#.#}");
 * Object[] objs = {Double.valueOf(3.1415)};
@@ -323,20 +315,17 @@ import java.util.Objects;
 * objs = mf.parse(result, new ParsePosition(0));
 * // objs now equals {Double.valueOf(3.1)}
 * }
 * </blockquote>
 *
 * <p>
 * Likewise, parsing with a {@code MessageFormat} object using patterns containing
 * multiple occurrences of the same argument would return the last match.  For
 * example,
 * <blockquote>
 * {@snippet lang=java :
 * MessageFormat mf = new MessageFormat("{0}, {0}, {0}");
 * String forParsing = "x, y, z";
 * Object[] objs = mf.parse(forParsing, new ParsePosition(0));
 * // objs now equals {new String("z")}
 * }
 * </blockquote>
 *
 * <h3><a id="synchronization">Synchronization</a></h3>
 *
--- a/src/java.base/share/classes/java/util/Locale.java
+++ b/src/java.base/share/classes/java/util/Locale.java
@@ -50,6 +50,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.spi.LocaleNameProvider;
 import java.util.stream.Stream;
 import jdk.internal.util.StaticProperty;
 import jdk.internal.vm.annotation.Stable;
 import sun.security.action.GetPropertyAction;
@@ -1053,11 +1054,10 @@ public final class Locale implements Cloneable, Serializable {
    private static Locale initDefault() {
        String language, region, script, country, variant;
-        Properties props = GetPropertyAction.privilegedGetProperties();
+        language = StaticProperty.USER_LANGUAGE;
        language = props.getProperty("user.language", "en");
        // for compatibility, check for old user.region property
-        region = props.getProperty("user.region");
+        region = StaticProperty.USER_REGION;
-        if (region != null) {
+        if (!region.isEmpty()) {
            // region can be of form country, country_variant, or _variant
            int i = region.indexOf('_');
            if (i >= 0) {
@@ -1069,30 +1069,24 @@ public final class Locale implements Cloneable, Serializable {
            }
            script = "";
        } else {
-            script = props.getProperty("user.script", "");
+            script = StaticProperty.USER_SCRIPT;
-            country = props.getProperty("user.country", "");
+            country = StaticProperty.USER_COUNTRY;
-            variant = props.getProperty("user.variant", "");
+            variant = StaticProperty.USER_VARIANT;
        }
        return getInstance(language, script, country, variant,
-                getDefaultExtensions(props.getProperty("user.extensions", ""))
+                getDefaultExtensions(StaticProperty.USER_EXTENSIONS)
                    .orElse(null));
    }
    private static Locale initDefault(Locale.Category category) {
        Properties props = GetPropertyAction.privilegedGetProperties();
        Locale locale = Locale.defaultLocale;
        return getInstance(
-            props.getProperty(category.languageKey,
+            category == Category.DISPLAY ? StaticProperty.USER_LANGUAGE_DISPLAY : StaticProperty.USER_LANGUAGE_FORMAT,
-                    locale.getLanguage()),
+            category == Category.DISPLAY ? StaticProperty.USER_SCRIPT_DISPLAY : StaticProperty.USER_SCRIPT_FORMAT,
-            props.getProperty(category.scriptKey,
+            category == Category.DISPLAY ? StaticProperty.USER_COUNTRY_DISPLAY : StaticProperty.USER_COUNTRY_FORMAT,
-                    locale.getScript()),
+            category == Category.DISPLAY ? StaticProperty.USER_VARIANT_DISPLAY : StaticProperty.USER_VARIANT_FORMAT,
-            props.getProperty(category.countryKey,
+            getDefaultExtensions(category == Category.DISPLAY ? StaticProperty.USER_EXTENSIONS_DISPLAY : StaticProperty.USER_EXTENSIONS_FORMAT)
                    locale.getCountry()),
            props.getProperty(category.variantKey,
                    locale.getVariant()),
            getDefaultExtensions(props.getProperty(category.extensionsKey, ""))
                .orElse(locale.getLocaleExtensions()));
    }
--- a/src/java.base/share/classes/java/util/zip/ZipFile.java
+++ b/src/java.base/share/classes/java/util/zip/ZipFile.java
@@ -1222,16 +1222,17 @@ public class ZipFile implements ZipConstants, Closeable {
            int nlen = CENNAM(cen, pos);
            int elen = CENEXT(cen, pos);
            int clen = CENCOM(cen, pos);
-            if (entryPos + nlen > cen.length - ENDHDR) {
+            long headerSize = (long)CENHDR + nlen + clen + elen;
            // CEN header size + name length + comment length + extra length
            // should not exceed 65,535 bytes per the PKWare APP.NOTE
            // 4.4.10, 4.4.11, & 4.4.12.  Also check that current CEN header will
            // not exceed the length of the CEN array
            if (headerSize > 0xFFFF || pos + headerSize > cen.length - ENDHDR) {
                zerror("invalid CEN header (bad header size)");
            }
            if (elen > 0 && !DISABLE_ZIP64_EXTRA_VALIDATION) {
-                long extraStartingOffset = pos + CENHDR + nlen;
+                checkExtraFields(pos, entryPos + nlen, elen);
                if ((int)extraStartingOffset != extraStartingOffset) {
                    zerror("invalid CEN header (bad extra offset)");
                }
                checkExtraFields(pos, (int)extraStartingOffset, elen);
            } else if (elen == 0 && (CENSIZ(cen, pos) == ZIP64_MAGICVAL
                    || CENLEN(cen, pos) == ZIP64_MAGICVAL
                    || CENOFF(cen, pos) == ZIP64_MAGICVAL
@@ -1292,7 +1293,7 @@ public class ZipFile implements ZipConstants, Closeable {
                int tagBlockSize = get16(cen, currentOffset);
                currentOffset += Short.BYTES;
-                int tagBlockEndingOffset = currentOffset + tagBlockSize;
+                long tagBlockEndingOffset = (long)currentOffset + tagBlockSize;
                //  The ending offset for this tag block should not go past the
                //  offset for the end of the extra field
--- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
@@ -361,7 +361,7 @@ public abstract sealed class AbstractMemorySegmentImpl
    @ForceInline
    public void checkAccess(long offset, long length, boolean readOnly) {
        if (!readOnly && this.readOnly) {
-            throw new UnsupportedOperationException("Attempt to write a read-only segment");
+            throw new IllegalArgumentException("Attempt to write a read-only segment");
        }
        checkBounds(offset, length);
    }
--- a/src/java.base/share/classes/jdk/internal/util/StaticProperty.java
+++ b/src/java.base/share/classes/jdk/internal/util/StaticProperty.java
@@ -57,6 +57,22 @@ public final class StaticProperty {
    private static final String OS_NAME;
    private static final String OS_ARCH;
    private static final String OS_VERSION;
    public static final String USER_LANGUAGE;
    public static final String USER_LANGUAGE_DISPLAY;
    public static final String USER_LANGUAGE_FORMAT;
    public static final String USER_SCRIPT;
    public static final String USER_SCRIPT_DISPLAY;
    public static final String USER_SCRIPT_FORMAT;
    public static final String USER_COUNTRY;
    public static final String USER_COUNTRY_DISPLAY;
    public static final String USER_COUNTRY_FORMAT;
    public static final String USER_VARIANT;
    public static final String USER_VARIANT_DISPLAY;
    public static final String USER_VARIANT_FORMAT;
    public static final String USER_EXTENSIONS;
    public static final String USER_EXTENSIONS_DISPLAY;
    public static final String USER_EXTENSIONS_FORMAT;
    public static final String USER_REGION;
    private StaticProperty() {}
@@ -79,6 +95,22 @@ public final class StaticProperty {
        OS_NAME = getProperty(props, "os.name");
        OS_ARCH = getProperty(props, "os.arch");
        OS_VERSION = getProperty(props, "os.version");
        USER_LANGUAGE = getProperty(props, "user.language", "en");
        USER_LANGUAGE_DISPLAY = getProperty(props, "user.language.display", USER_LANGUAGE);
        USER_LANGUAGE_FORMAT = getProperty(props, "user.language.format", USER_LANGUAGE);
        USER_SCRIPT = getProperty(props, "user.script", "");
        USER_SCRIPT_DISPLAY = getProperty(props, "user.script.display", USER_SCRIPT);
        USER_SCRIPT_FORMAT = getProperty(props, "user.script.format", USER_SCRIPT);
        USER_COUNTRY = getProperty(props, "user.country", "");
        USER_COUNTRY_DISPLAY = getProperty(props, "user.country.display", USER_COUNTRY);
        USER_COUNTRY_FORMAT = getProperty(props, "user.country.format", USER_COUNTRY);
        USER_VARIANT = getProperty(props, "user.variant", "");
        USER_VARIANT_DISPLAY = getProperty(props, "user.variant.display", USER_VARIANT);
        USER_VARIANT_FORMAT = getProperty(props, "user.variant.format", USER_VARIANT);
        USER_EXTENSIONS = getProperty(props, "user.extensions", "");
        USER_EXTENSIONS_DISPLAY = getProperty(props, "user.extensions.display", USER_EXTENSIONS);
        USER_EXTENSIONS_FORMAT = getProperty(props, "user.extensions.format", USER_EXTENSIONS);
        USER_REGION = getProperty(props, "user.region", "");
    }
    private static String getProperty(Properties props, String key) {
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Alex Menkov	cf948548c3	8321565: [REDO] Heap dump does not contain virtual Thread stack references Reviewed-by: sspitsyn, yyang, dholmes	2023-12-13 18:47:04 +00:00
Jorn Vernee	7ece9e90c0	8321400: java/foreign/TestStubAllocFailure.java fails with code cache exhaustion Reviewed-by: mcimadamore	2023-12-13 17:34:37 +00:00
Albert Mingkun Yang	9320ef9b29	8321973: Parallel: Remove unused methods in AdaptiveSizePolicy Reviewed-by: tschatzl	2023-12-13 12:43:41 +00:00
Albert Mingkun Yang	2a565ff368	8321808: G1: Use unsigned type for non-negative G1 flags Reviewed-by: tschatzl, iwalulya	2023-12-13 11:18:51 +00:00
Lei Zaakjyu	493b5bd2fd	8293622: Cleanup use of G1ConcRefinementThreads Reviewed-by: ayang, iwalulya	2023-12-13 11:18:38 +00:00
Aleksei Voitylov	f573f6d233	8321515: ARM32: Move method resolution information out of the cpCache properly Reviewed-by: shade	2023-12-13 11:04:11 +00:00
Sergey Tsypanov	8a0a6f8c25	8321279: Implement hashCode() in Heap-X-Buffer.java.template Reviewed-by: alanb, bpb	2023-12-13 09:10:11 +00:00
David Holmes	3d9d353edb	8321825: Remove runtime/CompressedOops/CompressedClassPointers.java from the ProblemList Reviewed-by: ccheung	2023-12-12 23:00:48 +00:00
Matias Saavedra Silva	1b621f5527	8321474: TestAutoCreateSharedArchiveUpgrade.java should be updated with JDK 21 Reviewed-by: dholmes, iklam	2023-12-12 22:49:41 +00:00
Erik Joelsson	5463c9cd9a	8321557: Move SOURCE line verification for OracleJDK out of OpenJDK Reviewed-by: ihse	2023-12-12 21:31:41 +00:00
Joe Darcy	ac07355f55	8320200: Use Elements predicates for record constructors to improve print output Reviewed-by: vromero	2023-12-12 21:00:50 +00:00
Roger Riggs	4fb5c12813	8321180: Condition for non-latin1 string size too large exception is off by one Reviewed-by: rgiulietti	2023-12-12 20:55:17 +00:00
Alexandre Iline	d5a96e3f49	8321621: Update JCov version to 3.0.16 Reviewed-by: erikj, alanb, ihse	2023-12-12 20:41:18 +00:00
Justin Lu	aadf36809c	6230751: [Fmt-Ch] Recursive MessageFormats in ChoiceFormats ignore indicated subformats Reviewed-by: naoto	2023-12-12 19:25:20 +00:00
Joe Darcy	a3447ec656	8321827: Remove unnecessary suppress warnings annotations from the printing processor Reviewed-by: jlaskey	2023-12-12 18:44:43 +00:00
Sergey Bylokhov	b25ed57b76	8270269: Desktop.browse method fails if earlier CoInitialize call as COINIT_MULTITHREADED Reviewed-by: aivanov	2023-12-12 18:30:41 +00:00
Christian Stein	df4ed7eff7	8321739: Source launcher fails with "Not a directory" error Reviewed-by: jlahoda	2023-12-12 15:26:21 +00:00
Jamil Nimeh	5718039a46	8321542: C2: Missing ChaCha20 stub for x86_32 leads to crashes Reviewed-by: chagedorn, shade	2023-12-12 14:36:58 +00:00
Hannes Wallnöfer	c51685267c	`8321889`: JavaDoc method references with wrong (nested) type Reviewed-by: alanb	2023-12-12 11:27:31 +00:00
Thomas Schatzl	7d903964fb	8321422: Test gc/g1/pinnedobjs/TestPinnedObjectTypes.java times out after completion Reviewed-by: iwalulya, ayang	2023-12-12 10:35:40 +00:00
Kevin Walls	6f4824068d	8321729: Remove 'orb' field in RMIConnector Reviewed-by: rriggs, dfuchs	2023-12-12 10:02:01 +00:00
Kevin Walls	e1fd663f22	8311306: Test com/sun/management/ThreadMXBean/ThreadCpuTimeArray.java failed: out of expected range Reviewed-by: sspitsyn	2023-12-12 09:58:41 +00:00
Albert Mingkun Yang	d5214a4288	8321814: G1: Remove unused G1RemSetScanState::_collection_set_iter_state Reviewed-by: tschatzl	2023-12-12 09:45:27 +00:00
Albert Mingkun Yang	2611a49ea1	8321287: Remove unused enum style in Prefetch Reviewed-by: fparain, gziemski	2023-12-12 08:36:55 +00:00
Alan Bateman	b8c0b2fd8c	8321594: NativeThreadSet should use placeholder for virtual threads Reviewed-by: bpb	2023-12-12 07:55:56 +00:00
Guoxiong Li	973bcdab81	8321631: Fix comments in access.hpp Reviewed-by: eosterlund, stefank	2023-12-12 07:19:50 +00:00
Yuri Gaevsky	6359b4ec23	8318217: RISC-V: C2 VectorizedHashCode Reviewed-by: mli, fyang	2023-12-12 06:35:09 +00:00
Jorn Vernee	ce4b257fa5	8320886: Unsafe_SetMemory0 is not guarded Reviewed-by: dholmes, fparain	2023-12-11 19:05:40 +00:00
Hamlin Li	b270f30d10	8318629: G1: Refine code a bit in G1RemSetTrackingPolicy::update_at_allocate Reviewed-by: ayang, tschatzl	2023-12-11 15:45:47 +00:00
Magnus Ihse Bursie	486594d427	8316657: Support whitebox testing in microbenchmarks Reviewed-by: erikj, redestad	2023-12-11 14:17:38 +00:00
Jan Lahoda	ce8399fd60	8321582: yield <primitive-type>.class not parsed correctly. Reviewed-by: vromero	2023-12-11 12:20:22 +00:00
Adam Sotona	3c6459e1de	8321641: ClassFile ModuleAttribute.ModuleAttributeBuilder::moduleVersion spec contains a copy-paste error Reviewed-by: alanb	2023-12-11 10:08:42 +00:00
Anton Bobrov	92fd490f22	8321176: [Screencast] make a second attempt on screencast failure Reviewed-by: azvegint, prr	2023-12-11 08:29:40 +00:00
Per Minborg	d13302f8b0	8321387: SegmentAllocator:allocateFrom(AddressLayout, MemorySegment) does not throw stated UnsupportedOperationException Reviewed-by: mcimadamore	2023-12-11 07:52:31 +00:00
vamsi-parasa	ce108446ca	8319577: x86_64 AVX2 intrinsics for Arrays.sort methods (int, float arrays) Reviewed-by: sviswanathan, ihse, jbhateja, kvn	2023-12-08 22:52:48 +00:00
Joe Darcy	5c12a182e3	8320790: Update --release 22 symbol information for JDK 22 build 27 Reviewed-by: iris, jjg	2023-12-08 19:33:48 +00:00
Brian Burkhalter	71800884f6	8321429: (fc) FileChannel.lock creates a FileKey containing two long index values, they could be stored as int values Reviewed-by: alanb	2023-12-08 19:19:01 +00:00
Naoto Sato	0c178beb69	8321206: Make Locale related system properties `StaticProperty` Reviewed-by: rriggs	2023-12-08 18:47:40 +00:00
Phil Race	6c13a3032f	8312307: Obsoleted code in hb-jdk-font.cc Reviewed-by: serb	2023-12-08 18:47:30 +00:00
Ioi Lam	5e6bfc5eaa	8321539: Minimal build is broken by JDK-8320935 Reviewed-by: matsaave, ccheung, mbaesken	2023-12-08 17:25:22 +00:00
Chris Plummer	2c2d4d2cde	8321485: remove serviceability/attach/ConcAttachTest.java from problemlist on macosx Reviewed-by: jpai, amenkov	2023-12-08 17:02:35 +00:00
Lance Andersen	0eb299af79	8316141: Improve CEN header validation checking Reviewed-by: alanb	2023-12-08 16:37:53 +00:00
Magnus Ihse Bursie	b893a2b2f7	8321597: Use .template consistently for files treated as templates by the build Reviewed-by: erikj	2023-12-08 15:46:02 +00:00
Frederic Thevenet	05f950934e	8321374: Add a configure option to explicitly set CompanyName property in VersionInfo resource for Windows exe/dll Reviewed-by: erikj, ihse	2023-12-08 14:09:01 +00:00
Daniel Lundén	701bc3bbbe	8295166: IGV: dump graph at more locations Reviewed-by: thartmann, rcastanedalo, chagedorn	2023-12-08 11:08:08 +00:00
Daniel Lundén	9e48b90c7f	8310524: C2: record parser-generated LoadN nodes for IGVN Reviewed-by: chagedorn, rcastanedalo, thartmann	2023-12-08 11:04:39 +00:00
sunguoyun	bad5edf146	8320959: jdk/jfr/event/runtime/TestShutdownEvent.java crash with CONF=fastdebug -Xcomp Co-authored-by: Markus Grönlund <mgronlun@openjdk.org> Reviewed-by: mgronlun	2023-12-08 10:47:58 +00:00
Jaikiran Pai	f577385fc8	8316738: java/net/httpclient/HttpClientLocalAddrTest.java failed in timeout Reviewed-by: dfuchs	2023-12-08 10:21:07 +00:00
Alan Bateman	86623aa41d	8320786: Remove ThreadGroup.stop Reviewed-by: rriggs, dholmes, jpai	2023-12-08 08:04:38 +00:00
Alan Bateman	af5c49226c	8320532: Remove Thread/ThreadGroup suspend/resume Reviewed-by: dholmes, jpai, sspitsyn, smarks	2023-12-08 07:10:20 +00:00
Alex Menkov	cb7e3d263a	8321560: [BACKOUT] 8299426: Heap dump does not contain virtual Thread stack references Reviewed-by: cjplummer, dholmes	2023-12-08 01:24:25 +00:00
Phil Race	25dc4762b4	8286827: BogusColorSpace methods return wrong array Reviewed-by: bpb, serb	2023-12-07 23:33:56 +00:00
Weijun Wang	11e4a925be	8320597: RSA signature verification fails on signed data that does not encode params correctly Reviewed-by: mullan, valeriep	2023-12-07 23:25:56 +00:00
Alex Menkov	354ea4c28f	8299426: Heap dump does not contain virtual Thread stack references Reviewed-by: cjplummer, sspitsyn, lmesnik	2023-12-07 23:18:23 +00:00
Phil Race	959a443a9e	8288712: Typo in javadoc in javax.imageio.ImageReader.java Reviewed-by: iris	2023-12-07 21:05:38 +00:00
Naoto Sato	4ed38f5ad5	8321409: Console read line with zero out should zero out underlying buffer in JLine (redux) Reviewed-by: alanb	2023-12-07 19:46:18 +00:00
Matias Saavedra Silva	fe4c0a2f04	8302790: Set FileMapRegion::mapped_base() to null if mapping fails Reviewed-by: iklam, ccheung	2023-12-07 19:32:55 +00:00