mirror of
https://github.com/JetBrains/JetBrainsRuntime.git
synced 2025-12-19 07:49:41 +01:00
Compare commits
57 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cf948548c3 | ||
|
|
7ece9e90c0 | ||
|
|
9320ef9b29 | ||
|
|
2a565ff368 | ||
|
|
493b5bd2fd | ||
|
|
f573f6d233 | ||
|
|
8a0a6f8c25 | ||
|
|
3d9d353edb | ||
|
|
1b621f5527 | ||
|
|
5463c9cd9a | ||
|
|
ac07355f55 | ||
|
|
4fb5c12813 | ||
|
|
d5a96e3f49 | ||
|
|
aadf36809c | ||
|
|
a3447ec656 | ||
|
|
b25ed57b76 | ||
|
|
df4ed7eff7 | ||
|
|
5718039a46 | ||
|
|
c51685267c | ||
|
|
7d903964fb | ||
|
|
6f4824068d | ||
|
|
e1fd663f22 | ||
|
|
d5214a4288 | ||
|
|
2611a49ea1 | ||
|
|
b8c0b2fd8c | ||
|
|
973bcdab81 | ||
|
|
6359b4ec23 | ||
|
|
ce4b257fa5 | ||
|
|
b270f30d10 | ||
|
|
486594d427 | ||
|
|
ce8399fd60 | ||
|
|
3c6459e1de | ||
|
|
92fd490f22 | ||
|
|
d13302f8b0 | ||
|
|
ce108446ca | ||
|
|
5c12a182e3 | ||
|
|
71800884f6 | ||
|
|
0c178beb69 | ||
|
|
6c13a3032f | ||
|
|
5e6bfc5eaa | ||
|
|
2c2d4d2cde | ||
|
|
0eb299af79 | ||
|
|
b893a2b2f7 | ||
|
|
05f950934e | ||
|
|
701bc3bbbe | ||
|
|
9e48b90c7f | ||
|
|
bad5edf146 | ||
|
|
f577385fc8 | ||
|
|
86623aa41d | ||
|
|
af5c49226c | ||
|
|
cb7e3d263a | ||
|
|
25dc4762b4 | ||
|
|
11e4a925be | ||
|
|
354ea4c28f | ||
|
|
959a443a9e | ||
|
|
4ed38f5ad5 | ||
|
|
fe4c0a2f04 |
@@ -58,7 +58,7 @@ DEMO_MANIFEST := $(SUPPORT_OUTPUTDIR)/demos/java-main-manifest.mf
|
|||||||
# This rule will be depended on due to the MANIFEST line in SetupBuildDemo
|
# This rule will be depended on due to the MANIFEST line in SetupBuildDemo
|
||||||
# and SetupBuildJvmtiDemo.
|
# and SetupBuildJvmtiDemo.
|
||||||
$(eval $(call SetupTextFileProcessing, BUILD_JAVA_MANIFEST, \
|
$(eval $(call SetupTextFileProcessing, BUILD_JAVA_MANIFEST, \
|
||||||
SOURCE_FILES := $(TOPDIR)/make/data/mainmanifest/manifest.mf, \
|
SOURCE_FILES := $(TOPDIR)/make/data/mainmanifest/manifest.mf.template, \
|
||||||
OUTPUT_FILE := $(DEMO_MANIFEST), \
|
OUTPUT_FILE := $(DEMO_MANIFEST), \
|
||||||
REPLACEMENTS := \
|
REPLACEMENTS := \
|
||||||
@@VERSION_SPECIFICATION@@ => $(VERSION_SPECIFICATION) ; \
|
@@VERSION_SPECIFICATION@@ => $(VERSION_SPECIFICATION) ; \
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ include TextFileProcessing.gmk
|
|||||||
|
|
||||||
# This rule will be depended on due to the MANIFEST line
|
# This rule will be depended on due to the MANIFEST line
|
||||||
$(eval $(call SetupTextFileProcessing, BUILD_JAVA_MANIFEST, \
|
$(eval $(call SetupTextFileProcessing, BUILD_JAVA_MANIFEST, \
|
||||||
SOURCE_FILES := $(TOPDIR)/make/data/mainmanifest/manifest.mf, \
|
SOURCE_FILES := $(TOPDIR)/make/data/mainmanifest/manifest.mf.template, \
|
||||||
OUTPUT_FILE := $(SUPPORT_OUTPUTDIR)/java-main-manifest.mf, \
|
OUTPUT_FILE := $(SUPPORT_OUTPUTDIR)/java-main-manifest.mf, \
|
||||||
REPLACEMENTS := \
|
REPLACEMENTS := \
|
||||||
@@VERSION_SPECIFICATION@@ => $(VERSION_SPECIFICATION) ; \
|
@@VERSION_SPECIFICATION@@ => $(VERSION_SPECIFICATION) ; \
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ ifeq ($(call isTargetOs, macosx), true)
|
|||||||
))
|
))
|
||||||
|
|
||||||
$(eval $(call SetupTextFileProcessing, BUILD_JDK_PLIST, \
|
$(eval $(call SetupTextFileProcessing, BUILD_JDK_PLIST, \
|
||||||
SOURCE_FILES := $(MACOSX_PLIST_SRC)/JDK-Info.plist, \
|
SOURCE_FILES := $(MACOSX_PLIST_SRC)/JDK-Info.plist.template, \
|
||||||
OUTPUT_FILE := $(JDK_MACOSX_CONTENTS_DIR)/Info.plist, \
|
OUTPUT_FILE := $(JDK_MACOSX_CONTENTS_DIR)/Info.plist, \
|
||||||
REPLACEMENTS := \
|
REPLACEMENTS := \
|
||||||
@@ID@@ => $(MACOSX_BUNDLE_ID_BASE).jdk ; \
|
@@ID@@ => $(MACOSX_BUNDLE_ID_BASE).jdk ; \
|
||||||
@@ -82,7 +82,7 @@ ifeq ($(call isTargetOs, macosx), true)
|
|||||||
))
|
))
|
||||||
|
|
||||||
$(eval $(call SetupTextFileProcessing, BUILD_JRE_PLIST, \
|
$(eval $(call SetupTextFileProcessing, BUILD_JRE_PLIST, \
|
||||||
SOURCE_FILES := $(MACOSX_PLIST_SRC)/JRE-Info.plist, \
|
SOURCE_FILES := $(MACOSX_PLIST_SRC)/JRE-Info.plist.template, \
|
||||||
OUTPUT_FILE := $(JRE_MACOSX_CONTENTS_DIR)/Info.plist, \
|
OUTPUT_FILE := $(JRE_MACOSX_CONTENTS_DIR)/Info.plist, \
|
||||||
REPLACEMENTS := \
|
REPLACEMENTS := \
|
||||||
@@ID@@ => $(MACOSX_BUNDLE_ID_BASE).jre ; \
|
@@ID@@ => $(MACOSX_BUNDLE_ID_BASE).jre ; \
|
||||||
|
|||||||
@@ -744,9 +744,16 @@ endif
|
|||||||
|
|
||||||
$(eval $(call SetupTarget, build-test-lib, \
|
$(eval $(call SetupTarget, build-test-lib, \
|
||||||
MAKEFILE := test/BuildTestLib, \
|
MAKEFILE := test/BuildTestLib, \
|
||||||
|
TARGET := build-test-lib, \
|
||||||
DEPS := exploded-image, \
|
DEPS := exploded-image, \
|
||||||
))
|
))
|
||||||
|
|
||||||
|
$(eval $(call SetupTarget, test-image-lib, \
|
||||||
|
MAKEFILE := test/BuildTestLib, \
|
||||||
|
TARGET := test-image-lib, \
|
||||||
|
DEPS := build-test-lib, \
|
||||||
|
))
|
||||||
|
|
||||||
ifeq ($(BUILD_FAILURE_HANDLER), true)
|
ifeq ($(BUILD_FAILURE_HANDLER), true)
|
||||||
# Builds the failure handler jtreg extension
|
# Builds the failure handler jtreg extension
|
||||||
$(eval $(call SetupTarget, build-test-failure-handler, \
|
$(eval $(call SetupTarget, build-test-failure-handler, \
|
||||||
@@ -781,7 +788,7 @@ endif
|
|||||||
|
|
||||||
$(eval $(call SetupTarget, build-microbenchmark, \
|
$(eval $(call SetupTarget, build-microbenchmark, \
|
||||||
MAKEFILE := test/BuildMicrobenchmark, \
|
MAKEFILE := test/BuildMicrobenchmark, \
|
||||||
DEPS := interim-langtools exploded-image, \
|
DEPS := interim-langtools exploded-image build-test-lib, \
|
||||||
))
|
))
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
@@ -1264,7 +1271,7 @@ all-docs-bundles: docs-jdk-bundles docs-javase-bundles docs-reference-bundles
|
|||||||
# This target builds the test image
|
# This target builds the test image
|
||||||
test-image: prepare-test-image test-image-jdk-jtreg-native \
|
test-image: prepare-test-image test-image-jdk-jtreg-native \
|
||||||
test-image-demos-jdk test-image-libtest-jtreg-native \
|
test-image-demos-jdk test-image-libtest-jtreg-native \
|
||||||
test-image-lib-native
|
test-image-lib test-image-lib-native
|
||||||
|
|
||||||
ifneq ($(JVM_TEST_IMAGE_TARGETS), )
|
ifneq ($(JVM_TEST_IMAGE_TARGETS), )
|
||||||
# If JVM_TEST_IMAGE_TARGETS is externally defined, use it instead of the
|
# If JVM_TEST_IMAGE_TARGETS is externally defined, use it instead of the
|
||||||
|
|||||||
@@ -448,17 +448,17 @@ AC_DEFUN_ONCE([BASIC_SETUP_OUTPUT_DIR],
|
|||||||
AC_SUBST(CONFIGURESUPPORT_OUTPUTDIR)
|
AC_SUBST(CONFIGURESUPPORT_OUTPUTDIR)
|
||||||
|
|
||||||
# The spec.gmk file contains all variables for the make system.
|
# The spec.gmk file contains all variables for the make system.
|
||||||
AC_CONFIG_FILES([$OUTPUTDIR/spec.gmk:$AUTOCONF_DIR/spec.gmk.in])
|
AC_CONFIG_FILES([$OUTPUTDIR/spec.gmk:$AUTOCONF_DIR/spec.gmk.template])
|
||||||
# The bootcycle-spec.gmk file contains support for boot cycle builds.
|
# The bootcycle-spec.gmk file contains support for boot cycle builds.
|
||||||
AC_CONFIG_FILES([$OUTPUTDIR/bootcycle-spec.gmk:$AUTOCONF_DIR/bootcycle-spec.gmk.in])
|
AC_CONFIG_FILES([$OUTPUTDIR/bootcycle-spec.gmk:$AUTOCONF_DIR/bootcycle-spec.gmk.template])
|
||||||
# The buildjdk-spec.gmk file contains support for building a buildjdk when cross compiling.
|
# The buildjdk-spec.gmk file contains support for building a buildjdk when cross compiling.
|
||||||
AC_CONFIG_FILES([$OUTPUTDIR/buildjdk-spec.gmk:$AUTOCONF_DIR/buildjdk-spec.gmk.in])
|
AC_CONFIG_FILES([$OUTPUTDIR/buildjdk-spec.gmk:$AUTOCONF_DIR/buildjdk-spec.gmk.template])
|
||||||
# The compare.sh is used to compare the build output to other builds.
|
# The compare.sh is used to compare the build output to other builds.
|
||||||
AC_CONFIG_FILES([$OUTPUTDIR/compare.sh:$AUTOCONF_DIR/compare.sh.in])
|
AC_CONFIG_FILES([$OUTPUTDIR/compare.sh:$AUTOCONF_DIR/compare.sh.template])
|
||||||
# The generated Makefile knows where the spec.gmk is and where the source is.
|
# The generated Makefile knows where the spec.gmk is and where the source is.
|
||||||
# You can run make from the OUTPUTDIR, or from the top-level Makefile
|
# You can run make from the OUTPUTDIR, or from the top-level Makefile
|
||||||
# which will look for generated configurations
|
# which will look for generated configurations
|
||||||
AC_CONFIG_FILES([$OUTPUTDIR/Makefile:$AUTOCONF_DIR/Makefile.in])
|
AC_CONFIG_FILES([$OUTPUTDIR/Makefile:$AUTOCONF_DIR/Makefile.template])
|
||||||
])
|
])
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|||||||
@@ -110,6 +110,15 @@ AC_DEFUN_ONCE([JDKVER_SETUP_JDK_VERSION_NUMBERS],
|
|||||||
CHECK_VALUE: [UTIL_CHECK_STRING_NON_EMPTY_PRINTABLE])
|
CHECK_VALUE: [UTIL_CHECK_STRING_NON_EMPTY_PRINTABLE])
|
||||||
AC_SUBST(COMPANY_NAME)
|
AC_SUBST(COMPANY_NAME)
|
||||||
|
|
||||||
|
# Set the JDK RC Company name
|
||||||
|
# Otherwise uses the value set for "vendor-name".
|
||||||
|
UTIL_ARG_WITH(NAME: jdk-rc-company-name, TYPE: string,
|
||||||
|
DEFAULT: $COMPANY_NAME,
|
||||||
|
DESC: [Set JDK RC company name. This is used for CompanyName properties of MS Windows binaries.],
|
||||||
|
DEFAULT_DESC: [from branding.conf],
|
||||||
|
CHECK_VALUE: [UTIL_CHECK_STRING_NON_EMPTY_PRINTABLE])
|
||||||
|
AC_SUBST(JDK_RC_COMPANY_NAME)
|
||||||
|
|
||||||
# The vendor URL, if any
|
# The vendor URL, if any
|
||||||
# Only set VENDOR_URL if '--with-vendor-url' was used and is not empty.
|
# Only set VENDOR_URL if '--with-vendor-url' was used and is not empty.
|
||||||
# Otherwise we will use the value from "branding.conf" included above.
|
# Otherwise we will use the value from "branding.conf" included above.
|
||||||
|
|||||||
@@ -191,6 +191,7 @@ PRODUCT_NAME := @PRODUCT_NAME@
|
|||||||
PRODUCT_SUFFIX := @PRODUCT_SUFFIX@
|
PRODUCT_SUFFIX := @PRODUCT_SUFFIX@
|
||||||
JDK_RC_PLATFORM_NAME := @JDK_RC_PLATFORM_NAME@
|
JDK_RC_PLATFORM_NAME := @JDK_RC_PLATFORM_NAME@
|
||||||
JDK_RC_NAME := @JDK_RC_NAME@
|
JDK_RC_NAME := @JDK_RC_NAME@
|
||||||
|
JDK_RC_COMPANY_NAME:=@JDK_RC_COMPANY_NAME@
|
||||||
COMPANY_NAME := @COMPANY_NAME@
|
COMPANY_NAME := @COMPANY_NAME@
|
||||||
HOTSPOT_VM_DISTRO := @HOTSPOT_VM_DISTRO@
|
HOTSPOT_VM_DISTRO := @HOTSPOT_VM_DISTRO@
|
||||||
MACOSX_BUNDLE_NAME_BASE := @MACOSX_BUNDLE_NAME_BASE@
|
MACOSX_BUNDLE_NAME_BASE := @MACOSX_BUNDLE_NAME_BASE@
|
||||||
@@ -98,7 +98,7 @@ GLOBAL_VERSION_INFO_RESOURCE := $(TOPDIR)/src/java.base/windows/native/common/ve
|
|||||||
|
|
||||||
JDK_RCFLAGS=$(RCFLAGS) \
|
JDK_RCFLAGS=$(RCFLAGS) \
|
||||||
-D"JDK_VERSION_STRING=$(VERSION_STRING)" \
|
-D"JDK_VERSION_STRING=$(VERSION_STRING)" \
|
||||||
-D"JDK_COMPANY=$(COMPANY_NAME)" \
|
-D"JDK_COMPANY=$(JDK_RC_COMPANY_NAME)" \
|
||||||
-D"JDK_VER=$(VERSION_NUMBER_FOUR_POSITIONS)" \
|
-D"JDK_VER=$(VERSION_NUMBER_FOUR_POSITIONS)" \
|
||||||
-D"JDK_COPYRIGHT=Copyright \xA9 $(COPYRIGHT_YEAR)" \
|
-D"JDK_COPYRIGHT=Copyright \xA9 $(COPYRIGHT_YEAR)" \
|
||||||
-D"JDK_NAME=$(JDK_RC_NAME) $(VERSION_SHORT)" \
|
-D"JDK_NAME=$(JDK_RC_NAME) $(VERSION_SHORT)" \
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ define SetupBuildLauncherBody
|
|||||||
$1_PLIST_FILE := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/$1/Info.plist
|
$1_PLIST_FILE := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/$1/Info.plist
|
||||||
|
|
||||||
$$(eval $$(call SetupTextFileProcessing, BUILD_PLIST_$1, \
|
$$(eval $$(call SetupTextFileProcessing, BUILD_PLIST_$1, \
|
||||||
SOURCE_FILES := $(TOPDIR)/make/data/bundle/cmdline-Info.plist, \
|
SOURCE_FILES := $(TOPDIR)/make/data/bundle/cmdline-Info.plist.template, \
|
||||||
OUTPUT_FILE := $$($1_PLIST_FILE), \
|
OUTPUT_FILE := $$($1_PLIST_FILE), \
|
||||||
REPLACEMENTS := \
|
REPLACEMENTS := \
|
||||||
@@ID@@ => $(MACOSX_BUNDLE_ID_BASE).$1 ; \
|
@@ID@@ => $(MACOSX_BUNDLE_ID_BASE).$1 ; \
|
||||||
|
|||||||
@@ -1206,7 +1206,7 @@ var getJibProfilesDependencies = function (input, common) {
|
|||||||
|
|
||||||
jcov: {
|
jcov: {
|
||||||
organization: common.organization,
|
organization: common.organization,
|
||||||
revision: "3.0-15-jdk-asm+1.0",
|
revision: "3.0-16-jdk-asm+1.0",
|
||||||
ext: "zip",
|
ext: "zip",
|
||||||
environment_name: "JCOV_HOME",
|
environment_name: "JCOV_HOME",
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ $(eval $(call IncludeCustomExtension, hotspot/gensrc/GenerateSources.gmk))
|
|||||||
|
|
||||||
# Setup the hotspot launcher script for developer use
|
# Setup the hotspot launcher script for developer use
|
||||||
$(eval $(call SetupTextFileProcessing, CREATE_HOTSPOT_LAUNCHER, \
|
$(eval $(call SetupTextFileProcessing, CREATE_HOTSPOT_LAUNCHER, \
|
||||||
SOURCE_FILES := $(TOPDIR)/make/scripts/hotspot.sh, \
|
SOURCE_FILES := $(TOPDIR)/make/scripts/hotspot.sh.template, \
|
||||||
OUTPUT_FILE := $(JVM_OUTPUTDIR)/hotspot, \
|
OUTPUT_FILE := $(JVM_OUTPUTDIR)/hotspot, \
|
||||||
REPLACEMENTS := \
|
REPLACEMENTS := \
|
||||||
@@LIBARCH@@ => $(OPENJDK_TARGET_CPU_LEGACY_LIB) ; \
|
@@LIBARCH@@ => $(OPENJDK_TARGET_CPU_LEGACY_LIB) ; \
|
||||||
|
|||||||
@@ -245,7 +245,7 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, x86_64)+$(INCLUDE_COMPILER2)
|
|||||||
TOOLCHAIN := TOOLCHAIN_LINK_CXX, \
|
TOOLCHAIN := TOOLCHAIN_LINK_CXX, \
|
||||||
OPTIMIZATION := HIGH, \
|
OPTIMIZATION := HIGH, \
|
||||||
CFLAGS := $(CFLAGS_JDKLIB), \
|
CFLAGS := $(CFLAGS_JDKLIB), \
|
||||||
CXXFLAGS := $(CXXFLAGS_JDKLIB), \
|
CXXFLAGS := $(CXXFLAGS_JDKLIB) -std=c++17, \
|
||||||
LDFLAGS := $(LDFLAGS_JDKLIB) \
|
LDFLAGS := $(LDFLAGS_JDKLIB) \
|
||||||
$(call SET_SHARED_LIBRARY_ORIGIN), \
|
$(call SET_SHARED_LIBRARY_ORIGIN), \
|
||||||
LIBS := $(LIBCXX), \
|
LIBS := $(LIBCXX), \
|
||||||
|
|||||||
@@ -53,11 +53,10 @@ JMH_UNPACKED_DIR := $(MICROBENCHMARK_OUTPUT)/jmh_jars
|
|||||||
JMH_UNPACKED_JARS_DONE := $(JMH_UNPACKED_DIR)/_unpacked.marker
|
JMH_UNPACKED_JARS_DONE := $(JMH_UNPACKED_DIR)/_unpacked.marker
|
||||||
|
|
||||||
# External dependencies
|
# External dependencies
|
||||||
JMH_COMPILE_JARS := $(JMH_CORE_JAR) $(JMH_GENERATOR_JAR)
|
WHITEBOX_JAR := $(SUPPORT_OUTPUTDIR)/test/lib/wb.jar
|
||||||
|
JMH_COMPILE_JARS := $(JMH_CORE_JAR) $(JMH_GENERATOR_JAR) $(WHITEBOX_JAR)
|
||||||
JMH_RUNTIME_JARS := $(JMH_CORE_JAR) $(JMH_COMMONS_MATH_JAR) $(JMH_JOPT_SIMPLE_JAR)
|
JMH_RUNTIME_JARS := $(JMH_CORE_JAR) $(JMH_COMMONS_MATH_JAR) $(JMH_JOPT_SIMPLE_JAR)
|
||||||
|
|
||||||
MICROBENCHMARK_CLASSPATH := $(call PathList, $(JMH_COMPILE_JARS))
|
|
||||||
|
|
||||||
# Native dependencies
|
# Native dependencies
|
||||||
MICROBENCHMARK_NATIVE_SRC_DIRS := $(MICROBENCHMARK_SRC)
|
MICROBENCHMARK_NATIVE_SRC_DIRS := $(MICROBENCHMARK_SRC)
|
||||||
MICROBENCHMARK_NATIVE_OUTPUT := $(MICROBENCHMARK_OUTPUT)/native
|
MICROBENCHMARK_NATIVE_OUTPUT := $(MICROBENCHMARK_OUTPUT)/native
|
||||||
@@ -92,24 +91,28 @@ $(eval $(call SetupJavaCompilation, BUILD_INDIFY, \
|
|||||||
$(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \
|
$(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \
|
||||||
TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \
|
TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \
|
||||||
SMALL_JAVA := false, \
|
SMALL_JAVA := false, \
|
||||||
CLASSPATH := $(MICROBENCHMARK_CLASSPATH), \
|
CLASSPATH := $(JMH_COMPILE_JARS), \
|
||||||
DISABLED_WARNINGS := restricted this-escape processing rawtypes cast serial preview, \
|
DISABLED_WARNINGS := restricted this-escape processing rawtypes cast \
|
||||||
|
serial preview, \
|
||||||
SRC := $(MICROBENCHMARK_SRC), \
|
SRC := $(MICROBENCHMARK_SRC), \
|
||||||
BIN := $(MICROBENCHMARK_CLASSES), \
|
BIN := $(MICROBENCHMARK_CLASSES), \
|
||||||
JAVAC_FLAGS := --add-exports java.base/sun.security.util=ALL-UNNAMED \
|
JAVAC_FLAGS := \
|
||||||
--add-exports java.base/sun.invoke.util=ALL-UNNAMED \
|
|
||||||
--add-exports java.base/jdk.internal.classfile.impl=ALL-UNNAMED \
|
--add-exports java.base/jdk.internal.classfile.impl=ALL-UNNAMED \
|
||||||
--add-exports java.base/jdk.internal.org.objectweb.asm=ALL-UNNAMED \
|
|
||||||
--add-exports java.base/jdk.internal.org.objectweb.asm.tree=ALL-UNNAMED \
|
|
||||||
--add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
|
|
||||||
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED \
|
|
||||||
--add-exports java.base/jdk.internal.event=ALL-UNNAMED \
|
--add-exports java.base/jdk.internal.event=ALL-UNNAMED \
|
||||||
--add-exports java.base/jdk.internal.foreign=ALL-UNNAMED \
|
--add-exports java.base/jdk.internal.foreign=ALL-UNNAMED \
|
||||||
|
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED \
|
||||||
|
--add-exports java.base/jdk.internal.org.objectweb.asm.tree=ALL-UNNAMED \
|
||||||
|
--add-exports java.base/jdk.internal.org.objectweb.asm=ALL-UNNAMED \
|
||||||
|
--add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
|
||||||
|
--add-exports java.base/sun.invoke.util=ALL-UNNAMED \
|
||||||
|
--add-exports java.base/sun.security.util=ALL-UNNAMED \
|
||||||
--enable-preview \
|
--enable-preview \
|
||||||
-processor org.openjdk.jmh.generators.BenchmarkProcessor, \
|
-processor org.openjdk.jmh.generators.BenchmarkProcessor, \
|
||||||
JAVA_FLAGS := --add-modules jdk.unsupported --limit-modules java.management \
|
JAVA_FLAGS := \
|
||||||
--add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
|
--add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
|
||||||
--enable-preview, \
|
--add-modules jdk.unsupported \
|
||||||
|
--enable-preview \
|
||||||
|
--limit-modules java.management, \
|
||||||
))
|
))
|
||||||
|
|
||||||
$(BUILD_JDK_MICROBENCHMARK): $(JMH_COMPILE_JARS)
|
$(BUILD_JDK_MICROBENCHMARK): $(JMH_COMPILE_JARS)
|
||||||
|
|||||||
@@ -23,12 +23,22 @@
|
|||||||
# questions.
|
# questions.
|
||||||
#
|
#
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# This file builds the Java components of testlib.
|
||||||
|
# It also covers the test-image part, where the built files are copied to the
|
||||||
|
# test image.
|
||||||
|
################################################################################
|
||||||
|
|
||||||
default: all
|
default: all
|
||||||
|
|
||||||
include $(SPEC)
|
include $(SPEC)
|
||||||
include MakeBase.gmk
|
include MakeBase.gmk
|
||||||
include JavaCompilation.gmk
|
include JavaCompilation.gmk
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# Targets for building the test lib jars
|
||||||
|
################################################################################
|
||||||
|
|
||||||
TARGETS :=
|
TARGETS :=
|
||||||
|
|
||||||
TEST_LIB_SOURCE_DIR := $(TOPDIR)/test/lib
|
TEST_LIB_SOURCE_DIR := $(TOPDIR)/test/lib
|
||||||
@@ -63,8 +73,21 @@ $(eval $(call SetupJavaCompilation, BUILD_TEST_LIB_JAR, \
|
|||||||
|
|
||||||
TARGETS += $(BUILD_TEST_LIB_JAR)
|
TARGETS += $(BUILD_TEST_LIB_JAR)
|
||||||
|
|
||||||
##########################################################################################
|
build-test-lib: $(TARGETS)
|
||||||
|
|
||||||
all: $(TARGETS)
|
################################################################################
|
||||||
|
# Targets for building test-image.
|
||||||
|
################################################################################
|
||||||
|
|
||||||
.PHONY: default all
|
# Copy the jars to the test image.
|
||||||
|
$(eval $(call SetupCopyFiles, COPY_LIBTEST_JARS, \
|
||||||
|
DEST := $(TEST_IMAGE_DIR)/lib-test, \
|
||||||
|
FILES := $(BUILD_WB_JAR_JAR) $(BUILD_TEST_LIB_JAR_JAR), \
|
||||||
|
))
|
||||||
|
#
|
||||||
|
|
||||||
|
test-image-lib: $(COPY_LIBTEST_JARS)
|
||||||
|
|
||||||
|
all: build-test-lib
|
||||||
|
|
||||||
|
.PHONY: default all build-test-lib test-image-lib
|
||||||
|
|||||||
@@ -193,4 +193,9 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Is SIMD sort supported for this CPU?
|
||||||
|
static bool supports_simd_sort(BasicType bt) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#endif // CPU_AARCH64_MATCHER_AARCH64_HPP
|
#endif // CPU_AARCH64_MATCHER_AARCH64_HPP
|
||||||
|
|||||||
@@ -303,15 +303,19 @@ void InterpreterMacroAssembler::load_field_entry(Register cache, Register index,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void InterpreterMacroAssembler::load_method_entry(Register cache, Register index, int bcp_offset) {
|
void InterpreterMacroAssembler::load_method_entry(Register cache, Register index, int bcp_offset) {
|
||||||
|
assert_different_registers(cache, index);
|
||||||
|
|
||||||
// Get index out of bytecode pointer
|
// Get index out of bytecode pointer
|
||||||
get_index_at_bcp(index, bcp_offset, cache /* as tmp */, sizeof(u2));
|
get_index_at_bcp(index, bcp_offset, cache /* as tmp */, sizeof(u2));
|
||||||
|
|
||||||
|
// sizeof(ResolvedMethodEntry) is not a power of 2 on Arm, so can't use shift
|
||||||
mov(cache, sizeof(ResolvedMethodEntry));
|
mov(cache, sizeof(ResolvedMethodEntry));
|
||||||
mul(index, index, cache); // Scale the index to be the entry index * sizeof(ResolvedMethodEntry)
|
mul(index, index, cache); // Scale the index to be the entry index * sizeof(ResolvedMethodEntry)
|
||||||
|
|
||||||
// load constant pool cache pointer
|
// load constant pool cache pointer
|
||||||
ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize));
|
ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize));
|
||||||
// Get address of method entries array
|
// Get address of method entries array
|
||||||
ldr(cache, Address(cache, ConstantPoolCache::method_entries_offset()));
|
ldr(cache, Address(cache, in_bytes(ConstantPoolCache::method_entries_offset())));
|
||||||
add(cache, cache, Array<ResolvedMethodEntry>::base_offset_in_bytes());
|
add(cache, cache, Array<ResolvedMethodEntry>::base_offset_in_bytes());
|
||||||
add(cache, cache, index);
|
add(cache, cache, index);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -186,4 +186,9 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Is SIMD sort supported for this CPU?
|
||||||
|
static bool supports_simd_sort(BasicType bt) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#endif // CPU_ARM_MATCHER_ARM_HPP
|
#endif // CPU_ARM_MATCHER_ARM_HPP
|
||||||
|
|||||||
@@ -370,16 +370,15 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
|
|||||||
if (index_size == sizeof(u4)) {
|
if (index_size == sizeof(u4)) {
|
||||||
__ load_resolved_indy_entry(Rcache, Rindex);
|
__ load_resolved_indy_entry(Rcache, Rindex);
|
||||||
__ ldrh(Rcache, Address(Rcache, in_bytes(ResolvedIndyEntry::num_parameters_offset())));
|
__ ldrh(Rcache, Address(Rcache, in_bytes(ResolvedIndyEntry::num_parameters_offset())));
|
||||||
__ check_stack_top();
|
|
||||||
__ add(Rstack_top, Rstack_top, AsmOperand(Rcache, lsl, Interpreter::logStackElementSize));
|
|
||||||
} else {
|
} else {
|
||||||
// Pop N words from the stack
|
// Pop N words from the stack
|
||||||
assert(index_size == sizeof(u2), "Can only be u2");
|
assert(index_size == sizeof(u2), "Can only be u2");
|
||||||
__ load_method_entry(Rcache, Rindex);
|
__ load_method_entry(Rcache, Rindex);
|
||||||
__ ldrh(Rcache, Address(Rcache, in_bytes(ResolvedIndyEntry::num_parameters_offset())));
|
__ ldrh(Rcache, Address(Rcache, in_bytes(ResolvedMethodEntry::num_parameters_offset())));
|
||||||
|
}
|
||||||
|
|
||||||
__ check_stack_top();
|
__ check_stack_top();
|
||||||
__ add(Rstack_top, Rstack_top, AsmOperand(Rcache, lsl, Interpreter::logStackElementSize));
|
__ add(Rstack_top, Rstack_top, AsmOperand(Rcache, lsl, Interpreter::logStackElementSize));
|
||||||
}
|
|
||||||
|
|
||||||
__ convert_retval_to_tos(state);
|
__ convert_retval_to_tos(state);
|
||||||
|
|
||||||
|
|||||||
@@ -3666,15 +3666,15 @@ void TemplateTable::prepare_invoke(Register Rcache, Register recv) {
|
|||||||
// load receiver if needed (after extra argument is pushed so parameter size is correct)
|
// load receiver if needed (after extra argument is pushed so parameter size is correct)
|
||||||
if (load_receiver) {
|
if (load_receiver) {
|
||||||
__ ldrh(recv, Address(Rcache, in_bytes(ResolvedMethodEntry::num_parameters_offset())));
|
__ ldrh(recv, Address(Rcache, in_bytes(ResolvedMethodEntry::num_parameters_offset())));
|
||||||
Address recv_addr = __ receiver_argument_address(Rstack_top, Rtemp, recv);
|
__ add(recv, Rstack_top, AsmOperand(recv, lsl, Interpreter::logStackElementSize));
|
||||||
__ ldr(recv, recv_addr);
|
__ ldr(recv, Address(recv, -Interpreter::stackElementSize));
|
||||||
__ verify_oop(recv);
|
__ verify_oop(recv);
|
||||||
}
|
}
|
||||||
|
|
||||||
// load return address
|
// load return address
|
||||||
{ const address table = (address) Interpreter::invoke_return_entry_table_for(code);
|
{ const address table = (address) Interpreter::invoke_return_entry_table_for(code);
|
||||||
__ mov_slow(Rtemp, table);
|
__ mov_slow(LR, table);
|
||||||
__ ldr(LR, Address::indexed_ptr(Rtemp, ret_type));
|
__ ldr(LR, Address::indexed_ptr(LR, ret_type));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3744,10 +3744,13 @@ void TemplateTable::invokevirtual(int byte_no) {
|
|||||||
void TemplateTable::invokespecial(int byte_no) {
|
void TemplateTable::invokespecial(int byte_no) {
|
||||||
transition(vtos, vtos);
|
transition(vtos, vtos);
|
||||||
assert(byte_no == f1_byte, "use this argument");
|
assert(byte_no == f1_byte, "use this argument");
|
||||||
|
|
||||||
const Register Rrecv = R2_tmp;
|
const Register Rrecv = R2_tmp;
|
||||||
load_resolved_method_entry_special_or_static(R2_tmp, // ResolvedMethodEntry*
|
const Register Rflags = R3_tmp;
|
||||||
|
|
||||||
|
load_resolved_method_entry_special_or_static(Rrecv, // ResolvedMethodEntry*
|
||||||
Rmethod, // Method*
|
Rmethod, // Method*
|
||||||
R3_tmp); // Flags
|
Rflags); // Flags
|
||||||
prepare_invoke(Rrecv, Rrecv);
|
prepare_invoke(Rrecv, Rrecv);
|
||||||
__ verify_oop(Rrecv);
|
__ verify_oop(Rrecv);
|
||||||
__ null_check(Rrecv, Rtemp);
|
__ null_check(Rrecv, Rtemp);
|
||||||
@@ -3760,12 +3763,16 @@ void TemplateTable::invokespecial(int byte_no) {
|
|||||||
void TemplateTable::invokestatic(int byte_no) {
|
void TemplateTable::invokestatic(int byte_no) {
|
||||||
transition(vtos, vtos);
|
transition(vtos, vtos);
|
||||||
assert(byte_no == f1_byte, "use this argument");
|
assert(byte_no == f1_byte, "use this argument");
|
||||||
load_resolved_method_entry_special_or_static(R2_tmp, // ResolvedMethodEntry*
|
|
||||||
|
const Register Rrecv = R2_tmp;
|
||||||
|
const Register Rflags = R3_tmp;
|
||||||
|
|
||||||
|
load_resolved_method_entry_special_or_static(Rrecv, // ResolvedMethodEntry*
|
||||||
Rmethod, // Method*
|
Rmethod, // Method*
|
||||||
R3_tmp); // Flags
|
Rflags); // Flags
|
||||||
prepare_invoke(R2_tmp, R2_tmp);
|
prepare_invoke(Rrecv, Rrecv);
|
||||||
// do the call
|
// do the call
|
||||||
__ profile_call(R2_tmp);
|
__ profile_call(Rrecv);
|
||||||
__ jump_from_interpreted(Rmethod);
|
__ jump_from_interpreted(Rmethod);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3788,10 +3795,10 @@ void TemplateTable::invokeinterface(int byte_no) {
|
|||||||
const Register Rflags = R3_tmp;
|
const Register Rflags = R3_tmp;
|
||||||
const Register Rklass = R2_tmp; // Note! Same register with Rrecv
|
const Register Rklass = R2_tmp; // Note! Same register with Rrecv
|
||||||
|
|
||||||
load_resolved_method_entry_interface(R2_tmp, // ResolvedMethodEntry*
|
load_resolved_method_entry_interface(Rrecv, // ResolvedMethodEntry*
|
||||||
R1_tmp, // Klass*
|
Rinterf, // Klass*
|
||||||
Rmethod, // Method* or itable/vtable index
|
Rmethod, // Method* or itable/vtable index
|
||||||
R3_tmp); // Flags
|
Rflags); // Flags
|
||||||
prepare_invoke(Rrecv, Rrecv);
|
prepare_invoke(Rrecv, Rrecv);
|
||||||
|
|
||||||
// First check for Object case, then private interface method,
|
// First check for Object case, then private interface method,
|
||||||
|
|||||||
@@ -195,4 +195,9 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Is SIMD sort supported for this CPU?
|
||||||
|
static bool supports_simd_sort(BasicType bt) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#endif // CPU_PPC_MATCHER_PPC_HPP
|
#endif // CPU_PPC_MATCHER_PPC_HPP
|
||||||
|
|||||||
@@ -1459,6 +1459,112 @@ void C2_MacroAssembler::string_equals(Register a1, Register a2,
|
|||||||
BLOCK_COMMENT("} string_equals");
|
BLOCK_COMMENT("} string_equals");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// jdk.internal.util.ArraysSupport.vectorizedHashCode
|
||||||
|
void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register result,
|
||||||
|
Register tmp1, Register tmp2, Register tmp3,
|
||||||
|
Register tmp4, Register tmp5, Register tmp6,
|
||||||
|
BasicType eltype)
|
||||||
|
{
|
||||||
|
assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, t0, t1);
|
||||||
|
|
||||||
|
const int elsize = arrays_hashcode_elsize(eltype);
|
||||||
|
const int chunks_end_shift = exact_log2(elsize);
|
||||||
|
|
||||||
|
switch (eltype) {
|
||||||
|
case T_BOOLEAN: BLOCK_COMMENT("arrays_hashcode(unsigned byte) {"); break;
|
||||||
|
case T_CHAR: BLOCK_COMMENT("arrays_hashcode(char) {"); break;
|
||||||
|
case T_BYTE: BLOCK_COMMENT("arrays_hashcode(byte) {"); break;
|
||||||
|
case T_SHORT: BLOCK_COMMENT("arrays_hashcode(short) {"); break;
|
||||||
|
case T_INT: BLOCK_COMMENT("arrays_hashcode(int) {"); break;
|
||||||
|
default:
|
||||||
|
ShouldNotReachHere();
|
||||||
|
}
|
||||||
|
|
||||||
|
const int stride = 4;
|
||||||
|
const Register pow31_4 = tmp1;
|
||||||
|
const Register pow31_3 = tmp2;
|
||||||
|
const Register pow31_2 = tmp3;
|
||||||
|
const Register chunks = tmp4;
|
||||||
|
const Register chunks_end = chunks;
|
||||||
|
|
||||||
|
Label DONE, TAIL, TAIL_LOOP, WIDE_LOOP;
|
||||||
|
|
||||||
|
// result has a value initially
|
||||||
|
|
||||||
|
beqz(cnt, DONE);
|
||||||
|
|
||||||
|
andi(chunks, cnt, ~(stride-1));
|
||||||
|
beqz(chunks, TAIL);
|
||||||
|
|
||||||
|
mv(pow31_4, 923521); // [31^^4]
|
||||||
|
mv(pow31_3, 29791); // [31^^3]
|
||||||
|
mv(pow31_2, 961); // [31^^2]
|
||||||
|
|
||||||
|
slli(chunks_end, chunks, chunks_end_shift);
|
||||||
|
add(chunks_end, ary, chunks_end);
|
||||||
|
andi(cnt, cnt, stride-1); // don't forget about tail!
|
||||||
|
|
||||||
|
bind(WIDE_LOOP);
|
||||||
|
mulw(result, result, pow31_4); // 31^^4 * h
|
||||||
|
arrays_hashcode_elload(t0, Address(ary, 0 * elsize), eltype);
|
||||||
|
arrays_hashcode_elload(t1, Address(ary, 1 * elsize), eltype);
|
||||||
|
arrays_hashcode_elload(tmp5, Address(ary, 2 * elsize), eltype);
|
||||||
|
arrays_hashcode_elload(tmp6, Address(ary, 3 * elsize), eltype);
|
||||||
|
mulw(t0, t0, pow31_3); // 31^^3 * ary[i+0]
|
||||||
|
addw(result, result, t0);
|
||||||
|
mulw(t1, t1, pow31_2); // 31^^2 * ary[i+1]
|
||||||
|
addw(result, result, t1);
|
||||||
|
slli(t0, tmp5, 5); // optimize 31^^1 * ary[i+2]
|
||||||
|
subw(tmp5, t0, tmp5); // with ary[i+2]<<5 - ary[i+2]
|
||||||
|
addw(result, result, tmp5);
|
||||||
|
addw(result, result, tmp6); // 31^^4 * h + 31^^3 * ary[i+0] + 31^^2 * ary[i+1]
|
||||||
|
// + 31^^1 * ary[i+2] + 31^^0 * ary[i+3]
|
||||||
|
addi(ary, ary, elsize * stride);
|
||||||
|
bne(ary, chunks_end, WIDE_LOOP);
|
||||||
|
beqz(cnt, DONE);
|
||||||
|
|
||||||
|
bind(TAIL);
|
||||||
|
slli(chunks_end, cnt, chunks_end_shift);
|
||||||
|
add(chunks_end, ary, chunks_end);
|
||||||
|
|
||||||
|
bind(TAIL_LOOP);
|
||||||
|
arrays_hashcode_elload(t0, Address(ary), eltype);
|
||||||
|
slli(t1, result, 5); // optimize 31 * result
|
||||||
|
subw(result, t1, result); // with result<<5 - result
|
||||||
|
addw(result, result, t0);
|
||||||
|
addi(ary, ary, elsize);
|
||||||
|
bne(ary, chunks_end, TAIL_LOOP);
|
||||||
|
|
||||||
|
bind(DONE);
|
||||||
|
BLOCK_COMMENT("} // arrays_hashcode");
|
||||||
|
}
|
||||||
|
|
||||||
|
int C2_MacroAssembler::arrays_hashcode_elsize(BasicType eltype) {
|
||||||
|
switch (eltype) {
|
||||||
|
case T_BOOLEAN: return sizeof(jboolean);
|
||||||
|
case T_BYTE: return sizeof(jbyte);
|
||||||
|
case T_SHORT: return sizeof(jshort);
|
||||||
|
case T_CHAR: return sizeof(jchar);
|
||||||
|
case T_INT: return sizeof(jint);
|
||||||
|
default:
|
||||||
|
ShouldNotReachHere();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void C2_MacroAssembler::arrays_hashcode_elload(Register dst, Address src, BasicType eltype) {
|
||||||
|
switch (eltype) {
|
||||||
|
// T_BOOLEAN used as surrogate for unsigned byte
|
||||||
|
case T_BOOLEAN: lbu(dst, src); break;
|
||||||
|
case T_BYTE: lb(dst, src); break;
|
||||||
|
case T_SHORT: lh(dst, src); break;
|
||||||
|
case T_CHAR: lhu(dst, src); break;
|
||||||
|
case T_INT: lw(dst, src); break;
|
||||||
|
default:
|
||||||
|
ShouldNotReachHere();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
|
typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
|
||||||
typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
|
typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
|
||||||
bool is_far, bool is_unordered);
|
bool is_far, bool is_unordered);
|
||||||
|
|||||||
@@ -82,6 +82,15 @@
|
|||||||
Register result, Register cnt1,
|
Register result, Register cnt1,
|
||||||
int elem_size);
|
int elem_size);
|
||||||
|
|
||||||
|
void arrays_hashcode(Register ary, Register cnt, Register result,
|
||||||
|
Register tmp1, Register tmp2,
|
||||||
|
Register tmp3, Register tmp4,
|
||||||
|
Register tmp5, Register tmp6,
|
||||||
|
BasicType eltype);
|
||||||
|
// helper function for arrays_hashcode
|
||||||
|
int arrays_hashcode_elsize(BasicType eltype);
|
||||||
|
void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);
|
||||||
|
|
||||||
void string_equals(Register r1, Register r2,
|
void string_equals(Register r1, Register r2,
|
||||||
Register result, Register cnt1,
|
Register result, Register cnt1,
|
||||||
int elem_size);
|
int elem_size);
|
||||||
|
|||||||
@@ -192,4 +192,9 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Is SIMD sort supported for this CPU?
|
||||||
|
static bool supports_simd_sort(BasicType bt) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#endif // CPU_RISCV_MATCHER_RISCV_HPP
|
#endif // CPU_RISCV_MATCHER_RISCV_HPP
|
||||||
|
|||||||
@@ -10371,6 +10371,26 @@ instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
|
|||||||
ins_pipe(pipe_class_memory);
|
ins_pipe(pipe_class_memory);
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
// fast ArraysSupport.vectorizedHashCode
|
||||||
|
instruct arrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI basic_type,
|
||||||
|
iRegLNoSp tmp1, iRegLNoSp tmp2,
|
||||||
|
iRegLNoSp tmp3, iRegLNoSp tmp4,
|
||||||
|
iRegLNoSp tmp5, iRegLNoSp tmp6, rFlagsReg cr)
|
||||||
|
%{
|
||||||
|
match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type)));
|
||||||
|
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
|
||||||
|
USE_KILL ary, USE_KILL cnt, USE basic_type, KILL cr);
|
||||||
|
|
||||||
|
format %{ "Array HashCode array[] $ary,$cnt,$result,$basic_type -> $result // KILL all" %}
|
||||||
|
ins_encode %{
|
||||||
|
__ arrays_hashcode($ary$$Register, $cnt$$Register, $result$$Register,
|
||||||
|
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
||||||
|
$tmp4$$Register, $tmp5$$Register, $tmp6$$Register,
|
||||||
|
(BasicType)$basic_type$$constant);
|
||||||
|
%}
|
||||||
|
ins_pipe(pipe_class_memory);
|
||||||
|
%}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Safepoint Instructions
|
// Safepoint Instructions
|
||||||
|
|
||||||
|
|||||||
@@ -315,6 +315,10 @@ void VM_Version::c2_initialize() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
|
||||||
|
FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
|
||||||
|
}
|
||||||
|
|
||||||
if (!UseZicbop) {
|
if (!UseZicbop) {
|
||||||
if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
|
if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
|
||||||
warning("Zicbop is not available on this CPU");
|
warning("Zicbop is not available on this CPU");
|
||||||
|
|||||||
@@ -184,4 +184,9 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Is SIMD sort supported for this CPU?
|
||||||
|
static bool supports_simd_sort(BasicType bt) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#endif // CPU_S390_MATCHER_S390_HPP
|
#endif // CPU_S390_MATCHER_S390_HPP
|
||||||
|
|||||||
@@ -920,6 +920,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
|
|||||||
case 0x11: // movups
|
case 0x11: // movups
|
||||||
case 0x12: // movlps
|
case 0x12: // movlps
|
||||||
case 0x28: // movaps
|
case 0x28: // movaps
|
||||||
|
case 0x29: // movaps
|
||||||
case 0x2E: // ucomiss
|
case 0x2E: // ucomiss
|
||||||
case 0x2F: // comiss
|
case 0x2F: // comiss
|
||||||
case 0x54: // andps
|
case 0x54: // andps
|
||||||
@@ -969,7 +970,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
|
|||||||
assert(which == call32_operand, "jcc has no disp32 or imm");
|
assert(which == call32_operand, "jcc has no disp32 or imm");
|
||||||
return ip;
|
return ip;
|
||||||
default:
|
default:
|
||||||
ShouldNotReachHere();
|
fatal("not handled: 0x0F%2X", 0xFF & *(ip-1));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|||||||
@@ -248,4 +248,17 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Is SIMD sort supported for this CPU?
|
||||||
|
static bool supports_simd_sort(BasicType bt) {
|
||||||
|
if (VM_Version::supports_avx512dq()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (VM_Version::supports_avx2() && !is_double_word_type(bt)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // CPU_X86_MATCHER_X86_HPP
|
#endif // CPU_X86_MATCHER_X86_HPP
|
||||||
|
|||||||
@@ -4193,22 +4193,23 @@ void StubGenerator::generate_compiler_stubs() {
|
|||||||
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
|
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load x86_64_sort library on supported hardware to enable avx512 sort and partition intrinsics
|
// Load x86_64_sort library on supported hardware to enable SIMD sort and partition intrinsics
|
||||||
if (VM_Version::is_intel() && VM_Version::supports_avx512dq()) {
|
|
||||||
|
if (VM_Version::is_intel() && (VM_Version::supports_avx512dq() || VM_Version::supports_avx2())) {
|
||||||
void *libsimdsort = nullptr;
|
void *libsimdsort = nullptr;
|
||||||
char ebuf_[1024];
|
char ebuf_[1024];
|
||||||
char dll_name_simd_sort[JVM_MAXPATHLEN];
|
char dll_name_simd_sort[JVM_MAXPATHLEN];
|
||||||
if (os::dll_locate_lib(dll_name_simd_sort, sizeof(dll_name_simd_sort), Arguments::get_dll_dir(), "simdsort")) {
|
if (os::dll_locate_lib(dll_name_simd_sort, sizeof(dll_name_simd_sort), Arguments::get_dll_dir(), "simdsort")) {
|
||||||
libsimdsort = os::dll_load(dll_name_simd_sort, ebuf_, sizeof ebuf_);
|
libsimdsort = os::dll_load(dll_name_simd_sort, ebuf_, sizeof ebuf_);
|
||||||
}
|
}
|
||||||
// Get addresses for avx512 sort and partition routines
|
// Get addresses for SIMD sort and partition routines
|
||||||
if (libsimdsort != nullptr) {
|
if (libsimdsort != nullptr) {
|
||||||
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "simdsort" JNI_LIB_SUFFIX, p2i(libsimdsort));
|
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "simdsort" JNI_LIB_SUFFIX, p2i(libsimdsort));
|
||||||
|
|
||||||
snprintf(ebuf_, sizeof(ebuf_), "avx512_sort");
|
snprintf(ebuf_, sizeof(ebuf_), VM_Version::supports_avx512dq() ? "avx512_sort" : "avx2_sort");
|
||||||
StubRoutines::_array_sort = (address)os::dll_lookup(libsimdsort, ebuf_);
|
StubRoutines::_array_sort = (address)os::dll_lookup(libsimdsort, ebuf_);
|
||||||
|
|
||||||
snprintf(ebuf_, sizeof(ebuf_), "avx512_partition");
|
snprintf(ebuf_, sizeof(ebuf_), VM_Version::supports_avx512dq() ? "avx512_partition" : "avx2_partition");
|
||||||
StubRoutines::_array_partition = (address)os::dll_lookup(libsimdsort, ebuf_);
|
StubRoutines::_array_partition = (address)os::dll_lookup(libsimdsort, ebuf_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -858,7 +858,7 @@ void VM_Version::get_processor_features() {
|
|||||||
|
|
||||||
// Check if processor has Intel Ecore
|
// Check if processor has Intel Ecore
|
||||||
if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
|
if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
|
||||||
(_model == 0x97 || _model == 0xAC || _model == 0xAF)) {
|
(_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) {
|
||||||
FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
|
FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1130,6 +1130,7 @@ void VM_Version::get_processor_features() {
|
|||||||
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
|
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _LP64
|
||||||
// ChaCha20 Intrinsics
|
// ChaCha20 Intrinsics
|
||||||
// As long as the system supports AVX as a baseline we can do a
|
// As long as the system supports AVX as a baseline we can do a
|
||||||
// SIMD-enabled block function. StubGenerator makes the determination
|
// SIMD-enabled block function. StubGenerator makes the determination
|
||||||
@@ -1145,6 +1146,13 @@ void VM_Version::get_processor_features() {
|
|||||||
}
|
}
|
||||||
FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
|
FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
// No support currently for ChaCha20 intrinsics on 32-bit platforms
|
||||||
|
if (UseChaCha20Intrinsics) {
|
||||||
|
warning("ChaCha20 intrinsics are not available on this CPU.");
|
||||||
|
FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
|
||||||
|
}
|
||||||
|
#endif // _LP64
|
||||||
|
|
||||||
// Base64 Intrinsics (Check the condition for which the intrinsic will be active)
|
// Base64 Intrinsics (Check the condition for which the intrinsic will be active)
|
||||||
if (UseAVX >= 2) {
|
if (UseAVX >= 2) {
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ public:
|
|||||||
static void initialize() NOT_CDS_RETURN;
|
static void initialize() NOT_CDS_RETURN;
|
||||||
static void check_system_property(const char* key, const char* value) NOT_CDS_RETURN;
|
static void check_system_property(const char* key, const char* value) NOT_CDS_RETURN;
|
||||||
static void check_unsupported_dumping_properties() NOT_CDS_RETURN;
|
static void check_unsupported_dumping_properties() NOT_CDS_RETURN;
|
||||||
static bool check_vm_args_consistency(bool patch_mod_javabase, bool mode_flag_cmd_line) NOT_CDS_RETURN_(false);
|
static bool check_vm_args_consistency(bool patch_mod_javabase, bool mode_flag_cmd_line) NOT_CDS_RETURN_(true);
|
||||||
|
|
||||||
// Basic CDS features
|
// Basic CDS features
|
||||||
static bool is_dumping_archive() { return is_dumping_static_archive() || is_dumping_dynamic_archive(); }
|
static bool is_dumping_archive() { return is_dumping_static_archive() || is_dumping_dynamic_archive(); }
|
||||||
|
|||||||
@@ -129,7 +129,23 @@ CDSHeapVerifier::CDSHeapVerifier() : _archived_objs(0), _problems(0)
|
|||||||
// This just points to an empty Map
|
// This just points to an empty Map
|
||||||
ADD_EXCL("jdk/internal/reflect/Reflection", "methodFilterMap"); // E
|
ADD_EXCL("jdk/internal/reflect/Reflection", "methodFilterMap"); // E
|
||||||
ADD_EXCL("jdk/internal/util/StaticProperty", "FILE_ENCODING", // C
|
ADD_EXCL("jdk/internal/util/StaticProperty", "FILE_ENCODING", // C
|
||||||
"JAVA_LOCALE_USE_OLD_ISO_CODES"); // C
|
"JAVA_LOCALE_USE_OLD_ISO_CODES", // C
|
||||||
|
"USER_LANGUAGE", // C
|
||||||
|
"USER_LANGUAGE_DISPLAY", // C
|
||||||
|
"USER_LANGUAGE_FORMAT", // C
|
||||||
|
"USER_SCRIPT", // C
|
||||||
|
"USER_SCRIPT_DISPLAY", // C
|
||||||
|
"USER_SCRIPT_FORMAT", // C
|
||||||
|
"USER_COUNTRY", // C
|
||||||
|
"USER_COUNTRY_DISPLAY", // C
|
||||||
|
"USER_COUNTRY_FORMAT", // C
|
||||||
|
"USER_VARIANT", // C
|
||||||
|
"USER_VARIANT_DISPLAY", // C
|
||||||
|
"USER_VARIANT_FORMAT", // C
|
||||||
|
"USER_EXTENSIONS", // C
|
||||||
|
"USER_EXTENSIONS_DISPLAY", // C
|
||||||
|
"USER_EXTENSIONS_FORMAT", // C
|
||||||
|
"USER_REGION"); // C
|
||||||
|
|
||||||
// Integer for 0 and 1 are in java/lang/Integer$IntegerCache and are archived
|
// Integer for 0 and 1 are in java/lang/Integer$IntegerCache and are archived
|
||||||
ADD_EXCL("sun/invoke/util/ValueConversions", "ONE_INT", // E
|
ADD_EXCL("sun/invoke/util/ValueConversions", "ONE_INT", // E
|
||||||
|
|||||||
@@ -1465,7 +1465,7 @@ BitMapView FileMapRegion::ptrmap_view() {
|
|||||||
return bitmap_view(false);
|
return bitmap_view(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FileMapRegion::check_region_crc() const {
|
bool FileMapRegion::check_region_crc(char* base) const {
|
||||||
// This function should be called after the region has been properly
|
// This function should be called after the region has been properly
|
||||||
// loaded into memory via FileMapInfo::map_region() or FileMapInfo::read_region().
|
// loaded into memory via FileMapInfo::map_region() or FileMapInfo::read_region().
|
||||||
// I.e., this->mapped_base() must be valid.
|
// I.e., this->mapped_base() must be valid.
|
||||||
@@ -1474,8 +1474,8 @@ bool FileMapRegion::check_region_crc() const {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(mapped_base() != nullptr, "must be initialized");
|
assert(base != nullptr, "must be initialized");
|
||||||
int crc = ClassLoader::crc32(0, mapped_base(), (jint)sz);
|
int crc = ClassLoader::crc32(0, base, (jint)sz);
|
||||||
if (crc != this->crc()) {
|
if (crc != this->crc()) {
|
||||||
log_warning(cds)("Checksum verification failed.");
|
log_warning(cds)("Checksum verification failed.");
|
||||||
return false;
|
return false;
|
||||||
@@ -1760,13 +1760,13 @@ bool FileMapInfo::read_region(int i, char* base, size_t size, bool do_commit) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
r->set_mapped_from_file(false);
|
if (VerifySharedSpaces && !r->check_region_crc(base)) {
|
||||||
r->set_mapped_base(base);
|
|
||||||
|
|
||||||
if (VerifySharedSpaces && !r->check_region_crc()) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r->set_mapped_from_file(false);
|
||||||
|
r->set_mapped_base(base);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1803,6 +1803,7 @@ MapArchiveResult FileMapInfo::map_region(int i, intx addr_delta, char* mapped_ba
|
|||||||
return MAP_ARCHIVE_OTHER_FAILURE; // oom or I/O error.
|
return MAP_ARCHIVE_OTHER_FAILURE; // oom or I/O error.
|
||||||
} else {
|
} else {
|
||||||
assert(r->mapped_base() != nullptr, "must be initialized");
|
assert(r->mapped_base() != nullptr, "must be initialized");
|
||||||
|
return MAP_ARCHIVE_SUCCESS;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Note that this may either be a "fresh" mapping into unreserved address
|
// Note that this may either be a "fresh" mapping into unreserved address
|
||||||
@@ -1817,16 +1818,17 @@ MapArchiveResult FileMapInfo::map_region(int i, intx addr_delta, char* mapped_ba
|
|||||||
_memory_mapping_failed = true;
|
_memory_mapping_failed = true;
|
||||||
return MAP_ARCHIVE_MMAP_FAILURE;
|
return MAP_ARCHIVE_MMAP_FAILURE;
|
||||||
}
|
}
|
||||||
r->set_mapped_from_file(true);
|
|
||||||
r->set_mapped_base(requested_addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (VerifySharedSpaces && !r->check_region_crc()) {
|
if (VerifySharedSpaces && !r->check_region_crc(requested_addr)) {
|
||||||
return MAP_ARCHIVE_OTHER_FAILURE;
|
return MAP_ARCHIVE_OTHER_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r->set_mapped_from_file(true);
|
||||||
|
r->set_mapped_base(requested_addr);
|
||||||
|
|
||||||
return MAP_ARCHIVE_SUCCESS;
|
return MAP_ARCHIVE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// The return value is the location of the archive relocation bitmap.
|
// The return value is the location of the archive relocation bitmap.
|
||||||
char* FileMapInfo::map_bitmap_region() {
|
char* FileMapInfo::map_bitmap_region() {
|
||||||
@@ -1843,8 +1845,7 @@ char* FileMapInfo::map_bitmap_region() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
r->set_mapped_base(bitmap_base);
|
if (VerifySharedSpaces && !r->check_region_crc(bitmap_base)) {
|
||||||
if (VerifySharedSpaces && !r->check_region_crc()) {
|
|
||||||
log_error(cds)("relocation bitmap CRC error");
|
log_error(cds)("relocation bitmap CRC error");
|
||||||
if (!os::unmap_memory(bitmap_base, r->used_aligned())) {
|
if (!os::unmap_memory(bitmap_base, r->used_aligned())) {
|
||||||
fatal("os::unmap_memory of relocation bitmap failed");
|
fatal("os::unmap_memory of relocation bitmap failed");
|
||||||
@@ -1853,6 +1854,7 @@ char* FileMapInfo::map_bitmap_region() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
r->set_mapped_from_file(true);
|
r->set_mapped_from_file(true);
|
||||||
|
r->set_mapped_base(bitmap_base);
|
||||||
log_info(cds)("Mapped %s region #%d at base " INTPTR_FORMAT " top " INTPTR_FORMAT " (%s)",
|
log_info(cds)("Mapped %s region #%d at base " INTPTR_FORMAT " top " INTPTR_FORMAT " (%s)",
|
||||||
is_static() ? "static " : "dynamic",
|
is_static() ? "static " : "dynamic",
|
||||||
MetaspaceShared::bm, p2i(r->mapped_base()), p2i(r->mapped_end()),
|
MetaspaceShared::bm, p2i(r->mapped_base()), p2i(r->mapped_end()),
|
||||||
@@ -2128,13 +2130,14 @@ bool FileMapInfo::map_heap_region_impl() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
r->set_mapped_base(base);
|
if (VerifySharedSpaces && !r->check_region_crc(base)) {
|
||||||
if (VerifySharedSpaces && !r->check_region_crc()) {
|
|
||||||
dealloc_heap_region();
|
dealloc_heap_region();
|
||||||
log_info(cds)("UseSharedSpaces: mapped heap region is corrupt");
|
log_info(cds)("UseSharedSpaces: mapped heap region is corrupt");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r->set_mapped_base(base);
|
||||||
|
|
||||||
// If the requested range is different from the range allocated by GC, then
|
// If the requested range is different from the range allocated by GC, then
|
||||||
// the pointers need to be patched.
|
// the pointers need to be patched.
|
||||||
address mapped_start = (address) _mapped_heap_memregion.start();
|
address mapped_start = (address) _mapped_heap_memregion.start();
|
||||||
|
|||||||
@@ -170,7 +170,7 @@ public:
|
|||||||
BitMapView ptrmap_view();
|
BitMapView ptrmap_view();
|
||||||
bool has_ptrmap() { return _ptrmap_size_in_bits != 0; }
|
bool has_ptrmap() { return _ptrmap_size_in_bits != 0; }
|
||||||
|
|
||||||
bool check_region_crc() const;
|
bool check_region_crc(char* base) const;
|
||||||
void print(outputStream* st, int region_index);
|
void print(outputStream* st, int region_index);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -175,7 +175,7 @@ G1ConcurrentRefine::G1ConcurrentRefine(G1Policy* policy) :
|
|||||||
{}
|
{}
|
||||||
|
|
||||||
jint G1ConcurrentRefine::initialize() {
|
jint G1ConcurrentRefine::initialize() {
|
||||||
return _thread_control.initialize(this, max_num_threads());
|
return _thread_control.initialize(this, G1ConcRefinementThreads);
|
||||||
}
|
}
|
||||||
|
|
||||||
G1ConcurrentRefine* G1ConcurrentRefine::create(G1Policy* policy, jint* ecode) {
|
G1ConcurrentRefine* G1ConcurrentRefine::create(G1Policy* policy, jint* ecode) {
|
||||||
@@ -199,10 +199,6 @@ void G1ConcurrentRefine::threads_do(ThreadClosure *tc) {
|
|||||||
_thread_control.worker_threads_do(tc);
|
_thread_control.worker_threads_do(tc);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint G1ConcurrentRefine::max_num_threads() {
|
|
||||||
return G1ConcRefinementThreads;
|
|
||||||
}
|
|
||||||
|
|
||||||
void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms,
|
void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms,
|
||||||
size_t processed_logged_cards,
|
size_t processed_logged_cards,
|
||||||
size_t predicted_thread_buffer_cards,
|
size_t predicted_thread_buffer_cards,
|
||||||
|
|||||||
@@ -215,9 +215,6 @@ public:
|
|||||||
|
|
||||||
// Iterate over all concurrent refinement threads applying the given closure.
|
// Iterate over all concurrent refinement threads applying the given closure.
|
||||||
void threads_do(ThreadClosure *tc);
|
void threads_do(ThreadClosure *tc);
|
||||||
|
|
||||||
// Maximum number of refinement threads.
|
|
||||||
static uint max_num_threads();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // SHARE_GC_G1_G1CONCURRENTREFINE_HPP
|
#endif // SHARE_GC_G1_G1CONCURRENTREFINE_HPP
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ void G1FromCardCache::print(outputStream* out) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint G1FromCardCache::num_par_rem_sets() {
|
uint G1FromCardCache::num_par_rem_sets() {
|
||||||
return G1DirtyCardQueueSet::num_par_ids() + G1ConcurrentRefine::max_num_threads() + MAX2(ConcGCThreads, ParallelGCThreads);
|
return G1DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads + MAX2(ConcGCThreads, ParallelGCThreads);
|
||||||
}
|
}
|
||||||
|
|
||||||
void G1FromCardCache::clear(uint region_idx) {
|
void G1FromCardCache::clear(uint region_idx) {
|
||||||
|
|||||||
@@ -91,11 +91,6 @@ class G1RemSetScanState : public CHeapObj<mtGC> {
|
|||||||
|
|
||||||
size_t _max_reserved_regions;
|
size_t _max_reserved_regions;
|
||||||
|
|
||||||
// Has this region that is part of the regions in the collection set been processed yet.
|
|
||||||
typedef bool G1RemsetIterState;
|
|
||||||
|
|
||||||
G1RemsetIterState volatile* _collection_set_iter_state;
|
|
||||||
|
|
||||||
// Card table iteration claim for each heap region, from 0 (completely unscanned)
|
// Card table iteration claim for each heap region, from 0 (completely unscanned)
|
||||||
// to (>=) HeapRegion::CardsPerRegion (completely scanned).
|
// to (>=) HeapRegion::CardsPerRegion (completely scanned).
|
||||||
uint volatile* _card_table_scan_state;
|
uint volatile* _card_table_scan_state;
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ double G1RemSetSummary::rs_thread_vtime(uint thread) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
G1RemSetSummary::G1RemSetSummary(bool should_update) :
|
G1RemSetSummary::G1RemSetSummary(bool should_update) :
|
||||||
_num_vtimes(G1ConcurrentRefine::max_num_threads()),
|
_num_vtimes(G1ConcRefinementThreads),
|
||||||
_rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)) {
|
_rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)) {
|
||||||
|
|
||||||
memset(_rs_threads_vtimes, 0, sizeof(double) * _num_vtimes);
|
memset(_rs_threads_vtimes, 0, sizeof(double) * _num_vtimes);
|
||||||
|
|||||||
@@ -38,18 +38,16 @@ bool G1RemSetTrackingPolicy::needs_scan_for_rebuild(HeapRegion* r) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void G1RemSetTrackingPolicy::update_at_allocate(HeapRegion* r) {
|
void G1RemSetTrackingPolicy::update_at_allocate(HeapRegion* r) {
|
||||||
if (r->is_young()) {
|
assert(r->is_young() || r->is_humongous() || r->is_old(),
|
||||||
// Always collect remembered set for young regions.
|
"Region %u with unexpected heap region type %s", r->hrm_index(), r->get_type_str());
|
||||||
r->rem_set()->set_state_complete();
|
if (r->is_old()) {
|
||||||
} else if (r->is_humongous()) {
|
|
||||||
// Collect remembered sets for humongous regions by default to allow eager reclaim.
|
|
||||||
r->rem_set()->set_state_complete();
|
|
||||||
} else if (r->is_old()) {
|
|
||||||
// By default, do not create remembered set for new old regions.
|
// By default, do not create remembered set for new old regions.
|
||||||
r->rem_set()->set_state_untracked();
|
r->rem_set()->set_state_untracked();
|
||||||
} else {
|
return;
|
||||||
guarantee(false, "Unhandled region %u with heap region type %s", r->hrm_index(), r->get_type_str());
|
|
||||||
}
|
}
|
||||||
|
// Always collect remembered set for young regions and for humongous regions.
|
||||||
|
// Humongous regions need that for eager reclaim.
|
||||||
|
r->rem_set()->set_state_complete();
|
||||||
}
|
}
|
||||||
|
|
||||||
void G1RemSetTrackingPolicy::update_at_free(HeapRegion* r) {
|
void G1RemSetTrackingPolicy::update_at_free(HeapRegion* r) {
|
||||||
|
|||||||
@@ -117,7 +117,7 @@
|
|||||||
"Confidence level for MMU/pause predictions") \
|
"Confidence level for MMU/pause predictions") \
|
||||||
range(0, 100) \
|
range(0, 100) \
|
||||||
\
|
\
|
||||||
product(intx, G1SummarizeRSetStatsPeriod, 0, DIAGNOSTIC, \
|
product(uintx, G1SummarizeRSetStatsPeriod, 0, DIAGNOSTIC, \
|
||||||
"The period (in number of GCs) at which we will generate " \
|
"The period (in number of GCs) at which we will generate " \
|
||||||
"update buffer processing info " \
|
"update buffer processing info " \
|
||||||
"(0 means do not periodically generate this info); " \
|
"(0 means do not periodically generate this info); " \
|
||||||
@@ -148,7 +148,7 @@
|
|||||||
"Number of entries in an SATB log buffer.") \
|
"Number of entries in an SATB log buffer.") \
|
||||||
constraint(G1SATBBufferSizeConstraintFunc, AtParse) \
|
constraint(G1SATBBufferSizeConstraintFunc, AtParse) \
|
||||||
\
|
\
|
||||||
develop(intx, G1SATBProcessCompletedThreshold, 20, \
|
develop(uintx, G1SATBProcessCompletedThreshold, 20, \
|
||||||
"Number of completed buffers that triggers log processing.") \
|
"Number of completed buffers that triggers log processing.") \
|
||||||
range(0, max_jint) \
|
range(0, max_jint) \
|
||||||
\
|
\
|
||||||
|
|||||||
@@ -344,17 +344,11 @@ class AdaptiveSizePolicy : public CHeapObj<mtGC> {
|
|||||||
AdaptiveWeightedAverage* avg_eden_live() const { return _avg_eden_live; }
|
AdaptiveWeightedAverage* avg_eden_live() const { return _avg_eden_live; }
|
||||||
AdaptiveWeightedAverage* avg_old_live() const { return _avg_old_live; }
|
AdaptiveWeightedAverage* avg_old_live() const { return _avg_old_live; }
|
||||||
|
|
||||||
AdaptivePaddedAverage* avg_survived() const { return _avg_survived; }
|
|
||||||
AdaptivePaddedNoZeroDevAverage* avg_pretenured() { return _avg_pretenured; }
|
|
||||||
|
|
||||||
// Methods indicating events of interest to the adaptive size policy,
|
// Methods indicating events of interest to the adaptive size policy,
|
||||||
// called by GC algorithms. It is the responsibility of users of this
|
// called by GC algorithms. It is the responsibility of users of this
|
||||||
// policy to call these methods at the correct times!
|
// policy to call these methods at the correct times!
|
||||||
virtual void minor_collection_begin();
|
virtual void minor_collection_begin();
|
||||||
virtual void minor_collection_end(GCCause::Cause gc_cause);
|
virtual void minor_collection_end(GCCause::Cause gc_cause);
|
||||||
virtual LinearLeastSquareFit* minor_pause_old_estimator() const {
|
|
||||||
return _minor_pause_old_estimator;
|
|
||||||
}
|
|
||||||
|
|
||||||
LinearLeastSquareFit* minor_pause_young_estimator() {
|
LinearLeastSquareFit* minor_pause_young_estimator() {
|
||||||
return _minor_pause_young_estimator;
|
return _minor_pause_young_estimator;
|
||||||
@@ -404,10 +398,6 @@ class AdaptiveSizePolicy : public CHeapObj<mtGC> {
|
|||||||
_overhead_checker.set_gc_overhead_limit_exceeded(v);
|
_overhead_checker.set_gc_overhead_limit_exceeded(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool gc_overhead_limit_near() {
|
|
||||||
return _overhead_checker.gc_overhead_limit_near();
|
|
||||||
}
|
|
||||||
|
|
||||||
void reset_gc_overhead_limit_count() {
|
void reset_gc_overhead_limit_count() {
|
||||||
_overhead_checker.reset_gc_overhead_limit_count();
|
_overhead_checker.reset_gc_overhead_limit_count();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -105,13 +105,15 @@ static void commit(HelperType& helper) {
|
|||||||
assert(thread != nullptr, "invariant");
|
assert(thread != nullptr, "invariant");
|
||||||
if (thread->is_Java_thread()) {
|
if (thread->is_Java_thread()) {
|
||||||
JavaThread* jt = JavaThread::cast(thread);
|
JavaThread* jt = JavaThread::cast(thread);
|
||||||
if (jt->thread_state() != _thread_in_vm) {
|
if (jt->thread_state() == _thread_in_native) {
|
||||||
assert(jt->thread_state() == _thread_in_native, "invariant");
|
|
||||||
// For a JavaThread to take a JFR stacktrace, it must be in _thread_in_vm. Can safepoint here.
|
// For a JavaThread to take a JFR stacktrace, it must be in _thread_in_vm. Can safepoint here.
|
||||||
ThreadInVMfromNative transition(jt);
|
ThreadInVMfromNative transition(jt);
|
||||||
event.commit();
|
event.commit();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// If a thread comes here still _thread_in_Java, which can happen for example
|
||||||
|
// when loading the disassembler library in response to traps in JIT code - all is ok.
|
||||||
|
// Since there is no ljf, an event will be committed without a stacktrace.
|
||||||
}
|
}
|
||||||
event.commit();
|
event.commit();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -53,8 +53,8 @@
|
|||||||
// * store_at: Store a value in an internal pointer relative to a base object.
|
// * store_at: Store a value in an internal pointer relative to a base object.
|
||||||
// * atomic_cmpxchg: Atomically compare-and-swap a new value at an address if previous value matched the compared value.
|
// * atomic_cmpxchg: Atomically compare-and-swap a new value at an address if previous value matched the compared value.
|
||||||
// * atomic_cmpxchg_at: Atomically compare-and-swap a new value at an internal pointer address if previous value matched the compared value.
|
// * atomic_cmpxchg_at: Atomically compare-and-swap a new value at an internal pointer address if previous value matched the compared value.
|
||||||
// * atomic_xchg: Atomically swap a new value at an address if previous value matched the compared value.
|
// * atomic_xchg: Atomically swap a new value at an address without checking the previous value.
|
||||||
// * atomic_xchg_at: Atomically swap a new value at an internal pointer address if previous value matched the compared value.
|
// * atomic_xchg_at: Atomically swap a new value at an internal pointer address without checking the previous value.
|
||||||
// * arraycopy: Copy data from one heap array to another heap array. The ArrayAccess class has convenience functions for this.
|
// * arraycopy: Copy data from one heap array to another heap array. The ArrayAccess class has convenience functions for this.
|
||||||
// * clone: Clone the contents of an object to a newly allocated object.
|
// * clone: Clone the contents of an object to a newly allocated object.
|
||||||
//
|
//
|
||||||
@@ -83,12 +83,11 @@
|
|||||||
// and whether the access is performed on the heap or outside. Then the
|
// and whether the access is performed on the heap or outside. Then the
|
||||||
// appropriate BarrierSet::AccessBarrier is called to perform the access.
|
// appropriate BarrierSet::AccessBarrier is called to perform the access.
|
||||||
//
|
//
|
||||||
// The implementation of step 1-4 resides in in accessBackend.hpp, to allow selected
|
// The implementation of step 1-4 resides in accessBackend.hpp, to allow selected
|
||||||
// accesses to be accessible from only access.hpp, as opposed to access.inline.hpp.
|
// accesses to be accessible from only access.hpp, as opposed to access.inline.hpp.
|
||||||
// Steps 5.a and 5.b require knowledge about the GC backends, and therefore needs to
|
// Steps 5.a and 5.b require knowledge about the GC backends, and therefore needs to
|
||||||
// include the various GC backend .inline.hpp headers. Their implementation resides in
|
// include the various GC backend .inline.hpp headers. Their implementation resides in
|
||||||
// access.inline.hpp. The accesses that are allowed through the access.hpp file
|
// access.inline.hpp.
|
||||||
// must be instantiated in access.cpp using the INSTANTIATE_HPP_ACCESS macro.
|
|
||||||
|
|
||||||
template <DecoratorSet decorators = DECORATORS_NONE>
|
template <DecoratorSet decorators = DECORATORS_NONE>
|
||||||
class Access: public AllStatic {
|
class Access: public AllStatic {
|
||||||
|
|||||||
@@ -365,10 +365,10 @@
|
|||||||
"Level of detail of the ideal graph printout. " \
|
"Level of detail of the ideal graph printout. " \
|
||||||
"System-wide value, -1=printing is disabled, " \
|
"System-wide value, -1=printing is disabled, " \
|
||||||
"0=print nothing except IGVPrintLevel directives, " \
|
"0=print nothing except IGVPrintLevel directives, " \
|
||||||
"5=all details printed. " \
|
"6=all details printed. " \
|
||||||
"Level of detail of printouts can be set on a per-method level " \
|
"Level of detail of printouts can be set on a per-method level " \
|
||||||
"as well by using CompileCommand=option.") \
|
"as well by using CompileCommand=option.") \
|
||||||
range(-1, 5) \
|
range(-1, 6) \
|
||||||
\
|
\
|
||||||
notproduct(intx, PrintIdealGraphPort, 4444, \
|
notproduct(intx, PrintIdealGraphPort, 4444, \
|
||||||
"Ideal graph printer to network port") \
|
"Ideal graph printer to network port") \
|
||||||
|
|||||||
@@ -1041,6 +1041,10 @@ void Compile::Init(bool aliasing) {
|
|||||||
Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
|
Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
|
||||||
set_decompile_count(0);
|
set_decompile_count(0);
|
||||||
|
|
||||||
|
#ifndef PRODUCT
|
||||||
|
Copy::zero_to_bytes(_igv_phase_iter, sizeof(_igv_phase_iter));
|
||||||
|
#endif
|
||||||
|
|
||||||
set_do_freq_based_layout(_directive->BlockLayoutByFrequencyOption);
|
set_do_freq_based_layout(_directive->BlockLayoutByFrequencyOption);
|
||||||
_loop_opts_cnt = LoopOptsCount;
|
_loop_opts_cnt = LoopOptsCount;
|
||||||
set_do_inlining(Inline);
|
set_do_inlining(Inline);
|
||||||
@@ -2397,6 +2401,7 @@ void Compile::Optimize() {
|
|||||||
if (failing()) return;
|
if (failing()) return;
|
||||||
|
|
||||||
// Conditional Constant Propagation;
|
// Conditional Constant Propagation;
|
||||||
|
print_method(PHASE_BEFORE_CCP1, 2);
|
||||||
PhaseCCP ccp( &igvn );
|
PhaseCCP ccp( &igvn );
|
||||||
assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
|
assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
|
||||||
{
|
{
|
||||||
@@ -2972,6 +2977,8 @@ void Compile::Code_Gen() {
|
|||||||
if (failing()) {
|
if (failing()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
print_method(PHASE_REGISTER_ALLOCATION, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prior to register allocation we kept empty basic blocks in case the
|
// Prior to register allocation we kept empty basic blocks in case the
|
||||||
@@ -2989,6 +2996,7 @@ void Compile::Code_Gen() {
|
|||||||
cfg.fixup_flow();
|
cfg.fixup_flow();
|
||||||
cfg.remove_unreachable_blocks();
|
cfg.remove_unreachable_blocks();
|
||||||
cfg.verify_dominator_tree();
|
cfg.verify_dominator_tree();
|
||||||
|
print_method(PHASE_BLOCK_ORDERING, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply peephole optimizations
|
// Apply peephole optimizations
|
||||||
@@ -2996,12 +3004,14 @@ void Compile::Code_Gen() {
|
|||||||
TracePhase tp("peephole", &timers[_t_peephole]);
|
TracePhase tp("peephole", &timers[_t_peephole]);
|
||||||
PhasePeephole peep( _regalloc, cfg);
|
PhasePeephole peep( _regalloc, cfg);
|
||||||
peep.do_transform();
|
peep.do_transform();
|
||||||
|
print_method(PHASE_PEEPHOLE, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do late expand if CPU requires this.
|
// Do late expand if CPU requires this.
|
||||||
if (Matcher::require_postalloc_expand) {
|
if (Matcher::require_postalloc_expand) {
|
||||||
TracePhase tp("postalloc_expand", &timers[_t_postalloc_expand]);
|
TracePhase tp("postalloc_expand", &timers[_t_postalloc_expand]);
|
||||||
cfg.postalloc_expand(_regalloc);
|
cfg.postalloc_expand(_regalloc);
|
||||||
|
print_method(PHASE_POSTALLOC_EXPAND, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert Nodes to instruction bits in a buffer
|
// Convert Nodes to instruction bits in a buffer
|
||||||
@@ -5102,6 +5112,10 @@ void Compile::print_method(CompilerPhaseType cpt, int level, Node* n) {
|
|||||||
ResourceMark rm;
|
ResourceMark rm;
|
||||||
stringStream ss;
|
stringStream ss;
|
||||||
ss.print_raw(CompilerPhaseTypeHelper::to_description(cpt));
|
ss.print_raw(CompilerPhaseTypeHelper::to_description(cpt));
|
||||||
|
int iter = ++_igv_phase_iter[cpt];
|
||||||
|
if (iter > 1) {
|
||||||
|
ss.print(" %d", iter);
|
||||||
|
}
|
||||||
if (n != nullptr) {
|
if (n != nullptr) {
|
||||||
ss.print(": %d %s ", n->_idx, NodeClassNames[n->Opcode()]);
|
ss.print(": %d %s ", n->_idx, NodeClassNames[n->Opcode()]);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -343,6 +343,7 @@ class Compile : public Phase {
|
|||||||
bool _print_intrinsics; // True if we should print intrinsics for this compilation
|
bool _print_intrinsics; // True if we should print intrinsics for this compilation
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
uint _igv_idx; // Counter for IGV node identifiers
|
uint _igv_idx; // Counter for IGV node identifiers
|
||||||
|
uint _igv_phase_iter[PHASE_NUM_TYPES]; // Counters for IGV phase iterations
|
||||||
bool _trace_opto_output;
|
bool _trace_opto_output;
|
||||||
bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
|
bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
|
||||||
#endif
|
#endif
|
||||||
@@ -531,6 +532,7 @@ private:
|
|||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
IdealGraphPrinter* igv_printer() { return _igv_printer; }
|
IdealGraphPrinter* igv_printer() { return _igv_printer; }
|
||||||
|
void reset_igv_phase_iter(CompilerPhaseType cpt) { _igv_phase_iter[cpt] = 0; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void log_late_inline(CallGenerator* cg);
|
void log_late_inline(CallGenerator* cg);
|
||||||
|
|||||||
@@ -1563,6 +1563,11 @@ Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
|
|||||||
if (((bt == T_OBJECT) && C->do_escape_analysis()) || C->eliminate_boxing()) {
|
if (((bt == T_OBJECT) && C->do_escape_analysis()) || C->eliminate_boxing()) {
|
||||||
// Improve graph before escape analysis and boxing elimination.
|
// Improve graph before escape analysis and boxing elimination.
|
||||||
record_for_igvn(ld);
|
record_for_igvn(ld);
|
||||||
|
if (ld->is_DecodeN()) {
|
||||||
|
// Also record the actual load (LoadN) in case ld is DecodeN
|
||||||
|
assert(ld->in(1)->Opcode() == Op_LoadN, "Assumption invalid: input to DecodeN is not LoadN");
|
||||||
|
record_for_igvn(ld->in(1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ld;
|
return ld;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5387,6 +5387,10 @@ bool LibraryCallKit::inline_array_partition() {
|
|||||||
const TypeInstPtr* elem_klass = gvn().type(elementType)->isa_instptr();
|
const TypeInstPtr* elem_klass = gvn().type(elementType)->isa_instptr();
|
||||||
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
|
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
|
||||||
BasicType bt = elem_type->basic_type();
|
BasicType bt = elem_type->basic_type();
|
||||||
|
// Disable the intrinsic if the CPU does not support SIMD sort
|
||||||
|
if (!Matcher::supports_simd_sort(bt)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
address stubAddr = nullptr;
|
address stubAddr = nullptr;
|
||||||
stubAddr = StubRoutines::select_array_partition_function();
|
stubAddr = StubRoutines::select_array_partition_function();
|
||||||
// stub not loaded
|
// stub not loaded
|
||||||
@@ -5440,6 +5444,10 @@ bool LibraryCallKit::inline_array_sort() {
|
|||||||
const TypeInstPtr* elem_klass = gvn().type(elementType)->isa_instptr();
|
const TypeInstPtr* elem_klass = gvn().type(elementType)->isa_instptr();
|
||||||
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
|
ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
|
||||||
BasicType bt = elem_type->basic_type();
|
BasicType bt = elem_type->basic_type();
|
||||||
|
// Disable the intrinsic if the CPU does not support SIMD sort
|
||||||
|
if (!Matcher::supports_simd_sort(bt)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
address stubAddr = nullptr;
|
address stubAddr = nullptr;
|
||||||
stubAddr = StubRoutines::select_arraysort_function();
|
stubAddr = StubRoutines::select_arraysort_function();
|
||||||
//stub not loaded
|
//stub not loaded
|
||||||
|
|||||||
@@ -1180,6 +1180,7 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
|
|||||||
}
|
}
|
||||||
BoolNode* bol = test->as_Bool();
|
BoolNode* bol = test->as_Bool();
|
||||||
if (invar.is_invariant(bol)) {
|
if (invar.is_invariant(bol)) {
|
||||||
|
C->print_method(PHASE_BEFORE_LOOP_PREDICATION_IC, 4, iff);
|
||||||
// Invariant test
|
// Invariant test
|
||||||
new_predicate_proj = create_new_if_for_predicate(parse_predicate_proj, nullptr,
|
new_predicate_proj = create_new_if_for_predicate(parse_predicate_proj, nullptr,
|
||||||
reason,
|
reason,
|
||||||
@@ -1197,6 +1198,9 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
|
|||||||
IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If();
|
IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If();
|
||||||
_igvn.hash_delete(new_predicate_iff);
|
_igvn.hash_delete(new_predicate_iff);
|
||||||
new_predicate_iff->set_req(1, new_predicate_bol);
|
new_predicate_iff->set_req(1, new_predicate_bol);
|
||||||
|
|
||||||
|
C->print_method(PHASE_AFTER_LOOP_PREDICATION_IC, 4, new_predicate_proj->in(0));
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
if (TraceLoopPredicate) {
|
if (TraceLoopPredicate) {
|
||||||
tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx);
|
tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx);
|
||||||
@@ -1207,6 +1211,7 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
} else if (cl != nullptr && loop->is_range_check_if(if_success_proj, this, invar DEBUG_ONLY(COMMA parse_predicate_proj))) {
|
} else if (cl != nullptr && loop->is_range_check_if(if_success_proj, this, invar DEBUG_ONLY(COMMA parse_predicate_proj))) {
|
||||||
|
C->print_method(PHASE_BEFORE_LOOP_PREDICATION_RC, 4, iff);
|
||||||
// Range check for counted loops
|
// Range check for counted loops
|
||||||
assert(if_success_proj->is_IfTrue(), "trap must be on false projection for a range check");
|
assert(if_success_proj->is_IfTrue(), "trap must be on false projection for a range check");
|
||||||
const Node* cmp = bol->in(1)->as_Cmp();
|
const Node* cmp = bol->in(1)->as_Cmp();
|
||||||
@@ -1270,6 +1275,8 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
|
|||||||
new_predicate_proj = add_template_assertion_predicate(iff, loop, if_success_proj, parse_predicate_proj, upper_bound_proj, scale,
|
new_predicate_proj = add_template_assertion_predicate(iff, loop, if_success_proj, parse_predicate_proj, upper_bound_proj, scale,
|
||||||
offset, init, limit, stride, rng, overflow, reason);
|
offset, init, limit, stride, rng, overflow, reason);
|
||||||
|
|
||||||
|
C->print_method(PHASE_AFTER_LOOP_PREDICATION_RC, 4, new_predicate_proj->in(0));
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
if (TraceLoopOpts && !TraceLoopPredicate) {
|
if (TraceLoopOpts && !TraceLoopPredicate) {
|
||||||
tty->print("Predicate RC ");
|
tty->print("Predicate RC ");
|
||||||
|
|||||||
@@ -703,6 +703,9 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
LoopNode* head = loop->_head->as_Loop();
|
LoopNode* head = loop->_head->as_Loop();
|
||||||
|
|
||||||
|
C->print_method(PHASE_BEFORE_LOOP_PEELING, 4, head);
|
||||||
|
|
||||||
bool counted_loop = head->is_CountedLoop();
|
bool counted_loop = head->is_CountedLoop();
|
||||||
if (counted_loop) {
|
if (counted_loop) {
|
||||||
CountedLoopNode *cl = head->as_CountedLoop();
|
CountedLoopNode *cl = head->as_CountedLoop();
|
||||||
@@ -795,6 +798,8 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
|
|||||||
peeled_dom_test_elim(loop,old_new);
|
peeled_dom_test_elim(loop,old_new);
|
||||||
|
|
||||||
loop->record_for_igvn();
|
loop->record_for_igvn();
|
||||||
|
|
||||||
|
C->print_method(PHASE_AFTER_LOOP_PEELING, 4, new_head);
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------policy_maximally_unroll------------------------
|
//------------------------------policy_maximally_unroll------------------------
|
||||||
@@ -1629,6 +1634,8 @@ void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_n
|
|||||||
CountedLoopEndNode *main_end = main_head->loopexit();
|
CountedLoopEndNode *main_end = main_head->loopexit();
|
||||||
assert(main_end->outcnt() == 2, "1 true, 1 false path only");
|
assert(main_end->outcnt() == 2, "1 true, 1 false path only");
|
||||||
|
|
||||||
|
C->print_method(PHASE_BEFORE_PRE_MAIN_POST, 4, main_head);
|
||||||
|
|
||||||
Node *pre_header= main_head->in(LoopNode::EntryControl);
|
Node *pre_header= main_head->in(LoopNode::EntryControl);
|
||||||
Node *init = main_head->init_trip();
|
Node *init = main_head->init_trip();
|
||||||
Node *incr = main_end ->incr();
|
Node *incr = main_end ->incr();
|
||||||
@@ -1825,6 +1832,8 @@ void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_n
|
|||||||
// finds some, but we _know_ they are all useless.
|
// finds some, but we _know_ they are all useless.
|
||||||
peeled_dom_test_elim(loop,old_new);
|
peeled_dom_test_elim(loop,old_new);
|
||||||
loop->record_for_igvn();
|
loop->record_for_igvn();
|
||||||
|
|
||||||
|
C->print_method(PHASE_AFTER_PRE_MAIN_POST, 4, main_head);
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------insert_vector_post_loop------------------------
|
//------------------------------insert_vector_post_loop------------------------
|
||||||
@@ -2127,6 +2136,9 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
|
|||||||
assert(LoopUnrollLimit, "");
|
assert(LoopUnrollLimit, "");
|
||||||
CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
|
CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
|
||||||
CountedLoopEndNode *loop_end = loop_head->loopexit();
|
CountedLoopEndNode *loop_end = loop_head->loopexit();
|
||||||
|
|
||||||
|
C->print_method(PHASE_BEFORE_LOOP_UNROLLING, 4, loop_head);
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
if (PrintOpto && VerifyLoopOptimizations) {
|
if (PrintOpto && VerifyLoopOptimizations) {
|
||||||
tty->print("Unrolling ");
|
tty->print("Unrolling ");
|
||||||
@@ -2374,6 +2386,8 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
C->print_method(PHASE_AFTER_LOOP_UNROLLING, 4, clone_head);
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------do_maximally_unroll----------------------------
|
//------------------------------do_maximally_unroll----------------------------
|
||||||
@@ -3003,6 +3017,8 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
|
|||||||
// stride_con and scale_con can be negative which will flip about the
|
// stride_con and scale_con can be negative which will flip about the
|
||||||
// sense of the test.
|
// sense of the test.
|
||||||
|
|
||||||
|
C->print_method(PHASE_BEFORE_RANGE_CHECK_ELIMINATION, 4, iff);
|
||||||
|
|
||||||
// Perform the limit computations in jlong to avoid overflow
|
// Perform the limit computations in jlong to avoid overflow
|
||||||
jlong lscale_con = scale_con;
|
jlong lscale_con = scale_con;
|
||||||
Node* int_offset = offset;
|
Node* int_offset = offset;
|
||||||
@@ -3103,6 +3119,9 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
|
|||||||
--imax;
|
--imax;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
C->print_method(PHASE_AFTER_RANGE_CHECK_ELIMINATION, 4, cl);
|
||||||
|
|
||||||
} // End of is IF
|
} // End of is IF
|
||||||
}
|
}
|
||||||
if (loop_entry != cl->skip_strip_mined()->in(LoopNode::EntryControl)) {
|
if (loop_entry != cl->skip_strip_mined()->in(LoopNode::EntryControl)) {
|
||||||
|
|||||||
@@ -134,6 +134,8 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
C->print_method(PHASE_BEFORE_LOOP_UNSWITCHING, 4, head);
|
||||||
|
|
||||||
// Need to revert back to normal loop
|
// Need to revert back to normal loop
|
||||||
if (head->is_CountedLoop() && !head->as_CountedLoop()->is_normal_loop()) {
|
if (head->is_CountedLoop() && !head->as_CountedLoop()->is_normal_loop()) {
|
||||||
head->as_CountedLoop()->set_normal_loop();
|
head->as_CountedLoop()->set_normal_loop();
|
||||||
@@ -200,6 +202,8 @@ void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
C->print_method(PHASE_AFTER_LOOP_UNSWITCHING, 4, head_clone);
|
||||||
|
|
||||||
C->set_major_progress();
|
C->set_major_progress();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1446,7 +1446,12 @@ void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Now split the IF
|
// Now split the IF
|
||||||
|
C->print_method(PHASE_BEFORE_SPLIT_IF, 4, iff);
|
||||||
|
if ((PrintOpto && VerifyLoopOptimizations) || TraceLoopOpts) {
|
||||||
|
tty->print_cr("Split-If");
|
||||||
|
}
|
||||||
do_split_if(iff);
|
do_split_if(iff);
|
||||||
|
C->print_method(PHASE_AFTER_SPLIT_IF, 4, iff);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3625,6 +3630,9 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
C->print_method(PHASE_BEFORE_PARTIAL_PEELING, 4, head);
|
||||||
|
|
||||||
VectorSet peel;
|
VectorSet peel;
|
||||||
VectorSet not_peel;
|
VectorSet not_peel;
|
||||||
Node_List peel_list;
|
Node_List peel_list;
|
||||||
@@ -3919,6 +3927,9 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
C->print_method(PHASE_AFTER_PARTIAL_PEELING, 4, new_head_clone);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2779,7 +2779,7 @@ void Parse::do_one_bytecode() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
constexpr int perBytecode = 5;
|
constexpr int perBytecode = 6;
|
||||||
if (C->should_print_igv(perBytecode)) {
|
if (C->should_print_igv(perBytecode)) {
|
||||||
IdealGraphPrinter* printer = C->igv_printer();
|
IdealGraphPrinter* printer = C->igv_printer();
|
||||||
char buffer[256];
|
char buffer[256];
|
||||||
|
|||||||
@@ -894,7 +894,7 @@ void PhaseIterGVN::verify_step(Node* n) {
|
|||||||
void PhaseIterGVN::trace_PhaseIterGVN(Node* n, Node* nn, const Type* oldtype) {
|
void PhaseIterGVN::trace_PhaseIterGVN(Node* n, Node* nn, const Type* oldtype) {
|
||||||
const Type* newtype = type_or_null(n);
|
const Type* newtype = type_or_null(n);
|
||||||
if (nn != n || oldtype != newtype) {
|
if (nn != n || oldtype != newtype) {
|
||||||
C->print_method(PHASE_AFTER_ITER_GVN_STEP, 4, n);
|
C->print_method(PHASE_AFTER_ITER_GVN_STEP, 5, n);
|
||||||
}
|
}
|
||||||
if (TraceIterativeGVN) {
|
if (TraceIterativeGVN) {
|
||||||
uint wlsize = _worklist.size();
|
uint wlsize = _worklist.size();
|
||||||
@@ -1025,6 +1025,7 @@ void PhaseIterGVN::trace_PhaseIterGVN_verbose(Node* n, int num_processed) {
|
|||||||
void PhaseIterGVN::optimize() {
|
void PhaseIterGVN::optimize() {
|
||||||
DEBUG_ONLY(uint num_processed = 0;)
|
DEBUG_ONLY(uint num_processed = 0;)
|
||||||
NOT_PRODUCT(init_verifyPhaseIterGVN();)
|
NOT_PRODUCT(init_verifyPhaseIterGVN();)
|
||||||
|
NOT_PRODUCT(C->reset_igv_phase_iter(PHASE_AFTER_ITER_GVN_STEP);)
|
||||||
C->print_method(PHASE_BEFORE_ITER_GVN, 3);
|
C->print_method(PHASE_BEFORE_ITER_GVN, 3);
|
||||||
if (StressIGVN) {
|
if (StressIGVN) {
|
||||||
shuffle_worklist();
|
shuffle_worklist();
|
||||||
|
|||||||
@@ -49,6 +49,27 @@
|
|||||||
flags(ITER_GVN_AFTER_VECTOR, "Iter GVN after vector box elimination") \
|
flags(ITER_GVN_AFTER_VECTOR, "Iter GVN after vector box elimination") \
|
||||||
flags(BEFORE_BEAUTIFY_LOOPS, "Before beautify loops") \
|
flags(BEFORE_BEAUTIFY_LOOPS, "Before beautify loops") \
|
||||||
flags(AFTER_BEAUTIFY_LOOPS, "After beautify loops") \
|
flags(AFTER_BEAUTIFY_LOOPS, "After beautify loops") \
|
||||||
|
flags(BEFORE_LOOP_UNROLLING, "Before Loop Unrolling") \
|
||||||
|
flags(AFTER_LOOP_UNROLLING, "After Loop Unrolling") \
|
||||||
|
flags(BEFORE_SPLIT_IF, "Before Split-If") \
|
||||||
|
flags(AFTER_SPLIT_IF, "After Split-If") \
|
||||||
|
flags(BEFORE_LOOP_PREDICATION_IC, "Before Loop Predication IC") \
|
||||||
|
flags(AFTER_LOOP_PREDICATION_IC, "After Loop Predication IC") \
|
||||||
|
flags(BEFORE_LOOP_PREDICATION_RC, "Before Loop Predication RC") \
|
||||||
|
flags(AFTER_LOOP_PREDICATION_RC, "After Loop Predication RC") \
|
||||||
|
flags(BEFORE_PARTIAL_PEELING, "Before Partial Peeling") \
|
||||||
|
flags(AFTER_PARTIAL_PEELING, "After Partial Peeling") \
|
||||||
|
flags(BEFORE_LOOP_PEELING, "Before Loop Peeling") \
|
||||||
|
flags(AFTER_LOOP_PEELING, "After Loop Peeling") \
|
||||||
|
flags(BEFORE_LOOP_UNSWITCHING, "Before Loop Unswitching") \
|
||||||
|
flags(AFTER_LOOP_UNSWITCHING, "After Loop Unswitching") \
|
||||||
|
flags(BEFORE_RANGE_CHECK_ELIMINATION, "Before Range Check Elimination") \
|
||||||
|
flags(AFTER_RANGE_CHECK_ELIMINATION, "After Range Check Elimination") \
|
||||||
|
flags(BEFORE_PRE_MAIN_POST, "Before Pre/Main/Post Loops") \
|
||||||
|
flags(AFTER_PRE_MAIN_POST, "After Pre/Main/Post Loops") \
|
||||||
|
flags(SUPERWORD1_BEFORE_SCHEDULE, "Superword 1, Before Schedule") \
|
||||||
|
flags(SUPERWORD2_BEFORE_OUTPUT, "Superword 2, Before Output") \
|
||||||
|
flags(SUPERWORD3_AFTER_OUTPUT, "Superword 3, After Output") \
|
||||||
flags(BEFORE_CLOOPS, "Before CountedLoop") \
|
flags(BEFORE_CLOOPS, "Before CountedLoop") \
|
||||||
flags(AFTER_CLOOPS, "After CountedLoop") \
|
flags(AFTER_CLOOPS, "After CountedLoop") \
|
||||||
flags(PHASEIDEAL_BEFORE_EA, "PhaseIdealLoop before EA") \
|
flags(PHASEIDEAL_BEFORE_EA, "PhaseIdealLoop before EA") \
|
||||||
@@ -58,6 +79,7 @@
|
|||||||
flags(PHASEIDEALLOOP1, "PhaseIdealLoop 1") \
|
flags(PHASEIDEALLOOP1, "PhaseIdealLoop 1") \
|
||||||
flags(PHASEIDEALLOOP2, "PhaseIdealLoop 2") \
|
flags(PHASEIDEALLOOP2, "PhaseIdealLoop 2") \
|
||||||
flags(PHASEIDEALLOOP3, "PhaseIdealLoop 3") \
|
flags(PHASEIDEALLOOP3, "PhaseIdealLoop 3") \
|
||||||
|
flags(BEFORE_CCP1, "Before PhaseCCP 1") \
|
||||||
flags(CCP1, "PhaseCCP 1") \
|
flags(CCP1, "PhaseCCP 1") \
|
||||||
flags(ITER_GVN2, "Iter GVN 2") \
|
flags(ITER_GVN2, "Iter GVN 2") \
|
||||||
flags(PHASEIDEALLOOP_ITERATIONS, "PhaseIdealLoop iterations") \
|
flags(PHASEIDEALLOOP_ITERATIONS, "PhaseIdealLoop iterations") \
|
||||||
@@ -67,6 +89,10 @@
|
|||||||
flags(BEFORE_MATCHING, "Before matching") \
|
flags(BEFORE_MATCHING, "Before matching") \
|
||||||
flags(MATCHING, "After matching") \
|
flags(MATCHING, "After matching") \
|
||||||
flags(GLOBAL_CODE_MOTION, "Global code motion") \
|
flags(GLOBAL_CODE_MOTION, "Global code motion") \
|
||||||
|
flags(REGISTER_ALLOCATION, "Register Allocation") \
|
||||||
|
flags(BLOCK_ORDERING, "Block Ordering") \
|
||||||
|
flags(PEEPHOLE, "Peephole") \
|
||||||
|
flags(POSTALLOC_EXPAND, "Post-Allocation Expand") \
|
||||||
flags(MACH_ANALYSIS, "After mach analysis") \
|
flags(MACH_ANALYSIS, "After mach analysis") \
|
||||||
flags(FINAL_CODE, "Final Code") \
|
flags(FINAL_CODE, "Final Code") \
|
||||||
flags(END, "End") \
|
flags(END, "End") \
|
||||||
|
|||||||
@@ -591,12 +591,6 @@ void PhaseIdealLoop::handle_use( Node *use, Node *def, small_cache *cache, Node
|
|||||||
// Found an If getting its condition-code input from a Phi in the same block.
|
// Found an If getting its condition-code input from a Phi in the same block.
|
||||||
// Split thru the Region.
|
// Split thru the Region.
|
||||||
void PhaseIdealLoop::do_split_if(Node* iff, RegionNode** new_false_region, RegionNode** new_true_region) {
|
void PhaseIdealLoop::do_split_if(Node* iff, RegionNode** new_false_region, RegionNode** new_true_region) {
|
||||||
if (PrintOpto && VerifyLoopOptimizations) {
|
|
||||||
tty->print_cr("Split-if");
|
|
||||||
}
|
|
||||||
if (TraceLoopOpts) {
|
|
||||||
tty->print_cr("SplitIf");
|
|
||||||
}
|
|
||||||
|
|
||||||
C->set_major_progress();
|
C->set_major_progress();
|
||||||
RegionNode *region = iff->in(0)->as_Region();
|
RegionNode *region = iff->in(0)->as_Region();
|
||||||
|
|||||||
@@ -2381,6 +2381,9 @@ void SuperWord::schedule() {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
CountedLoopNode* cl = lpt()->_head->as_CountedLoop();
|
||||||
|
_phase->C->print_method(PHASE_SUPERWORD1_BEFORE_SCHEDULE, 4, cl);
|
||||||
|
|
||||||
// (4) Use the memops_schedule to re-order the memops in all slices.
|
// (4) Use the memops_schedule to re-order the memops in all slices.
|
||||||
schedule_reorder_memops(memops_schedule);
|
schedule_reorder_memops(memops_schedule);
|
||||||
}
|
}
|
||||||
@@ -2488,6 +2491,7 @@ bool SuperWord::output() {
|
|||||||
lpt()->dump_head();
|
lpt()->dump_head();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
_phase->C->print_method(PHASE_SUPERWORD2_BEFORE_OUTPUT, 4, cl);
|
||||||
|
|
||||||
// Ensure main loop's initial value is properly aligned
|
// Ensure main loop's initial value is properly aligned
|
||||||
// (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
|
// (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
|
||||||
@@ -2808,6 +2812,8 @@ bool SuperWord::output() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_phase->C->print_method(PHASE_SUPERWORD3_AFTER_OUTPUT, 4, cl);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -390,7 +390,10 @@ UNSAFE_ENTRY_SCOPED(void, Unsafe_SetMemory0(JNIEnv *env, jobject unsafe, jobject
|
|||||||
oop base = JNIHandles::resolve(obj);
|
oop base = JNIHandles::resolve(obj);
|
||||||
void* p = index_oop_from_field_offset_long(base, offset);
|
void* p = index_oop_from_field_offset_long(base, offset);
|
||||||
|
|
||||||
|
{
|
||||||
|
GuardUnsafeAccess guard(thread);
|
||||||
Copy::fill_to_memory_atomic(p, sz, value);
|
Copy::fill_to_memory_atomic(p, sz, value);
|
||||||
|
}
|
||||||
} UNSAFE_END
|
} UNSAFE_END
|
||||||
|
|
||||||
UNSAFE_ENTRY_SCOPED(void, Unsafe_CopyMemory0(JNIEnv *env, jobject unsafe, jobject srcObj, jlong srcOffset, jobject dstObj, jlong dstOffset, jlong size)) {
|
UNSAFE_ENTRY_SCOPED(void, Unsafe_CopyMemory0(JNIEnv *env, jobject unsafe, jobject srcObj, jlong srcOffset, jobject dstObj, jlong dstOffset, jlong size)) {
|
||||||
|
|||||||
@@ -35,12 +35,6 @@
|
|||||||
|
|
||||||
class Prefetch : AllStatic {
|
class Prefetch : AllStatic {
|
||||||
public:
|
public:
|
||||||
enum style {
|
|
||||||
do_none, // Do no prefetching
|
|
||||||
do_read, // Do read prefetching
|
|
||||||
do_write // Do write prefetching
|
|
||||||
};
|
|
||||||
|
|
||||||
// Prefetch anticipating read; must not fault, semantically a no-op
|
// Prefetch anticipating read; must not fault, semantically a no-op
|
||||||
static void read(const void* loc, intx interval);
|
static void read(const void* loc, intx interval);
|
||||||
|
|
||||||
|
|||||||
@@ -1473,6 +1473,25 @@ void SymbolTableDumper::do_symbol(Symbol** p) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Support class used to generate HPROF_GC_CLASS_DUMP records
|
||||||
|
|
||||||
|
class ClassDumper : public KlassClosure {
|
||||||
|
private:
|
||||||
|
AbstractDumpWriter* _writer;
|
||||||
|
AbstractDumpWriter* writer() const { return _writer; }
|
||||||
|
|
||||||
|
public:
|
||||||
|
ClassDumper(AbstractDumpWriter* writer) : _writer(writer) {}
|
||||||
|
|
||||||
|
void do_klass(Klass* k) {
|
||||||
|
if (k->is_instance_klass()) {
|
||||||
|
DumperSupport::dump_instance_class(writer(), k);
|
||||||
|
} else {
|
||||||
|
DumperSupport::dump_array_class(writer(), k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Support class used to generate HPROF_GC_ROOT_JNI_LOCAL records
|
// Support class used to generate HPROF_GC_ROOT_JNI_LOCAL records
|
||||||
|
|
||||||
class JNILocalsDumper : public OopClosure {
|
class JNILocalsDumper : public OopClosure {
|
||||||
@@ -1860,21 +1879,25 @@ vframe* ThreadDumper::get_top_frame() const {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Callback to dump thread-related data for unmounted virtual threads;
|
||||||
|
// implemented by VM_HeapDumper.
|
||||||
|
class UnmountedVThreadDumper {
|
||||||
|
public:
|
||||||
|
virtual void dump_vthread(oop vt, AbstractDumpWriter* segment_writer) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
class VM_HeapDumper;
|
// Support class used when iterating over the heap.
|
||||||
|
|
||||||
// Support class using when iterating over the heap.
|
|
||||||
class HeapObjectDumper : public ObjectClosure {
|
class HeapObjectDumper : public ObjectClosure {
|
||||||
private:
|
private:
|
||||||
AbstractDumpWriter* _writer;
|
AbstractDumpWriter* _writer;
|
||||||
AbstractDumpWriter* writer() { return _writer; }
|
AbstractDumpWriter* writer() { return _writer; }
|
||||||
|
UnmountedVThreadDumper* _vthread_dumper;
|
||||||
|
|
||||||
DumperClassCacheTable _class_cache;
|
DumperClassCacheTable _class_cache;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
HeapObjectDumper(AbstractDumpWriter* writer) {
|
HeapObjectDumper(AbstractDumpWriter* writer, UnmountedVThreadDumper* vthread_dumper)
|
||||||
_writer = writer;
|
: _writer(writer), _vthread_dumper(vthread_dumper) {}
|
||||||
}
|
|
||||||
|
|
||||||
// called for each object in the heap
|
// called for each object in the heap
|
||||||
void do_object(oop o);
|
void do_object(oop o);
|
||||||
@@ -1895,6 +1918,9 @@ void HeapObjectDumper::do_object(oop o) {
|
|||||||
if (o->is_instance()) {
|
if (o->is_instance()) {
|
||||||
// create a HPROF_GC_INSTANCE record for each object
|
// create a HPROF_GC_INSTANCE record for each object
|
||||||
DumperSupport::dump_instance(writer(), o, &_class_cache);
|
DumperSupport::dump_instance(writer(), o, &_class_cache);
|
||||||
|
if (java_lang_VirtualThread::is_instance(o) && ThreadDumper::should_dump_vthread(o)) {
|
||||||
|
_vthread_dumper->dump_vthread(o, writer());
|
||||||
|
}
|
||||||
} else if (o->is_objArray()) {
|
} else if (o->is_objArray()) {
|
||||||
// create a HPROF_GC_OBJ_ARRAY_DUMP record for each object array
|
// create a HPROF_GC_OBJ_ARRAY_DUMP record for each object array
|
||||||
DumperSupport::dump_object_array(writer(), objArrayOop(o));
|
DumperSupport::dump_object_array(writer(), objArrayOop(o));
|
||||||
@@ -1908,16 +1934,52 @@ void HeapObjectDumper::do_object(oop o) {
|
|||||||
class DumperController : public CHeapObj<mtInternal> {
|
class DumperController : public CHeapObj<mtInternal> {
|
||||||
private:
|
private:
|
||||||
Monitor* _lock;
|
Monitor* _lock;
|
||||||
|
Mutex* _global_writer_lock;
|
||||||
|
|
||||||
const uint _dumper_number;
|
const uint _dumper_number;
|
||||||
uint _complete_number;
|
uint _complete_number;
|
||||||
|
|
||||||
|
bool _started; // VM dumper started and acquired global writer lock
|
||||||
|
|
||||||
public:
|
public:
|
||||||
DumperController(uint number) :
|
DumperController(uint number) :
|
||||||
_lock(new (std::nothrow) PaddedMonitor(Mutex::safepoint, "DumperController_lock")),
|
// _lock and _global_writer_lock are used for synchronization between GC worker threads inside safepoint,
|
||||||
|
// so we lock with _no_safepoint_check_flag.
|
||||||
|
// signal_start() acquires _lock when global writer is locked,
|
||||||
|
// its rank must be less than _global_writer_lock rank.
|
||||||
|
_lock(new (std::nothrow) PaddedMonitor(Mutex::nosafepoint - 1, "DumperController_lock")),
|
||||||
|
_global_writer_lock(new (std::nothrow) Mutex(Mutex::nosafepoint, "DumpWriter_lock")),
|
||||||
_dumper_number(number),
|
_dumper_number(number),
|
||||||
_complete_number(0) { }
|
_complete_number(0),
|
||||||
|
_started(false)
|
||||||
|
{}
|
||||||
|
|
||||||
~DumperController() { delete _lock; }
|
~DumperController() {
|
||||||
|
delete _lock;
|
||||||
|
delete _global_writer_lock;
|
||||||
|
}
|
||||||
|
|
||||||
|
// parallel (non VM) dumpers must wait until VM dumper acquires global writer lock
|
||||||
|
void wait_for_start_signal() {
|
||||||
|
MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
|
||||||
|
while (_started == false) {
|
||||||
|
ml.wait();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void signal_start() {
|
||||||
|
MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
|
||||||
|
_started = true;
|
||||||
|
ml.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
void lock_global_writer() {
|
||||||
|
_global_writer_lock->lock_without_safepoint_check();
|
||||||
|
}
|
||||||
|
|
||||||
|
void unlock_global_writer() {
|
||||||
|
_global_writer_lock->unlock();
|
||||||
|
}
|
||||||
|
|
||||||
void dumper_complete(DumpWriter* local_writer, DumpWriter* global_writer) {
|
void dumper_complete(DumpWriter* local_writer, DumpWriter* global_writer) {
|
||||||
MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
|
MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
|
||||||
@@ -1946,7 +2008,7 @@ private:
|
|||||||
int _dump_seq;
|
int _dump_seq;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void merge_file(char* path);
|
void merge_file(const char* path);
|
||||||
void merge_done();
|
void merge_done();
|
||||||
void set_error(const char* msg);
|
void set_error(const char* msg);
|
||||||
|
|
||||||
@@ -1958,8 +2020,28 @@ public:
|
|||||||
_dump_seq(dump_seq) {}
|
_dump_seq(dump_seq) {}
|
||||||
|
|
||||||
void do_merge();
|
void do_merge();
|
||||||
|
|
||||||
|
// returns path for the parallel DumpWriter (resource allocated)
|
||||||
|
static char* get_writer_path(const char* base_path, int seq);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
char* DumpMerger::get_writer_path(const char* base_path, int seq) {
|
||||||
|
// approximate required buffer size
|
||||||
|
size_t buf_size = strlen(base_path)
|
||||||
|
+ 2 // ".p"
|
||||||
|
+ 10 // number (that's enough for 2^32 parallel dumpers)
|
||||||
|
+ 1; // '\0'
|
||||||
|
|
||||||
|
char* path = NEW_RESOURCE_ARRAY(char, buf_size);
|
||||||
|
memset(path, 0, buf_size);
|
||||||
|
|
||||||
|
os::snprintf(path, buf_size, "%s.p%d", base_path, seq);
|
||||||
|
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void DumpMerger::merge_done() {
|
void DumpMerger::merge_done() {
|
||||||
// Writes the HPROF_HEAP_DUMP_END record.
|
// Writes the HPROF_HEAP_DUMP_END record.
|
||||||
if (!_has_error) {
|
if (!_has_error) {
|
||||||
@@ -1980,8 +2062,7 @@ void DumpMerger::set_error(const char* msg) {
|
|||||||
// Merge segmented heap files via sendfile, it's more efficient than the
|
// Merge segmented heap files via sendfile, it's more efficient than the
|
||||||
// read+write combination, which would require transferring data to and from
|
// read+write combination, which would require transferring data to and from
|
||||||
// user space.
|
// user space.
|
||||||
void DumpMerger::merge_file(char* path) {
|
void DumpMerger::merge_file(const char* path) {
|
||||||
assert(!SafepointSynchronize::is_at_safepoint(), "merging happens outside safepoint");
|
|
||||||
TraceTime timer("Merge segmented heap file directly", TRACETIME_LOG(Info, heapdump));
|
TraceTime timer("Merge segmented heap file directly", TRACETIME_LOG(Info, heapdump));
|
||||||
|
|
||||||
int segment_fd = os::open(path, O_RDONLY, 0);
|
int segment_fd = os::open(path, O_RDONLY, 0);
|
||||||
@@ -2018,8 +2099,7 @@ void DumpMerger::merge_file(char* path) {
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// Generic implementation using read+write
|
// Generic implementation using read+write
|
||||||
void DumpMerger::merge_file(char* path) {
|
void DumpMerger::merge_file(const char* path) {
|
||||||
assert(!SafepointSynchronize::is_at_safepoint(), "merging happens outside safepoint");
|
|
||||||
TraceTime timer("Merge segmented heap file", TRACETIME_LOG(Info, heapdump));
|
TraceTime timer("Merge segmented heap file", TRACETIME_LOG(Info, heapdump));
|
||||||
|
|
||||||
fileStream segment_fs(path, "rb");
|
fileStream segment_fs(path, "rb");
|
||||||
@@ -2044,7 +2124,6 @@ void DumpMerger::merge_file(char* path) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
void DumpMerger::do_merge() {
|
void DumpMerger::do_merge() {
|
||||||
assert(!SafepointSynchronize::is_at_safepoint(), "merging happens outside safepoint");
|
|
||||||
TraceTime timer("Merge heap files complete", TRACETIME_LOG(Info, heapdump));
|
TraceTime timer("Merge heap files complete", TRACETIME_LOG(Info, heapdump));
|
||||||
|
|
||||||
// Since contents in segmented heap file were already zipped, we don't need to zip
|
// Since contents in segmented heap file were already zipped, we don't need to zip
|
||||||
@@ -2054,10 +2133,9 @@ void DumpMerger::do_merge() {
|
|||||||
|
|
||||||
// Merge the content of the remaining files into base file. Regardless of whether
|
// Merge the content of the remaining files into base file. Regardless of whether
|
||||||
// the merge process is successful or not, these segmented files will be deleted.
|
// the merge process is successful or not, these segmented files will be deleted.
|
||||||
char path[JVM_MAXPATHLEN];
|
|
||||||
for (int i = 0; i < _dump_seq; i++) {
|
for (int i = 0; i < _dump_seq; i++) {
|
||||||
memset(path, 0, JVM_MAXPATHLEN);
|
ResourceMark rm;
|
||||||
os::snprintf(path, JVM_MAXPATHLEN, "%s.p%d", _path, i);
|
const char* path = get_writer_path(_path, i);
|
||||||
if (!_has_error) {
|
if (!_has_error) {
|
||||||
merge_file(path);
|
merge_file(path);
|
||||||
}
|
}
|
||||||
@@ -2087,7 +2165,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
// The VM operation that performs the heap dump
|
// The VM operation that performs the heap dump
|
||||||
class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
|
class VM_HeapDumper : public VM_GC_Operation, public WorkerTask, public UnmountedVThreadDumper {
|
||||||
private:
|
private:
|
||||||
static VM_HeapDumper* _global_dumper;
|
static VM_HeapDumper* _global_dumper;
|
||||||
static DumpWriter* _global_writer;
|
static DumpWriter* _global_writer;
|
||||||
@@ -2107,10 +2185,15 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
|
|||||||
uint _num_dumper_threads;
|
uint _num_dumper_threads;
|
||||||
DumperController* _dumper_controller;
|
DumperController* _dumper_controller;
|
||||||
ParallelObjectIterator* _poi;
|
ParallelObjectIterator* _poi;
|
||||||
// worker id of VMDumper thread.
|
|
||||||
static const size_t VMDumperWorkerId = 0;
|
// Dumper id of VMDumper thread.
|
||||||
|
static const int VMDumperId = 0;
|
||||||
// VM dumper dumps both heap and non-heap data, other dumpers dump heap-only data.
|
// VM dumper dumps both heap and non-heap data, other dumpers dump heap-only data.
|
||||||
static bool is_vm_dumper(uint worker_id) { return worker_id == VMDumperWorkerId; }
|
static bool is_vm_dumper(int dumper_id) { return dumper_id == VMDumperId; }
|
||||||
|
// the 1st dumper calling get_next_dumper_id becomes VM dumper
|
||||||
|
int get_next_dumper_id() {
|
||||||
|
return Atomic::fetch_then_add(&_dump_seq, 1);
|
||||||
|
}
|
||||||
|
|
||||||
// accessors and setters
|
// accessors and setters
|
||||||
static VM_HeapDumper* dumper() { assert(_global_dumper != nullptr, "Error"); return _global_dumper; }
|
static VM_HeapDumper* dumper() { assert(_global_dumper != nullptr, "Error"); return _global_dumper; }
|
||||||
@@ -2129,17 +2212,11 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
|
|||||||
|
|
||||||
bool skip_operation() const;
|
bool skip_operation() const;
|
||||||
|
|
||||||
// create dump writer for every parallel dump thread
|
// writes a HPROF_LOAD_CLASS record to global writer
|
||||||
DumpWriter* create_local_writer();
|
|
||||||
|
|
||||||
// writes a HPROF_LOAD_CLASS record
|
|
||||||
static void do_load_class(Klass* k);
|
static void do_load_class(Klass* k);
|
||||||
|
|
||||||
// writes a HPROF_GC_CLASS_DUMP record for the given class
|
|
||||||
static void do_class_dump(Klass* k);
|
|
||||||
|
|
||||||
// HPROF_GC_ROOT_THREAD_OBJ records for platform and mounted virtual threads
|
// HPROF_GC_ROOT_THREAD_OBJ records for platform and mounted virtual threads
|
||||||
void dump_threads();
|
void dump_threads(AbstractDumpWriter* writer);
|
||||||
|
|
||||||
void add_class_serial_number(Klass* k, int serial_num) {
|
void add_class_serial_number(Klass* k, int serial_num) {
|
||||||
_klass_map->at_put_grow(serial_num, k);
|
_klass_map->at_put_grow(serial_num, k);
|
||||||
@@ -2150,7 +2227,7 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// HPROF_TRACE and HPROF_FRAME records for platform and mounted virtual threads
|
// HPROF_TRACE and HPROF_FRAME records for platform and mounted virtual threads
|
||||||
void dump_stack_traces();
|
void dump_stack_traces(AbstractDumpWriter* writer);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
VM_HeapDumper(DumpWriter* writer, bool gc_before_heap_dump, bool oome, uint num_dump_threads) :
|
VM_HeapDumper(DumpWriter* writer, bool gc_before_heap_dump, bool oome, uint num_dump_threads) :
|
||||||
@@ -2168,7 +2245,7 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
|
|||||||
_thread_serial_num = 1;
|
_thread_serial_num = 1;
|
||||||
_frame_serial_num = 1;
|
_frame_serial_num = 1;
|
||||||
|
|
||||||
_dump_seq = 0;
|
_dump_seq = VMDumperId;
|
||||||
_num_dumper_threads = num_dump_threads;
|
_num_dumper_threads = num_dump_threads;
|
||||||
_dumper_controller = nullptr;
|
_dumper_controller = nullptr;
|
||||||
_poi = nullptr;
|
_poi = nullptr;
|
||||||
@@ -2202,12 +2279,15 @@ class VM_HeapDumper : public VM_GC_Operation, public WorkerTask {
|
|||||||
}
|
}
|
||||||
int dump_seq() { return _dump_seq; }
|
int dump_seq() { return _dump_seq; }
|
||||||
bool is_parallel_dump() { return _num_dumper_threads > 1; }
|
bool is_parallel_dump() { return _num_dumper_threads > 1; }
|
||||||
bool can_parallel_dump(WorkerThreads* workers);
|
void prepare_parallel_dump(WorkerThreads* workers);
|
||||||
|
|
||||||
VMOp_Type type() const { return VMOp_HeapDumper; }
|
VMOp_Type type() const { return VMOp_HeapDumper; }
|
||||||
virtual bool doit_prologue();
|
virtual bool doit_prologue();
|
||||||
void doit();
|
void doit();
|
||||||
void work(uint worker_id);
|
void work(uint worker_id);
|
||||||
|
|
||||||
|
// UnmountedVThreadDumper implementation
|
||||||
|
void dump_vthread(oop vt, AbstractDumpWriter* segment_writer);
|
||||||
};
|
};
|
||||||
|
|
||||||
VM_HeapDumper* VM_HeapDumper::_global_dumper = nullptr;
|
VM_HeapDumper* VM_HeapDumper::_global_dumper = nullptr;
|
||||||
@@ -2251,21 +2331,12 @@ void VM_HeapDumper::do_load_class(Klass* k) {
|
|||||||
writer()->write_symbolID(name);
|
writer()->write_symbolID(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
// writes a HPROF_GC_CLASS_DUMP record for the given class
|
|
||||||
void VM_HeapDumper::do_class_dump(Klass* k) {
|
|
||||||
if (k->is_instance_klass()) {
|
|
||||||
DumperSupport::dump_instance_class(writer(), k);
|
|
||||||
} else {
|
|
||||||
DumperSupport::dump_array_class(writer(), k);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write a HPROF_GC_ROOT_THREAD_OBJ record for platform/carrier and mounted virtual threads.
|
// Write a HPROF_GC_ROOT_THREAD_OBJ record for platform/carrier and mounted virtual threads.
|
||||||
// Then walk the stack so that locals and JNI locals are dumped.
|
// Then walk the stack so that locals and JNI locals are dumped.
|
||||||
void VM_HeapDumper::dump_threads() {
|
void VM_HeapDumper::dump_threads(AbstractDumpWriter* writer) {
|
||||||
for (int i = 0; i < _thread_dumpers_count; i++) {
|
for (int i = 0; i < _thread_dumpers_count; i++) {
|
||||||
_thread_dumpers[i]->dump_thread_obj(writer());
|
_thread_dumpers[i]->dump_thread_obj(writer);
|
||||||
_thread_dumpers[i]->dump_stack_refs(writer());
|
_thread_dumpers[i]->dump_stack_refs(writer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2280,31 +2351,21 @@ bool VM_HeapDumper::doit_prologue() {
|
|||||||
return VM_GC_Operation::doit_prologue();
|
return VM_GC_Operation::doit_prologue();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VM_HeapDumper::can_parallel_dump(WorkerThreads* workers) {
|
void VM_HeapDumper::prepare_parallel_dump(WorkerThreads* workers) {
|
||||||
bool can_parallel = true;
|
|
||||||
uint num_active_workers = workers != nullptr ? workers->active_workers() : 0;
|
uint num_active_workers = workers != nullptr ? workers->active_workers() : 0;
|
||||||
uint num_requested_dump_threads = _num_dumper_threads;
|
uint num_requested_dump_threads = _num_dumper_threads;
|
||||||
// check if we can dump in parallel based on requested and active threads
|
// check if we can dump in parallel based on requested and active threads
|
||||||
if (num_active_workers <= 1 || num_requested_dump_threads <= 1) {
|
if (num_active_workers <= 1 || num_requested_dump_threads <= 1) {
|
||||||
_num_dumper_threads = 1;
|
_num_dumper_threads = 1;
|
||||||
can_parallel = false;
|
|
||||||
} else {
|
|
||||||
// check if we have extra path room to accommodate segmented heap files
|
|
||||||
const char* base_path = writer()->get_file_path();
|
|
||||||
assert(base_path != nullptr, "sanity check");
|
|
||||||
if ((strlen(base_path) + 7/*.p\d\d\d\d\0*/) >= JVM_MAXPATHLEN) {
|
|
||||||
_num_dumper_threads = 1;
|
|
||||||
can_parallel = false;
|
|
||||||
} else {
|
} else {
|
||||||
_num_dumper_threads = clamp(num_requested_dump_threads, 2U, num_active_workers);
|
_num_dumper_threads = clamp(num_requested_dump_threads, 2U, num_active_workers);
|
||||||
}
|
}
|
||||||
}
|
_dumper_controller = new (std::nothrow) DumperController(_num_dumper_threads);
|
||||||
|
bool can_parallel = _num_dumper_threads > 1;
|
||||||
log_info(heapdump)("Requested dump threads %u, active dump threads %u, "
|
log_info(heapdump)("Requested dump threads %u, active dump threads %u, "
|
||||||
"actual dump threads %u, parallelism %s",
|
"actual dump threads %u, parallelism %s",
|
||||||
num_requested_dump_threads, num_active_workers,
|
num_requested_dump_threads, num_active_workers,
|
||||||
_num_dumper_threads, can_parallel ? "true" : "false");
|
_num_dumper_threads, can_parallel ? "true" : "false");
|
||||||
return can_parallel;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The VM operation that dumps the heap. The dump consists of the following
|
// The VM operation that dumps the heap. The dump consists of the following
|
||||||
@@ -2352,11 +2413,11 @@ void VM_HeapDumper::doit() {
|
|||||||
set_global_writer();
|
set_global_writer();
|
||||||
|
|
||||||
WorkerThreads* workers = ch->safepoint_workers();
|
WorkerThreads* workers = ch->safepoint_workers();
|
||||||
if (!can_parallel_dump(workers)) {
|
prepare_parallel_dump(workers);
|
||||||
work(VMDumperWorkerId);
|
|
||||||
|
if (!is_parallel_dump()) {
|
||||||
|
work(VMDumperId);
|
||||||
} else {
|
} else {
|
||||||
uint heap_only_dumper_threads = _num_dumper_threads - 1 /* VMDumper thread */;
|
|
||||||
_dumper_controller = new (std::nothrow) DumperController(heap_only_dumper_threads);
|
|
||||||
ParallelObjectIterator poi(_num_dumper_threads);
|
ParallelObjectIterator poi(_num_dumper_threads);
|
||||||
_poi = &poi;
|
_poi = &poi;
|
||||||
workers->run_task(this, _num_dumper_threads);
|
workers->run_task(this, _num_dumper_threads);
|
||||||
@@ -2368,26 +2429,19 @@ void VM_HeapDumper::doit() {
|
|||||||
clear_global_writer();
|
clear_global_writer();
|
||||||
}
|
}
|
||||||
|
|
||||||
// prepare DumpWriter for every parallel dump thread
|
|
||||||
DumpWriter* VM_HeapDumper::create_local_writer() {
|
|
||||||
char* path = NEW_RESOURCE_ARRAY(char, JVM_MAXPATHLEN);
|
|
||||||
memset(path, 0, JVM_MAXPATHLEN);
|
|
||||||
|
|
||||||
// generate segmented heap file path
|
|
||||||
const char* base_path = writer()->get_file_path();
|
|
||||||
// share global compressor, local DumpWriter is not responsible for its life cycle
|
|
||||||
AbstractCompressor* compressor = writer()->compressor();
|
|
||||||
int seq = Atomic::fetch_then_add(&_dump_seq, 1);
|
|
||||||
os::snprintf(path, JVM_MAXPATHLEN, "%s.p%d", base_path, seq);
|
|
||||||
|
|
||||||
// create corresponding writer for that
|
|
||||||
DumpWriter* local_writer = new DumpWriter(path, writer()->is_overwrite(), compressor);
|
|
||||||
return local_writer;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VM_HeapDumper::work(uint worker_id) {
|
void VM_HeapDumper::work(uint worker_id) {
|
||||||
// VM Dumper works on all non-heap data dumping and part of heap iteration.
|
// VM Dumper works on all non-heap data dumping and part of heap iteration.
|
||||||
if (is_vm_dumper(worker_id)) {
|
int dumper_id = get_next_dumper_id();
|
||||||
|
|
||||||
|
if (is_vm_dumper(dumper_id)) {
|
||||||
|
// lock global writer, it will be unlocked after VM Dumper finishes with non-heap data
|
||||||
|
_dumper_controller->lock_global_writer();
|
||||||
|
_dumper_controller->signal_start();
|
||||||
|
} else {
|
||||||
|
_dumper_controller->wait_for_start_signal();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_vm_dumper(dumper_id)) {
|
||||||
TraceTime timer("Dump non-objects", TRACETIME_LOG(Info, heapdump));
|
TraceTime timer("Dump non-objects", TRACETIME_LOG(Info, heapdump));
|
||||||
// Write the file header - we always use 1.0.2
|
// Write the file header - we always use 1.0.2
|
||||||
const char* header = "JAVA PROFILE 1.0.2";
|
const char* header = "JAVA PROFILE 1.0.2";
|
||||||
@@ -2409,21 +2463,31 @@ void VM_HeapDumper::work(uint worker_id) {
|
|||||||
|
|
||||||
// write HPROF_FRAME and HPROF_TRACE records
|
// write HPROF_FRAME and HPROF_TRACE records
|
||||||
// this must be called after _klass_map is built when iterating the classes above.
|
// this must be called after _klass_map is built when iterating the classes above.
|
||||||
dump_stack_traces();
|
dump_stack_traces(writer());
|
||||||
|
|
||||||
|
// unlock global writer, so parallel dumpers can dump stack traces of unmounted virtual threads
|
||||||
|
_dumper_controller->unlock_global_writer();
|
||||||
|
}
|
||||||
|
|
||||||
// HPROF_HEAP_DUMP/HPROF_HEAP_DUMP_SEGMENT starts here
|
// HPROF_HEAP_DUMP/HPROF_HEAP_DUMP_SEGMENT starts here
|
||||||
|
|
||||||
|
ResourceMark rm;
|
||||||
|
// share global compressor, local DumpWriter is not responsible for its life cycle
|
||||||
|
DumpWriter segment_writer(DumpMerger::get_writer_path(writer()->get_file_path(), dumper_id),
|
||||||
|
writer()->is_overwrite(), writer()->compressor());
|
||||||
|
if (!segment_writer.has_error()) {
|
||||||
|
if (is_vm_dumper(dumper_id)) {
|
||||||
|
// dump some non-heap subrecords to heap dump segment
|
||||||
|
TraceTime timer("Dump non-objects (part 2)", TRACETIME_LOG(Info, heapdump));
|
||||||
// Writes HPROF_GC_CLASS_DUMP records
|
// Writes HPROF_GC_CLASS_DUMP records
|
||||||
{
|
ClassDumper class_dumper(&segment_writer);
|
||||||
LockedClassesDo locked_dump_class(&do_class_dump);
|
ClassLoaderDataGraph::classes_do(&class_dumper);
|
||||||
ClassLoaderDataGraph::classes_do(&locked_dump_class);
|
|
||||||
}
|
|
||||||
|
|
||||||
// HPROF_GC_ROOT_THREAD_OBJ + frames + jni locals
|
// HPROF_GC_ROOT_THREAD_OBJ + frames + jni locals
|
||||||
dump_threads();
|
dump_threads(&segment_writer);
|
||||||
|
|
||||||
// HPROF_GC_ROOT_JNI_GLOBAL
|
// HPROF_GC_ROOT_JNI_GLOBAL
|
||||||
JNIGlobalsDumper jni_dumper(writer());
|
JNIGlobalsDumper jni_dumper(&segment_writer);
|
||||||
JNIHandles::oops_do(&jni_dumper);
|
JNIHandles::oops_do(&jni_dumper);
|
||||||
// technically not jni roots, but global roots
|
// technically not jni roots, but global roots
|
||||||
// for things like preallocated throwable backtraces
|
// for things like preallocated throwable backtraces
|
||||||
@@ -2431,8 +2495,8 @@ void VM_HeapDumper::work(uint worker_id) {
|
|||||||
// HPROF_GC_ROOT_STICKY_CLASS
|
// HPROF_GC_ROOT_STICKY_CLASS
|
||||||
// These should be classes in the null class loader data, and not all classes
|
// These should be classes in the null class loader data, and not all classes
|
||||||
// if !ClassUnloading
|
// if !ClassUnloading
|
||||||
StickyClassDumper class_dumper(writer());
|
StickyClassDumper stiky_class_dumper(&segment_writer);
|
||||||
ClassLoaderData::the_null_class_loader_data()->classes_do(&class_dumper);
|
ClassLoaderData::the_null_class_loader_data()->classes_do(&stiky_class_dumper);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Heap iteration.
|
// Heap iteration.
|
||||||
@@ -2442,46 +2506,39 @@ void VM_HeapDumper::work(uint worker_id) {
|
|||||||
// segment is started.
|
// segment is started.
|
||||||
// The HPROF_GC_CLASS_DUMP and HPROF_GC_INSTANCE_DUMP are the vast bulk
|
// The HPROF_GC_CLASS_DUMP and HPROF_GC_INSTANCE_DUMP are the vast bulk
|
||||||
// of the heap dump.
|
// of the heap dump.
|
||||||
|
|
||||||
|
TraceTime timer(is_parallel_dump() ? "Dump heap objects in parallel" : "Dump heap objects", TRACETIME_LOG(Info, heapdump));
|
||||||
|
HeapObjectDumper obj_dumper(&segment_writer, this);
|
||||||
if (!is_parallel_dump()) {
|
if (!is_parallel_dump()) {
|
||||||
assert(is_vm_dumper(worker_id), "must be");
|
|
||||||
// == Serial dump
|
|
||||||
ResourceMark rm;
|
|
||||||
TraceTime timer("Dump heap objects", TRACETIME_LOG(Info, heapdump));
|
|
||||||
HeapObjectDumper obj_dumper(writer());
|
|
||||||
Universe::heap()->object_iterate(&obj_dumper);
|
Universe::heap()->object_iterate(&obj_dumper);
|
||||||
writer()->finish_dump_segment();
|
|
||||||
// Writes the HPROF_HEAP_DUMP_END record because merge does not happen in serial dump
|
|
||||||
DumperSupport::end_of_dump(writer());
|
|
||||||
writer()->flush();
|
|
||||||
} else {
|
} else {
|
||||||
// == Parallel dump
|
// == Parallel dump
|
||||||
ResourceMark rm;
|
|
||||||
TraceTime timer("Dump heap objects in parallel", TRACETIME_LOG(Info, heapdump));
|
|
||||||
DumpWriter* local_writer = is_vm_dumper(worker_id) ? writer() : create_local_writer();
|
|
||||||
if (!local_writer->has_error()) {
|
|
||||||
HeapObjectDumper obj_dumper(local_writer);
|
|
||||||
_poi->object_iterate(&obj_dumper, worker_id);
|
_poi->object_iterate(&obj_dumper, worker_id);
|
||||||
local_writer->finish_dump_segment();
|
|
||||||
local_writer->flush();
|
|
||||||
}
|
}
|
||||||
if (is_vm_dumper(worker_id)) {
|
|
||||||
|
segment_writer.finish_dump_segment();
|
||||||
|
segment_writer.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
_dumper_controller->dumper_complete(&segment_writer, writer());
|
||||||
|
|
||||||
|
if (is_vm_dumper(dumper_id)) {
|
||||||
_dumper_controller->wait_all_dumpers_complete();
|
_dumper_controller->wait_all_dumpers_complete();
|
||||||
} else {
|
|
||||||
_dumper_controller->dumper_complete(local_writer, writer());
|
// flush global writer
|
||||||
delete local_writer;
|
writer()->flush();
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// At this point, all fragments of the heapdump have been written to separate files.
|
// At this point, all fragments of the heapdump have been written to separate files.
|
||||||
// We need to merge them into a complete heapdump and write HPROF_HEAP_DUMP_END at that time.
|
// We need to merge them into a complete heapdump and write HPROF_HEAP_DUMP_END at that time.
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void VM_HeapDumper::dump_stack_traces() {
|
void VM_HeapDumper::dump_stack_traces(AbstractDumpWriter* writer) {
|
||||||
// write a HPROF_TRACE record without any frames to be referenced as object alloc sites
|
// write a HPROF_TRACE record without any frames to be referenced as object alloc sites
|
||||||
DumperSupport::write_header(writer(), HPROF_TRACE, 3 * sizeof(u4));
|
DumperSupport::write_header(writer, HPROF_TRACE, 3 * sizeof(u4));
|
||||||
writer()->write_u4((u4)STACK_TRACE_ID);
|
writer->write_u4((u4)STACK_TRACE_ID);
|
||||||
writer()->write_u4(0); // thread number
|
writer->write_u4(0); // thread number
|
||||||
writer()->write_u4(0); // frame count
|
writer->write_u4(0); // frame count
|
||||||
|
|
||||||
// max number if every platform thread is carrier with mounted virtual thread
|
// max number if every platform thread is carrier with mounted virtual thread
|
||||||
_thread_dumpers = NEW_C_HEAP_ARRAY(ThreadDumper*, Threads::number_of_threads() * 2, mtInternal);
|
_thread_dumpers = NEW_C_HEAP_ARRAY(ThreadDumper*, Threads::number_of_threads() * 2, mtInternal);
|
||||||
@@ -2505,7 +2562,7 @@ void VM_HeapDumper::dump_stack_traces() {
|
|||||||
add_oom_frame = false;
|
add_oom_frame = false;
|
||||||
}
|
}
|
||||||
thread_dumper->init_serial_nums(&_thread_serial_num, &_frame_serial_num);
|
thread_dumper->init_serial_nums(&_thread_serial_num, &_frame_serial_num);
|
||||||
thread_dumper->dump_stack_traces(writer(), _klass_map);
|
thread_dumper->dump_stack_traces(writer, _klass_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
// platform or carrier thread
|
// platform or carrier thread
|
||||||
@@ -2515,11 +2572,27 @@ void VM_HeapDumper::dump_stack_traces() {
|
|||||||
thread_dumper->add_oom_frame(_oome_constructor);
|
thread_dumper->add_oom_frame(_oome_constructor);
|
||||||
}
|
}
|
||||||
thread_dumper->init_serial_nums(&_thread_serial_num, &_frame_serial_num);
|
thread_dumper->init_serial_nums(&_thread_serial_num, &_frame_serial_num);
|
||||||
thread_dumper->dump_stack_traces(writer(), _klass_map);
|
thread_dumper->dump_stack_traces(writer, _klass_map);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VM_HeapDumper::dump_vthread(oop vt, AbstractDumpWriter* segment_writer) {
|
||||||
|
// unmounted vthread has no JavaThread
|
||||||
|
ThreadDumper thread_dumper(ThreadDumper::ThreadType::UnmountedVirtual, nullptr, vt);
|
||||||
|
thread_dumper.init_serial_nums(&_thread_serial_num, &_frame_serial_num);
|
||||||
|
|
||||||
|
// write HPROF_TRACE/HPROF_FRAME records to global writer
|
||||||
|
_dumper_controller->lock_global_writer();
|
||||||
|
thread_dumper.dump_stack_traces(writer(), _klass_map);
|
||||||
|
_dumper_controller->unlock_global_writer();
|
||||||
|
|
||||||
|
// write HPROF_GC_ROOT_THREAD_OBJ/HPROF_GC_ROOT_JAVA_FRAME/HPROF_GC_ROOT_JNI_LOCAL subrecord
|
||||||
|
// to segment writer
|
||||||
|
thread_dumper.dump_thread_obj(segment_writer);
|
||||||
|
thread_dumper.dump_stack_refs(segment_writer);
|
||||||
|
}
|
||||||
|
|
||||||
// dump the heap to given path.
|
// dump the heap to given path.
|
||||||
int HeapDumper::dump(const char* path, outputStream* out, int compression, bool overwrite, uint num_dump_threads) {
|
int HeapDumper::dump(const char* path, outputStream* out, int compression, bool overwrite, uint num_dump_threads) {
|
||||||
assert(path != nullptr && strlen(path) > 0, "path missing");
|
assert(path != nullptr && strlen(path) > 0, "path missing");
|
||||||
@@ -2561,16 +2634,14 @@ int HeapDumper::dump(const char* path, outputStream* out, int compression, bool
|
|||||||
// record any error that the writer may have encountered
|
// record any error that the writer may have encountered
|
||||||
set_error(writer.error());
|
set_error(writer.error());
|
||||||
|
|
||||||
// For serial dump, once VM_HeapDumper completes, the whole heap dump process
|
// Heap dump process is done in two phases
|
||||||
// is done, no further phases needed. For parallel dump, the whole heap dump
|
|
||||||
// process is done in two phases
|
|
||||||
//
|
//
|
||||||
// Phase 1: Concurrent threads directly write heap data to multiple heap files.
|
// Phase 1: Concurrent threads directly write heap data to multiple heap files.
|
||||||
// This is done by VM_HeapDumper, which is performed within safepoint.
|
// This is done by VM_HeapDumper, which is performed within safepoint.
|
||||||
//
|
//
|
||||||
// Phase 2: Merge multiple heap files into one complete heap dump file.
|
// Phase 2: Merge multiple heap files into one complete heap dump file.
|
||||||
// This is done by DumpMerger, which is performed outside safepoint
|
// This is done by DumpMerger, which is performed outside safepoint
|
||||||
if (dumper.is_parallel_dump()) {
|
|
||||||
DumpMerger merger(path, &writer, dumper.dump_seq());
|
DumpMerger merger(path, &writer, dumper.dump_seq());
|
||||||
Thread* current_thread = Thread::current();
|
Thread* current_thread = Thread::current();
|
||||||
if (current_thread->is_AttachListener_thread()) {
|
if (current_thread->is_AttachListener_thread()) {
|
||||||
@@ -2583,6 +2654,7 @@ int HeapDumper::dump(const char* path, outputStream* out, int compression, bool
|
|||||||
VM_HeapDumpMerge op(&merger);
|
VM_HeapDumpMerge op(&merger);
|
||||||
VMThread::execute(&op);
|
VMThread::execute(&op);
|
||||||
}
|
}
|
||||||
|
if (writer.error() != nullptr) {
|
||||||
set_error(writer.error());
|
set_error(writer.error());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
367
src/java.base/linux/native/libsimdsort/avx2-32bit-qsort.hpp
Normal file
367
src/java.base/linux/native/libsimdsort/avx2-32bit-qsort.hpp
Normal file
@@ -0,0 +1,367 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2021 Serge Sans Paille. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
|
||||||
|
|
||||||
|
#ifndef AVX2_QSORT_32BIT
|
||||||
|
#define AVX2_QSORT_32BIT
|
||||||
|
|
||||||
|
#include "avx2-emu-funcs.hpp"
|
||||||
|
#include "xss-common-qsort.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Constants used in sorting 8 elements in a ymm registers. Based on Bitonic
|
||||||
|
* sorting network (see
|
||||||
|
* https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg)
|
||||||
|
*/
|
||||||
|
|
||||||
|
// ymm 7, 6, 5, 4, 3, 2, 1, 0
|
||||||
|
#define NETWORK_32BIT_AVX2_1 4, 5, 6, 7, 0, 1, 2, 3
|
||||||
|
#define NETWORK_32BIT_AVX2_2 0, 1, 2, 3, 4, 5, 6, 7
|
||||||
|
#define NETWORK_32BIT_AVX2_3 5, 4, 7, 6, 1, 0, 3, 2
|
||||||
|
#define NETWORK_32BIT_AVX2_4 3, 2, 1, 0, 7, 6, 5, 4
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Assumes ymm is random and performs a full sorting network defined in
|
||||||
|
* https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
|
||||||
|
*/
|
||||||
|
template <typename vtype, typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_INLINE reg_t sort_ymm_32bit(reg_t ymm) {
|
||||||
|
const typename vtype::opmask_t oxAA = _mm256_set_epi32(
|
||||||
|
0xFFFFFFFF, 0, 0xFFFFFFFF, 0, 0xFFFFFFFF, 0, 0xFFFFFFFF, 0);
|
||||||
|
const typename vtype::opmask_t oxCC = _mm256_set_epi32(
|
||||||
|
0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
|
||||||
|
const typename vtype::opmask_t oxF0 = _mm256_set_epi32(
|
||||||
|
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0, 0, 0, 0);
|
||||||
|
|
||||||
|
const typename vtype::ymmi_t rev_index = vtype::seti(NETWORK_32BIT_AVX2_2);
|
||||||
|
ymm = cmp_merge<vtype>(
|
||||||
|
ymm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(ymm), oxAA);
|
||||||
|
ymm = cmp_merge<vtype>(
|
||||||
|
ymm, vtype::permutexvar(vtype::seti(NETWORK_32BIT_AVX2_1), ymm), oxCC);
|
||||||
|
ymm = cmp_merge<vtype>(
|
||||||
|
ymm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(ymm), oxAA);
|
||||||
|
ymm = cmp_merge<vtype>(ymm, vtype::permutexvar(rev_index, ymm), oxF0);
|
||||||
|
ymm = cmp_merge<vtype>(
|
||||||
|
ymm, vtype::permutexvar(vtype::seti(NETWORK_32BIT_AVX2_3), ymm), oxCC);
|
||||||
|
ymm = cmp_merge<vtype>(
|
||||||
|
ymm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(ymm), oxAA);
|
||||||
|
return ymm;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct avx2_32bit_swizzle_ops;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct avx2_vector<int32_t> {
|
||||||
|
using type_t = int32_t;
|
||||||
|
using reg_t = __m256i;
|
||||||
|
using ymmi_t = __m256i;
|
||||||
|
using opmask_t = __m256i;
|
||||||
|
static const uint8_t numlanes = 8;
|
||||||
|
#ifdef XSS_MINIMAL_NETWORK_SORT
|
||||||
|
static constexpr int network_sort_threshold = numlanes;
|
||||||
|
#else
|
||||||
|
static constexpr int network_sort_threshold = 256;
|
||||||
|
#endif
|
||||||
|
static constexpr int partition_unroll_factor = 4;
|
||||||
|
|
||||||
|
using swizzle_ops = avx2_32bit_swizzle_ops;
|
||||||
|
|
||||||
|
static type_t type_max() { return X86_SIMD_SORT_MAX_INT32; }
|
||||||
|
static type_t type_min() { return X86_SIMD_SORT_MIN_INT32; }
|
||||||
|
static reg_t zmm_max() {
|
||||||
|
return _mm256_set1_epi32(type_max());
|
||||||
|
} // TODO: this should broadcast bits as is?
|
||||||
|
static opmask_t get_partial_loadmask(uint64_t num_to_read) {
|
||||||
|
auto mask = ((0x1ull << num_to_read) - 0x1ull);
|
||||||
|
return convert_int_to_avx2_mask(mask);
|
||||||
|
}
|
||||||
|
static ymmi_t seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
|
||||||
|
int v8) {
|
||||||
|
return _mm256_set_epi32(v1, v2, v3, v4, v5, v6, v7, v8);
|
||||||
|
}
|
||||||
|
static opmask_t kxor_opmask(opmask_t x, opmask_t y) {
|
||||||
|
return _mm256_xor_si256(x, y);
|
||||||
|
}
|
||||||
|
static opmask_t ge(reg_t x, reg_t y) {
|
||||||
|
opmask_t equal = eq(x, y);
|
||||||
|
opmask_t greater = _mm256_cmpgt_epi32(x, y);
|
||||||
|
return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(equal),
|
||||||
|
_mm256_castsi256_ps(greater)));
|
||||||
|
}
|
||||||
|
static opmask_t gt(reg_t x, reg_t y) { return _mm256_cmpgt_epi32(x, y); }
|
||||||
|
static opmask_t eq(reg_t x, reg_t y) { return _mm256_cmpeq_epi32(x, y); }
|
||||||
|
template <int scale>
|
||||||
|
static reg_t mask_i64gather(reg_t src, opmask_t mask, __m256i index,
|
||||||
|
void const *base) {
|
||||||
|
return _mm256_mask_i32gather_epi32(src, base, index, mask, scale);
|
||||||
|
}
|
||||||
|
template <int scale>
|
||||||
|
static reg_t i64gather(__m256i index, void const *base) {
|
||||||
|
return _mm256_i32gather_epi32((int const *)base, index, scale);
|
||||||
|
}
|
||||||
|
static reg_t loadu(void const *mem) {
|
||||||
|
return _mm256_loadu_si256((reg_t const *)mem);
|
||||||
|
}
|
||||||
|
static reg_t max(reg_t x, reg_t y) { return _mm256_max_epi32(x, y); }
|
||||||
|
static void mask_compressstoreu(void *mem, opmask_t mask, reg_t x) {
|
||||||
|
return avx2_emu_mask_compressstoreu32<type_t>(mem, mask, x);
|
||||||
|
}
|
||||||
|
static reg_t maskz_loadu(opmask_t mask, void const *mem) {
|
||||||
|
return _mm256_maskload_epi32((const int *)mem, mask);
|
||||||
|
}
|
||||||
|
static reg_t mask_loadu(reg_t x, opmask_t mask, void const *mem) {
|
||||||
|
reg_t dst = _mm256_maskload_epi32((type_t *)mem, mask);
|
||||||
|
return mask_mov(x, mask, dst);
|
||||||
|
}
|
||||||
|
static reg_t mask_mov(reg_t x, opmask_t mask, reg_t y) {
|
||||||
|
return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(x),
|
||||||
|
_mm256_castsi256_ps(y),
|
||||||
|
_mm256_castsi256_ps(mask)));
|
||||||
|
}
|
||||||
|
static void mask_storeu(void *mem, opmask_t mask, reg_t x) {
|
||||||
|
return _mm256_maskstore_epi32((type_t *)mem, mask, x);
|
||||||
|
}
|
||||||
|
static reg_t min(reg_t x, reg_t y) { return _mm256_min_epi32(x, y); }
|
||||||
|
static reg_t permutexvar(__m256i idx, reg_t ymm) {
|
||||||
|
return _mm256_permutevar8x32_epi32(ymm, idx);
|
||||||
|
// return avx2_emu_permutexvar_epi32(idx, ymm);
|
||||||
|
}
|
||||||
|
static reg_t permutevar(reg_t ymm, __m256i idx) {
|
||||||
|
return _mm256_permutevar8x32_epi32(ymm, idx);
|
||||||
|
}
|
||||||
|
static reg_t reverse(reg_t ymm) {
|
||||||
|
const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2);
|
||||||
|
return permutexvar(rev_index, ymm);
|
||||||
|
}
|
||||||
|
static type_t reducemax(reg_t v) {
|
||||||
|
return avx2_emu_reduce_max32<type_t>(v);
|
||||||
|
}
|
||||||
|
static type_t reducemin(reg_t v) {
|
||||||
|
return avx2_emu_reduce_min32<type_t>(v);
|
||||||
|
}
|
||||||
|
static reg_t set1(type_t v) { return _mm256_set1_epi32(v); }
|
||||||
|
template <uint8_t mask>
|
||||||
|
static reg_t shuffle(reg_t ymm) {
|
||||||
|
return _mm256_shuffle_epi32(ymm, mask);
|
||||||
|
}
|
||||||
|
static void storeu(void *mem, reg_t x) {
|
||||||
|
_mm256_storeu_si256((__m256i *)mem, x);
|
||||||
|
}
|
||||||
|
static reg_t sort_vec(reg_t x) {
|
||||||
|
return sort_ymm_32bit<avx2_vector<type_t>>(x);
|
||||||
|
}
|
||||||
|
static reg_t cast_from(__m256i v) { return v; }
|
||||||
|
static __m256i cast_to(reg_t v) { return v; }
|
||||||
|
static int double_compressstore(type_t *left_addr, type_t *right_addr,
|
||||||
|
opmask_t k, reg_t reg) {
|
||||||
|
return avx2_double_compressstore32<type_t>(left_addr, right_addr, k,
|
||||||
|
reg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct avx2_vector<float> {
|
||||||
|
using type_t = float;
|
||||||
|
using reg_t = __m256;
|
||||||
|
using ymmi_t = __m256i;
|
||||||
|
using opmask_t = __m256i;
|
||||||
|
static const uint8_t numlanes = 8;
|
||||||
|
#ifdef XSS_MINIMAL_NETWORK_SORT
|
||||||
|
static constexpr int network_sort_threshold = numlanes;
|
||||||
|
#else
|
||||||
|
static constexpr int network_sort_threshold = 256;
|
||||||
|
#endif
|
||||||
|
static constexpr int partition_unroll_factor = 4;
|
||||||
|
|
||||||
|
using swizzle_ops = avx2_32bit_swizzle_ops;
|
||||||
|
|
||||||
|
static type_t type_max() { return X86_SIMD_SORT_INFINITYF; }
|
||||||
|
static type_t type_min() { return -X86_SIMD_SORT_INFINITYF; }
|
||||||
|
static reg_t zmm_max() { return _mm256_set1_ps(type_max()); }
|
||||||
|
|
||||||
|
static ymmi_t seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
|
||||||
|
int v8) {
|
||||||
|
return _mm256_set_epi32(v1, v2, v3, v4, v5, v6, v7, v8);
|
||||||
|
}
|
||||||
|
|
||||||
|
static reg_t maskz_loadu(opmask_t mask, void const *mem) {
|
||||||
|
return _mm256_maskload_ps((const float *)mem, mask);
|
||||||
|
}
|
||||||
|
static opmask_t ge(reg_t x, reg_t y) {
|
||||||
|
return _mm256_castps_si256(_mm256_cmp_ps(x, y, _CMP_GE_OQ));
|
||||||
|
}
|
||||||
|
static opmask_t gt(reg_t x, reg_t y) {
|
||||||
|
return _mm256_castps_si256(_mm256_cmp_ps(x, y, _CMP_GT_OQ));
|
||||||
|
}
|
||||||
|
static opmask_t eq(reg_t x, reg_t y) {
|
||||||
|
return _mm256_castps_si256(_mm256_cmp_ps(x, y, _CMP_EQ_OQ));
|
||||||
|
}
|
||||||
|
static opmask_t get_partial_loadmask(uint64_t num_to_read) {
|
||||||
|
auto mask = ((0x1ull << num_to_read) - 0x1ull);
|
||||||
|
return convert_int_to_avx2_mask(mask);
|
||||||
|
}
|
||||||
|
static int32_t convert_mask_to_int(opmask_t mask) {
|
||||||
|
return convert_avx2_mask_to_int(mask);
|
||||||
|
}
|
||||||
|
template <int type>
|
||||||
|
static opmask_t fpclass(reg_t x) {
|
||||||
|
if constexpr (type == (0x01 | 0x80)) {
|
||||||
|
return _mm256_castps_si256(_mm256_cmp_ps(x, x, _CMP_UNORD_Q));
|
||||||
|
} else {
|
||||||
|
static_assert(type == (0x01 | 0x80), "should not reach here");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template <int scale>
|
||||||
|
static reg_t mask_i64gather(reg_t src, opmask_t mask, __m256i index,
|
||||||
|
void const *base) {
|
||||||
|
return _mm256_mask_i32gather_ps(src, base, index,
|
||||||
|
_mm256_castsi256_ps(mask), scale);
|
||||||
|
;
|
||||||
|
}
|
||||||
|
template <int scale>
|
||||||
|
static reg_t i64gather(__m256i index, void const *base) {
|
||||||
|
return _mm256_i32gather_ps((float *)base, index, scale);
|
||||||
|
}
|
||||||
|
static reg_t loadu(void const *mem) {
|
||||||
|
return _mm256_loadu_ps((float const *)mem);
|
||||||
|
}
|
||||||
|
static reg_t max(reg_t x, reg_t y) { return _mm256_max_ps(x, y); }
|
||||||
|
static void mask_compressstoreu(void *mem, opmask_t mask, reg_t x) {
|
||||||
|
return avx2_emu_mask_compressstoreu32<type_t>(mem, mask, x);
|
||||||
|
}
|
||||||
|
static reg_t mask_loadu(reg_t x, opmask_t mask, void const *mem) {
|
||||||
|
reg_t dst = _mm256_maskload_ps((type_t *)mem, mask);
|
||||||
|
return mask_mov(x, mask, dst);
|
||||||
|
}
|
||||||
|
static reg_t mask_mov(reg_t x, opmask_t mask, reg_t y) {
|
||||||
|
return _mm256_blendv_ps(x, y, _mm256_castsi256_ps(mask));
|
||||||
|
}
|
||||||
|
static void mask_storeu(void *mem, opmask_t mask, reg_t x) {
|
||||||
|
return _mm256_maskstore_ps((type_t *)mem, mask, x);
|
||||||
|
}
|
||||||
|
static reg_t min(reg_t x, reg_t y) { return _mm256_min_ps(x, y); }
|
||||||
|
static reg_t permutexvar(__m256i idx, reg_t ymm) {
|
||||||
|
return _mm256_permutevar8x32_ps(ymm, idx);
|
||||||
|
}
|
||||||
|
static reg_t permutevar(reg_t ymm, __m256i idx) {
|
||||||
|
return _mm256_permutevar8x32_ps(ymm, idx);
|
||||||
|
}
|
||||||
|
static reg_t reverse(reg_t ymm) {
|
||||||
|
const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2);
|
||||||
|
return permutexvar(rev_index, ymm);
|
||||||
|
}
|
||||||
|
static type_t reducemax(reg_t v) {
|
||||||
|
return avx2_emu_reduce_max32<type_t>(v);
|
||||||
|
}
|
||||||
|
static type_t reducemin(reg_t v) {
|
||||||
|
return avx2_emu_reduce_min32<type_t>(v);
|
||||||
|
}
|
||||||
|
static reg_t set1(type_t v) { return _mm256_set1_ps(v); }
|
||||||
|
template <uint8_t mask>
|
||||||
|
static reg_t shuffle(reg_t ymm) {
|
||||||
|
return _mm256_castsi256_ps(
|
||||||
|
_mm256_shuffle_epi32(_mm256_castps_si256(ymm), mask));
|
||||||
|
}
|
||||||
|
static void storeu(void *mem, reg_t x) {
|
||||||
|
_mm256_storeu_ps((float *)mem, x);
|
||||||
|
}
|
||||||
|
static reg_t sort_vec(reg_t x) {
|
||||||
|
return sort_ymm_32bit<avx2_vector<type_t>>(x);
|
||||||
|
}
|
||||||
|
static reg_t cast_from(__m256i v) { return _mm256_castsi256_ps(v); }
|
||||||
|
static __m256i cast_to(reg_t v) { return _mm256_castps_si256(v); }
|
||||||
|
static int double_compressstore(type_t *left_addr, type_t *right_addr,
|
||||||
|
opmask_t k, reg_t reg) {
|
||||||
|
return avx2_double_compressstore32<type_t>(left_addr, right_addr, k,
|
||||||
|
reg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct avx2_32bit_swizzle_ops {
|
||||||
|
template <typename vtype, int scale>
|
||||||
|
X86_SIMD_SORT_INLINE typename vtype::reg_t swap_n(
|
||||||
|
typename vtype::reg_t reg) {
|
||||||
|
__m256i v = vtype::cast_to(reg);
|
||||||
|
|
||||||
|
if constexpr (scale == 2) {
|
||||||
|
__m256 vf = _mm256_castsi256_ps(v);
|
||||||
|
vf = _mm256_permute_ps(vf, 0b10110001);
|
||||||
|
v = _mm256_castps_si256(vf);
|
||||||
|
} else if constexpr (scale == 4) {
|
||||||
|
__m256 vf = _mm256_castsi256_ps(v);
|
||||||
|
vf = _mm256_permute_ps(vf, 0b01001110);
|
||||||
|
v = _mm256_castps_si256(vf);
|
||||||
|
} else if constexpr (scale == 8) {
|
||||||
|
v = _mm256_permute2x128_si256(v, v, 0b00000001);
|
||||||
|
} else {
|
||||||
|
static_assert(scale == -1, "should not be reached");
|
||||||
|
}
|
||||||
|
|
||||||
|
return vtype::cast_from(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, int scale>
|
||||||
|
X86_SIMD_SORT_INLINE typename vtype::reg_t reverse_n(
|
||||||
|
typename vtype::reg_t reg) {
|
||||||
|
__m256i v = vtype::cast_to(reg);
|
||||||
|
|
||||||
|
if constexpr (scale == 2) {
|
||||||
|
return swap_n<vtype, 2>(reg);
|
||||||
|
} else if constexpr (scale == 4) {
|
||||||
|
constexpr uint64_t mask = 0b00011011;
|
||||||
|
__m256 vf = _mm256_castsi256_ps(v);
|
||||||
|
vf = _mm256_permute_ps(vf, mask);
|
||||||
|
v = _mm256_castps_si256(vf);
|
||||||
|
} else if constexpr (scale == 8) {
|
||||||
|
return vtype::reverse(reg);
|
||||||
|
} else {
|
||||||
|
static_assert(scale == -1, "should not be reached");
|
||||||
|
}
|
||||||
|
|
||||||
|
return vtype::cast_from(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, int scale>
|
||||||
|
X86_SIMD_SORT_INLINE typename vtype::reg_t merge_n(
|
||||||
|
typename vtype::reg_t reg, typename vtype::reg_t other) {
|
||||||
|
__m256i v1 = vtype::cast_to(reg);
|
||||||
|
__m256i v2 = vtype::cast_to(other);
|
||||||
|
|
||||||
|
if constexpr (scale == 2) {
|
||||||
|
v1 = _mm256_blend_epi32(v1, v2, 0b01010101);
|
||||||
|
} else if constexpr (scale == 4) {
|
||||||
|
v1 = _mm256_blend_epi32(v1, v2, 0b00110011);
|
||||||
|
} else if constexpr (scale == 8) {
|
||||||
|
v1 = _mm256_blend_epi32(v1, v2, 0b00001111);
|
||||||
|
} else {
|
||||||
|
static_assert(scale == -1, "should not be reached");
|
||||||
|
}
|
||||||
|
|
||||||
|
return vtype::cast_from(v1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // AVX2_QSORT_32BIT
|
||||||
183
src/java.base/linux/native/libsimdsort/avx2-emu-funcs.hpp
Normal file
183
src/java.base/linux/native/libsimdsort/avx2-emu-funcs.hpp
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2021 Serge Sans Paille. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
|
||||||
|
|
||||||
|
#ifndef AVX2_EMU_FUNCS
|
||||||
|
#define AVX2_EMU_FUNCS
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "xss-common-qsort.h"
|
||||||
|
|
||||||
|
constexpr auto avx2_mask_helper_lut32 = [] {
|
||||||
|
std::array<std::array<int32_t, 8>, 256> lut{};
|
||||||
|
for (int64_t i = 0; i <= 0xFF; i++) {
|
||||||
|
std::array<int32_t, 8> entry{};
|
||||||
|
for (int j = 0; j < 8; j++) {
|
||||||
|
if (((i >> j) & 1) == 1)
|
||||||
|
entry[j] = 0xFFFFFFFF;
|
||||||
|
else
|
||||||
|
entry[j] = 0;
|
||||||
|
}
|
||||||
|
lut[i] = entry;
|
||||||
|
}
|
||||||
|
return lut;
|
||||||
|
}();
|
||||||
|
|
||||||
|
constexpr auto avx2_compressstore_lut32_gen = [] {
|
||||||
|
std::array<std::array<std::array<int32_t, 8>, 256>, 2> lutPair{};
|
||||||
|
auto &permLut = lutPair[0];
|
||||||
|
auto &leftLut = lutPair[1];
|
||||||
|
for (int64_t i = 0; i <= 0xFF; i++) {
|
||||||
|
std::array<int32_t, 8> indices{};
|
||||||
|
std::array<int32_t, 8> leftEntry = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
|
int right = 7;
|
||||||
|
int left = 0;
|
||||||
|
for (int j = 0; j < 8; j++) {
|
||||||
|
bool ge = (i >> j) & 1;
|
||||||
|
if (ge) {
|
||||||
|
indices[right] = j;
|
||||||
|
right--;
|
||||||
|
} else {
|
||||||
|
indices[left] = j;
|
||||||
|
leftEntry[left] = 0xFFFFFFFF;
|
||||||
|
left++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
permLut[i] = indices;
|
||||||
|
leftLut[i] = leftEntry;
|
||||||
|
}
|
||||||
|
return lutPair;
|
||||||
|
}();
|
||||||
|
|
||||||
|
constexpr auto avx2_compressstore_lut32_perm = avx2_compressstore_lut32_gen[0];
|
||||||
|
constexpr auto avx2_compressstore_lut32_left = avx2_compressstore_lut32_gen[1];
|
||||||
|
|
||||||
|
|
||||||
|
X86_SIMD_SORT_INLINE
|
||||||
|
__m256i convert_int_to_avx2_mask(int32_t m) {
|
||||||
|
return _mm256_loadu_si256(
|
||||||
|
(const __m256i *)avx2_mask_helper_lut32[m].data());
|
||||||
|
}
|
||||||
|
|
||||||
|
X86_SIMD_SORT_INLINE
|
||||||
|
int32_t convert_avx2_mask_to_int(__m256i m) {
|
||||||
|
return _mm256_movemask_ps(_mm256_castsi256_ps(m));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emulators for intrinsics missing from AVX2 compared to AVX512
|
||||||
|
template <typename T>
|
||||||
|
T avx2_emu_reduce_max32(typename avx2_vector<T>::reg_t x) {
|
||||||
|
using vtype = avx2_vector<T>;
|
||||||
|
using reg_t = typename vtype::reg_t;
|
||||||
|
|
||||||
|
reg_t inter1 =
|
||||||
|
vtype::max(x, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(x));
|
||||||
|
reg_t inter2 = vtype::max(
|
||||||
|
inter1, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(inter1));
|
||||||
|
T arr[vtype::numlanes];
|
||||||
|
vtype::storeu(arr, inter2);
|
||||||
|
return std::max(arr[0], arr[7]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T avx2_emu_reduce_min32(typename avx2_vector<T>::reg_t x) {
|
||||||
|
using vtype = avx2_vector<T>;
|
||||||
|
using reg_t = typename vtype::reg_t;
|
||||||
|
|
||||||
|
reg_t inter1 =
|
||||||
|
vtype::min(x, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(x));
|
||||||
|
reg_t inter2 = vtype::min(
|
||||||
|
inter1, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(inter1));
|
||||||
|
T arr[vtype::numlanes];
|
||||||
|
vtype::storeu(arr, inter2);
|
||||||
|
return std::min(arr[0], arr[7]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void avx2_emu_mask_compressstoreu32(void *base_addr,
|
||||||
|
typename avx2_vector<T>::opmask_t k,
|
||||||
|
typename avx2_vector<T>::reg_t reg) {
|
||||||
|
using vtype = avx2_vector<T>;
|
||||||
|
|
||||||
|
T *leftStore = (T *)base_addr;
|
||||||
|
|
||||||
|
int32_t shortMask = convert_avx2_mask_to_int(k);
|
||||||
|
const __m256i &perm = _mm256_loadu_si256(
|
||||||
|
(const __m256i *)avx2_compressstore_lut32_perm[shortMask].data());
|
||||||
|
const __m256i &left = _mm256_loadu_si256(
|
||||||
|
(const __m256i *)avx2_compressstore_lut32_left[shortMask].data());
|
||||||
|
|
||||||
|
typename vtype::reg_t temp = vtype::permutevar(reg, perm);
|
||||||
|
|
||||||
|
vtype::mask_storeu(leftStore, left, temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
int avx2_double_compressstore32(void *left_addr, void *right_addr,
|
||||||
|
typename avx2_vector<T>::opmask_t k,
|
||||||
|
typename avx2_vector<T>::reg_t reg) {
|
||||||
|
using vtype = avx2_vector<T>;
|
||||||
|
|
||||||
|
T *leftStore = (T *)left_addr;
|
||||||
|
T *rightStore = (T *)right_addr;
|
||||||
|
|
||||||
|
int32_t shortMask = convert_avx2_mask_to_int(k);
|
||||||
|
const __m256i &perm = _mm256_loadu_si256(
|
||||||
|
(const __m256i *)avx2_compressstore_lut32_perm[shortMask].data());
|
||||||
|
|
||||||
|
typename vtype::reg_t temp = vtype::permutevar(reg, perm);
|
||||||
|
|
||||||
|
vtype::storeu(leftStore, temp);
|
||||||
|
vtype::storeu(rightStore, temp);
|
||||||
|
|
||||||
|
return _mm_popcnt_u32(shortMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
typename avx2_vector<T>::reg_t avx2_emu_max(typename avx2_vector<T>::reg_t x,
|
||||||
|
typename avx2_vector<T>::reg_t y) {
|
||||||
|
using vtype = avx2_vector<T>;
|
||||||
|
typename vtype::opmask_t nlt = vtype::gt(x, y);
|
||||||
|
return _mm256_castpd_si256(_mm256_blendv_pd(_mm256_castsi256_pd(y),
|
||||||
|
_mm256_castsi256_pd(x),
|
||||||
|
_mm256_castsi256_pd(nlt)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
typename avx2_vector<T>::reg_t avx2_emu_min(typename avx2_vector<T>::reg_t x,
|
||||||
|
typename avx2_vector<T>::reg_t y) {
|
||||||
|
using vtype = avx2_vector<T>;
|
||||||
|
typename vtype::opmask_t nlt = vtype::gt(x, y);
|
||||||
|
return _mm256_castpd_si256(_mm256_blendv_pd(_mm256_castsi256_pd(x),
|
||||||
|
_mm256_castsi256_pd(y),
|
||||||
|
_mm256_castsi256_pd(nlt)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
66
src/java.base/linux/native/libsimdsort/avx2-linux-qsort.cpp
Normal file
66
src/java.base/linux/native/libsimdsort/avx2-linux-qsort.cpp
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023 Intel Corporation. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "simdsort-support.hpp"
|
||||||
|
#ifdef __SIMDSORT_SUPPORTED_LINUX
|
||||||
|
|
||||||
|
#pragma GCC target("avx2")
|
||||||
|
#include "avx2-32bit-qsort.hpp"
|
||||||
|
#include "classfile_constants.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define DLL_PUBLIC __attribute__((visibility("default")))
|
||||||
|
#define INSERTION_SORT_THRESHOLD_32BIT 16
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
DLL_PUBLIC void avx2_sort(void *array, int elem_type, int32_t from_index, int32_t to_index) {
|
||||||
|
switch(elem_type) {
|
||||||
|
case JVM_T_INT:
|
||||||
|
avx2_fast_sort((int32_t*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
|
||||||
|
break;
|
||||||
|
case JVM_T_FLOAT:
|
||||||
|
avx2_fast_sort((float*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false, "Unexpected type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DLL_PUBLIC void avx2_partition(void *array, int elem_type, int32_t from_index, int32_t to_index, int32_t *pivot_indices, int32_t index_pivot1, int32_t index_pivot2) {
|
||||||
|
switch(elem_type) {
|
||||||
|
case JVM_T_INT:
|
||||||
|
avx2_fast_partition((int32_t*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
||||||
|
break;
|
||||||
|
case JVM_T_FLOAT:
|
||||||
|
avx2_fast_partition((float*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false, "Unexpected type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -28,7 +28,7 @@
|
|||||||
#ifndef AVX512_QSORT_32BIT
|
#ifndef AVX512_QSORT_32BIT
|
||||||
#define AVX512_QSORT_32BIT
|
#define AVX512_QSORT_32BIT
|
||||||
|
|
||||||
#include "avx512-common-qsort.h"
|
#include "xss-common-qsort.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Constants used in sorting 16 elements in a ZMM registers. Based on Bitonic
|
* Constants used in sorting 16 elements in a ZMM registers. Based on Bitonic
|
||||||
@@ -43,130 +43,204 @@
|
|||||||
#define NETWORK_32BIT_6 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4
|
#define NETWORK_32BIT_6 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4
|
||||||
#define NETWORK_32BIT_7 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
|
#define NETWORK_32BIT_7 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
|
||||||
|
|
||||||
|
template <typename vtype, typename reg_t>
|
||||||
|
X86_SIMD_SORT_INLINE reg_t sort_zmm_32bit(reg_t zmm);
|
||||||
|
|
||||||
|
struct avx512_32bit_swizzle_ops;
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct zmm_vector<int32_t> {
|
struct zmm_vector<int32_t> {
|
||||||
using type_t = int32_t;
|
using type_t = int32_t;
|
||||||
using zmm_t = __m512i;
|
using reg_t = __m512i;
|
||||||
using ymm_t = __m256i;
|
using halfreg_t = __m256i;
|
||||||
using opmask_t = __mmask16;
|
using opmask_t = __mmask16;
|
||||||
static const uint8_t numlanes = 16;
|
static const uint8_t numlanes = 16;
|
||||||
|
#ifdef XSS_MINIMAL_NETWORK_SORT
|
||||||
|
static constexpr int network_sort_threshold = numlanes;
|
||||||
|
#else
|
||||||
|
static constexpr int network_sort_threshold = 512;
|
||||||
|
#endif
|
||||||
|
static constexpr int partition_unroll_factor = 8;
|
||||||
|
|
||||||
|
using swizzle_ops = avx512_32bit_swizzle_ops;
|
||||||
|
|
||||||
static type_t type_max() { return X86_SIMD_SORT_MAX_INT32; }
|
static type_t type_max() { return X86_SIMD_SORT_MAX_INT32; }
|
||||||
static type_t type_min() { return X86_SIMD_SORT_MIN_INT32; }
|
static type_t type_min() { return X86_SIMD_SORT_MIN_INT32; }
|
||||||
static zmm_t zmm_max() { return _mm512_set1_epi32(type_max()); }
|
static reg_t zmm_max() { return _mm512_set1_epi32(type_max()); }
|
||||||
|
|
||||||
static opmask_t knot_opmask(opmask_t x) { return _mm512_knot(x); }
|
static opmask_t knot_opmask(opmask_t x) { return _mm512_knot(x); }
|
||||||
static opmask_t ge(zmm_t x, zmm_t y) {
|
|
||||||
|
static opmask_t ge(reg_t x, reg_t y) {
|
||||||
return _mm512_cmp_epi32_mask(x, y, _MM_CMPINT_NLT);
|
return _mm512_cmp_epi32_mask(x, y, _MM_CMPINT_NLT);
|
||||||
}
|
}
|
||||||
static opmask_t gt(zmm_t x, zmm_t y) {
|
|
||||||
|
static opmask_t gt(reg_t x, reg_t y) {
|
||||||
return _mm512_cmp_epi32_mask(x, y, _MM_CMPINT_GT);
|
return _mm512_cmp_epi32_mask(x, y, _MM_CMPINT_GT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static opmask_t get_partial_loadmask(uint64_t num_to_read) {
|
||||||
|
return ((0x1ull << num_to_read) - 0x1ull);
|
||||||
|
}
|
||||||
template <int scale>
|
template <int scale>
|
||||||
static ymm_t i64gather(__m512i index, void const *base) {
|
static halfreg_t i64gather(__m512i index, void const *base) {
|
||||||
return _mm512_i64gather_epi32(index, base, scale);
|
return _mm512_i64gather_epi32(index, base, scale);
|
||||||
}
|
}
|
||||||
static zmm_t merge(ymm_t y1, ymm_t y2) {
|
static reg_t merge(halfreg_t y1, halfreg_t y2) {
|
||||||
zmm_t z1 = _mm512_castsi256_si512(y1);
|
reg_t z1 = _mm512_castsi256_si512(y1);
|
||||||
return _mm512_inserti32x8(z1, y2, 1);
|
return _mm512_inserti32x8(z1, y2, 1);
|
||||||
}
|
}
|
||||||
static zmm_t loadu(void const *mem) { return _mm512_loadu_si512(mem); }
|
static reg_t loadu(void const *mem) { return _mm512_loadu_si512(mem); }
|
||||||
static void mask_compressstoreu(void *mem, opmask_t mask, zmm_t x) {
|
static void mask_compressstoreu(void *mem, opmask_t mask, reg_t x) {
|
||||||
return _mm512_mask_compressstoreu_epi32(mem, mask, x);
|
return _mm512_mask_compressstoreu_epi32(mem, mask, x);
|
||||||
}
|
}
|
||||||
static zmm_t mask_loadu(zmm_t x, opmask_t mask, void const *mem) {
|
static reg_t mask_loadu(reg_t x, opmask_t mask, void const *mem) {
|
||||||
return _mm512_mask_loadu_epi32(x, mask, mem);
|
return _mm512_mask_loadu_epi32(x, mask, mem);
|
||||||
}
|
}
|
||||||
static zmm_t mask_mov(zmm_t x, opmask_t mask, zmm_t y) {
|
static reg_t mask_mov(reg_t x, opmask_t mask, reg_t y) {
|
||||||
return _mm512_mask_mov_epi32(x, mask, y);
|
return _mm512_mask_mov_epi32(x, mask, y);
|
||||||
}
|
}
|
||||||
static void mask_storeu(void *mem, opmask_t mask, zmm_t x) {
|
static void mask_storeu(void *mem, opmask_t mask, reg_t x) {
|
||||||
return _mm512_mask_storeu_epi32(mem, mask, x);
|
return _mm512_mask_storeu_epi32(mem, mask, x);
|
||||||
}
|
}
|
||||||
static zmm_t min(zmm_t x, zmm_t y) { return _mm512_min_epi32(x, y); }
|
static reg_t min(reg_t x, reg_t y) { return _mm512_min_epi32(x, y); }
|
||||||
static zmm_t max(zmm_t x, zmm_t y) { return _mm512_max_epi32(x, y); }
|
static reg_t max(reg_t x, reg_t y) { return _mm512_max_epi32(x, y); }
|
||||||
static zmm_t permutexvar(__m512i idx, zmm_t zmm) {
|
static reg_t permutexvar(__m512i idx, reg_t zmm) {
|
||||||
return _mm512_permutexvar_epi32(idx, zmm);
|
return _mm512_permutexvar_epi32(idx, zmm);
|
||||||
}
|
}
|
||||||
static type_t reducemax(zmm_t v) { return _mm512_reduce_max_epi32(v); }
|
static type_t reducemax(reg_t v) { return _mm512_reduce_max_epi32(v); }
|
||||||
static type_t reducemin(zmm_t v) { return _mm512_reduce_min_epi32(v); }
|
static type_t reducemin(reg_t v) { return _mm512_reduce_min_epi32(v); }
|
||||||
static zmm_t set1(type_t v) { return _mm512_set1_epi32(v); }
|
static reg_t set1(type_t v) { return _mm512_set1_epi32(v); }
|
||||||
template <uint8_t mask>
|
template <uint8_t mask>
|
||||||
static zmm_t shuffle(zmm_t zmm) {
|
static reg_t shuffle(reg_t zmm) {
|
||||||
return _mm512_shuffle_epi32(zmm, (_MM_PERM_ENUM)mask);
|
return _mm512_shuffle_epi32(zmm, (_MM_PERM_ENUM)mask);
|
||||||
}
|
}
|
||||||
static void storeu(void *mem, zmm_t x) {
|
static void storeu(void *mem, reg_t x) {
|
||||||
return _mm512_storeu_si512(mem, x);
|
return _mm512_storeu_si512(mem, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ymm_t max(ymm_t x, ymm_t y) { return _mm256_max_epi32(x, y); }
|
static halfreg_t max(halfreg_t x, halfreg_t y) {
|
||||||
static ymm_t min(ymm_t x, ymm_t y) { return _mm256_min_epi32(x, y); }
|
return _mm256_max_epi32(x, y);
|
||||||
|
}
|
||||||
|
static halfreg_t min(halfreg_t x, halfreg_t y) {
|
||||||
|
return _mm256_min_epi32(x, y);
|
||||||
|
}
|
||||||
|
static reg_t reverse(reg_t zmm) {
|
||||||
|
const auto rev_index = _mm512_set_epi32(NETWORK_32BIT_5);
|
||||||
|
return permutexvar(rev_index, zmm);
|
||||||
|
}
|
||||||
|
static reg_t sort_vec(reg_t x) {
|
||||||
|
return sort_zmm_32bit<zmm_vector<type_t>>(x);
|
||||||
|
}
|
||||||
|
static reg_t cast_from(__m512i v) { return v; }
|
||||||
|
static __m512i cast_to(reg_t v) { return v; }
|
||||||
|
static int double_compressstore(type_t *left_addr, type_t *right_addr,
|
||||||
|
opmask_t k, reg_t reg) {
|
||||||
|
return avx512_double_compressstore<zmm_vector<type_t>>(
|
||||||
|
left_addr, right_addr, k, reg);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
template <>
|
template <>
|
||||||
struct zmm_vector<float> {
|
struct zmm_vector<float> {
|
||||||
using type_t = float;
|
using type_t = float;
|
||||||
using zmm_t = __m512;
|
using reg_t = __m512;
|
||||||
using ymm_t = __m256;
|
using halfreg_t = __m256;
|
||||||
using opmask_t = __mmask16;
|
using opmask_t = __mmask16;
|
||||||
static const uint8_t numlanes = 16;
|
static const uint8_t numlanes = 16;
|
||||||
|
#ifdef XSS_MINIMAL_NETWORK_SORT
|
||||||
|
static constexpr int network_sort_threshold = numlanes;
|
||||||
|
#else
|
||||||
|
static constexpr int network_sort_threshold = 512;
|
||||||
|
#endif
|
||||||
|
static constexpr int partition_unroll_factor = 8;
|
||||||
|
|
||||||
|
using swizzle_ops = avx512_32bit_swizzle_ops;
|
||||||
|
|
||||||
static type_t type_max() { return X86_SIMD_SORT_INFINITYF; }
|
static type_t type_max() { return X86_SIMD_SORT_INFINITYF; }
|
||||||
static type_t type_min() { return -X86_SIMD_SORT_INFINITYF; }
|
static type_t type_min() { return -X86_SIMD_SORT_INFINITYF; }
|
||||||
static zmm_t zmm_max() { return _mm512_set1_ps(type_max()); }
|
static reg_t zmm_max() { return _mm512_set1_ps(type_max()); }
|
||||||
|
|
||||||
static opmask_t knot_opmask(opmask_t x) { return _mm512_knot(x); }
|
static opmask_t knot_opmask(opmask_t x) { return _mm512_knot(x); }
|
||||||
static opmask_t ge(zmm_t x, zmm_t y) {
|
static opmask_t ge(reg_t x, reg_t y) {
|
||||||
return _mm512_cmp_ps_mask(x, y, _CMP_GE_OQ);
|
return _mm512_cmp_ps_mask(x, y, _CMP_GE_OQ);
|
||||||
}
|
}
|
||||||
static opmask_t gt(zmm_t x, zmm_t y) {
|
static opmask_t gt(reg_t x, reg_t y) {
|
||||||
return _mm512_cmp_ps_mask(x, y, _CMP_GT_OQ);
|
return _mm512_cmp_ps_mask(x, y, _CMP_GT_OQ);
|
||||||
}
|
}
|
||||||
|
static opmask_t get_partial_loadmask(uint64_t num_to_read) {
|
||||||
|
return ((0x1ull << num_to_read) - 0x1ull);
|
||||||
|
}
|
||||||
|
static int32_t convert_mask_to_int(opmask_t mask) { return mask; }
|
||||||
|
template <int type>
|
||||||
|
static opmask_t fpclass(reg_t x) {
|
||||||
|
return _mm512_fpclass_ps_mask(x, type);
|
||||||
|
}
|
||||||
template <int scale>
|
template <int scale>
|
||||||
static ymm_t i64gather(__m512i index, void const *base) {
|
static halfreg_t i64gather(__m512i index, void const *base) {
|
||||||
return _mm512_i64gather_ps(index, base, scale);
|
return _mm512_i64gather_ps(index, base, scale);
|
||||||
}
|
}
|
||||||
static zmm_t merge(ymm_t y1, ymm_t y2) {
|
static reg_t merge(halfreg_t y1, halfreg_t y2) {
|
||||||
zmm_t z1 = _mm512_castsi512_ps(
|
reg_t z1 = _mm512_castsi512_ps(
|
||||||
_mm512_castsi256_si512(_mm256_castps_si256(y1)));
|
_mm512_castsi256_si512(_mm256_castps_si256(y1)));
|
||||||
return _mm512_insertf32x8(z1, y2, 1);
|
return _mm512_insertf32x8(z1, y2, 1);
|
||||||
}
|
}
|
||||||
static zmm_t loadu(void const *mem) { return _mm512_loadu_ps(mem); }
|
static reg_t loadu(void const *mem) { return _mm512_loadu_ps(mem); }
|
||||||
static zmm_t max(zmm_t x, zmm_t y) { return _mm512_max_ps(x, y); }
|
static reg_t max(reg_t x, reg_t y) { return _mm512_max_ps(x, y); }
|
||||||
static void mask_compressstoreu(void *mem, opmask_t mask, zmm_t x) {
|
static void mask_compressstoreu(void *mem, opmask_t mask, reg_t x) {
|
||||||
return _mm512_mask_compressstoreu_ps(mem, mask, x);
|
return _mm512_mask_compressstoreu_ps(mem, mask, x);
|
||||||
}
|
}
|
||||||
static zmm_t mask_loadu(zmm_t x, opmask_t mask, void const *mem) {
|
static reg_t maskz_loadu(opmask_t mask, void const *mem) {
|
||||||
|
return _mm512_maskz_loadu_ps(mask, mem);
|
||||||
|
}
|
||||||
|
static reg_t mask_loadu(reg_t x, opmask_t mask, void const *mem) {
|
||||||
return _mm512_mask_loadu_ps(x, mask, mem);
|
return _mm512_mask_loadu_ps(x, mask, mem);
|
||||||
}
|
}
|
||||||
static zmm_t mask_mov(zmm_t x, opmask_t mask, zmm_t y) {
|
static reg_t mask_mov(reg_t x, opmask_t mask, reg_t y) {
|
||||||
return _mm512_mask_mov_ps(x, mask, y);
|
return _mm512_mask_mov_ps(x, mask, y);
|
||||||
}
|
}
|
||||||
static void mask_storeu(void *mem, opmask_t mask, zmm_t x) {
|
static void mask_storeu(void *mem, opmask_t mask, reg_t x) {
|
||||||
return _mm512_mask_storeu_ps(mem, mask, x);
|
return _mm512_mask_storeu_ps(mem, mask, x);
|
||||||
}
|
}
|
||||||
static zmm_t min(zmm_t x, zmm_t y) { return _mm512_min_ps(x, y); }
|
static reg_t min(reg_t x, reg_t y) { return _mm512_min_ps(x, y); }
|
||||||
static zmm_t permutexvar(__m512i idx, zmm_t zmm) {
|
static reg_t permutexvar(__m512i idx, reg_t zmm) {
|
||||||
return _mm512_permutexvar_ps(idx, zmm);
|
return _mm512_permutexvar_ps(idx, zmm);
|
||||||
}
|
}
|
||||||
static type_t reducemax(zmm_t v) { return _mm512_reduce_max_ps(v); }
|
static type_t reducemax(reg_t v) { return _mm512_reduce_max_ps(v); }
|
||||||
static type_t reducemin(zmm_t v) { return _mm512_reduce_min_ps(v); }
|
static type_t reducemin(reg_t v) { return _mm512_reduce_min_ps(v); }
|
||||||
static zmm_t set1(type_t v) { return _mm512_set1_ps(v); }
|
static reg_t set1(type_t v) { return _mm512_set1_ps(v); }
|
||||||
template <uint8_t mask>
|
template <uint8_t mask>
|
||||||
static zmm_t shuffle(zmm_t zmm) {
|
static reg_t shuffle(reg_t zmm) {
|
||||||
return _mm512_shuffle_ps(zmm, zmm, (_MM_PERM_ENUM)mask);
|
return _mm512_shuffle_ps(zmm, zmm, (_MM_PERM_ENUM)mask);
|
||||||
}
|
}
|
||||||
static void storeu(void *mem, zmm_t x) { return _mm512_storeu_ps(mem, x); }
|
static void storeu(void *mem, reg_t x) { return _mm512_storeu_ps(mem, x); }
|
||||||
|
|
||||||
static ymm_t max(ymm_t x, ymm_t y) { return _mm256_max_ps(x, y); }
|
static halfreg_t max(halfreg_t x, halfreg_t y) {
|
||||||
static ymm_t min(ymm_t x, ymm_t y) { return _mm256_min_ps(x, y); }
|
return _mm256_max_ps(x, y);
|
||||||
|
}
|
||||||
|
static halfreg_t min(halfreg_t x, halfreg_t y) {
|
||||||
|
return _mm256_min_ps(x, y);
|
||||||
|
}
|
||||||
|
static reg_t reverse(reg_t zmm) {
|
||||||
|
const auto rev_index = _mm512_set_epi32(NETWORK_32BIT_5);
|
||||||
|
return permutexvar(rev_index, zmm);
|
||||||
|
}
|
||||||
|
static reg_t sort_vec(reg_t x) {
|
||||||
|
return sort_zmm_32bit<zmm_vector<type_t>>(x);
|
||||||
|
}
|
||||||
|
static reg_t cast_from(__m512i v) { return _mm512_castsi512_ps(v); }
|
||||||
|
static __m512i cast_to(reg_t v) { return _mm512_castps_si512(v); }
|
||||||
|
static int double_compressstore(type_t *left_addr, type_t *right_addr,
|
||||||
|
opmask_t k, reg_t reg) {
|
||||||
|
return avx512_double_compressstore<zmm_vector<type_t>>(
|
||||||
|
left_addr, right_addr, k, reg);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Assumes zmm is random and performs a full sorting network defined in
|
* Assumes zmm is random and performs a full sorting network defined in
|
||||||
* https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
|
* https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
|
||||||
*/
|
*/
|
||||||
template <typename vtype, typename zmm_t = typename vtype::zmm_t>
|
template <typename vtype, typename reg_t = typename vtype::reg_t>
|
||||||
X86_SIMD_SORT_INLINE zmm_t sort_zmm_32bit(zmm_t zmm) {
|
X86_SIMD_SORT_INLINE reg_t sort_zmm_32bit(reg_t zmm) {
|
||||||
zmm = cmp_merge<vtype>(
|
zmm = cmp_merge<vtype>(
|
||||||
zmm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(zmm), 0xAAAA);
|
zmm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(zmm), 0xAAAA);
|
||||||
zmm = cmp_merge<vtype>(
|
zmm = cmp_merge<vtype>(
|
||||||
@@ -193,249 +267,71 @@ X86_SIMD_SORT_INLINE zmm_t sort_zmm_32bit(zmm_t zmm) {
|
|||||||
return zmm;
|
return zmm;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assumes zmm is bitonic and performs a recursive half cleaner
|
struct avx512_32bit_swizzle_ops {
|
||||||
template <typename vtype, typename zmm_t = typename vtype::zmm_t>
|
template <typename vtype, int scale>
|
||||||
X86_SIMD_SORT_INLINE zmm_t bitonic_merge_zmm_32bit(zmm_t zmm) {
|
X86_SIMD_SORT_INLINE typename vtype::reg_t swap_n(
|
||||||
// 1) half_cleaner[16]: compare 1-9, 2-10, 3-11 etc ..
|
typename vtype::reg_t reg) {
|
||||||
zmm = cmp_merge<vtype>(
|
__m512i v = vtype::cast_to(reg);
|
||||||
zmm, vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_7), zmm),
|
|
||||||
0xFF00);
|
if constexpr (scale == 2) {
|
||||||
// 2) half_cleaner[8]: compare 1-5, 2-6, 3-7 etc ..
|
v = _mm512_shuffle_epi32(v, (_MM_PERM_ENUM)0b10110001);
|
||||||
zmm = cmp_merge<vtype>(
|
} else if constexpr (scale == 4) {
|
||||||
zmm, vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_6), zmm),
|
v = _mm512_shuffle_epi32(v, (_MM_PERM_ENUM)0b01001110);
|
||||||
0xF0F0);
|
} else if constexpr (scale == 8) {
|
||||||
// 3) half_cleaner[4]
|
v = _mm512_shuffle_i64x2(v, v, 0b10110001);
|
||||||
zmm = cmp_merge<vtype>(
|
} else if constexpr (scale == 16) {
|
||||||
zmm, vtype::template shuffle<SHUFFLE_MASK(1, 0, 3, 2)>(zmm), 0xCCCC);
|
v = _mm512_shuffle_i64x2(v, v, 0b01001110);
|
||||||
// 3) half_cleaner[1]
|
} else {
|
||||||
zmm = cmp_merge<vtype>(
|
static_assert(scale == -1, "should not be reached");
|
||||||
zmm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(zmm), 0xAAAA);
|
|
||||||
return zmm;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assumes zmm1 and zmm2 are sorted and performs a recursive half cleaner
|
return vtype::cast_from(v);
|
||||||
template <typename vtype, typename zmm_t = typename vtype::zmm_t>
|
|
||||||
X86_SIMD_SORT_INLINE void bitonic_merge_two_zmm_32bit(zmm_t *zmm1,
|
|
||||||
zmm_t *zmm2) {
|
|
||||||
// 1) First step of a merging network: coex of zmm1 and zmm2 reversed
|
|
||||||
*zmm2 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), *zmm2);
|
|
||||||
zmm_t zmm3 = vtype::min(*zmm1, *zmm2);
|
|
||||||
zmm_t zmm4 = vtype::max(*zmm1, *zmm2);
|
|
||||||
// 2) Recursive half cleaner for each
|
|
||||||
*zmm1 = bitonic_merge_zmm_32bit<vtype>(zmm3);
|
|
||||||
*zmm2 = bitonic_merge_zmm_32bit<vtype>(zmm4);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assumes [zmm0, zmm1] and [zmm2, zmm3] are sorted and performs a recursive
|
template <typename vtype, int scale>
|
||||||
// half cleaner
|
X86_SIMD_SORT_INLINE typename vtype::reg_t reverse_n(
|
||||||
template <typename vtype, typename zmm_t = typename vtype::zmm_t>
|
typename vtype::reg_t reg) {
|
||||||
X86_SIMD_SORT_INLINE void bitonic_merge_four_zmm_32bit(zmm_t *zmm) {
|
__m512i v = vtype::cast_to(reg);
|
||||||
zmm_t zmm2r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[2]);
|
|
||||||
zmm_t zmm3r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[3]);
|
if constexpr (scale == 2) {
|
||||||
zmm_t zmm_t1 = vtype::min(zmm[0], zmm3r);
|
return swap_n<vtype, 2>(reg);
|
||||||
zmm_t zmm_t2 = vtype::min(zmm[1], zmm2r);
|
} else if constexpr (scale == 4) {
|
||||||
zmm_t zmm_t3 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
|
__m512i mask = _mm512_set_epi32(12, 13, 14, 15, 8, 9, 10, 11, 4, 5,
|
||||||
vtype::max(zmm[1], zmm2r));
|
6, 7, 0, 1, 2, 3);
|
||||||
zmm_t zmm_t4 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
|
v = _mm512_permutexvar_epi32(mask, v);
|
||||||
vtype::max(zmm[0], zmm3r));
|
} else if constexpr (scale == 8) {
|
||||||
zmm_t zmm0 = vtype::min(zmm_t1, zmm_t2);
|
__m512i mask = _mm512_set_epi32(8, 9, 10, 11, 12, 13, 14, 15, 0, 1,
|
||||||
zmm_t zmm1 = vtype::max(zmm_t1, zmm_t2);
|
2, 3, 4, 5, 6, 7);
|
||||||
zmm_t zmm2 = vtype::min(zmm_t3, zmm_t4);
|
v = _mm512_permutexvar_epi32(mask, v);
|
||||||
zmm_t zmm3 = vtype::max(zmm_t3, zmm_t4);
|
} else if constexpr (scale == 16) {
|
||||||
zmm[0] = bitonic_merge_zmm_32bit<vtype>(zmm0);
|
return vtype::reverse(reg);
|
||||||
zmm[1] = bitonic_merge_zmm_32bit<vtype>(zmm1);
|
} else {
|
||||||
zmm[2] = bitonic_merge_zmm_32bit<vtype>(zmm2);
|
static_assert(scale == -1, "should not be reached");
|
||||||
zmm[3] = bitonic_merge_zmm_32bit<vtype>(zmm3);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename vtype, typename zmm_t = typename vtype::zmm_t>
|
return vtype::cast_from(v);
|
||||||
X86_SIMD_SORT_INLINE void bitonic_merge_eight_zmm_32bit(zmm_t *zmm) {
|
|
||||||
zmm_t zmm4r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[4]);
|
|
||||||
zmm_t zmm5r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[5]);
|
|
||||||
zmm_t zmm6r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[6]);
|
|
||||||
zmm_t zmm7r = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5), zmm[7]);
|
|
||||||
zmm_t zmm_t1 = vtype::min(zmm[0], zmm7r);
|
|
||||||
zmm_t zmm_t2 = vtype::min(zmm[1], zmm6r);
|
|
||||||
zmm_t zmm_t3 = vtype::min(zmm[2], zmm5r);
|
|
||||||
zmm_t zmm_t4 = vtype::min(zmm[3], zmm4r);
|
|
||||||
zmm_t zmm_t5 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
|
|
||||||
vtype::max(zmm[3], zmm4r));
|
|
||||||
zmm_t zmm_t6 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
|
|
||||||
vtype::max(zmm[2], zmm5r));
|
|
||||||
zmm_t zmm_t7 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
|
|
||||||
vtype::max(zmm[1], zmm6r));
|
|
||||||
zmm_t zmm_t8 = vtype::permutexvar(_mm512_set_epi32(NETWORK_32BIT_5),
|
|
||||||
vtype::max(zmm[0], zmm7r));
|
|
||||||
COEX<vtype>(zmm_t1, zmm_t3);
|
|
||||||
COEX<vtype>(zmm_t2, zmm_t4);
|
|
||||||
COEX<vtype>(zmm_t5, zmm_t7);
|
|
||||||
COEX<vtype>(zmm_t6, zmm_t8);
|
|
||||||
COEX<vtype>(zmm_t1, zmm_t2);
|
|
||||||
COEX<vtype>(zmm_t3, zmm_t4);
|
|
||||||
COEX<vtype>(zmm_t5, zmm_t6);
|
|
||||||
COEX<vtype>(zmm_t7, zmm_t8);
|
|
||||||
zmm[0] = bitonic_merge_zmm_32bit<vtype>(zmm_t1);
|
|
||||||
zmm[1] = bitonic_merge_zmm_32bit<vtype>(zmm_t2);
|
|
||||||
zmm[2] = bitonic_merge_zmm_32bit<vtype>(zmm_t3);
|
|
||||||
zmm[3] = bitonic_merge_zmm_32bit<vtype>(zmm_t4);
|
|
||||||
zmm[4] = bitonic_merge_zmm_32bit<vtype>(zmm_t5);
|
|
||||||
zmm[5] = bitonic_merge_zmm_32bit<vtype>(zmm_t6);
|
|
||||||
zmm[6] = bitonic_merge_zmm_32bit<vtype>(zmm_t7);
|
|
||||||
zmm[7] = bitonic_merge_zmm_32bit<vtype>(zmm_t8);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename vtype, typename type_t>
|
template <typename vtype, int scale>
|
||||||
X86_SIMD_SORT_INLINE void sort_16_32bit(type_t *arr, int32_t N) {
|
X86_SIMD_SORT_INLINE typename vtype::reg_t merge_n(
|
||||||
typename vtype::opmask_t load_mask = (0x0001 << N) - 0x0001;
|
typename vtype::reg_t reg, typename vtype::reg_t other) {
|
||||||
typename vtype::zmm_t zmm =
|
__m512i v1 = vtype::cast_to(reg);
|
||||||
vtype::mask_loadu(vtype::zmm_max(), load_mask, arr);
|
__m512i v2 = vtype::cast_to(other);
|
||||||
vtype::mask_storeu(arr, load_mask, sort_zmm_32bit<vtype>(zmm));
|
|
||||||
|
if constexpr (scale == 2) {
|
||||||
|
v1 = _mm512_mask_blend_epi32(0b0101010101010101, v1, v2);
|
||||||
|
} else if constexpr (scale == 4) {
|
||||||
|
v1 = _mm512_mask_blend_epi32(0b0011001100110011, v1, v2);
|
||||||
|
} else if constexpr (scale == 8) {
|
||||||
|
v1 = _mm512_mask_blend_epi32(0b0000111100001111, v1, v2);
|
||||||
|
} else if constexpr (scale == 16) {
|
||||||
|
v1 = _mm512_mask_blend_epi32(0b0000000011111111, v1, v2);
|
||||||
|
} else {
|
||||||
|
static_assert(scale == -1, "should not be reached");
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename vtype, typename type_t>
|
return vtype::cast_from(v1);
|
||||||
X86_SIMD_SORT_INLINE void sort_32_32bit(type_t *arr, int32_t N) {
|
|
||||||
if (N <= 16) {
|
|
||||||
sort_16_32bit<vtype>(arr, N);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
using zmm_t = typename vtype::zmm_t;
|
|
||||||
zmm_t zmm1 = vtype::loadu(arr);
|
|
||||||
typename vtype::opmask_t load_mask = (0x0001 << (N - 16)) - 0x0001;
|
|
||||||
zmm_t zmm2 = vtype::mask_loadu(vtype::zmm_max(), load_mask, arr + 16);
|
|
||||||
zmm1 = sort_zmm_32bit<vtype>(zmm1);
|
|
||||||
zmm2 = sort_zmm_32bit<vtype>(zmm2);
|
|
||||||
bitonic_merge_two_zmm_32bit<vtype>(&zmm1, &zmm2);
|
|
||||||
vtype::storeu(arr, zmm1);
|
|
||||||
vtype::mask_storeu(arr + 16, load_mask, zmm2);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename vtype, typename type_t>
|
|
||||||
X86_SIMD_SORT_INLINE void sort_64_32bit(type_t *arr, int32_t N) {
|
|
||||||
if (N <= 32) {
|
|
||||||
sort_32_32bit<vtype>(arr, N);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
using zmm_t = typename vtype::zmm_t;
|
|
||||||
using opmask_t = typename vtype::opmask_t;
|
|
||||||
zmm_t zmm[4];
|
|
||||||
zmm[0] = vtype::loadu(arr);
|
|
||||||
zmm[1] = vtype::loadu(arr + 16);
|
|
||||||
opmask_t load_mask1 = 0xFFFF, load_mask2 = 0xFFFF;
|
|
||||||
uint64_t combined_mask = (0x1ull << (N - 32)) - 0x1ull;
|
|
||||||
load_mask1 &= combined_mask & 0xFFFF;
|
|
||||||
load_mask2 &= (combined_mask >> 16) & 0xFFFF;
|
|
||||||
zmm[2] = vtype::mask_loadu(vtype::zmm_max(), load_mask1, arr + 32);
|
|
||||||
zmm[3] = vtype::mask_loadu(vtype::zmm_max(), load_mask2, arr + 48);
|
|
||||||
zmm[0] = sort_zmm_32bit<vtype>(zmm[0]);
|
|
||||||
zmm[1] = sort_zmm_32bit<vtype>(zmm[1]);
|
|
||||||
zmm[2] = sort_zmm_32bit<vtype>(zmm[2]);
|
|
||||||
zmm[3] = sort_zmm_32bit<vtype>(zmm[3]);
|
|
||||||
bitonic_merge_two_zmm_32bit<vtype>(&zmm[0], &zmm[1]);
|
|
||||||
bitonic_merge_two_zmm_32bit<vtype>(&zmm[2], &zmm[3]);
|
|
||||||
bitonic_merge_four_zmm_32bit<vtype>(zmm);
|
|
||||||
vtype::storeu(arr, zmm[0]);
|
|
||||||
vtype::storeu(arr + 16, zmm[1]);
|
|
||||||
vtype::mask_storeu(arr + 32, load_mask1, zmm[2]);
|
|
||||||
vtype::mask_storeu(arr + 48, load_mask2, zmm[3]);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename vtype, typename type_t>
|
|
||||||
X86_SIMD_SORT_INLINE void sort_128_32bit(type_t *arr, int32_t N) {
|
|
||||||
if (N <= 64) {
|
|
||||||
sort_64_32bit<vtype>(arr, N);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
using zmm_t = typename vtype::zmm_t;
|
|
||||||
using opmask_t = typename vtype::opmask_t;
|
|
||||||
zmm_t zmm[8];
|
|
||||||
zmm[0] = vtype::loadu(arr);
|
|
||||||
zmm[1] = vtype::loadu(arr + 16);
|
|
||||||
zmm[2] = vtype::loadu(arr + 32);
|
|
||||||
zmm[3] = vtype::loadu(arr + 48);
|
|
||||||
zmm[0] = sort_zmm_32bit<vtype>(zmm[0]);
|
|
||||||
zmm[1] = sort_zmm_32bit<vtype>(zmm[1]);
|
|
||||||
zmm[2] = sort_zmm_32bit<vtype>(zmm[2]);
|
|
||||||
zmm[3] = sort_zmm_32bit<vtype>(zmm[3]);
|
|
||||||
opmask_t load_mask1 = 0xFFFF, load_mask2 = 0xFFFF;
|
|
||||||
opmask_t load_mask3 = 0xFFFF, load_mask4 = 0xFFFF;
|
|
||||||
if (N != 128) {
|
|
||||||
uint64_t combined_mask = (0x1ull << (N - 64)) - 0x1ull;
|
|
||||||
load_mask1 &= combined_mask & 0xFFFF;
|
|
||||||
load_mask2 &= (combined_mask >> 16) & 0xFFFF;
|
|
||||||
load_mask3 &= (combined_mask >> 32) & 0xFFFF;
|
|
||||||
load_mask4 &= (combined_mask >> 48) & 0xFFFF;
|
|
||||||
}
|
|
||||||
zmm[4] = vtype::mask_loadu(vtype::zmm_max(), load_mask1, arr + 64);
|
|
||||||
zmm[5] = vtype::mask_loadu(vtype::zmm_max(), load_mask2, arr + 80);
|
|
||||||
zmm[6] = vtype::mask_loadu(vtype::zmm_max(), load_mask3, arr + 96);
|
|
||||||
zmm[7] = vtype::mask_loadu(vtype::zmm_max(), load_mask4, arr + 112);
|
|
||||||
zmm[4] = sort_zmm_32bit<vtype>(zmm[4]);
|
|
||||||
zmm[5] = sort_zmm_32bit<vtype>(zmm[5]);
|
|
||||||
zmm[6] = sort_zmm_32bit<vtype>(zmm[6]);
|
|
||||||
zmm[7] = sort_zmm_32bit<vtype>(zmm[7]);
|
|
||||||
bitonic_merge_two_zmm_32bit<vtype>(&zmm[0], &zmm[1]);
|
|
||||||
bitonic_merge_two_zmm_32bit<vtype>(&zmm[2], &zmm[3]);
|
|
||||||
bitonic_merge_two_zmm_32bit<vtype>(&zmm[4], &zmm[5]);
|
|
||||||
bitonic_merge_two_zmm_32bit<vtype>(&zmm[6], &zmm[7]);
|
|
||||||
bitonic_merge_four_zmm_32bit<vtype>(zmm);
|
|
||||||
bitonic_merge_four_zmm_32bit<vtype>(zmm + 4);
|
|
||||||
bitonic_merge_eight_zmm_32bit<vtype>(zmm);
|
|
||||||
vtype::storeu(arr, zmm[0]);
|
|
||||||
vtype::storeu(arr + 16, zmm[1]);
|
|
||||||
vtype::storeu(arr + 32, zmm[2]);
|
|
||||||
vtype::storeu(arr + 48, zmm[3]);
|
|
||||||
vtype::mask_storeu(arr + 64, load_mask1, zmm[4]);
|
|
||||||
vtype::mask_storeu(arr + 80, load_mask2, zmm[5]);
|
|
||||||
vtype::mask_storeu(arr + 96, load_mask3, zmm[6]);
|
|
||||||
vtype::mask_storeu(arr + 112, load_mask4, zmm[7]);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename vtype, typename type_t>
|
|
||||||
static void qsort_32bit_(type_t *arr, int64_t left, int64_t right,
|
|
||||||
int64_t max_iters) {
|
|
||||||
/*
|
|
||||||
* Resort to std::sort if quicksort isnt making any progress
|
|
||||||
*/
|
|
||||||
if (max_iters <= 0) {
|
|
||||||
std::sort(arr + left, arr + right + 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Base case: use bitonic networks to sort arrays <= 128
|
|
||||||
*/
|
|
||||||
if (right + 1 - left <= 128) {
|
|
||||||
sort_128_32bit<vtype>(arr + left, (int32_t)(right + 1 - left));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
type_t pivot = get_pivot_scalar<type_t>(arr, left, right);
|
|
||||||
type_t smallest = vtype::type_max();
|
|
||||||
type_t biggest = vtype::type_min();
|
|
||||||
int64_t pivot_index = partition_avx512_unrolled<vtype, 2>(
|
|
||||||
arr, left, right + 1, pivot, &smallest, &biggest, false);
|
|
||||||
if (pivot != smallest)
|
|
||||||
qsort_32bit_<vtype>(arr, left, pivot_index - 1, max_iters - 1);
|
|
||||||
if (pivot != biggest)
|
|
||||||
qsort_32bit_<vtype>(arr, pivot_index, right, max_iters - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void inline avx512_qsort<int32_t>(int32_t *arr, int64_t fromIndex, int64_t toIndex) {
|
|
||||||
int64_t arrsize = toIndex - fromIndex;
|
|
||||||
if (arrsize > 1) {
|
|
||||||
qsort_32bit_<zmm_vector<int32_t>, int32_t>(arr, fromIndex, toIndex - 1,
|
|
||||||
2 * (int64_t)log2(arrsize));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void inline avx512_qsort<float>(float *arr, int64_t fromIndex, int64_t toIndex) {
|
|
||||||
int64_t arrsize = toIndex - fromIndex;
|
|
||||||
if (arrsize > 1) {
|
|
||||||
qsort_32bit_<zmm_vector<float>, float>(arr, fromIndex, toIndex - 1,
|
|
||||||
2 * (int64_t)log2(arrsize));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#endif // AVX512_QSORT_32BIT
|
#endif // AVX512_QSORT_32BIT
|
||||||
|
|||||||
@@ -1,212 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
|
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
||||||
*
|
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
|
||||||
* under the terms of the GNU General Public License version 2 only, as
|
|
||||||
* published by the Free Software Foundation.
|
|
||||||
*
|
|
||||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
||||||
* version 2 for more details (a copy is included in the LICENSE file that
|
|
||||||
* accompanied this code).
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License version
|
|
||||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
||||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
*
|
|
||||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
||||||
* or visit www.oracle.com if you need additional information or have any
|
|
||||||
* questions.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
// This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
|
|
||||||
|
|
||||||
#ifndef AVX512_64BIT_COMMON
|
|
||||||
#define AVX512_64BIT_COMMON
|
|
||||||
#include "avx512-common-qsort.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Constants used in sorting 8 elements in a ZMM registers. Based on Bitonic
|
|
||||||
* sorting network (see
|
|
||||||
* https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg)
|
|
||||||
*/
|
|
||||||
// ZMM 7, 6, 5, 4, 3, 2, 1, 0
|
|
||||||
#define NETWORK_64BIT_1 4, 5, 6, 7, 0, 1, 2, 3
|
|
||||||
#define NETWORK_64BIT_2 0, 1, 2, 3, 4, 5, 6, 7
|
|
||||||
#define NETWORK_64BIT_3 5, 4, 7, 6, 1, 0, 3, 2
|
|
||||||
#define NETWORK_64BIT_4 3, 2, 1, 0, 7, 6, 5, 4
|
|
||||||
|
|
||||||
template <>
|
|
||||||
struct zmm_vector<int64_t> {
|
|
||||||
using type_t = int64_t;
|
|
||||||
using zmm_t = __m512i;
|
|
||||||
using zmmi_t = __m512i;
|
|
||||||
using ymm_t = __m512i;
|
|
||||||
using opmask_t = __mmask8;
|
|
||||||
static const uint8_t numlanes = 8;
|
|
||||||
|
|
||||||
static type_t type_max() { return X86_SIMD_SORT_MAX_INT64; }
|
|
||||||
static type_t type_min() { return X86_SIMD_SORT_MIN_INT64; }
|
|
||||||
static zmm_t zmm_max() {
|
|
||||||
return _mm512_set1_epi64(type_max());
|
|
||||||
} // TODO: this should broadcast bits as is?
|
|
||||||
|
|
||||||
static zmmi_t seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
|
|
||||||
int v8) {
|
|
||||||
return _mm512_set_epi64(v1, v2, v3, v4, v5, v6, v7, v8);
|
|
||||||
}
|
|
||||||
static opmask_t kxor_opmask(opmask_t x, opmask_t y) {
|
|
||||||
return _kxor_mask8(x, y);
|
|
||||||
}
|
|
||||||
static opmask_t knot_opmask(opmask_t x) { return _knot_mask8(x); }
|
|
||||||
static opmask_t le(zmm_t x, zmm_t y) {
|
|
||||||
return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_LE);
|
|
||||||
}
|
|
||||||
static opmask_t ge(zmm_t x, zmm_t y) {
|
|
||||||
return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_NLT);
|
|
||||||
}
|
|
||||||
static opmask_t gt(zmm_t x, zmm_t y) {
|
|
||||||
return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_GT);
|
|
||||||
}
|
|
||||||
static opmask_t eq(zmm_t x, zmm_t y) {
|
|
||||||
return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_EQ);
|
|
||||||
}
|
|
||||||
template <int scale>
|
|
||||||
static zmm_t mask_i64gather(zmm_t src, opmask_t mask, __m512i index,
|
|
||||||
void const *base) {
|
|
||||||
return _mm512_mask_i64gather_epi64(src, mask, index, base, scale);
|
|
||||||
}
|
|
||||||
template <int scale>
|
|
||||||
static zmm_t i64gather(__m512i index, void const *base) {
|
|
||||||
return _mm512_i64gather_epi64(index, base, scale);
|
|
||||||
}
|
|
||||||
static zmm_t loadu(void const *mem) { return _mm512_loadu_si512(mem); }
|
|
||||||
static zmm_t max(zmm_t x, zmm_t y) { return _mm512_max_epi64(x, y); }
|
|
||||||
static void mask_compressstoreu(void *mem, opmask_t mask, zmm_t x) {
|
|
||||||
return _mm512_mask_compressstoreu_epi64(mem, mask, x);
|
|
||||||
}
|
|
||||||
static zmm_t maskz_loadu(opmask_t mask, void const *mem) {
|
|
||||||
return _mm512_maskz_loadu_epi64(mask, mem);
|
|
||||||
}
|
|
||||||
static zmm_t mask_loadu(zmm_t x, opmask_t mask, void const *mem) {
|
|
||||||
return _mm512_mask_loadu_epi64(x, mask, mem);
|
|
||||||
}
|
|
||||||
static zmm_t mask_mov(zmm_t x, opmask_t mask, zmm_t y) {
|
|
||||||
return _mm512_mask_mov_epi64(x, mask, y);
|
|
||||||
}
|
|
||||||
static void mask_storeu(void *mem, opmask_t mask, zmm_t x) {
|
|
||||||
return _mm512_mask_storeu_epi64(mem, mask, x);
|
|
||||||
}
|
|
||||||
static zmm_t min(zmm_t x, zmm_t y) { return _mm512_min_epi64(x, y); }
|
|
||||||
static zmm_t permutexvar(__m512i idx, zmm_t zmm) {
|
|
||||||
return _mm512_permutexvar_epi64(idx, zmm);
|
|
||||||
}
|
|
||||||
static type_t reducemax(zmm_t v) { return _mm512_reduce_max_epi64(v); }
|
|
||||||
static type_t reducemin(zmm_t v) { return _mm512_reduce_min_epi64(v); }
|
|
||||||
static zmm_t set1(type_t v) { return _mm512_set1_epi64(v); }
|
|
||||||
template <uint8_t mask>
|
|
||||||
static zmm_t shuffle(zmm_t zmm) {
|
|
||||||
__m512d temp = _mm512_castsi512_pd(zmm);
|
|
||||||
return _mm512_castpd_si512(
|
|
||||||
_mm512_shuffle_pd(temp, temp, (_MM_PERM_ENUM)mask));
|
|
||||||
}
|
|
||||||
static void storeu(void *mem, zmm_t x) { _mm512_storeu_si512(mem, x); }
|
|
||||||
};
|
|
||||||
template <>
|
|
||||||
struct zmm_vector<double> {
|
|
||||||
using type_t = double;
|
|
||||||
using zmm_t = __m512d;
|
|
||||||
using zmmi_t = __m512i;
|
|
||||||
using ymm_t = __m512d;
|
|
||||||
using opmask_t = __mmask8;
|
|
||||||
static const uint8_t numlanes = 8;
|
|
||||||
|
|
||||||
static type_t type_max() { return X86_SIMD_SORT_INFINITY; }
|
|
||||||
static type_t type_min() { return -X86_SIMD_SORT_INFINITY; }
|
|
||||||
static zmm_t zmm_max() { return _mm512_set1_pd(type_max()); }
|
|
||||||
|
|
||||||
static zmmi_t seti(int v1, int v2, int v3, int v4, int v5, int v6, int v7,
|
|
||||||
int v8) {
|
|
||||||
return _mm512_set_epi64(v1, v2, v3, v4, v5, v6, v7, v8);
|
|
||||||
}
|
|
||||||
|
|
||||||
static zmm_t maskz_loadu(opmask_t mask, void const *mem) {
|
|
||||||
return _mm512_maskz_loadu_pd(mask, mem);
|
|
||||||
}
|
|
||||||
static opmask_t knot_opmask(opmask_t x) { return _knot_mask8(x); }
|
|
||||||
static opmask_t ge(zmm_t x, zmm_t y) {
|
|
||||||
return _mm512_cmp_pd_mask(x, y, _CMP_GE_OQ);
|
|
||||||
}
|
|
||||||
static opmask_t gt(zmm_t x, zmm_t y) {
|
|
||||||
return _mm512_cmp_pd_mask(x, y, _CMP_GT_OQ);
|
|
||||||
}
|
|
||||||
static opmask_t eq(zmm_t x, zmm_t y) {
|
|
||||||
return _mm512_cmp_pd_mask(x, y, _CMP_EQ_OQ);
|
|
||||||
}
|
|
||||||
template <int type>
|
|
||||||
static opmask_t fpclass(zmm_t x) {
|
|
||||||
return _mm512_fpclass_pd_mask(x, type);
|
|
||||||
}
|
|
||||||
template <int scale>
|
|
||||||
static zmm_t mask_i64gather(zmm_t src, opmask_t mask, __m512i index,
|
|
||||||
void const *base) {
|
|
||||||
return _mm512_mask_i64gather_pd(src, mask, index, base, scale);
|
|
||||||
}
|
|
||||||
template <int scale>
|
|
||||||
static zmm_t i64gather(__m512i index, void const *base) {
|
|
||||||
return _mm512_i64gather_pd(index, base, scale);
|
|
||||||
}
|
|
||||||
static zmm_t loadu(void const *mem) { return _mm512_loadu_pd(mem); }
|
|
||||||
static zmm_t max(zmm_t x, zmm_t y) { return _mm512_max_pd(x, y); }
|
|
||||||
static void mask_compressstoreu(void *mem, opmask_t mask, zmm_t x) {
|
|
||||||
return _mm512_mask_compressstoreu_pd(mem, mask, x);
|
|
||||||
}
|
|
||||||
static zmm_t mask_loadu(zmm_t x, opmask_t mask, void const *mem) {
|
|
||||||
return _mm512_mask_loadu_pd(x, mask, mem);
|
|
||||||
}
|
|
||||||
static zmm_t mask_mov(zmm_t x, opmask_t mask, zmm_t y) {
|
|
||||||
return _mm512_mask_mov_pd(x, mask, y);
|
|
||||||
}
|
|
||||||
static void mask_storeu(void *mem, opmask_t mask, zmm_t x) {
|
|
||||||
return _mm512_mask_storeu_pd(mem, mask, x);
|
|
||||||
}
|
|
||||||
static zmm_t min(zmm_t x, zmm_t y) { return _mm512_min_pd(x, y); }
|
|
||||||
static zmm_t permutexvar(__m512i idx, zmm_t zmm) {
|
|
||||||
return _mm512_permutexvar_pd(idx, zmm);
|
|
||||||
}
|
|
||||||
static type_t reducemax(zmm_t v) { return _mm512_reduce_max_pd(v); }
|
|
||||||
static type_t reducemin(zmm_t v) { return _mm512_reduce_min_pd(v); }
|
|
||||||
static zmm_t set1(type_t v) { return _mm512_set1_pd(v); }
|
|
||||||
template <uint8_t mask>
|
|
||||||
static zmm_t shuffle(zmm_t zmm) {
|
|
||||||
return _mm512_shuffle_pd(zmm, zmm, (_MM_PERM_ENUM)mask);
|
|
||||||
}
|
|
||||||
static void storeu(void *mem, zmm_t x) { _mm512_storeu_pd(mem, x); }
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Assumes zmm is random and performs a full sorting network defined in
|
|
||||||
* https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
|
|
||||||
*/
|
|
||||||
template <typename vtype, typename zmm_t = typename vtype::zmm_t>
|
|
||||||
X86_SIMD_SORT_INLINE zmm_t sort_zmm_64bit(zmm_t zmm) {
|
|
||||||
const typename vtype::zmmi_t rev_index = vtype::seti(NETWORK_64BIT_2);
|
|
||||||
zmm = cmp_merge<vtype>(
|
|
||||||
zmm, vtype::template shuffle<SHUFFLE_MASK(1, 1, 1, 1)>(zmm), 0xAA);
|
|
||||||
zmm = cmp_merge<vtype>(
|
|
||||||
zmm, vtype::permutexvar(vtype::seti(NETWORK_64BIT_1), zmm), 0xCC);
|
|
||||||
zmm = cmp_merge<vtype>(
|
|
||||||
zmm, vtype::template shuffle<SHUFFLE_MASK(1, 1, 1, 1)>(zmm), 0xAA);
|
|
||||||
zmm = cmp_merge<vtype>(zmm, vtype::permutexvar(rev_index, zmm), 0xF0);
|
|
||||||
zmm = cmp_merge<vtype>(
|
|
||||||
zmm, vtype::permutexvar(vtype::seti(NETWORK_64BIT_3), zmm), 0xCC);
|
|
||||||
zmm = cmp_merge<vtype>(
|
|
||||||
zmm, vtype::template shuffle<SHUFFLE_MASK(1, 1, 1, 1)>(zmm), 0xAA);
|
|
||||||
return zmm;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,483 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2021 Serge Sans Paille. All rights reserved.
|
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
||||||
*
|
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
|
||||||
* under the terms of the GNU General Public License version 2 only, as
|
|
||||||
* published by the Free Software Foundation.
|
|
||||||
*
|
|
||||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
||||||
* version 2 for more details (a copy is included in the LICENSE file that
|
|
||||||
* accompanied this code).
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License version
|
|
||||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
||||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
*
|
|
||||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
||||||
* or visit www.oracle.com if you need additional information or have any
|
|
||||||
* questions.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
// This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
|
|
||||||
#ifndef AVX512_QSORT_COMMON
|
|
||||||
#define AVX512_QSORT_COMMON
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Quicksort using AVX-512. The ideas and code are based on these two research
|
|
||||||
* papers [1] and [2]. On a high level, the idea is to vectorize quicksort
|
|
||||||
* partitioning using AVX-512 compressstore instructions. If the array size is
|
|
||||||
* < 128, then use Bitonic sorting network implemented on 512-bit registers.
|
|
||||||
* The precise network definitions depend on the dtype and are defined in
|
|
||||||
* separate files: avx512-16bit-qsort.hpp, avx512-32bit-qsort.hpp and
|
|
||||||
* avx512-64bit-qsort.hpp. Article [4] is a good resource for bitonic sorting
|
|
||||||
* network. The core implementations of the vectorized qsort functions
|
|
||||||
* avx512_qsort<T>(T*, int64_t) are modified versions of avx2 quicksort
|
|
||||||
* presented in the paper [2] and source code associated with that paper [3].
|
|
||||||
*
|
|
||||||
* [1] Fast and Robust Vectorized In-Place Sorting of Primitive Types
|
|
||||||
* https://drops.dagstuhl.de/opus/volltexte/2021/13775/
|
|
||||||
*
|
|
||||||
* [2] A Novel Hybrid Quicksort Algorithm Vectorized using AVX-512 on Intel
|
|
||||||
* Skylake https://arxiv.org/pdf/1704.08579.pdf
|
|
||||||
*
|
|
||||||
* [3] https://github.com/simd-sorting/fast-and-robust: SPDX-License-Identifier:
|
|
||||||
* MIT
|
|
||||||
*
|
|
||||||
* [4]
|
|
||||||
* http://mitp-content-server.mit.edu:18180/books/content/sectbyfn?collid=books_pres_0&fn=Chapter%2027.pdf&id=8030
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstring>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
/*
|
|
||||||
Workaround for the bug in GCC12 (that was fixed in GCC 12.3.1).
|
|
||||||
More details are available at: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
|
|
||||||
*/
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
|
||||||
#pragma GCC diagnostic ignored "-Wuninitialized"
|
|
||||||
#include <immintrin.h>
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
|
|
||||||
#define X86_SIMD_SORT_INFINITY std::numeric_limits<double>::infinity()
|
|
||||||
#define X86_SIMD_SORT_INFINITYF std::numeric_limits<float>::infinity()
|
|
||||||
#define X86_SIMD_SORT_INFINITYH 0x7c00
|
|
||||||
#define X86_SIMD_SORT_NEGINFINITYH 0xfc00
|
|
||||||
#define X86_SIMD_SORT_MAX_UINT16 std::numeric_limits<uint16_t>::max()
|
|
||||||
#define X86_SIMD_SORT_MAX_INT16 std::numeric_limits<int16_t>::max()
|
|
||||||
#define X86_SIMD_SORT_MIN_INT16 std::numeric_limits<int16_t>::min()
|
|
||||||
#define X86_SIMD_SORT_MAX_UINT32 std::numeric_limits<uint32_t>::max()
|
|
||||||
#define X86_SIMD_SORT_MAX_INT32 std::numeric_limits<int32_t>::max()
|
|
||||||
#define X86_SIMD_SORT_MIN_INT32 std::numeric_limits<int32_t>::min()
|
|
||||||
#define X86_SIMD_SORT_MAX_UINT64 std::numeric_limits<uint64_t>::max()
|
|
||||||
#define X86_SIMD_SORT_MAX_INT64 std::numeric_limits<int64_t>::max()
|
|
||||||
#define X86_SIMD_SORT_MIN_INT64 std::numeric_limits<int64_t>::min()
|
|
||||||
#define ZMM_MAX_DOUBLE _mm512_set1_pd(X86_SIMD_SORT_INFINITY)
|
|
||||||
#define ZMM_MAX_UINT64 _mm512_set1_epi64(X86_SIMD_SORT_MAX_UINT64)
|
|
||||||
#define ZMM_MAX_INT64 _mm512_set1_epi64(X86_SIMD_SORT_MAX_INT64)
|
|
||||||
#define ZMM_MAX_FLOAT _mm512_set1_ps(X86_SIMD_SORT_INFINITYF)
|
|
||||||
#define ZMM_MAX_UINT _mm512_set1_epi32(X86_SIMD_SORT_MAX_UINT32)
|
|
||||||
#define ZMM_MAX_INT _mm512_set1_epi32(X86_SIMD_SORT_MAX_INT32)
|
|
||||||
#define ZMM_MAX_HALF _mm512_set1_epi16(X86_SIMD_SORT_INFINITYH)
|
|
||||||
#define YMM_MAX_HALF _mm256_set1_epi16(X86_SIMD_SORT_INFINITYH)
|
|
||||||
#define ZMM_MAX_UINT16 _mm512_set1_epi16(X86_SIMD_SORT_MAX_UINT16)
|
|
||||||
#define ZMM_MAX_INT16 _mm512_set1_epi16(X86_SIMD_SORT_MAX_INT16)
|
|
||||||
#define SHUFFLE_MASK(a, b, c, d) (a << 6) | (b << 4) | (c << 2) | d
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#define X86_SIMD_SORT_INLINE static inline
|
|
||||||
#define X86_SIMD_SORT_FINLINE static __forceinline
|
|
||||||
#elif defined(__CYGWIN__)
|
|
||||||
/*
|
|
||||||
* Force inline in cygwin to work around a compiler bug. See
|
|
||||||
* https://github.com/numpy/numpy/pull/22315#issuecomment-1267757584
|
|
||||||
*/
|
|
||||||
#define X86_SIMD_SORT_INLINE static __attribute__((always_inline))
|
|
||||||
#define X86_SIMD_SORT_FINLINE static __attribute__((always_inline))
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#define X86_SIMD_SORT_INLINE static inline
|
|
||||||
#define X86_SIMD_SORT_FINLINE static __attribute__((always_inline))
|
|
||||||
#else
|
|
||||||
#define X86_SIMD_SORT_INLINE static
|
|
||||||
#define X86_SIMD_SORT_FINLINE static
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define LIKELY(x) __builtin_expect((x), 1)
|
|
||||||
#define UNLIKELY(x) __builtin_expect((x), 0)
|
|
||||||
|
|
||||||
template <typename type>
|
|
||||||
struct zmm_vector;
|
|
||||||
|
|
||||||
template <typename type>
|
|
||||||
struct ymm_vector;
|
|
||||||
|
|
||||||
// Regular quicksort routines:
|
|
||||||
template <typename T>
|
|
||||||
void avx512_qsort(T *arr, int64_t arrsize);
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void inline avx512_qsort(T *arr, int64_t from_index, int64_t to_index);
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
bool is_a_nan(T elem) {
|
|
||||||
return std::isnan(elem);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
X86_SIMD_SORT_INLINE T get_pivot_scalar(T *arr, const int64_t left, const int64_t right) {
|
|
||||||
// median of 8 equally spaced elements
|
|
||||||
int64_t NUM_ELEMENTS = 8;
|
|
||||||
int64_t MID = NUM_ELEMENTS / 2;
|
|
||||||
int64_t size = (right - left) / NUM_ELEMENTS;
|
|
||||||
T temp[NUM_ELEMENTS];
|
|
||||||
for (int64_t i = 0; i < NUM_ELEMENTS; i++) temp[i] = arr[left + (i * size)];
|
|
||||||
std::sort(temp, temp + NUM_ELEMENTS);
|
|
||||||
return temp[MID];
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename vtype, typename T = typename vtype::type_t>
|
|
||||||
bool comparison_func_ge(const T &a, const T &b) {
|
|
||||||
return a < b;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename vtype, typename T = typename vtype::type_t>
|
|
||||||
bool comparison_func_gt(const T &a, const T &b) {
|
|
||||||
return a <= b;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* COEX == Compare and Exchange two registers by swapping min and max values
|
|
||||||
*/
|
|
||||||
template <typename vtype, typename mm_t>
|
|
||||||
static void COEX(mm_t &a, mm_t &b) {
|
|
||||||
mm_t temp = a;
|
|
||||||
a = vtype::min(a, b);
|
|
||||||
b = vtype::max(temp, b);
|
|
||||||
}
|
|
||||||
template <typename vtype, typename zmm_t = typename vtype::zmm_t,
|
|
||||||
typename opmask_t = typename vtype::opmask_t>
|
|
||||||
static inline zmm_t cmp_merge(zmm_t in1, zmm_t in2, opmask_t mask) {
|
|
||||||
zmm_t min = vtype::min(in2, in1);
|
|
||||||
zmm_t max = vtype::max(in2, in1);
|
|
||||||
return vtype::mask_mov(min, mask, max); // 0 -> min, 1 -> max
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Parition one ZMM register based on the pivot and returns the
|
|
||||||
* number of elements that are greater than or equal to the pivot.
|
|
||||||
*/
|
|
||||||
template <typename vtype, typename type_t, typename zmm_t>
|
|
||||||
static inline int32_t partition_vec(type_t *arr, int64_t left, int64_t right,
|
|
||||||
const zmm_t curr_vec, const zmm_t pivot_vec,
|
|
||||||
zmm_t *smallest_vec, zmm_t *biggest_vec, bool use_gt) {
|
|
||||||
/* which elements are larger than or equal to the pivot */
|
|
||||||
typename vtype::opmask_t mask;
|
|
||||||
if (use_gt) mask = vtype::gt(curr_vec, pivot_vec);
|
|
||||||
else mask = vtype::ge(curr_vec, pivot_vec);
|
|
||||||
//mask = vtype::ge(curr_vec, pivot_vec);
|
|
||||||
int32_t amount_ge_pivot = _mm_popcnt_u32((int32_t)mask);
|
|
||||||
vtype::mask_compressstoreu(arr + left, vtype::knot_opmask(mask),
|
|
||||||
curr_vec);
|
|
||||||
vtype::mask_compressstoreu(arr + right - amount_ge_pivot, mask,
|
|
||||||
curr_vec);
|
|
||||||
*smallest_vec = vtype::min(curr_vec, *smallest_vec);
|
|
||||||
*biggest_vec = vtype::max(curr_vec, *biggest_vec);
|
|
||||||
return amount_ge_pivot;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Parition an array based on the pivot and returns the index of the
|
|
||||||
* first element that is greater than or equal to the pivot.
|
|
||||||
*/
|
|
||||||
template <typename vtype, typename type_t>
|
|
||||||
static inline int64_t partition_avx512(type_t *arr, int64_t left, int64_t right,
|
|
||||||
type_t pivot, type_t *smallest,
|
|
||||||
type_t *biggest, bool use_gt) {
|
|
||||||
auto comparison_func = use_gt ? comparison_func_gt<vtype> : comparison_func_ge<vtype>;
|
|
||||||
/* make array length divisible by vtype::numlanes , shortening the array */
|
|
||||||
for (int32_t i = (right - left) % vtype::numlanes; i > 0; --i) {
|
|
||||||
*smallest = std::min(*smallest, arr[left], comparison_func);
|
|
||||||
*biggest = std::max(*biggest, arr[left], comparison_func);
|
|
||||||
if (!comparison_func(arr[left], pivot)) {
|
|
||||||
std::swap(arr[left], arr[--right]);
|
|
||||||
} else {
|
|
||||||
++left;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (left == right)
|
|
||||||
return left; /* less than vtype::numlanes elements in the array */
|
|
||||||
|
|
||||||
using zmm_t = typename vtype::zmm_t;
|
|
||||||
zmm_t pivot_vec = vtype::set1(pivot);
|
|
||||||
zmm_t min_vec = vtype::set1(*smallest);
|
|
||||||
zmm_t max_vec = vtype::set1(*biggest);
|
|
||||||
|
|
||||||
if (right - left == vtype::numlanes) {
|
|
||||||
zmm_t vec = vtype::loadu(arr + left);
|
|
||||||
int32_t amount_ge_pivot =
|
|
||||||
partition_vec<vtype>(arr, left, left + vtype::numlanes, vec,
|
|
||||||
pivot_vec, &min_vec, &max_vec, use_gt);
|
|
||||||
*smallest = vtype::reducemin(min_vec);
|
|
||||||
*biggest = vtype::reducemax(max_vec);
|
|
||||||
return left + (vtype::numlanes - amount_ge_pivot);
|
|
||||||
}
|
|
||||||
|
|
||||||
// first and last vtype::numlanes values are partitioned at the end
|
|
||||||
zmm_t vec_left = vtype::loadu(arr + left);
|
|
||||||
zmm_t vec_right = vtype::loadu(arr + (right - vtype::numlanes));
|
|
||||||
// store points of the vectors
|
|
||||||
int64_t r_store = right - vtype::numlanes;
|
|
||||||
int64_t l_store = left;
|
|
||||||
// indices for loading the elements
|
|
||||||
left += vtype::numlanes;
|
|
||||||
right -= vtype::numlanes;
|
|
||||||
while (right - left != 0) {
|
|
||||||
zmm_t curr_vec;
|
|
||||||
/*
|
|
||||||
* if fewer elements are stored on the right side of the array,
|
|
||||||
* then next elements are loaded from the right side,
|
|
||||||
* otherwise from the left side
|
|
||||||
*/
|
|
||||||
if ((r_store + vtype::numlanes) - right < left - l_store) {
|
|
||||||
right -= vtype::numlanes;
|
|
||||||
curr_vec = vtype::loadu(arr + right);
|
|
||||||
} else {
|
|
||||||
curr_vec = vtype::loadu(arr + left);
|
|
||||||
left += vtype::numlanes;
|
|
||||||
}
|
|
||||||
// partition the current vector and save it on both sides of the array
|
|
||||||
int32_t amount_ge_pivot =
|
|
||||||
partition_vec<vtype>(arr, l_store, r_store + vtype::numlanes,
|
|
||||||
curr_vec, pivot_vec, &min_vec, &max_vec, use_gt);
|
|
||||||
;
|
|
||||||
r_store -= amount_ge_pivot;
|
|
||||||
l_store += (vtype::numlanes - amount_ge_pivot);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* partition and save vec_left and vec_right */
|
|
||||||
int32_t amount_ge_pivot =
|
|
||||||
partition_vec<vtype>(arr, l_store, r_store + vtype::numlanes, vec_left,
|
|
||||||
pivot_vec, &min_vec, &max_vec, use_gt);
|
|
||||||
l_store += (vtype::numlanes - amount_ge_pivot);
|
|
||||||
amount_ge_pivot =
|
|
||||||
partition_vec<vtype>(arr, l_store, l_store + vtype::numlanes, vec_right,
|
|
||||||
pivot_vec, &min_vec, &max_vec, use_gt);
|
|
||||||
l_store += (vtype::numlanes - amount_ge_pivot);
|
|
||||||
*smallest = vtype::reducemin(min_vec);
|
|
||||||
*biggest = vtype::reducemax(max_vec);
|
|
||||||
return l_store;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename vtype, int num_unroll,
|
|
||||||
typename type_t = typename vtype::type_t>
|
|
||||||
static inline int64_t partition_avx512_unrolled(type_t *arr, int64_t left,
|
|
||||||
int64_t right, type_t pivot,
|
|
||||||
type_t *smallest,
|
|
||||||
type_t *biggest, bool use_gt) {
|
|
||||||
if (right - left <= 2 * num_unroll * vtype::numlanes) {
|
|
||||||
return partition_avx512<vtype>(arr, left, right, pivot, smallest,
|
|
||||||
biggest, use_gt);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto comparison_func = use_gt ? comparison_func_gt<vtype> : comparison_func_ge<vtype>;
|
|
||||||
/* make array length divisible by 8*vtype::numlanes , shortening the array
|
|
||||||
*/
|
|
||||||
for (int32_t i = ((right - left) % (num_unroll * vtype::numlanes)); i > 0;
|
|
||||||
--i) {
|
|
||||||
*smallest = std::min(*smallest, arr[left], comparison_func);
|
|
||||||
*biggest = std::max(*biggest, arr[left], comparison_func);
|
|
||||||
if (!comparison_func(arr[left], pivot)) {
|
|
||||||
std::swap(arr[left], arr[--right]);
|
|
||||||
} else {
|
|
||||||
++left;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (left == right)
|
|
||||||
return left; /* less than vtype::numlanes elements in the array */
|
|
||||||
|
|
||||||
using zmm_t = typename vtype::zmm_t;
|
|
||||||
zmm_t pivot_vec = vtype::set1(pivot);
|
|
||||||
zmm_t min_vec = vtype::set1(*smallest);
|
|
||||||
zmm_t max_vec = vtype::set1(*biggest);
|
|
||||||
|
|
||||||
// We will now have atleast 16 registers worth of data to process:
|
|
||||||
// left and right vtype::numlanes values are partitioned at the end
|
|
||||||
zmm_t vec_left[num_unroll], vec_right[num_unroll];
|
|
||||||
#pragma GCC unroll 8
|
|
||||||
for (int ii = 0; ii < num_unroll; ++ii) {
|
|
||||||
vec_left[ii] = vtype::loadu(arr + left + vtype::numlanes * ii);
|
|
||||||
vec_right[ii] =
|
|
||||||
vtype::loadu(arr + (right - vtype::numlanes * (num_unroll - ii)));
|
|
||||||
}
|
|
||||||
// store points of the vectors
|
|
||||||
int64_t r_store = right - vtype::numlanes;
|
|
||||||
int64_t l_store = left;
|
|
||||||
// indices for loading the elements
|
|
||||||
left += num_unroll * vtype::numlanes;
|
|
||||||
right -= num_unroll * vtype::numlanes;
|
|
||||||
while (right - left != 0) {
|
|
||||||
zmm_t curr_vec[num_unroll];
|
|
||||||
/*
|
|
||||||
* if fewer elements are stored on the right side of the array,
|
|
||||||
* then next elements are loaded from the right side,
|
|
||||||
* otherwise from the left side
|
|
||||||
*/
|
|
||||||
if ((r_store + vtype::numlanes) - right < left - l_store) {
|
|
||||||
right -= num_unroll * vtype::numlanes;
|
|
||||||
#pragma GCC unroll 8
|
|
||||||
for (int ii = 0; ii < num_unroll; ++ii) {
|
|
||||||
curr_vec[ii] = vtype::loadu(arr + right + ii * vtype::numlanes);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
#pragma GCC unroll 8
|
|
||||||
for (int ii = 0; ii < num_unroll; ++ii) {
|
|
||||||
curr_vec[ii] = vtype::loadu(arr + left + ii * vtype::numlanes);
|
|
||||||
}
|
|
||||||
left += num_unroll * vtype::numlanes;
|
|
||||||
}
|
|
||||||
// partition the current vector and save it on both sides of the array
|
|
||||||
#pragma GCC unroll 8
|
|
||||||
for (int ii = 0; ii < num_unroll; ++ii) {
|
|
||||||
int32_t amount_ge_pivot = partition_vec<vtype>(
|
|
||||||
arr, l_store, r_store + vtype::numlanes, curr_vec[ii],
|
|
||||||
pivot_vec, &min_vec, &max_vec, use_gt);
|
|
||||||
l_store += (vtype::numlanes - amount_ge_pivot);
|
|
||||||
r_store -= amount_ge_pivot;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* partition and save vec_left[8] and vec_right[8] */
|
|
||||||
#pragma GCC unroll 8
|
|
||||||
for (int ii = 0; ii < num_unroll; ++ii) {
|
|
||||||
int32_t amount_ge_pivot =
|
|
||||||
partition_vec<vtype>(arr, l_store, r_store + vtype::numlanes,
|
|
||||||
vec_left[ii], pivot_vec, &min_vec, &max_vec, use_gt);
|
|
||||||
l_store += (vtype::numlanes - amount_ge_pivot);
|
|
||||||
r_store -= amount_ge_pivot;
|
|
||||||
}
|
|
||||||
#pragma GCC unroll 8
|
|
||||||
for (int ii = 0; ii < num_unroll; ++ii) {
|
|
||||||
int32_t amount_ge_pivot =
|
|
||||||
partition_vec<vtype>(arr, l_store, r_store + vtype::numlanes,
|
|
||||||
vec_right[ii], pivot_vec, &min_vec, &max_vec, use_gt);
|
|
||||||
l_store += (vtype::numlanes - amount_ge_pivot);
|
|
||||||
r_store -= amount_ge_pivot;
|
|
||||||
}
|
|
||||||
*smallest = vtype::reducemin(min_vec);
|
|
||||||
*biggest = vtype::reducemax(max_vec);
|
|
||||||
return l_store;
|
|
||||||
}
|
|
||||||
|
|
||||||
// to_index (exclusive)
|
|
||||||
template <typename vtype, typename type_t>
|
|
||||||
static int64_t vectorized_partition(type_t *arr, int64_t from_index, int64_t to_index, type_t pivot, bool use_gt) {
|
|
||||||
type_t smallest = vtype::type_max();
|
|
||||||
type_t biggest = vtype::type_min();
|
|
||||||
int64_t pivot_index = partition_avx512_unrolled<vtype, 2>(
|
|
||||||
arr, from_index, to_index, pivot, &smallest, &biggest, use_gt);
|
|
||||||
return pivot_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// partitioning functions
|
|
||||||
template <typename T>
|
|
||||||
void avx512_dual_pivot_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot1, int64_t index_pivot2){
|
|
||||||
const T pivot1 = arr[index_pivot1];
|
|
||||||
const T pivot2 = arr[index_pivot2];
|
|
||||||
|
|
||||||
const int64_t low = from_index;
|
|
||||||
const int64_t high = to_index;
|
|
||||||
const int64_t start = low + 1;
|
|
||||||
const int64_t end = high - 1;
|
|
||||||
|
|
||||||
|
|
||||||
std::swap(arr[index_pivot1], arr[low]);
|
|
||||||
std::swap(arr[index_pivot2], arr[end]);
|
|
||||||
|
|
||||||
|
|
||||||
const int64_t pivot_index2 = vectorized_partition<zmm_vector<T>, T>(arr, start, end, pivot2, true); // use_gt = true
|
|
||||||
std::swap(arr[end], arr[pivot_index2]);
|
|
||||||
int64_t upper = pivot_index2;
|
|
||||||
|
|
||||||
// if all other elements are greater than pivot2 (and pivot1), no need to do further partitioning
|
|
||||||
if (upper == start) {
|
|
||||||
pivot_indices[0] = low;
|
|
||||||
pivot_indices[1] = upper;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int64_t pivot_index1 = vectorized_partition<zmm_vector<T>, T>(arr, start, upper, pivot1, false); // use_ge (use_gt = false)
|
|
||||||
int64_t lower = pivot_index1 - 1;
|
|
||||||
std::swap(arr[low], arr[lower]);
|
|
||||||
|
|
||||||
pivot_indices[0] = lower;
|
|
||||||
pivot_indices[1] = upper;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void avx512_single_pivot_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot){
|
|
||||||
const T pivot = arr[index_pivot];
|
|
||||||
|
|
||||||
const int64_t low = from_index;
|
|
||||||
const int64_t high = to_index;
|
|
||||||
const int64_t end = high - 1;
|
|
||||||
|
|
||||||
|
|
||||||
const int64_t pivot_index1 = vectorized_partition<zmm_vector<T>, T>(arr, low, high, pivot, false); // use_gt = false (use_ge)
|
|
||||||
int64_t lower = pivot_index1;
|
|
||||||
|
|
||||||
const int64_t pivot_index2 = vectorized_partition<zmm_vector<T>, T>(arr, pivot_index1, high, pivot, true); // use_gt = true
|
|
||||||
int64_t upper = pivot_index2;
|
|
||||||
|
|
||||||
pivot_indices[0] = lower;
|
|
||||||
pivot_indices[1] = upper;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void inline avx512_fast_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot1, int64_t index_pivot2) {
|
|
||||||
if (index_pivot1 != index_pivot2) {
|
|
||||||
avx512_dual_pivot_partition<T>(arr, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
avx512_single_pivot_partition<T>(arr, from_index, to_index, pivot_indices, index_pivot1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void inline insertion_sort(T *arr, int32_t from_index, int32_t to_index) {
|
|
||||||
for (int i, k = from_index; ++k < to_index; ) {
|
|
||||||
T ai = arr[i = k];
|
|
||||||
|
|
||||||
if (ai < arr[i - 1]) {
|
|
||||||
while (--i >= from_index && ai < arr[i]) {
|
|
||||||
arr[i + 1] = arr[i];
|
|
||||||
}
|
|
||||||
arr[i + 1] = ai;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void inline avx512_fast_sort(T *arr, int64_t from_index, int64_t to_index, const int32_t INS_SORT_THRESHOLD) {
|
|
||||||
int32_t size = to_index - from_index;
|
|
||||||
|
|
||||||
if (size <= INS_SORT_THRESHOLD) {
|
|
||||||
insertion_sort<T>(arr, from_index, to_index);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
avx512_qsort<T>(arr, from_index, to_index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif // AVX512_QSORT_COMMON
|
|
||||||
@@ -21,12 +21,15 @@
|
|||||||
* questions.
|
* questions.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
#include "simdsort-support.hpp"
|
||||||
|
#ifdef __SIMDSORT_SUPPORTED_LINUX
|
||||||
|
|
||||||
#pragma GCC target("avx512dq", "avx512f")
|
#pragma GCC target("avx512dq", "avx512f")
|
||||||
#include "avx512-32bit-qsort.hpp"
|
#include "avx512-32bit-qsort.hpp"
|
||||||
#include "avx512-64bit-qsort.hpp"
|
#include "avx512-64bit-qsort.hpp"
|
||||||
#include "classfile_constants.h"
|
#include "classfile_constants.h"
|
||||||
|
|
||||||
|
|
||||||
#define DLL_PUBLIC __attribute__((visibility("default")))
|
#define DLL_PUBLIC __attribute__((visibility("default")))
|
||||||
#define INSERTION_SORT_THRESHOLD_32BIT 16
|
#define INSERTION_SORT_THRESHOLD_32BIT 16
|
||||||
#define INSERTION_SORT_THRESHOLD_64BIT 20
|
#define INSERTION_SORT_THRESHOLD_64BIT 20
|
||||||
@@ -36,35 +39,41 @@ extern "C" {
|
|||||||
DLL_PUBLIC void avx512_sort(void *array, int elem_type, int32_t from_index, int32_t to_index) {
|
DLL_PUBLIC void avx512_sort(void *array, int elem_type, int32_t from_index, int32_t to_index) {
|
||||||
switch(elem_type) {
|
switch(elem_type) {
|
||||||
case JVM_T_INT:
|
case JVM_T_INT:
|
||||||
avx512_fast_sort<int32_t>((int32_t*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
|
avx512_fast_sort((int32_t*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
|
||||||
break;
|
break;
|
||||||
case JVM_T_LONG:
|
case JVM_T_LONG:
|
||||||
avx512_fast_sort<int64_t>((int64_t*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_64BIT);
|
avx512_fast_sort((int64_t*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_64BIT);
|
||||||
break;
|
break;
|
||||||
case JVM_T_FLOAT:
|
case JVM_T_FLOAT:
|
||||||
avx512_fast_sort<float>((float*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
|
avx512_fast_sort((float*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_32BIT);
|
||||||
break;
|
break;
|
||||||
case JVM_T_DOUBLE:
|
case JVM_T_DOUBLE:
|
||||||
avx512_fast_sort<double>((double*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_64BIT);
|
avx512_fast_sort((double*)array, from_index, to_index, INSERTION_SORT_THRESHOLD_64BIT);
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
assert(false, "Unexpected type");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DLL_PUBLIC void avx512_partition(void *array, int elem_type, int32_t from_index, int32_t to_index, int32_t *pivot_indices, int32_t index_pivot1, int32_t index_pivot2) {
|
DLL_PUBLIC void avx512_partition(void *array, int elem_type, int32_t from_index, int32_t to_index, int32_t *pivot_indices, int32_t index_pivot1, int32_t index_pivot2) {
|
||||||
switch(elem_type) {
|
switch(elem_type) {
|
||||||
case JVM_T_INT:
|
case JVM_T_INT:
|
||||||
avx512_fast_partition<int32_t>((int32_t*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
avx512_fast_partition((int32_t*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
||||||
break;
|
break;
|
||||||
case JVM_T_LONG:
|
case JVM_T_LONG:
|
||||||
avx512_fast_partition<int64_t>((int64_t*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
avx512_fast_partition((int64_t*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
||||||
break;
|
break;
|
||||||
case JVM_T_FLOAT:
|
case JVM_T_FLOAT:
|
||||||
avx512_fast_partition<float>((float*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
avx512_fast_partition((float*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
||||||
break;
|
break;
|
||||||
case JVM_T_DOUBLE:
|
case JVM_T_DOUBLE:
|
||||||
avx512_fast_partition<double>((double*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
avx512_fast_partition((double*)array, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
assert(false, "Unexpected type");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|||||||
39
src/java.base/linux/native/libsimdsort/simdsort-support.hpp
Normal file
39
src/java.base/linux/native/libsimdsort/simdsort-support.hpp
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023 Intel Corporation. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SIMDSORT_SUPPORT_HPP
|
||||||
|
#define SIMDSORT_SUPPORT_HPP
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#undef assert
|
||||||
|
#define assert(cond, msg) { if (!(cond)) { fprintf(stderr, "assert fails %s %d: %s\n", __FILE__, __LINE__, msg); abort(); }}
|
||||||
|
|
||||||
|
|
||||||
|
// GCC >= 7.5 is needed to build AVX2 portions of libsimdsort using C++17 features
|
||||||
|
#if defined(_LP64) && (defined(__GNUC__) && ((__GNUC__ > 7) || ((__GNUC__ == 7) && (__GNUC_MINOR__ >= 5))))
|
||||||
|
#define __SIMDSORT_SUPPORTED_LINUX
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif //SIMDSORT_SUPPORT_HPP
|
||||||
101
src/java.base/linux/native/libsimdsort/xss-common-includes.h
Normal file
101
src/java.base/linux/native/libsimdsort/xss-common-includes.h
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2021 Serge Sans Paille. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
|
||||||
|
|
||||||
|
#ifndef XSS_COMMON_INCLUDES
|
||||||
|
#define XSS_COMMON_INCLUDES
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
/*
|
||||||
|
Workaround for the bug in GCC12 (that was fixed in GCC 12.3.1).
|
||||||
|
More details are available at:
|
||||||
|
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
|
||||||
|
*/
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||||
|
#pragma GCC diagnostic ignored "-Wuninitialized"
|
||||||
|
#include <immintrin.h>
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#include <limits>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#define X86_SIMD_SORT_INFINITY std::numeric_limits<double>::infinity()
|
||||||
|
#define X86_SIMD_SORT_INFINITYF std::numeric_limits<float>::infinity()
|
||||||
|
#define X86_SIMD_SORT_INFINITYH 0x7c00
|
||||||
|
#define X86_SIMD_SORT_NEGINFINITYH 0xfc00
|
||||||
|
#define X86_SIMD_SORT_MAX_UINT16 std::numeric_limits<uint16_t>::max()
|
||||||
|
#define X86_SIMD_SORT_MAX_INT16 std::numeric_limits<int16_t>::max()
|
||||||
|
#define X86_SIMD_SORT_MIN_INT16 std::numeric_limits<int16_t>::min()
|
||||||
|
#define X86_SIMD_SORT_MAX_UINT32 std::numeric_limits<uint32_t>::max()
|
||||||
|
#define X86_SIMD_SORT_MAX_INT32 std::numeric_limits<int32_t>::max()
|
||||||
|
#define X86_SIMD_SORT_MIN_INT32 std::numeric_limits<int32_t>::min()
|
||||||
|
#define X86_SIMD_SORT_MAX_UINT64 std::numeric_limits<uint64_t>::max()
|
||||||
|
#define X86_SIMD_SORT_MAX_INT64 std::numeric_limits<int64_t>::max()
|
||||||
|
#define X86_SIMD_SORT_MIN_INT64 std::numeric_limits<int64_t>::min()
|
||||||
|
#define ZMM_MAX_DOUBLE _mm512_set1_pd(X86_SIMD_SORT_INFINITY)
|
||||||
|
#define ZMM_MAX_UINT64 _mm512_set1_epi64(X86_SIMD_SORT_MAX_UINT64)
|
||||||
|
#define ZMM_MAX_INT64 _mm512_set1_epi64(X86_SIMD_SORT_MAX_INT64)
|
||||||
|
#define ZMM_MAX_FLOAT _mm512_set1_ps(X86_SIMD_SORT_INFINITYF)
|
||||||
|
#define ZMM_MAX_UINT _mm512_set1_epi32(X86_SIMD_SORT_MAX_UINT32)
|
||||||
|
#define ZMM_MAX_INT _mm512_set1_epi32(X86_SIMD_SORT_MAX_INT32)
|
||||||
|
#define ZMM_MAX_HALF _mm512_set1_epi16(X86_SIMD_SORT_INFINITYH)
|
||||||
|
#define YMM_MAX_HALF _mm256_set1_epi16(X86_SIMD_SORT_INFINITYH)
|
||||||
|
#define ZMM_MAX_UINT16 _mm512_set1_epi16(X86_SIMD_SORT_MAX_UINT16)
|
||||||
|
#define ZMM_MAX_INT16 _mm512_set1_epi16(X86_SIMD_SORT_MAX_INT16)
|
||||||
|
#define SHUFFLE_MASK(a, b, c, d) (a << 6) | (b << 4) | (c << 2) | d
|
||||||
|
|
||||||
|
#define PRAGMA(x) _Pragma(#x)
|
||||||
|
#define UNUSED(x) (void)(x)
|
||||||
|
|
||||||
|
/* Compiler specific macros specific */
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
#define X86_SIMD_SORT_INLINE static inline
|
||||||
|
#define X86_SIMD_SORT_FINLINE static inline __attribute__((always_inline))
|
||||||
|
#else
|
||||||
|
#define X86_SIMD_SORT_INLINE static
|
||||||
|
#define X86_SIMD_SORT_FINLINE static
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __GNUC__ >= 8
|
||||||
|
#define X86_SIMD_SORT_UNROLL_LOOP(num) PRAGMA(GCC unroll num)
|
||||||
|
#else
|
||||||
|
#define X86_SIMD_SORT_UNROLL_LOOP(num)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef size_t arrsize_t;
|
||||||
|
|
||||||
|
template <typename type>
|
||||||
|
struct zmm_vector;
|
||||||
|
|
||||||
|
template <typename type>
|
||||||
|
struct ymm_vector;
|
||||||
|
|
||||||
|
template <typename type>
|
||||||
|
struct avx2_vector;
|
||||||
|
|
||||||
|
#endif // XSS_COMMON_INCLUDES
|
||||||
528
src/java.base/linux/native/libsimdsort/xss-common-qsort.h
Normal file
528
src/java.base/linux/native/libsimdsort/xss-common-qsort.h
Normal file
@@ -0,0 +1,528 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2021 Serge Sans Paille. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
|
||||||
|
|
||||||
|
#ifndef XSS_COMMON_QSORT
|
||||||
|
#define XSS_COMMON_QSORT
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Quicksort using AVX-512. The ideas and code are based on these two research
|
||||||
|
* papers [1] and [2]. On a high level, the idea is to vectorize quicksort
|
||||||
|
* partitioning using AVX-512 compressstore instructions. If the array size is
|
||||||
|
* < 128, then use Bitonic sorting network implemented on 512-bit registers.
|
||||||
|
* The precise network definitions depend on the dtype and are defined in
|
||||||
|
* separate files: avx512-16bit-qsort.hpp, avx512-32bit-qsort.hpp and
|
||||||
|
* avx512-64bit-qsort.hpp. Article [4] is a good resource for bitonic sorting
|
||||||
|
* network. The core implementations of the vectorized qsort functions
|
||||||
|
* avx512_qsort<T>(T*, arrsize_t) are modified versions of avx2 quicksort
|
||||||
|
* presented in the paper [2] and source code associated with that paper [3].
|
||||||
|
*
|
||||||
|
* [1] Fast and Robust Vectorized In-Place Sorting of Primitive Types
|
||||||
|
* https://drops.dagstuhl.de/opus/volltexte/2021/13775/
|
||||||
|
*
|
||||||
|
* [2] A Novel Hybrid Quicksort Algorithm Vectorized using AVX-512 on Intel
|
||||||
|
* Skylake https://arxiv.org/pdf/1704.08579.pdf
|
||||||
|
*
|
||||||
|
* [3] https://github.com/simd-sorting/fast-and-robust: SPDX-License-Identifier:
|
||||||
|
* MIT
|
||||||
|
*
|
||||||
|
* [4] http://mitp-content-server.mit.edu:18180/books/content/sectbyfn?collid=books_pres_0&fn=Chapter%2027.pdf&id=8030
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xss-common-includes.h"
|
||||||
|
#include "xss-pivot-selection.hpp"
|
||||||
|
#include "xss-network-qsort.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool is_a_nan(T elem) {
|
||||||
|
return std::isnan(elem);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
X86_SIMD_SORT_INLINE T get_pivot_scalar(T *arr, const int64_t left, const int64_t right) {
|
||||||
|
// median of 8 equally spaced elements
|
||||||
|
int64_t NUM_ELEMENTS = 8;
|
||||||
|
int64_t MID = NUM_ELEMENTS / 2;
|
||||||
|
int64_t size = (right - left) / NUM_ELEMENTS;
|
||||||
|
T temp[NUM_ELEMENTS];
|
||||||
|
for (int64_t i = 0; i < NUM_ELEMENTS; i++) temp[i] = arr[left + (i * size)];
|
||||||
|
std::sort(temp, temp + NUM_ELEMENTS);
|
||||||
|
return temp[MID];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename T = typename vtype::type_t>
|
||||||
|
bool comparison_func_ge(const T &a, const T &b) {
|
||||||
|
return a < b;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename T = typename vtype::type_t>
|
||||||
|
bool comparison_func_gt(const T &a, const T &b) {
|
||||||
|
return a <= b;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* COEX == Compare and Exchange two registers by swapping min and max values
|
||||||
|
*/
|
||||||
|
template <typename vtype, typename mm_t>
|
||||||
|
X86_SIMD_SORT_INLINE void COEX(mm_t &a, mm_t &b) {
|
||||||
|
mm_t temp = a;
|
||||||
|
a = vtype::min(a, b);
|
||||||
|
b = vtype::max(temp, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename reg_t = typename vtype::reg_t,
|
||||||
|
typename opmask_t = typename vtype::opmask_t>
|
||||||
|
X86_SIMD_SORT_INLINE reg_t cmp_merge(reg_t in1, reg_t in2, opmask_t mask) {
|
||||||
|
reg_t min = vtype::min(in2, in1);
|
||||||
|
reg_t max = vtype::max(in2, in1);
|
||||||
|
return vtype::mask_mov(min, mask, max); // 0 -> min, 1 -> max
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename type_t, typename reg_t>
|
||||||
|
int avx512_double_compressstore(type_t *left_addr, type_t *right_addr,
|
||||||
|
typename vtype::opmask_t k, reg_t reg) {
|
||||||
|
int amount_ge_pivot = _mm_popcnt_u32((int)k);
|
||||||
|
|
||||||
|
vtype::mask_compressstoreu(left_addr, vtype::knot_opmask(k), reg);
|
||||||
|
vtype::mask_compressstoreu(right_addr + vtype::numlanes - amount_ge_pivot,
|
||||||
|
k, reg);
|
||||||
|
|
||||||
|
return amount_ge_pivot;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generic function dispatches to AVX2 or AVX512 code
|
||||||
|
template <typename vtype, typename type_t,
|
||||||
|
typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_INLINE arrsize_t partition_vec(type_t *l_store, type_t *r_store,
|
||||||
|
const reg_t curr_vec,
|
||||||
|
const reg_t pivot_vec,
|
||||||
|
reg_t &smallest_vec,
|
||||||
|
reg_t &biggest_vec, bool use_gt) {
|
||||||
|
//typename vtype::opmask_t ge_mask = vtype::ge(curr_vec, pivot_vec);
|
||||||
|
typename vtype::opmask_t mask;
|
||||||
|
if (use_gt) mask = vtype::gt(curr_vec, pivot_vec);
|
||||||
|
else mask = vtype::ge(curr_vec, pivot_vec);
|
||||||
|
|
||||||
|
int amount_ge_pivot =
|
||||||
|
vtype::double_compressstore(l_store, r_store, mask, curr_vec);
|
||||||
|
|
||||||
|
smallest_vec = vtype::min(curr_vec, smallest_vec);
|
||||||
|
biggest_vec = vtype::max(curr_vec, biggest_vec);
|
||||||
|
|
||||||
|
return amount_ge_pivot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parition an array based on the pivot and returns the index of the
|
||||||
|
* first element that is greater than or equal to the pivot.
|
||||||
|
*/
|
||||||
|
template <typename vtype, typename type_t>
|
||||||
|
X86_SIMD_SORT_INLINE arrsize_t partition_avx512(type_t *arr, arrsize_t left,
|
||||||
|
arrsize_t right, type_t pivot,
|
||||||
|
type_t *smallest,
|
||||||
|
type_t *biggest,
|
||||||
|
bool use_gt) {
|
||||||
|
auto comparison_func = use_gt ? comparison_func_gt<vtype> : comparison_func_ge<vtype>;
|
||||||
|
/* make array length divisible by vtype::numlanes , shortening the array */
|
||||||
|
for (int32_t i = (right - left) % vtype::numlanes; i > 0; --i) {
|
||||||
|
*smallest = std::min(*smallest, arr[left], comparison_func);
|
||||||
|
*biggest = std::max(*biggest, arr[left], comparison_func);
|
||||||
|
if (!comparison_func(arr[left], pivot)) {
|
||||||
|
std::swap(arr[left], arr[--right]);
|
||||||
|
} else {
|
||||||
|
++left;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (left == right)
|
||||||
|
return left; /* less than vtype::numlanes elements in the array */
|
||||||
|
|
||||||
|
using reg_t = typename vtype::reg_t;
|
||||||
|
reg_t pivot_vec = vtype::set1(pivot);
|
||||||
|
reg_t min_vec = vtype::set1(*smallest);
|
||||||
|
reg_t max_vec = vtype::set1(*biggest);
|
||||||
|
|
||||||
|
if (right - left == vtype::numlanes) {
|
||||||
|
reg_t vec = vtype::loadu(arr + left);
|
||||||
|
arrsize_t unpartitioned = right - left - vtype::numlanes;
|
||||||
|
arrsize_t l_store = left;
|
||||||
|
|
||||||
|
arrsize_t amount_ge_pivot =
|
||||||
|
partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
|
||||||
|
vec, pivot_vec, min_vec, max_vec, use_gt);
|
||||||
|
l_store += (vtype::numlanes - amount_ge_pivot);
|
||||||
|
*smallest = vtype::reducemin(min_vec);
|
||||||
|
*biggest = vtype::reducemax(max_vec);
|
||||||
|
return l_store;
|
||||||
|
}
|
||||||
|
|
||||||
|
// first and last vtype::numlanes values are partitioned at the end
|
||||||
|
reg_t vec_left = vtype::loadu(arr + left);
|
||||||
|
reg_t vec_right = vtype::loadu(arr + (right - vtype::numlanes));
|
||||||
|
// store points of the vectors
|
||||||
|
arrsize_t unpartitioned = right - left - vtype::numlanes;
|
||||||
|
arrsize_t l_store = left;
|
||||||
|
// indices for loading the elements
|
||||||
|
left += vtype::numlanes;
|
||||||
|
right -= vtype::numlanes;
|
||||||
|
while (right - left != 0) {
|
||||||
|
reg_t curr_vec;
|
||||||
|
/*
|
||||||
|
* if fewer elements are stored on the right side of the array,
|
||||||
|
* then next elements are loaded from the right side,
|
||||||
|
* otherwise from the left side
|
||||||
|
*/
|
||||||
|
if ((l_store + unpartitioned + vtype::numlanes) - right <
|
||||||
|
left - l_store) {
|
||||||
|
right -= vtype::numlanes;
|
||||||
|
curr_vec = vtype::loadu(arr + right);
|
||||||
|
} else {
|
||||||
|
curr_vec = vtype::loadu(arr + left);
|
||||||
|
left += vtype::numlanes;
|
||||||
|
}
|
||||||
|
// partition the current vector and save it on both sides of the array
|
||||||
|
arrsize_t amount_ge_pivot =
|
||||||
|
partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
|
||||||
|
curr_vec, pivot_vec, min_vec, max_vec, use_gt);
|
||||||
|
l_store += (vtype::numlanes - amount_ge_pivot);
|
||||||
|
unpartitioned -= vtype::numlanes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* partition and save vec_left and vec_right */
|
||||||
|
arrsize_t amount_ge_pivot =
|
||||||
|
partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
|
||||||
|
vec_left, pivot_vec, min_vec, max_vec, use_gt);
|
||||||
|
l_store += (vtype::numlanes - amount_ge_pivot);
|
||||||
|
unpartitioned -= vtype::numlanes;
|
||||||
|
|
||||||
|
amount_ge_pivot =
|
||||||
|
partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
|
||||||
|
vec_right, pivot_vec, min_vec, max_vec, use_gt);
|
||||||
|
l_store += (vtype::numlanes - amount_ge_pivot);
|
||||||
|
unpartitioned -= vtype::numlanes;
|
||||||
|
|
||||||
|
*smallest = vtype::reducemin(min_vec);
|
||||||
|
*biggest = vtype::reducemax(max_vec);
|
||||||
|
return l_store;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, int num_unroll,
|
||||||
|
typename type_t = typename vtype::type_t>
|
||||||
|
X86_SIMD_SORT_INLINE arrsize_t
|
||||||
|
partition_avx512_unrolled(type_t *arr, arrsize_t left, arrsize_t right,
|
||||||
|
type_t pivot, type_t *smallest, type_t *biggest, bool use_gt) {
|
||||||
|
if constexpr (num_unroll == 0) {
|
||||||
|
return partition_avx512<vtype>(arr, left, right, pivot, smallest,
|
||||||
|
biggest, use_gt);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Use regular partition_avx512 for smaller arrays */
|
||||||
|
if (right - left < 3 * num_unroll * vtype::numlanes) {
|
||||||
|
return partition_avx512<vtype>(arr, left, right, pivot, smallest,
|
||||||
|
biggest, use_gt);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto comparison_func = use_gt ? comparison_func_gt<vtype> : comparison_func_ge<vtype>;
|
||||||
|
/* make array length divisible by vtype::numlanes, shortening the array */
|
||||||
|
for (int32_t i = ((right - left) % (vtype::numlanes)); i > 0; --i) {
|
||||||
|
*smallest = std::min(*smallest, arr[left], comparison_func);
|
||||||
|
*biggest = std::max(*biggest, arr[left], comparison_func);
|
||||||
|
if (!comparison_func(arr[left], pivot)) {
|
||||||
|
std::swap(arr[left], arr[--right]);
|
||||||
|
} else {
|
||||||
|
++left;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
arrsize_t unpartitioned = right - left - vtype::numlanes;
|
||||||
|
arrsize_t l_store = left;
|
||||||
|
|
||||||
|
using reg_t = typename vtype::reg_t;
|
||||||
|
reg_t pivot_vec = vtype::set1(pivot);
|
||||||
|
reg_t min_vec = vtype::set1(*smallest);
|
||||||
|
reg_t max_vec = vtype::set1(*biggest);
|
||||||
|
|
||||||
|
/* Calculate and load more registers to make the rest of the array a
|
||||||
|
* multiple of num_unroll. These registers will be partitioned at the very
|
||||||
|
* end. */
|
||||||
|
int vecsToPartition = ((right - left) / vtype::numlanes) % num_unroll;
|
||||||
|
reg_t vec_align[num_unroll];
|
||||||
|
for (int i = 0; i < vecsToPartition; i++) {
|
||||||
|
vec_align[i] = vtype::loadu(arr + left + i * vtype::numlanes);
|
||||||
|
}
|
||||||
|
left += vecsToPartition * vtype::numlanes;
|
||||||
|
|
||||||
|
/* We will now have atleast 3*num_unroll registers worth of data to
|
||||||
|
* process. Load left and right vtype::numlanes*num_unroll values into
|
||||||
|
* registers to make space for in-place parition. The vec_left and
|
||||||
|
* vec_right registers are partitioned at the end */
|
||||||
|
reg_t vec_left[num_unroll], vec_right[num_unroll];
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(8)
|
||||||
|
for (int ii = 0; ii < num_unroll; ++ii) {
|
||||||
|
vec_left[ii] = vtype::loadu(arr + left + vtype::numlanes * ii);
|
||||||
|
vec_right[ii] =
|
||||||
|
vtype::loadu(arr + (right - vtype::numlanes * (num_unroll - ii)));
|
||||||
|
}
|
||||||
|
/* indices for loading the elements */
|
||||||
|
left += num_unroll * vtype::numlanes;
|
||||||
|
right -= num_unroll * vtype::numlanes;
|
||||||
|
while (right - left != 0) {
|
||||||
|
reg_t curr_vec[num_unroll];
|
||||||
|
/*
|
||||||
|
* if fewer elements are stored on the right side of the array,
|
||||||
|
* then next elements are loaded from the right side,
|
||||||
|
* otherwise from the left side
|
||||||
|
*/
|
||||||
|
if ((l_store + unpartitioned + vtype::numlanes) - right <
|
||||||
|
left - l_store) {
|
||||||
|
right -= num_unroll * vtype::numlanes;
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(8)
|
||||||
|
for (int ii = 0; ii < num_unroll; ++ii) {
|
||||||
|
curr_vec[ii] = vtype::loadu(arr + right + ii * vtype::numlanes);
|
||||||
|
/*
|
||||||
|
* error: '_mm_prefetch' needs target feature mmx on clang-cl
|
||||||
|
*/
|
||||||
|
#if !(defined(_MSC_VER) && defined(__clang__))
|
||||||
|
_mm_prefetch((char *)(arr + right + ii * vtype::numlanes -
|
||||||
|
num_unroll * vtype::numlanes),
|
||||||
|
_MM_HINT_T0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(8)
|
||||||
|
for (int ii = 0; ii < num_unroll; ++ii) {
|
||||||
|
curr_vec[ii] = vtype::loadu(arr + left + ii * vtype::numlanes);
|
||||||
|
/*
|
||||||
|
* error: '_mm_prefetch' needs target feature mmx on clang-cl
|
||||||
|
*/
|
||||||
|
#if !(defined(_MSC_VER) && defined(__clang__))
|
||||||
|
_mm_prefetch((char *)(arr + left + ii * vtype::numlanes +
|
||||||
|
num_unroll * vtype::numlanes),
|
||||||
|
_MM_HINT_T0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
left += num_unroll * vtype::numlanes;
|
||||||
|
}
|
||||||
|
/* partition the current vector and save it on both sides of the array
|
||||||
|
* */
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(8)
|
||||||
|
for (int ii = 0; ii < num_unroll; ++ii) {
|
||||||
|
arrsize_t amount_ge_pivot = partition_vec<vtype>(
|
||||||
|
arr + l_store, arr + l_store + unpartitioned, curr_vec[ii],
|
||||||
|
pivot_vec, min_vec, max_vec, use_gt);
|
||||||
|
l_store += (vtype::numlanes - amount_ge_pivot);
|
||||||
|
unpartitioned -= vtype::numlanes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* partition and save vec_left[num_unroll] and vec_right[num_unroll] */
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(8)
|
||||||
|
for (int ii = 0; ii < num_unroll; ++ii) {
|
||||||
|
arrsize_t amount_ge_pivot =
|
||||||
|
partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
|
||||||
|
vec_left[ii], pivot_vec, min_vec, max_vec, use_gt);
|
||||||
|
l_store += (vtype::numlanes - amount_ge_pivot);
|
||||||
|
unpartitioned -= vtype::numlanes;
|
||||||
|
}
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(8)
|
||||||
|
for (int ii = 0; ii < num_unroll; ++ii) {
|
||||||
|
arrsize_t amount_ge_pivot =
|
||||||
|
partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
|
||||||
|
vec_right[ii], pivot_vec, min_vec, max_vec, use_gt);
|
||||||
|
l_store += (vtype::numlanes - amount_ge_pivot);
|
||||||
|
unpartitioned -= vtype::numlanes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* partition and save vec_align[vecsToPartition] */
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(8)
|
||||||
|
for (int ii = 0; ii < vecsToPartition; ++ii) {
|
||||||
|
arrsize_t amount_ge_pivot =
|
||||||
|
partition_vec<vtype>(arr + l_store, arr + l_store + unpartitioned,
|
||||||
|
vec_align[ii], pivot_vec, min_vec, max_vec, use_gt);
|
||||||
|
l_store += (vtype::numlanes - amount_ge_pivot);
|
||||||
|
unpartitioned -= vtype::numlanes;
|
||||||
|
}
|
||||||
|
|
||||||
|
*smallest = vtype::reducemin(min_vec);
|
||||||
|
*biggest = vtype::reducemax(max_vec);
|
||||||
|
return l_store;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, int maxN>
|
||||||
|
void sort_n(typename vtype::type_t *arr, int N);
|
||||||
|
|
||||||
|
template <typename vtype, typename type_t>
|
||||||
|
static void qsort_(type_t *arr, arrsize_t left, arrsize_t right,
|
||||||
|
arrsize_t max_iters) {
|
||||||
|
/*
|
||||||
|
* Resort to std::sort if quicksort isnt making any progress
|
||||||
|
*/
|
||||||
|
if (max_iters <= 0) {
|
||||||
|
std::sort(arr + left, arr + right + 1, comparison_func_ge<vtype>);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Base case: use bitonic networks to sort arrays <=
|
||||||
|
* vtype::network_sort_threshold
|
||||||
|
*/
|
||||||
|
if (right + 1 - left <= vtype::network_sort_threshold) {
|
||||||
|
sort_n<vtype, vtype::network_sort_threshold>(
|
||||||
|
arr + left, (int32_t)(right + 1 - left));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_t pivot = get_pivot_blocks<vtype, type_t>(arr, left, right);
|
||||||
|
type_t smallest = vtype::type_max();
|
||||||
|
type_t biggest = vtype::type_min();
|
||||||
|
|
||||||
|
arrsize_t pivot_index =
|
||||||
|
partition_avx512_unrolled<vtype, vtype::partition_unroll_factor>(
|
||||||
|
arr, left, right + 1, pivot, &smallest, &biggest, false);
|
||||||
|
|
||||||
|
if (pivot != smallest)
|
||||||
|
qsort_<vtype>(arr, left, pivot_index - 1, max_iters - 1);
|
||||||
|
if (pivot != biggest) qsort_<vtype>(arr, pivot_index, right, max_iters - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hooks for OpenJDK sort
|
||||||
|
// to_index (exclusive)
|
||||||
|
template <typename vtype, typename type_t>
|
||||||
|
static int64_t vectorized_partition(type_t *arr, int64_t from_index, int64_t to_index, type_t pivot, bool use_gt) {
|
||||||
|
type_t smallest = vtype::type_max();
|
||||||
|
type_t biggest = vtype::type_min();
|
||||||
|
int64_t pivot_index = partition_avx512_unrolled<vtype, 2>(
|
||||||
|
arr, from_index, to_index, pivot, &smallest, &biggest, use_gt);
|
||||||
|
return pivot_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
// partitioning functions
|
||||||
|
template <typename vtype, typename T>
|
||||||
|
X86_SIMD_SORT_INLINE void simd_dual_pivot_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot1, int64_t index_pivot2){
|
||||||
|
const T pivot1 = arr[index_pivot1];
|
||||||
|
const T pivot2 = arr[index_pivot2];
|
||||||
|
|
||||||
|
const int64_t low = from_index;
|
||||||
|
const int64_t high = to_index;
|
||||||
|
const int64_t start = low + 1;
|
||||||
|
const int64_t end = high - 1;
|
||||||
|
|
||||||
|
|
||||||
|
std::swap(arr[index_pivot1], arr[low]);
|
||||||
|
std::swap(arr[index_pivot2], arr[end]);
|
||||||
|
|
||||||
|
|
||||||
|
const int64_t pivot_index2 = vectorized_partition<vtype, T>(arr, start, end, pivot2, true); // use_gt = true
|
||||||
|
std::swap(arr[end], arr[pivot_index2]);
|
||||||
|
int64_t upper = pivot_index2;
|
||||||
|
|
||||||
|
// if all other elements are greater than pivot2 (and pivot1), no need to do further partitioning
|
||||||
|
if (upper == start) {
|
||||||
|
pivot_indices[0] = low;
|
||||||
|
pivot_indices[1] = upper;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int64_t pivot_index1 = vectorized_partition<vtype, T>(arr, start, upper, pivot1, false); // use_ge (use_gt = false)
|
||||||
|
int64_t lower = pivot_index1 - 1;
|
||||||
|
std::swap(arr[low], arr[lower]);
|
||||||
|
|
||||||
|
pivot_indices[0] = lower;
|
||||||
|
pivot_indices[1] = upper;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename T>
|
||||||
|
X86_SIMD_SORT_INLINE void simd_single_pivot_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot) {
|
||||||
|
const T pivot = arr[index_pivot];
|
||||||
|
|
||||||
|
const int64_t low = from_index;
|
||||||
|
const int64_t high = to_index;
|
||||||
|
const int64_t end = high - 1;
|
||||||
|
|
||||||
|
|
||||||
|
const int64_t pivot_index1 = vectorized_partition<vtype, T>(arr, low, high, pivot, false); // use_gt = false (use_ge)
|
||||||
|
int64_t lower = pivot_index1;
|
||||||
|
|
||||||
|
const int64_t pivot_index2 = vectorized_partition<vtype, T>(arr, pivot_index1, high, pivot, true); // use_gt = true
|
||||||
|
int64_t upper = pivot_index2;
|
||||||
|
|
||||||
|
pivot_indices[0] = lower;
|
||||||
|
pivot_indices[1] = upper;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename T>
|
||||||
|
X86_SIMD_SORT_INLINE void simd_fast_partition(T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot1, int64_t index_pivot2) {
|
||||||
|
if (index_pivot1 != index_pivot2) {
|
||||||
|
simd_dual_pivot_partition<vtype, T>(arr, from_index, to_index, pivot_indices, index_pivot1, index_pivot2);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
simd_single_pivot_partition<vtype, T>(arr, from_index, to_index, pivot_indices, index_pivot1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
X86_SIMD_SORT_INLINE void insertion_sort(T *arr, int32_t from_index, int32_t to_index) {
|
||||||
|
for (int i, k = from_index; ++k < to_index; ) {
|
||||||
|
T ai = arr[i = k];
|
||||||
|
if (ai < arr[i - 1]) {
|
||||||
|
while (--i >= from_index && ai < arr[i]) {
|
||||||
|
arr[i + 1] = arr[i];
|
||||||
|
}
|
||||||
|
arr[i + 1] = ai;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename T>
|
||||||
|
X86_SIMD_SORT_INLINE void simd_fast_sort(T *arr, arrsize_t from_index, arrsize_t to_index, const arrsize_t INS_SORT_THRESHOLD)
|
||||||
|
{
|
||||||
|
arrsize_t arrsize = to_index - from_index;
|
||||||
|
if (arrsize <= INS_SORT_THRESHOLD) {
|
||||||
|
insertion_sort<T>(arr, from_index, to_index);
|
||||||
|
} else {
|
||||||
|
qsort_<vtype, T>(arr, from_index, to_index - 1, 2 * (arrsize_t)log2(arrsize));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DEFINE_METHODS(ISA, VTYPE) \
|
||||||
|
template <typename T> \
|
||||||
|
X86_SIMD_SORT_INLINE void ISA##_fast_sort( \
|
||||||
|
T *arr, arrsize_t from_index, arrsize_t to_index, const arrsize_t INS_SORT_THRESHOLD) \
|
||||||
|
{ \
|
||||||
|
simd_fast_sort<VTYPE, T>(arr, from_index, to_index, INS_SORT_THRESHOLD); \
|
||||||
|
} \
|
||||||
|
template <typename T> \
|
||||||
|
X86_SIMD_SORT_INLINE void ISA##_fast_partition( \
|
||||||
|
T *arr, int64_t from_index, int64_t to_index, int32_t *pivot_indices, int64_t index_pivot1, int64_t index_pivot2) \
|
||||||
|
{ \
|
||||||
|
simd_fast_partition<VTYPE, T>(arr, from_index, to_index, pivot_indices, index_pivot1, index_pivot2); \
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_METHODS(avx2, avx2_vector<T>)
|
||||||
|
DEFINE_METHODS(avx512, zmm_vector<T>)
|
||||||
|
|
||||||
|
#endif // XSS_COMMON_QSORT
|
||||||
209
src/java.base/linux/native/libsimdsort/xss-network-qsort.hpp
Normal file
209
src/java.base/linux/native/libsimdsort/xss-network-qsort.hpp
Normal file
@@ -0,0 +1,209 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2021 Serge Sans Paille. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
|
||||||
|
|
||||||
|
#ifndef XSS_NETWORK_QSORT
|
||||||
|
#define XSS_NETWORK_QSORT
|
||||||
|
|
||||||
|
#include "xss-common-qsort.h"
|
||||||
|
#include "xss-optimal-networks.hpp"
|
||||||
|
|
||||||
|
template <typename vtype, int numVecs, typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_FINLINE void bitonic_sort_n_vec(reg_t *regs) {
|
||||||
|
if constexpr (numVecs == 1) {
|
||||||
|
UNUSED(regs);
|
||||||
|
return;
|
||||||
|
} else if constexpr (numVecs == 2) {
|
||||||
|
COEX<vtype>(regs[0], regs[1]);
|
||||||
|
} else if constexpr (numVecs == 4) {
|
||||||
|
optimal_sort_4<vtype>(regs);
|
||||||
|
} else if constexpr (numVecs == 8) {
|
||||||
|
optimal_sort_8<vtype>(regs);
|
||||||
|
} else if constexpr (numVecs == 16) {
|
||||||
|
optimal_sort_16<vtype>(regs);
|
||||||
|
} else if constexpr (numVecs == 32) {
|
||||||
|
optimal_sort_32<vtype>(regs);
|
||||||
|
} else {
|
||||||
|
static_assert(numVecs == -1, "should not reach here");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Swizzle ops explained:
|
||||||
|
* swap_n<scale>: swap neighbouring blocks of size <scale/2> within block of
|
||||||
|
* size <scale> reg i = [7,6,5,4,3,2,1,0] swap_n<2>: =
|
||||||
|
* [[6,7],[4,5],[2,3],[0,1]] swap_n<4>: = [[5,4,7,6],[1,0,3,2]] swap_n<8>: =
|
||||||
|
* [[3,2,1,0,7,6,5,4]] reverse_n<scale>: reverse elements within block of size
|
||||||
|
* <scale> reg i = [7,6,5,4,3,2,1,0] rev_n<2>: =
|
||||||
|
* [[6,7],[4,5],[2,3],[0,1]] rev_n<4>: = [[4,5,6,7],[0,1,2,3]] rev_n<8>: =
|
||||||
|
* [[0,1,2,3,4,5,6,7]] merge_n<scale>: merge blocks of <scale/2> elements from
|
||||||
|
* two regs reg b,a = [a,a,a,a,a,a,a,a], [b,b,b,b,b,b,b,b] merge_n<2> =
|
||||||
|
* [a,b,a,b,a,b,a,b] merge_n<4> = [a,a,b,b,a,a,b,b] merge_n<8> =
|
||||||
|
* [a,a,a,a,b,b,b,b]
|
||||||
|
*/
|
||||||
|
|
||||||
|
template <typename vtype, int numVecs, int scale, bool first = true>
|
||||||
|
X86_SIMD_SORT_FINLINE void internal_merge_n_vec(typename vtype::reg_t *reg) {
|
||||||
|
using reg_t = typename vtype::reg_t;
|
||||||
|
using swizzle = typename vtype::swizzle_ops;
|
||||||
|
if constexpr (scale <= 1) {
|
||||||
|
UNUSED(reg);
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
if constexpr (first) {
|
||||||
|
// Use reverse then merge
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(64)
|
||||||
|
for (int i = 0; i < numVecs; i++) {
|
||||||
|
reg_t &v = reg[i];
|
||||||
|
reg_t rev = swizzle::template reverse_n<vtype, scale>(v);
|
||||||
|
COEX<vtype>(rev, v);
|
||||||
|
v = swizzle::template merge_n<vtype, scale>(v, rev);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Use swap then merge
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(64)
|
||||||
|
for (int i = 0; i < numVecs; i++) {
|
||||||
|
reg_t &v = reg[i];
|
||||||
|
reg_t swap = swizzle::template swap_n<vtype, scale>(v);
|
||||||
|
COEX<vtype>(swap, v);
|
||||||
|
v = swizzle::template merge_n<vtype, scale>(v, swap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
internal_merge_n_vec<vtype, numVecs, scale / 2, false>(reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, int numVecs, int scale,
|
||||||
|
typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_FINLINE void merge_substep_n_vec(reg_t *regs) {
|
||||||
|
using swizzle = typename vtype::swizzle_ops;
|
||||||
|
if constexpr (numVecs <= 1) {
|
||||||
|
UNUSED(regs);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reverse upper half of vectors
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(64)
|
||||||
|
for (int i = numVecs / 2; i < numVecs; i++) {
|
||||||
|
regs[i] = swizzle::template reverse_n<vtype, scale>(regs[i]);
|
||||||
|
}
|
||||||
|
// Do compare exchanges
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(64)
|
||||||
|
for (int i = 0; i < numVecs / 2; i++) {
|
||||||
|
COEX<vtype>(regs[i], regs[numVecs - 1 - i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
merge_substep_n_vec<vtype, numVecs / 2, scale>(regs);
|
||||||
|
merge_substep_n_vec<vtype, numVecs / 2, scale>(regs + numVecs / 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, int numVecs, int scale,
|
||||||
|
typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_FINLINE void merge_step_n_vec(reg_t *regs) {
|
||||||
|
// Do cross vector merges
|
||||||
|
merge_substep_n_vec<vtype, numVecs, scale>(regs);
|
||||||
|
|
||||||
|
// Do internal vector merges
|
||||||
|
internal_merge_n_vec<vtype, numVecs, scale>(regs);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, int numVecs, int numPer = 2,
|
||||||
|
typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_FINLINE void merge_n_vec(reg_t *regs) {
|
||||||
|
if constexpr (numPer > vtype::numlanes) {
|
||||||
|
UNUSED(regs);
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
merge_step_n_vec<vtype, numVecs, numPer>(regs);
|
||||||
|
merge_n_vec<vtype, numVecs, numPer * 2>(regs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, int numVecs, typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_INLINE void sort_n_vec(typename vtype::type_t *arr, int N) {
|
||||||
|
static_assert(numVecs > 0, "numVecs should be > 0");
|
||||||
|
if constexpr (numVecs > 1) {
|
||||||
|
if (N * 2 <= numVecs * vtype::numlanes) {
|
||||||
|
sort_n_vec<vtype, numVecs / 2>(arr, N);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reg_t vecs[numVecs];
|
||||||
|
|
||||||
|
// Generate masks for loading and storing
|
||||||
|
typename vtype::opmask_t ioMasks[numVecs - numVecs / 2];
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(64)
|
||||||
|
for (int i = numVecs / 2, j = 0; i < numVecs; i++, j++) {
|
||||||
|
uint64_t num_to_read =
|
||||||
|
std::min((uint64_t)std::max(0, N - i * vtype::numlanes),
|
||||||
|
(uint64_t)vtype::numlanes);
|
||||||
|
ioMasks[j] = vtype::get_partial_loadmask(num_to_read);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unmasked part of the load
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(64)
|
||||||
|
for (int i = 0; i < numVecs / 2; i++) {
|
||||||
|
vecs[i] = vtype::loadu(arr + i * vtype::numlanes);
|
||||||
|
}
|
||||||
|
// Masked part of the load
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(64)
|
||||||
|
for (int i = numVecs / 2, j = 0; i < numVecs; i++, j++) {
|
||||||
|
vecs[i] = vtype::mask_loadu(vtype::zmm_max(), ioMasks[j],
|
||||||
|
arr + i * vtype::numlanes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Run the initial sorting network to sort the columns of the [numVecs x
|
||||||
|
* num_lanes] matrix
|
||||||
|
*/
|
||||||
|
bitonic_sort_n_vec<vtype, numVecs>(vecs);
|
||||||
|
|
||||||
|
// Merge the vectors using bitonic merging networks
|
||||||
|
merge_n_vec<vtype, numVecs>(vecs);
|
||||||
|
|
||||||
|
// Unmasked part of the store
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(64)
|
||||||
|
for (int i = 0; i < numVecs / 2; i++) {
|
||||||
|
vtype::storeu(arr + i * vtype::numlanes, vecs[i]);
|
||||||
|
}
|
||||||
|
// Masked part of the store
|
||||||
|
X86_SIMD_SORT_UNROLL_LOOP(64)
|
||||||
|
for (int i = numVecs / 2, j = 0; i < numVecs; i++, j++) {
|
||||||
|
vtype::mask_storeu(arr + i * vtype::numlanes, ioMasks[j], vecs[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, int maxN>
|
||||||
|
X86_SIMD_SORT_INLINE void sort_n(typename vtype::type_t *arr, int N) {
|
||||||
|
constexpr int numVecs = maxN / vtype::numlanes;
|
||||||
|
constexpr bool isMultiple = (maxN == (vtype::numlanes * numVecs));
|
||||||
|
constexpr bool powerOfTwo = (numVecs != 0 && !(numVecs & (numVecs - 1)));
|
||||||
|
static_assert(powerOfTwo == true && isMultiple == true,
|
||||||
|
"maxN must be vtype::numlanes times a power of 2");
|
||||||
|
|
||||||
|
sort_n_vec<vtype, numVecs>(arr, N);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
342
src/java.base/linux/native/libsimdsort/xss-optimal-networks.hpp
Normal file
342
src/java.base/linux/native/libsimdsort/xss-optimal-networks.hpp
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2021 Serge Sans Paille. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort) All of these sources
|
||||||
|
// files are generated from the optimal networks described in
|
||||||
|
// https://bertdobbelaere.github.io/sorting_networks.html
|
||||||
|
|
||||||
|
template <typename vtype, typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_FINLINE void optimal_sort_4(reg_t *vecs) {
|
||||||
|
COEX<vtype>(vecs[0], vecs[2]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[3]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[1]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[3]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[1], vecs[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_FINLINE void optimal_sort_8(reg_t *vecs) {
|
||||||
|
COEX<vtype>(vecs[0], vecs[2]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[3]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[7]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[5]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[7]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[1]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[3]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[5]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[7]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[2], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[5]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[1], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[6]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[1], vecs[2]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[6]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_FINLINE void optimal_sort_16(reg_t *vecs) {
|
||||||
|
COEX<vtype>(vecs[0], vecs[13]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[15]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[14]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[11]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[10]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[5]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[7]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[9]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[13]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[14]);
|
||||||
|
COEX<vtype>(vecs[10], vecs[15]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[12]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[1]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[3]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[5]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[9]);
|
||||||
|
COEX<vtype>(vecs[10], vecs[11]);
|
||||||
|
COEX<vtype>(vecs[12], vecs[13]);
|
||||||
|
COEX<vtype>(vecs[14], vecs[15]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[2]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[3]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[10]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[11]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[7]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[9]);
|
||||||
|
COEX<vtype>(vecs[12], vecs[14]);
|
||||||
|
COEX<vtype>(vecs[13], vecs[15]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[1], vecs[2]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[7]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[10]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[11]);
|
||||||
|
COEX<vtype>(vecs[13], vecs[14]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[1], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[10]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[13]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[14]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[2], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[13]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[3], vecs[5]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[9]);
|
||||||
|
COEX<vtype>(vecs[10], vecs[12]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[3], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[10]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[12]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[6], vecs[7]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[9]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename reg_t = typename vtype::reg_t>
|
||||||
|
X86_SIMD_SORT_FINLINE void optimal_sort_32(reg_t *vecs) {
|
||||||
|
COEX<vtype>(vecs[0], vecs[1]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[3]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[5]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[7]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[9]);
|
||||||
|
COEX<vtype>(vecs[10], vecs[11]);
|
||||||
|
COEX<vtype>(vecs[12], vecs[13]);
|
||||||
|
COEX<vtype>(vecs[14], vecs[15]);
|
||||||
|
COEX<vtype>(vecs[16], vecs[17]);
|
||||||
|
COEX<vtype>(vecs[18], vecs[19]);
|
||||||
|
COEX<vtype>(vecs[20], vecs[21]);
|
||||||
|
COEX<vtype>(vecs[22], vecs[23]);
|
||||||
|
COEX<vtype>(vecs[24], vecs[25]);
|
||||||
|
COEX<vtype>(vecs[26], vecs[27]);
|
||||||
|
COEX<vtype>(vecs[28], vecs[29]);
|
||||||
|
COEX<vtype>(vecs[30], vecs[31]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[2]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[3]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[7]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[10]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[11]);
|
||||||
|
COEX<vtype>(vecs[12], vecs[14]);
|
||||||
|
COEX<vtype>(vecs[13], vecs[15]);
|
||||||
|
COEX<vtype>(vecs[16], vecs[18]);
|
||||||
|
COEX<vtype>(vecs[17], vecs[19]);
|
||||||
|
COEX<vtype>(vecs[20], vecs[22]);
|
||||||
|
COEX<vtype>(vecs[21], vecs[23]);
|
||||||
|
COEX<vtype>(vecs[24], vecs[26]);
|
||||||
|
COEX<vtype>(vecs[25], vecs[27]);
|
||||||
|
COEX<vtype>(vecs[28], vecs[30]);
|
||||||
|
COEX<vtype>(vecs[29], vecs[31]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[5]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[7]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[13]);
|
||||||
|
COEX<vtype>(vecs[10], vecs[14]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[15]);
|
||||||
|
COEX<vtype>(vecs[16], vecs[20]);
|
||||||
|
COEX<vtype>(vecs[17], vecs[21]);
|
||||||
|
COEX<vtype>(vecs[18], vecs[22]);
|
||||||
|
COEX<vtype>(vecs[19], vecs[23]);
|
||||||
|
COEX<vtype>(vecs[24], vecs[28]);
|
||||||
|
COEX<vtype>(vecs[25], vecs[29]);
|
||||||
|
COEX<vtype>(vecs[26], vecs[30]);
|
||||||
|
COEX<vtype>(vecs[27], vecs[31]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[9]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[10]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[11]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[13]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[14]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[15]);
|
||||||
|
COEX<vtype>(vecs[16], vecs[24]);
|
||||||
|
COEX<vtype>(vecs[17], vecs[25]);
|
||||||
|
COEX<vtype>(vecs[18], vecs[26]);
|
||||||
|
COEX<vtype>(vecs[19], vecs[27]);
|
||||||
|
COEX<vtype>(vecs[20], vecs[28]);
|
||||||
|
COEX<vtype>(vecs[21], vecs[29]);
|
||||||
|
COEX<vtype>(vecs[22], vecs[30]);
|
||||||
|
COEX<vtype>(vecs[23], vecs[31]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[16]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[10]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[9]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[14]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[13]);
|
||||||
|
COEX<vtype>(vecs[15], vecs[31]);
|
||||||
|
COEX<vtype>(vecs[17], vecs[24]);
|
||||||
|
COEX<vtype>(vecs[18], vecs[20]);
|
||||||
|
COEX<vtype>(vecs[19], vecs[28]);
|
||||||
|
COEX<vtype>(vecs[21], vecs[26]);
|
||||||
|
COEX<vtype>(vecs[22], vecs[25]);
|
||||||
|
COEX<vtype>(vecs[23], vecs[30]);
|
||||||
|
COEX<vtype>(vecs[27], vecs[29]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[1], vecs[2]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[5]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[22]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[11]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[25]);
|
||||||
|
COEX<vtype>(vecs[10], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[13], vecs[14]);
|
||||||
|
COEX<vtype>(vecs[17], vecs[18]);
|
||||||
|
COEX<vtype>(vecs[19], vecs[21]);
|
||||||
|
COEX<vtype>(vecs[20], vecs[24]);
|
||||||
|
COEX<vtype>(vecs[23], vecs[27]);
|
||||||
|
COEX<vtype>(vecs[26], vecs[28]);
|
||||||
|
COEX<vtype>(vecs[29], vecs[30]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[1], vecs[17]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[18]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[19]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[20]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[10]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[23]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[24]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[27]);
|
||||||
|
COEX<vtype>(vecs[12], vecs[28]);
|
||||||
|
COEX<vtype>(vecs[13], vecs[29]);
|
||||||
|
COEX<vtype>(vecs[14], vecs[30]);
|
||||||
|
COEX<vtype>(vecs[21], vecs[26]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[3], vecs[17]);
|
||||||
|
COEX<vtype>(vecs[4], vecs[16]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[21]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[18]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[9]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[20]);
|
||||||
|
COEX<vtype>(vecs[10], vecs[26]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[23]);
|
||||||
|
COEX<vtype>(vecs[13], vecs[25]);
|
||||||
|
COEX<vtype>(vecs[14], vecs[28]);
|
||||||
|
COEX<vtype>(vecs[15], vecs[27]);
|
||||||
|
COEX<vtype>(vecs[22], vecs[24]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[1], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[16]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[17]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[21]);
|
||||||
|
COEX<vtype>(vecs[10], vecs[22]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[19]);
|
||||||
|
COEX<vtype>(vecs[12], vecs[20]);
|
||||||
|
COEX<vtype>(vecs[14], vecs[24]);
|
||||||
|
COEX<vtype>(vecs[15], vecs[26]);
|
||||||
|
COEX<vtype>(vecs[23], vecs[28]);
|
||||||
|
COEX<vtype>(vecs[27], vecs[30]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[2], vecs[5]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[18]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[17]);
|
||||||
|
COEX<vtype>(vecs[12], vecs[16]);
|
||||||
|
COEX<vtype>(vecs[13], vecs[22]);
|
||||||
|
COEX<vtype>(vecs[14], vecs[20]);
|
||||||
|
COEX<vtype>(vecs[15], vecs[19]);
|
||||||
|
COEX<vtype>(vecs[23], vecs[24]);
|
||||||
|
COEX<vtype>(vecs[26], vecs[29]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[2], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[16]);
|
||||||
|
COEX<vtype>(vecs[10], vecs[11]);
|
||||||
|
COEX<vtype>(vecs[13], vecs[17]);
|
||||||
|
COEX<vtype>(vecs[14], vecs[18]);
|
||||||
|
COEX<vtype>(vecs[15], vecs[22]);
|
||||||
|
COEX<vtype>(vecs[19], vecs[25]);
|
||||||
|
COEX<vtype>(vecs[20], vecs[21]);
|
||||||
|
COEX<vtype>(vecs[27], vecs[29]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[5], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[10]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[13]);
|
||||||
|
COEX<vtype>(vecs[14], vecs[16]);
|
||||||
|
COEX<vtype>(vecs[15], vecs[17]);
|
||||||
|
COEX<vtype>(vecs[18], vecs[20]);
|
||||||
|
COEX<vtype>(vecs[19], vecs[23]);
|
||||||
|
COEX<vtype>(vecs[21], vecs[22]);
|
||||||
|
COEX<vtype>(vecs[25], vecs[26]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[3], vecs[5]);
|
||||||
|
COEX<vtype>(vecs[6], vecs[7]);
|
||||||
|
COEX<vtype>(vecs[8], vecs[9]);
|
||||||
|
COEX<vtype>(vecs[10], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[14]);
|
||||||
|
COEX<vtype>(vecs[13], vecs[16]);
|
||||||
|
COEX<vtype>(vecs[15], vecs[18]);
|
||||||
|
COEX<vtype>(vecs[17], vecs[20]);
|
||||||
|
COEX<vtype>(vecs[19], vecs[21]);
|
||||||
|
COEX<vtype>(vecs[22], vecs[23]);
|
||||||
|
COEX<vtype>(vecs[24], vecs[25]);
|
||||||
|
COEX<vtype>(vecs[26], vecs[28]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[3], vecs[4]);
|
||||||
|
COEX<vtype>(vecs[5], vecs[6]);
|
||||||
|
COEX<vtype>(vecs[7], vecs[8]);
|
||||||
|
COEX<vtype>(vecs[9], vecs[10]);
|
||||||
|
COEX<vtype>(vecs[11], vecs[12]);
|
||||||
|
COEX<vtype>(vecs[13], vecs[14]);
|
||||||
|
COEX<vtype>(vecs[15], vecs[16]);
|
||||||
|
COEX<vtype>(vecs[17], vecs[18]);
|
||||||
|
COEX<vtype>(vecs[19], vecs[20]);
|
||||||
|
COEX<vtype>(vecs[21], vecs[22]);
|
||||||
|
COEX<vtype>(vecs[23], vecs[24]);
|
||||||
|
COEX<vtype>(vecs[25], vecs[26]);
|
||||||
|
COEX<vtype>(vecs[27], vecs[28]);
|
||||||
|
}
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2021 Serge Sans Paille. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
|
||||||
|
|
||||||
|
template <typename vtype, typename mm_t>
|
||||||
|
X86_SIMD_SORT_INLINE void COEX(mm_t &a, mm_t &b);
|
||||||
|
|
||||||
|
template <typename vtype, typename type_t>
|
||||||
|
X86_SIMD_SORT_INLINE type_t get_pivot(type_t *arr, const arrsize_t left,
|
||||||
|
const arrsize_t right) {
|
||||||
|
using reg_t = typename vtype::reg_t;
|
||||||
|
type_t samples[vtype::numlanes];
|
||||||
|
arrsize_t delta = (right - left) / vtype::numlanes;
|
||||||
|
for (int i = 0; i < vtype::numlanes; i++) {
|
||||||
|
samples[i] = arr[left + i * delta];
|
||||||
|
}
|
||||||
|
reg_t rand_vec = vtype::loadu(samples);
|
||||||
|
reg_t sort = vtype::sort_vec(rand_vec);
|
||||||
|
|
||||||
|
return ((type_t *)&sort)[vtype::numlanes / 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename vtype, typename type_t>
|
||||||
|
X86_SIMD_SORT_INLINE type_t get_pivot_blocks(type_t *arr, const arrsize_t left,
|
||||||
|
const arrsize_t right) {
|
||||||
|
if (right - left <= 1024) {
|
||||||
|
return get_pivot<vtype>(arr, left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
using reg_t = typename vtype::reg_t;
|
||||||
|
constexpr int numVecs = 5;
|
||||||
|
|
||||||
|
arrsize_t width = (right - vtype::numlanes) - left;
|
||||||
|
arrsize_t delta = width / numVecs;
|
||||||
|
|
||||||
|
reg_t vecs[numVecs];
|
||||||
|
// Load data
|
||||||
|
for (int i = 0; i < numVecs; i++) {
|
||||||
|
vecs[i] = vtype::loadu(arr + left + delta * i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implement sorting network (from
|
||||||
|
// https://bertdobbelaere.github.io/sorting_networks.html)
|
||||||
|
COEX<vtype>(vecs[0], vecs[3]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[4]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[2]);
|
||||||
|
COEX<vtype>(vecs[1], vecs[3]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[0], vecs[1]);
|
||||||
|
COEX<vtype>(vecs[2], vecs[4]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[1], vecs[2]);
|
||||||
|
COEX<vtype>(vecs[3], vecs[4]);
|
||||||
|
|
||||||
|
COEX<vtype>(vecs[2], vecs[3]);
|
||||||
|
|
||||||
|
// Calculate median of the middle vector
|
||||||
|
reg_t &vec = vecs[numVecs / 2];
|
||||||
|
vec = vtype::sort_vec(vec);
|
||||||
|
|
||||||
|
type_t data[vtype::numlanes];
|
||||||
|
vtype::storeu(data, vec);
|
||||||
|
return data[vtype::numlanes / 2];
|
||||||
|
}
|
||||||
@@ -42,15 +42,10 @@ import static java.lang.String.LATIN1;
|
|||||||
|
|
||||||
final class StringUTF16 {
|
final class StringUTF16 {
|
||||||
|
|
||||||
|
// Return a new byte array for a UTF16-coded string for len chars
|
||||||
|
// Throw an exception if out of range
|
||||||
public static byte[] newBytesFor(int len) {
|
public static byte[] newBytesFor(int len) {
|
||||||
if (len < 0) {
|
return new byte[newBytesLength(len)];
|
||||||
throw new NegativeArraySizeException();
|
|
||||||
}
|
|
||||||
if (len > MAX_LENGTH) {
|
|
||||||
throw new OutOfMemoryError("UTF16 String size is " + len +
|
|
||||||
", should be less than " + MAX_LENGTH);
|
|
||||||
}
|
|
||||||
return new byte[len << 1];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the size of a UTF16-coded string
|
// Check the size of a UTF16-coded string
|
||||||
@@ -59,7 +54,7 @@ final class StringUTF16 {
|
|||||||
if (len < 0) {
|
if (len < 0) {
|
||||||
throw new NegativeArraySizeException();
|
throw new NegativeArraySizeException();
|
||||||
}
|
}
|
||||||
if (len > MAX_LENGTH) {
|
if (len >= MAX_LENGTH) {
|
||||||
throw new OutOfMemoryError("UTF16 String size is " + len +
|
throw new OutOfMemoryError("UTF16 String size is " + len +
|
||||||
", should be less than " + MAX_LENGTH);
|
", should be less than " + MAX_LENGTH);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1647,7 +1647,7 @@ public class Thread implements Runnable {
|
|||||||
* interrupt the wait.
|
* interrupt the wait.
|
||||||
* For more information, see
|
* For more information, see
|
||||||
* <a href="{@docRoot}/java.base/java/lang/doc-files/threadPrimitiveDeprecation.html">Why
|
* <a href="{@docRoot}/java.base/java/lang/doc-files/threadPrimitiveDeprecation.html">Why
|
||||||
* are Thread.stop, Thread.suspend and Thread.resume Deprecated?</a>.
|
* is Thread.stop deprecated and the ability to stop a thread removed?</a>.
|
||||||
*/
|
*/
|
||||||
@Deprecated(since="1.2", forRemoval=true)
|
@Deprecated(since="1.2", forRemoval=true)
|
||||||
public final void stop() {
|
public final void stop() {
|
||||||
@@ -1788,44 +1788,6 @@ public class Thread implements Runnable {
|
|||||||
return eetop != 0;
|
return eetop != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Throws {@code UnsupportedOperationException}.
|
|
||||||
*
|
|
||||||
* @throws UnsupportedOperationException always
|
|
||||||
*
|
|
||||||
* @deprecated This method was originally specified to suspend a thread.
|
|
||||||
* It was inherently deadlock-prone. If the target thread held a lock on
|
|
||||||
* a monitor protecting a critical system resource when it was suspended,
|
|
||||||
* no thread could access the resource until the target thread was resumed.
|
|
||||||
* If the thread intending to resume the target thread attempted to lock
|
|
||||||
* the monitor prior to calling {@code resume}, deadlock would result.
|
|
||||||
* Such deadlocks typically manifested themselves as "frozen" processes.
|
|
||||||
* For more information, see
|
|
||||||
* <a href="{@docRoot}/java.base/java/lang/doc-files/threadPrimitiveDeprecation.html">Why
|
|
||||||
* are Thread.stop, Thread.suspend and Thread.resume Deprecated?</a>.
|
|
||||||
*/
|
|
||||||
@Deprecated(since="1.2", forRemoval=true)
|
|
||||||
public final void suspend() {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Throws {@code UnsupportedOperationException}.
|
|
||||||
*
|
|
||||||
* @throws UnsupportedOperationException always
|
|
||||||
*
|
|
||||||
* @deprecated This method was originally specified to resume a thread
|
|
||||||
* suspended with {@link #suspend()}. Suspending a thread was
|
|
||||||
* inherently deadlock-prone.
|
|
||||||
* For more information, see
|
|
||||||
* <a href="{@docRoot}/java.base/java/lang/doc-files/threadPrimitiveDeprecation.html">Why
|
|
||||||
* are Thread.stop, Thread.suspend and Thread.resume Deprecated?</a>.
|
|
||||||
*/
|
|
||||||
@Deprecated(since="1.2", forRemoval=true)
|
|
||||||
public final void resume() {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Changes the priority of this thread.
|
* Changes the priority of this thread.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1995, 2022, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1995, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@@ -554,17 +554,6 @@ public class ThreadGroup implements Thread.UncaughtExceptionHandler {
|
|||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Throws {@code UnsupportedOperationException}.
|
|
||||||
*
|
|
||||||
* @deprecated This method was originally specified to stop all threads in
|
|
||||||
* the thread group. It was inherently unsafe.
|
|
||||||
*/
|
|
||||||
@Deprecated(since="1.2", forRemoval=true)
|
|
||||||
public final void stop() {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupts all {@linkplain Thread#isAlive() live} platform threads in
|
* Interrupts all {@linkplain Thread#isAlive() live} platform threads in
|
||||||
* this thread group and its subgroups.
|
* this thread group and its subgroups.
|
||||||
@@ -587,28 +576,6 @@ public class ThreadGroup implements Thread.UncaughtExceptionHandler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Throws {@code UnsupportedOperationException}.
|
|
||||||
*
|
|
||||||
* @deprecated This method was originally specified to suspend all threads
|
|
||||||
* in the thread group.
|
|
||||||
*/
|
|
||||||
@Deprecated(since="1.2", forRemoval=true)
|
|
||||||
public final void suspend() {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Throws {@code UnsupportedOperationException}.
|
|
||||||
*
|
|
||||||
* @deprecated This method was originally specified to resume all threads
|
|
||||||
* in the thread group.
|
|
||||||
*/
|
|
||||||
@Deprecated(since="1.2", forRemoval=true)
|
|
||||||
public final void resume() {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Does nothing.
|
* Does nothing.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -202,7 +202,7 @@ public sealed interface ModuleAttribute
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the module flags
|
* Sets the module version
|
||||||
* @param version the module version
|
* @param version the module version
|
||||||
* @return this builder
|
* @return this builder
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
<!doctype html>
|
<!doctype html>
|
||||||
<!--
|
<!--
|
||||||
Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
|
Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
|
||||||
This code is free software; you can redistribute it and/or modify it
|
This code is free software; you can redistribute it and/or modify it
|
||||||
@@ -158,173 +158,5 @@ wouldn't respond to <code>Thread.stop</code> either.</em> Such
|
|||||||
cases include deliberate denial-of-service attacks, and I/O
|
cases include deliberate denial-of-service attacks, and I/O
|
||||||
operations for which thread.stop and thread.interrupt do not work
|
operations for which thread.stop and thread.interrupt do not work
|
||||||
properly.</p>
|
properly.</p>
|
||||||
<hr>
|
|
||||||
<h2>Why are <code>Thread.suspend</code> and
|
|
||||||
<code>Thread.resume</code> deprecated and the ability to suspend or
|
|
||||||
resume a thread removed?</h2>
|
|
||||||
<p><code>Thread.suspend</code> was inherently deadlock-prone. If the
|
|
||||||
target thread held a lock on a monitor protecting a critical
|
|
||||||
system resource when it is suspended, no thread could access the
|
|
||||||
resource until the target thread was resumed. If the thread intending
|
|
||||||
to resume the target thread attempted to lock the monitor prior
|
|
||||||
to calling <code>resume</code>, deadlock resulted. Such deadlocks
|
|
||||||
typically manifest themselves as "frozen" processes.</p>
|
|
||||||
<hr>
|
|
||||||
<h2>What should I use instead of <code>Thread.suspend</code> and
|
|
||||||
<code>Thread.resume</code>?</h2>
|
|
||||||
<p>As with <code>Thread.stop</code>, the prudent approach is to
|
|
||||||
have the "target thread" poll a variable indicating the desired
|
|
||||||
state of the thread (active or suspended). When the desired state
|
|
||||||
is suspended, the thread waits using <code>Object.wait</code>. When
|
|
||||||
the thread is resumed, the target thread is notified using
|
|
||||||
<code>Object.notify</code>.</p>
|
|
||||||
<p>For example, suppose your applet contains the following
|
|
||||||
mousePressed event handler, which toggles the state of a thread
|
|
||||||
called <code>blinker</code>:</p>
|
|
||||||
<pre>
|
|
||||||
private boolean threadSuspended;
|
|
||||||
|
|
||||||
Public void mousePressed(MouseEvent e) {
|
|
||||||
e.consume();
|
|
||||||
|
|
||||||
if (threadSuspended)
|
|
||||||
blinker.resume();
|
|
||||||
else
|
|
||||||
blinker.suspend(); // DEADLOCK-PRONE!
|
|
||||||
|
|
||||||
threadSuspended = !threadSuspended;
|
|
||||||
}
|
|
||||||
</pre>
|
|
||||||
You can avoid the use of <code>Thread.suspend</code> and
|
|
||||||
<code>Thread.resume</code> by replacing the event handler above
|
|
||||||
with:
|
|
||||||
<pre>
|
|
||||||
public synchronized void mousePressed(MouseEvent e) {
|
|
||||||
e.consume();
|
|
||||||
|
|
||||||
threadSuspended = !threadSuspended;
|
|
||||||
|
|
||||||
if (!threadSuspended)
|
|
||||||
notify();
|
|
||||||
}
|
|
||||||
</pre>
|
|
||||||
and adding the following code to the "run loop":
|
|
||||||
<pre>
|
|
||||||
synchronized(this) {
|
|
||||||
while (threadSuspended)
|
|
||||||
wait();
|
|
||||||
}
|
|
||||||
</pre>
|
|
||||||
The <code>wait</code> method throws the
|
|
||||||
<code>InterruptedException</code>, so it must be inside a <code>try
|
|
||||||
... catch</code> clause. It's fine to put it in the same clause as
|
|
||||||
the <code>sleep</code>. The check should follow (rather than
|
|
||||||
precede) the <code>sleep</code> so the window is immediately
|
|
||||||
repainted when the thread is "resumed." The resulting
|
|
||||||
<code>run</code> method follows:
|
|
||||||
<pre>
|
|
||||||
public void run() {
|
|
||||||
while (true) {
|
|
||||||
try {
|
|
||||||
Thread.sleep(interval);
|
|
||||||
|
|
||||||
synchronized(this) {
|
|
||||||
while (threadSuspended)
|
|
||||||
wait();
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e){
|
|
||||||
}
|
|
||||||
repaint();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</pre>
|
|
||||||
Note that the <code>notify</code> in the <code>mousePressed</code>
|
|
||||||
method and the <code>wait</code> in the <code>run</code> method are
|
|
||||||
inside <code>synchronized</code> blocks. This is required by the
|
|
||||||
language, and ensures that <code>wait</code> and
|
|
||||||
<code>notify</code> are properly serialized. In practical terms,
|
|
||||||
this eliminates race conditions that could cause the "suspended"
|
|
||||||
thread to miss a <code>notify</code> and remain suspended
|
|
||||||
indefinitely.
|
|
||||||
<p>While the cost of synchronization in Java is decreasing as the
|
|
||||||
platform matures, it will never be free. A simple trick can be used
|
|
||||||
to remove the synchronization that we've added to each iteration of
|
|
||||||
the "run loop." The synchronized block that was added is replaced
|
|
||||||
by a slightly more complex piece of code that enters a synchronized
|
|
||||||
block only if the thread has actually been suspended:</p>
|
|
||||||
<pre>
|
|
||||||
if (threadSuspended) {
|
|
||||||
synchronized(this) {
|
|
||||||
while (threadSuspended)
|
|
||||||
wait();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</pre>
|
|
||||||
<p>In the absence of explicit synchronization,
|
|
||||||
<code>threadSuspended</code> must be made <code>volatile</code> to ensure
|
|
||||||
prompt communication of the suspend-request.</p>
|
|
||||||
The resulting <code>run</code> method is:
|
|
||||||
<pre>
|
|
||||||
private volatile boolean threadSuspended;
|
|
||||||
|
|
||||||
public void run() {
|
|
||||||
while (true) {
|
|
||||||
try {
|
|
||||||
Thread.sleep(interval);
|
|
||||||
|
|
||||||
if (threadSuspended) {
|
|
||||||
synchronized(this) {
|
|
||||||
while (threadSuspended)
|
|
||||||
wait();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e){
|
|
||||||
}
|
|
||||||
repaint();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</pre>
|
|
||||||
<hr>
|
|
||||||
<h2>Can I combine the two techniques to produce a thread that may
|
|
||||||
be safely "stopped" or "suspended"?</h2>
|
|
||||||
Yes, it's reasonably straightforward. The one subtlety is that the
|
|
||||||
target thread may already be suspended at the time that another
|
|
||||||
thread tries to stop it. If the <code>stop</code> method merely sets
|
|
||||||
the state variable (<code>blinker</code>) to null, the target thread
|
|
||||||
will remain suspended (waiting on the monitor), rather than exiting
|
|
||||||
gracefully as it should. If the applet is restarted, multiple
|
|
||||||
threads could end up waiting on the monitor at the same time,
|
|
||||||
resulting in erratic behavior.
|
|
||||||
<p>To rectify this situation, the <code>stop</code> method must ensure
|
|
||||||
that the target thread resumes immediately if it is suspended. Once
|
|
||||||
the target thread resumes, it must recognize immediately that it
|
|
||||||
has been stopped, and exit gracefully. Here's how the resulting
|
|
||||||
<code>run</code> and <code>stop</code> methods look:</p>
|
|
||||||
<pre>
|
|
||||||
public void run() {
|
|
||||||
Thread thisThread = Thread.currentThread();
|
|
||||||
while (blinker == thisThread) {
|
|
||||||
try {
|
|
||||||
Thread.sleep(interval);
|
|
||||||
|
|
||||||
synchronized(this) {
|
|
||||||
while (threadSuspended && blinker==thisThread)
|
|
||||||
wait();
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e){
|
|
||||||
}
|
|
||||||
repaint();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void stop() {
|
|
||||||
blinker = null;
|
|
||||||
notify();
|
|
||||||
}
|
|
||||||
</pre>
|
|
||||||
If the <code>stop</code> method calls <code>Thread.interrupt</code>, as
|
|
||||||
described above, it needn't call <code>notify</code> as well, but it
|
|
||||||
still must be synchronized. This ensures that the target thread
|
|
||||||
won't miss an interrupt due to a race condition.
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
@@ -631,6 +631,9 @@ public sealed interface MemoryLayout
|
|||||||
* <li>The accessed memory segment must be
|
* <li>The accessed memory segment must be
|
||||||
* {@link MemorySegment#isAccessibleBy(Thread) accessible} from the thread
|
* {@link MemorySegment#isAccessibleBy(Thread) accessible} from the thread
|
||||||
* performing the access operation, or a {@link WrongThreadException} is thrown.</li>
|
* performing the access operation, or a {@link WrongThreadException} is thrown.</li>
|
||||||
|
* <li>For write operations, the accessed memory segment must not be
|
||||||
|
* {@link MemorySegment#isReadOnly() read only}, or an
|
||||||
|
* {@link IllegalArgumentException} is thrown.</li>
|
||||||
* <li>The {@linkplain MemorySegment#scope() scope} associated with the accessed
|
* <li>The {@linkplain MemorySegment#scope() scope} associated with the accessed
|
||||||
* segment must be {@linkplain MemorySegment.Scope#isAlive() alive}, or an
|
* segment must be {@linkplain MemorySegment.Scope#isAlive() alive}, or an
|
||||||
* {@link IllegalStateException} is thrown.</li>
|
* {@link IllegalStateException} is thrown.</li>
|
||||||
|
|||||||
@@ -869,7 +869,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* this segment is not {@linkplain Scope#isAlive() alive}
|
* this segment is not {@linkplain Scope#isAlive() alive}
|
||||||
* @throws WrongThreadException if this method is called from a thread {@code T},
|
* @throws WrongThreadException if this method is called from a thread {@code T},
|
||||||
* such that {@code isAccessibleBy(T) == false}
|
* such that {@code isAccessibleBy(T) == false}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws IllegalArgumentException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
MemorySegment fill(byte value);
|
MemorySegment fill(byte value);
|
||||||
@@ -894,7 +894,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* {@code src} is not {@linkplain Scope#isAlive() alive}
|
* {@code src} is not {@linkplain Scope#isAlive() alive}
|
||||||
* @throws WrongThreadException if this method is called from a thread {@code T},
|
* @throws WrongThreadException if this method is called from a thread {@code T},
|
||||||
* such that {@code src.isAccessibleBy(T) == false}
|
* such that {@code src.isAccessibleBy(T) == false}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws IllegalArgumentException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
* @return this segment
|
* @return this segment
|
||||||
*/
|
*/
|
||||||
@@ -1269,6 +1269,8 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* this segment is not {@linkplain Scope#isAlive() alive}
|
* this segment is not {@linkplain Scope#isAlive() alive}
|
||||||
* @throws WrongThreadException if this method is called from a thread {@code T},
|
* @throws WrongThreadException if this method is called from a thread {@code T},
|
||||||
* such that {@code isAccessibleBy(T) == false}
|
* such that {@code isAccessibleBy(T) == false}
|
||||||
|
* @throws IllegalArgumentException if this segment is
|
||||||
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void setString(long offset, String str);
|
void setString(long offset, String str);
|
||||||
|
|
||||||
@@ -1306,6 +1308,8 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* such that {@code isAccessibleBy(T) == false}
|
* such that {@code isAccessibleBy(T) == false}
|
||||||
* @throws IllegalArgumentException if {@code charset} is not a
|
* @throws IllegalArgumentException if {@code charset} is not a
|
||||||
* {@linkplain StandardCharsets standard charset}
|
* {@linkplain StandardCharsets standard charset}
|
||||||
|
* @throws IllegalArgumentException if this segment is
|
||||||
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void setString(long offset, String str, Charset charset);
|
void setString(long offset, String str, Charset charset);
|
||||||
|
|
||||||
@@ -1493,7 +1497,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - bytes}
|
* @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - bytes}
|
||||||
* @throws IndexOutOfBoundsException if either {@code srcOffset},
|
* @throws IndexOutOfBoundsException if either {@code srcOffset},
|
||||||
* {@code dstOffset} or {@code bytes} are {@code < 0}
|
* {@code dstOffset} or {@code bytes} are {@code < 0}
|
||||||
* @throws UnsupportedOperationException if {@code dstSegment} is
|
* @throws IllegalArgumentException if {@code dstSegment} is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
@ForceInline
|
@ForceInline
|
||||||
@@ -1552,7 +1556,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* {@code dstSegment} is not {@linkplain Scope#isAlive() alive}
|
* {@code dstSegment} is not {@linkplain Scope#isAlive() alive}
|
||||||
* @throws WrongThreadException if this method is called from a thread {@code T},
|
* @throws WrongThreadException if this method is called from a thread {@code T},
|
||||||
* such that {@code dstSegment.isAccessibleBy(T) == false}
|
* such that {@code dstSegment.isAccessibleBy(T) == false}
|
||||||
* @throws UnsupportedOperationException if {@code dstSegment} is {@linkplain #isReadOnly() read-only}
|
* @throws IllegalArgumentException if {@code dstSegment} is {@linkplain #isReadOnly() read-only}
|
||||||
* @throws IndexOutOfBoundsException if {@code elementCount * srcLayout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code elementCount * srcLayout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code elementCount * dtsLayout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code elementCount * dtsLayout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code srcOffset > srcSegment.byteSize() - (elementCount * srcLayout.byteSize())}
|
* @throws IndexOutOfBoundsException if {@code srcOffset > srcSegment.byteSize() - (elementCount * srcLayout.byteSize())}
|
||||||
@@ -1605,7 +1609,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
||||||
* in the provided layout
|
* in the provided layout
|
||||||
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws IllegalArgumentException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void set(ValueLayout.OfByte layout, long offset, byte value);
|
void set(ValueLayout.OfByte layout, long offset, byte value);
|
||||||
@@ -1643,7 +1647,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
||||||
* in the provided layout
|
* in the provided layout
|
||||||
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws IllegalArgumentException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void set(ValueLayout.OfBoolean layout, long offset, boolean value);
|
void set(ValueLayout.OfBoolean layout, long offset, boolean value);
|
||||||
@@ -1681,7 +1685,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
||||||
* in the provided layout
|
* in the provided layout
|
||||||
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws IllegalArgumentException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void set(ValueLayout.OfChar layout, long offset, char value);
|
void set(ValueLayout.OfChar layout, long offset, char value);
|
||||||
@@ -1719,7 +1723,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
||||||
* in the provided layout
|
* in the provided layout
|
||||||
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws IllegalArgumentException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void set(ValueLayout.OfShort layout, long offset, short value);
|
void set(ValueLayout.OfShort layout, long offset, short value);
|
||||||
@@ -1757,7 +1761,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
||||||
* in the provided layout
|
* in the provided layout
|
||||||
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws IllegalArgumentException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void set(ValueLayout.OfInt layout, long offset, int value);
|
void set(ValueLayout.OfInt layout, long offset, int value);
|
||||||
@@ -1795,7 +1799,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
||||||
* in the provided layout
|
* in the provided layout
|
||||||
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws IllegalArgumentException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void set(ValueLayout.OfFloat layout, long offset, float value);
|
void set(ValueLayout.OfFloat layout, long offset, float value);
|
||||||
@@ -1833,7 +1837,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
||||||
* in the provided layout
|
* in the provided layout
|
||||||
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws IllegalArgumentException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void set(ValueLayout.OfLong layout, long offset, long value);
|
void set(ValueLayout.OfLong layout, long offset, long value);
|
||||||
@@ -1871,7 +1875,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
||||||
* in the provided layout
|
* in the provided layout
|
||||||
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws IllegalArgumentException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void set(ValueLayout.OfDouble layout, long offset, double value);
|
void set(ValueLayout.OfDouble layout, long offset, double value);
|
||||||
@@ -1921,8 +1925,10 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code offset > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is
|
* @throws UnsupportedOperationException if this segment is
|
||||||
* {@linkplain #isReadOnly() read-only}
|
* {@linkplain #isReadOnly() read-only}
|
||||||
* @throws UnsupportedOperationException if {@code value} is not a
|
* @throws IllegalArgumentException if {@code value} is not a
|
||||||
* {@linkplain #isNative() native} segment
|
* {@linkplain #isNative() native} segment
|
||||||
|
* @throws IllegalArgumentException if this segment is
|
||||||
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void set(AddressLayout layout, long offset, MemorySegment value);
|
void set(AddressLayout layout, long offset, MemorySegment value);
|
||||||
|
|
||||||
@@ -2055,7 +2061,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
|
* @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void setAtIndex(ValueLayout.OfByte layout, long index, byte value);
|
void setAtIndex(ValueLayout.OfByte layout, long index, byte value);
|
||||||
|
|
||||||
@@ -2078,7 +2084,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
|
* @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void setAtIndex(ValueLayout.OfBoolean layout, long index, boolean value);
|
void setAtIndex(ValueLayout.OfBoolean layout, long index, boolean value);
|
||||||
|
|
||||||
@@ -2101,7 +2107,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
|
* @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void setAtIndex(ValueLayout.OfShort layout, long index, short value);
|
void setAtIndex(ValueLayout.OfShort layout, long index, short value);
|
||||||
|
|
||||||
@@ -2146,7 +2152,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
|
* @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void setAtIndex(ValueLayout.OfInt layout, long index, int value);
|
void setAtIndex(ValueLayout.OfInt layout, long index, int value);
|
||||||
|
|
||||||
@@ -2191,7 +2197,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
|
* @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void setAtIndex(ValueLayout.OfFloat layout, long index, float value);
|
void setAtIndex(ValueLayout.OfFloat layout, long index, float value);
|
||||||
|
|
||||||
@@ -2236,7 +2242,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
|
* @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void setAtIndex(ValueLayout.OfLong layout, long index, long value);
|
void setAtIndex(ValueLayout.OfLong layout, long index, long value);
|
||||||
|
|
||||||
@@ -2281,7 +2287,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
* @throws IllegalArgumentException if {@code layout.byteAlignment() > layout.byteSize()}
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
|
* @throws IllegalArgumentException if this segment is {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void setAtIndex(ValueLayout.OfDouble layout, long index, double value);
|
void setAtIndex(ValueLayout.OfDouble layout, long index, double value);
|
||||||
|
|
||||||
@@ -2336,7 +2342,9 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
* @throws IndexOutOfBoundsException if {@code index * layout.byteSize() > byteSize() - layout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
|
* @throws UnsupportedOperationException if this segment is {@linkplain #isReadOnly() read-only}
|
||||||
* @throws UnsupportedOperationException if {@code value} is not a {@linkplain #isNative() native} segment
|
* @throws IllegalArgumentException if {@code value} is not a {@linkplain #isNative() native} segment
|
||||||
|
* @throws IllegalArgumentException if this segment is
|
||||||
|
* {@linkplain #isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
void setAtIndex(AddressLayout layout, long index, MemorySegment value);
|
void setAtIndex(AddressLayout layout, long index, MemorySegment value);
|
||||||
|
|
||||||
@@ -2460,7 +2468,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
* <a href="MemorySegment.html#segment-alignment">incompatible with the alignment constraint</a>
|
||||||
* in the source element layout
|
* in the source element layout
|
||||||
* @throws IllegalArgumentException if {@code dstLayout.byteAlignment() > dstLayout.byteSize()}
|
* @throws IllegalArgumentException if {@code dstLayout.byteAlignment() > dstLayout.byteSize()}
|
||||||
* @throws UnsupportedOperationException if {@code dstSegment} is {@linkplain #isReadOnly() read-only}
|
* @throws IllegalArgumentException if {@code dstSegment} is {@linkplain #isReadOnly() read-only}
|
||||||
* @throws IndexOutOfBoundsException if {@code elementCount * dstLayout.byteSize()} overflows
|
* @throws IndexOutOfBoundsException if {@code elementCount * dstLayout.byteSize()} overflows
|
||||||
* @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - (elementCount * dstLayout.byteSize())}
|
* @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - (elementCount * dstLayout.byteSize())}
|
||||||
* @throws IndexOutOfBoundsException if {@code srcIndex > srcArray.length - elementCount}
|
* @throws IndexOutOfBoundsException if {@code srcIndex > srcArray.length - elementCount}
|
||||||
|
|||||||
@@ -350,7 +350,7 @@ public interface SegmentAllocator {
|
|||||||
*
|
*
|
||||||
* @param layout the layout of the block of memory to be allocated
|
* @param layout the layout of the block of memory to be allocated
|
||||||
* @param value the value to be set in the newly allocated memory segment
|
* @param value the value to be set in the newly allocated memory segment
|
||||||
* @throws UnsupportedOperationException if {@code value} is not
|
* @throws IllegalArgumentException if {@code value} is not
|
||||||
* a {@linkplain MemorySegment#isNative() native} segment
|
* a {@linkplain MemorySegment#isNative() native} segment
|
||||||
*/
|
*/
|
||||||
default MemorySegment allocateFrom(AddressLayout layout, MemorySegment value) {
|
default MemorySegment allocateFrom(AddressLayout layout, MemorySegment value) {
|
||||||
@@ -670,9 +670,11 @@ public interface SegmentAllocator {
|
|||||||
*
|
*
|
||||||
* @param segment the segment from which the returned allocator should slice from
|
* @param segment the segment from which the returned allocator should slice from
|
||||||
* @return a new slicing allocator
|
* @return a new slicing allocator
|
||||||
|
* @throws IllegalArgumentException if the {@code segment} is
|
||||||
|
* {@linkplain MemorySegment#isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
static SegmentAllocator slicingAllocator(MemorySegment segment) {
|
static SegmentAllocator slicingAllocator(MemorySegment segment) {
|
||||||
Objects.requireNonNull(segment);
|
assertWritable(segment);
|
||||||
return new SlicingAllocator(segment);
|
return new SlicingAllocator(segment);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -700,9 +702,19 @@ public interface SegmentAllocator {
|
|||||||
* @param segment the memory segment to be recycled by the returned allocator
|
* @param segment the memory segment to be recycled by the returned allocator
|
||||||
* @return an allocator that recycles an existing segment upon each new
|
* @return an allocator that recycles an existing segment upon each new
|
||||||
* allocation request
|
* allocation request
|
||||||
|
* @throws IllegalArgumentException if the {@code segment} is
|
||||||
|
* {@linkplain MemorySegment#isReadOnly() read-only}
|
||||||
*/
|
*/
|
||||||
static SegmentAllocator prefixAllocator(MemorySegment segment) {
|
static SegmentAllocator prefixAllocator(MemorySegment segment) {
|
||||||
return (AbstractMemorySegmentImpl)Objects.requireNonNull(segment);
|
assertWritable(segment);
|
||||||
|
return (AbstractMemorySegmentImpl)segment;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertWritable(MemorySegment segment) {
|
||||||
|
// Implicit null check
|
||||||
|
if (segment.isReadOnly()) {
|
||||||
|
throw new IllegalArgumentException("read-only segment");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ForceInline
|
@ForceInline
|
||||||
|
|||||||
@@ -1841,7 +1841,7 @@ public class MethodHandles {
|
|||||||
* <a href="MethodHandles.Lookup.html#secmgr">refuses access</a>
|
* <a href="MethodHandles.Lookup.html#secmgr">refuses access</a>
|
||||||
* @throws NullPointerException if {@code bytes} is {@code null}
|
* @throws NullPointerException if {@code bytes} is {@code null}
|
||||||
* @since 9
|
* @since 9
|
||||||
* @see Lookup#privateLookupIn
|
* @see MethodHandles#privateLookupIn
|
||||||
* @see Lookup#dropLookupMode
|
* @see Lookup#dropLookupMode
|
||||||
* @see ClassLoader#defineClass(String,byte[],int,int,ProtectionDomain)
|
* @see ClassLoader#defineClass(String,byte[],int,int,ProtectionDomain)
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ package java.nio;
|
|||||||
|
|
||||||
import java.lang.foreign.MemorySegment;
|
import java.lang.foreign.MemorySegment;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import jdk.internal.util.ArraysSupport;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
#if[rw]
|
#if[rw]
|
||||||
@@ -705,6 +706,9 @@ class Heap$Type$Buffer$RW$
|
|||||||
addr, segment)));
|
addr, segment)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
return ArraysSupport.vectorizedHashCode(hb, ix(position()), remaining(), 1, ArraysSupport.T_BYTE);
|
||||||
|
}
|
||||||
|
|
||||||
#end[byte]
|
#end[byte]
|
||||||
|
|
||||||
@@ -733,6 +737,9 @@ class Heap$Type$Buffer$RW$
|
|||||||
offset, segment);
|
offset, segment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
return ArraysSupport.vectorizedHashCode(hb, ix(position()), remaining(), 1, ArraysSupport.T_CHAR);
|
||||||
|
}
|
||||||
#end[char]
|
#end[char]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -88,7 +88,6 @@ import java.util.Arrays;
|
|||||||
* <p>
|
* <p>
|
||||||
* Below is an example of constructing a ChoiceFormat with arrays to format
|
* Below is an example of constructing a ChoiceFormat with arrays to format
|
||||||
* and parse values:
|
* and parse values:
|
||||||
* <blockquote>
|
|
||||||
* {@snippet lang=java :
|
* {@snippet lang=java :
|
||||||
* double[] limits = {1,2,3,4,5,6,7};
|
* double[] limits = {1,2,3,4,5,6,7};
|
||||||
* String[] dayOfWeekNames = {"Sun","Mon","Tue","Wed","Thur","Fri","Sat"};
|
* String[] dayOfWeekNames = {"Sun","Mon","Tue","Wed","Thur","Fri","Sat"};
|
||||||
@@ -100,34 +99,27 @@ import java.util.Arrays;
|
|||||||
* + form.parse(form.format(i),status));
|
* + form.parse(form.format(i),status));
|
||||||
* }
|
* }
|
||||||
* }
|
* }
|
||||||
* </blockquote>
|
*
|
||||||
|
* <p>
|
||||||
* For more sophisticated patterns, {@code ChoiceFormat} can be used with
|
* For more sophisticated patterns, {@code ChoiceFormat} can be used with
|
||||||
* {@link MessageFormat} to produce accurate forms for singular and plural:
|
* {@link MessageFormat} to produce accurate forms for singular and plural:
|
||||||
* <blockquote>
|
|
||||||
* {@snippet lang=java :
|
* {@snippet lang=java :
|
||||||
* double[] filelimits = {0,1,2};
|
* MessageFormat msgFmt = new MessageFormat("The disk \"{0}\" contains {1}.");
|
||||||
* String[] filepart = {"are no files","is one file","are {2} files"};
|
* double[] fileLimits = {0,1,2};
|
||||||
* ChoiceFormat fileform = new ChoiceFormat(filelimits, filepart);
|
* String[] filePart = {"no files","one file","{1,number} files"};
|
||||||
* Format[] testFormats = {fileform, null, NumberFormat.getInstance()};
|
* ChoiceFormat fileChoices = new ChoiceFormat(fileLimits, filePart);
|
||||||
* MessageFormat pattform = new MessageFormat("There {0} on {1}");
|
* msgFmt.setFormatByArgumentIndex(1, fileChoices);
|
||||||
* pattform.setFormats(testFormats);
|
* Object[] args = {"MyDisk", 1273};
|
||||||
* Object[] testArgs = {null, "ADisk", null};
|
* System.out.println(msgFmt.format(args));
|
||||||
* for (int i = 0; i < 4; ++i) {
|
|
||||||
* testArgs[0] = Integer.valueOf(i);
|
|
||||||
* testArgs[2] = testArgs[0];
|
|
||||||
* System.out.println(pattform.format(testArgs));
|
|
||||||
* }
|
* }
|
||||||
* }
|
* The output with different values for {@code fileCount}:
|
||||||
* </blockquote>
|
* <blockquote><pre>
|
||||||
* Would output the following:
|
* The disk "MyDisk" contains no files.
|
||||||
* <blockquote>
|
* The disk "MyDisk" contains one file.
|
||||||
* <pre>{@code
|
* The disk "MyDisk" contains 1,273 files.
|
||||||
* There are no files on ADisk
|
* </pre></blockquote>
|
||||||
* There is one file on ADisk
|
* See {@link MessageFormat##pattern_caveats MessageFormat} for caveats regarding
|
||||||
* There are 2 files on ADisk
|
* {@code MessageFormat} patterns within a {@code ChoiceFormat} pattern.
|
||||||
* There are 3 files on ADisk
|
|
||||||
* }</pre>
|
|
||||||
* </blockquote>
|
|
||||||
*
|
*
|
||||||
* <h2><a id="patterns">Patterns</a></h2>
|
* <h2><a id="patterns">Patterns</a></h2>
|
||||||
* A {@code ChoiceFormat} pattern has the following syntax:
|
* A {@code ChoiceFormat} pattern has the following syntax:
|
||||||
@@ -194,7 +186,6 @@ import java.util.Arrays;
|
|||||||
* {@code new ChoiceFormat("1# ''one'' ").format(1)} returns {@code " 'one' "}.
|
* {@code new ChoiceFormat("1# ''one'' ").format(1)} returns {@code " 'one' "}.
|
||||||
*
|
*
|
||||||
* <p>Below is an example of constructing a ChoiceFormat with a pattern:
|
* <p>Below is an example of constructing a ChoiceFormat with a pattern:
|
||||||
* <blockquote>
|
|
||||||
* {@snippet lang=java :
|
* {@snippet lang=java :
|
||||||
* ChoiceFormat fmt = new ChoiceFormat(
|
* ChoiceFormat fmt = new ChoiceFormat(
|
||||||
* "-1#is negative| 0#is zero or fraction | 1#is one |1.0<is 1+ |2#is two |2<is more than 2.");
|
* "-1#is negative| 0#is zero or fraction | 1#is one |1.0<is 1+ |2#is two |2<is more than 2.");
|
||||||
@@ -210,7 +201,6 @@ import java.util.Arrays;
|
|||||||
* System.out.println(fmt.format(Double.NaN)); // outputs "is negative"
|
* System.out.println(fmt.format(Double.NaN)); // outputs "is negative"
|
||||||
* System.out.println(fmt.format(Double.POSITIVE_INFINITY)); // outputs "is more than 2."
|
* System.out.println(fmt.format(Double.POSITIVE_INFINITY)); // outputs "is more than 2."
|
||||||
* }
|
* }
|
||||||
* </blockquote>
|
|
||||||
*
|
*
|
||||||
* <h2><a id="synchronization">Synchronization</a></h2>
|
* <h2><a id="synchronization">Synchronization</a></h2>
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -231,7 +231,6 @@ import java.util.Objects;
|
|||||||
* <p>
|
* <p>
|
||||||
* The first example uses the static method {@code MessageFormat.format},
|
* The first example uses the static method {@code MessageFormat.format},
|
||||||
* which internally creates a {@code MessageFormat} for one-time use:
|
* which internally creates a {@code MessageFormat} for one-time use:
|
||||||
* <blockquote>
|
|
||||||
* {@snippet lang=java :
|
* {@snippet lang=java :
|
||||||
* int planet = 7;
|
* int planet = 7;
|
||||||
* String event = "a disturbance in the Force";
|
* String event = "a disturbance in the Force";
|
||||||
@@ -240,7 +239,6 @@ import java.util.Objects;
|
|||||||
* "At {1,time} on {1,date}, there was {2} on planet {0,number,integer}.",
|
* "At {1,time} on {1,date}, there was {2} on planet {0,number,integer}.",
|
||||||
* planet, new Date(), event);
|
* planet, new Date(), event);
|
||||||
* }
|
* }
|
||||||
* </blockquote>
|
|
||||||
* The output is:
|
* The output is:
|
||||||
* <blockquote><pre>
|
* <blockquote><pre>
|
||||||
* At 12:30 PM on Jul 3, 2053, there was a disturbance in the Force on planet 7.
|
* At 12:30 PM on Jul 3, 2053, there was a disturbance in the Force on planet 7.
|
||||||
@@ -249,7 +247,6 @@ import java.util.Objects;
|
|||||||
* <p>
|
* <p>
|
||||||
* The following example creates a {@code MessageFormat} instance that
|
* The following example creates a {@code MessageFormat} instance that
|
||||||
* can be used repeatedly:
|
* can be used repeatedly:
|
||||||
* <blockquote>
|
|
||||||
* {@snippet lang=java :
|
* {@snippet lang=java :
|
||||||
* int fileCount = 1273;
|
* int fileCount = 1273;
|
||||||
* String diskName = "MyDisk";
|
* String diskName = "MyDisk";
|
||||||
@@ -260,7 +257,6 @@ import java.util.Objects;
|
|||||||
*
|
*
|
||||||
* System.out.println(form.format(testArgs));
|
* System.out.println(form.format(testArgs));
|
||||||
* }
|
* }
|
||||||
* </blockquote>
|
|
||||||
* The output with different values for {@code fileCount}:
|
* The output with different values for {@code fileCount}:
|
||||||
* <blockquote><pre>
|
* <blockquote><pre>
|
||||||
* The disk "MyDisk" contains 0 file(s).
|
* The disk "MyDisk" contains 0 file(s).
|
||||||
@@ -269,23 +265,17 @@ import java.util.Objects;
|
|||||||
* </pre></blockquote>
|
* </pre></blockquote>
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* For more sophisticated patterns, you can use a {@code ChoiceFormat}
|
* For more sophisticated patterns, {@link ChoiceFormat} can be used with
|
||||||
* to produce correct forms for singular and plural:
|
* {@code MessageFormat} to produce accurate forms for singular and plural:
|
||||||
* <blockquote>
|
|
||||||
* {@snippet lang=java :
|
* {@snippet lang=java :
|
||||||
* MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0}.");
|
* MessageFormat msgFmt = new MessageFormat("The disk \"{0}\" contains {1}.");
|
||||||
* double[] filelimits = {0,1,2};
|
* double[] fileLimits = {0,1,2};
|
||||||
* String[] filepart = {"no files","one file","{0,number} files"};
|
* String[] filePart = {"no files","one file","{1,number} files"};
|
||||||
* ChoiceFormat fileform = new ChoiceFormat(filelimits, filepart);
|
* ChoiceFormat fileChoices = new ChoiceFormat(fileLimits, filePart);
|
||||||
* form.setFormatByArgumentIndex(0, fileform);
|
* msgFmt.setFormatByArgumentIndex(1, fileChoices);
|
||||||
*
|
* Object[] args = {"MyDisk", 1273};
|
||||||
* int fileCount = 1273;
|
* System.out.println(msgFmt.format(args));
|
||||||
* String diskName = "MyDisk";
|
|
||||||
* Object[] testArgs = {Long.valueOf(fileCount), diskName};
|
|
||||||
*
|
|
||||||
* System.out.println(form.format(testArgs));
|
|
||||||
* }
|
* }
|
||||||
* </blockquote>
|
|
||||||
* The output with different values for {@code fileCount}:
|
* The output with different values for {@code fileCount}:
|
||||||
* <blockquote><pre>
|
* <blockquote><pre>
|
||||||
* The disk "MyDisk" contains no files.
|
* The disk "MyDisk" contains no files.
|
||||||
@@ -297,24 +287,26 @@ import java.util.Objects;
|
|||||||
* You can create the {@code ChoiceFormat} programmatically, as in the
|
* You can create the {@code ChoiceFormat} programmatically, as in the
|
||||||
* above example, or by using a pattern. See {@link ChoiceFormat}
|
* above example, or by using a pattern. See {@link ChoiceFormat}
|
||||||
* for more information.
|
* for more information.
|
||||||
* <blockquote>
|
|
||||||
* {@snippet lang=java :
|
* {@snippet lang=java :
|
||||||
* form.applyPattern(
|
* msgFmt.applyPattern(
|
||||||
* "There {0,choice,0#are no files|1#is one file|1<are {0,number,integer} files}.");
|
* "There {0,choice,0#are no files|1#is one file|1<are {1,number,integer} files}.");
|
||||||
* }
|
* }
|
||||||
* </blockquote>
|
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* <strong>Note:</strong> As we see above, the string produced
|
* <strong id="pattern_caveats">Notes:</strong> As seen in the previous snippet,
|
||||||
* by a {@code ChoiceFormat} in {@code MessageFormat} is treated as special;
|
* the string produced by a {@code ChoiceFormat} in {@code MessageFormat} is
|
||||||
* occurrences of '{' are used to indicate subformats, and cause recursion.
|
* treated as special; occurrences of '{' are used to indicate subformats, and
|
||||||
|
* cause recursion. If a {@code FormatElement} is defined in the {@code ChoiceFormat}
|
||||||
|
* pattern, it will only be formatted according to the {@code FormatType} and
|
||||||
|
* {@code FormatStyle} pattern provided. The associated subformats of the
|
||||||
|
* top level {@code MessageFormat} will not be applied to the {@code FormatElement}
|
||||||
|
* defined in the {@code ChoiceFormat} pattern.
|
||||||
* If you create both a {@code MessageFormat} and {@code ChoiceFormat}
|
* If you create both a {@code MessageFormat} and {@code ChoiceFormat}
|
||||||
* programmatically (instead of using the string patterns), then be careful not to
|
* programmatically (instead of using the string patterns), then be careful not to
|
||||||
* produce a format that recurses on itself, which will cause an infinite loop.
|
* produce a format that recurses on itself, which will cause an infinite loop.
|
||||||
* <p>
|
* <p>
|
||||||
* When a single argument is parsed more than once in the string, the last match
|
* When a single argument is parsed more than once in the string, the last match
|
||||||
* will be the final result of the parsing. For example,
|
* will be the final result of the parsing. For example,
|
||||||
* <blockquote>
|
|
||||||
* {@snippet lang=java :
|
* {@snippet lang=java :
|
||||||
* MessageFormat mf = new MessageFormat("{0,number,#.##}, {0,number,#.#}");
|
* MessageFormat mf = new MessageFormat("{0,number,#.##}, {0,number,#.#}");
|
||||||
* Object[] objs = {Double.valueOf(3.1415)};
|
* Object[] objs = {Double.valueOf(3.1415)};
|
||||||
@@ -323,20 +315,17 @@ import java.util.Objects;
|
|||||||
* objs = mf.parse(result, new ParsePosition(0));
|
* objs = mf.parse(result, new ParsePosition(0));
|
||||||
* // objs now equals {Double.valueOf(3.1)}
|
* // objs now equals {Double.valueOf(3.1)}
|
||||||
* }
|
* }
|
||||||
* </blockquote>
|
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* Likewise, parsing with a {@code MessageFormat} object using patterns containing
|
* Likewise, parsing with a {@code MessageFormat} object using patterns containing
|
||||||
* multiple occurrences of the same argument would return the last match. For
|
* multiple occurrences of the same argument would return the last match. For
|
||||||
* example,
|
* example,
|
||||||
* <blockquote>
|
|
||||||
* {@snippet lang=java :
|
* {@snippet lang=java :
|
||||||
* MessageFormat mf = new MessageFormat("{0}, {0}, {0}");
|
* MessageFormat mf = new MessageFormat("{0}, {0}, {0}");
|
||||||
* String forParsing = "x, y, z";
|
* String forParsing = "x, y, z";
|
||||||
* Object[] objs = mf.parse(forParsing, new ParsePosition(0));
|
* Object[] objs = mf.parse(forParsing, new ParsePosition(0));
|
||||||
* // objs now equals {new String("z")}
|
* // objs now equals {new String("z")}
|
||||||
* }
|
* }
|
||||||
* </blockquote>
|
|
||||||
*
|
*
|
||||||
* <h3><a id="synchronization">Synchronization</a></h3>
|
* <h3><a id="synchronization">Synchronization</a></h3>
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -50,6 +50,7 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||||||
import java.util.spi.LocaleNameProvider;
|
import java.util.spi.LocaleNameProvider;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import jdk.internal.util.StaticProperty;
|
||||||
import jdk.internal.vm.annotation.Stable;
|
import jdk.internal.vm.annotation.Stable;
|
||||||
|
|
||||||
import sun.security.action.GetPropertyAction;
|
import sun.security.action.GetPropertyAction;
|
||||||
@@ -1053,11 +1054,10 @@ public final class Locale implements Cloneable, Serializable {
|
|||||||
|
|
||||||
private static Locale initDefault() {
|
private static Locale initDefault() {
|
||||||
String language, region, script, country, variant;
|
String language, region, script, country, variant;
|
||||||
Properties props = GetPropertyAction.privilegedGetProperties();
|
language = StaticProperty.USER_LANGUAGE;
|
||||||
language = props.getProperty("user.language", "en");
|
|
||||||
// for compatibility, check for old user.region property
|
// for compatibility, check for old user.region property
|
||||||
region = props.getProperty("user.region");
|
region = StaticProperty.USER_REGION;
|
||||||
if (region != null) {
|
if (!region.isEmpty()) {
|
||||||
// region can be of form country, country_variant, or _variant
|
// region can be of form country, country_variant, or _variant
|
||||||
int i = region.indexOf('_');
|
int i = region.indexOf('_');
|
||||||
if (i >= 0) {
|
if (i >= 0) {
|
||||||
@@ -1069,30 +1069,24 @@ public final class Locale implements Cloneable, Serializable {
|
|||||||
}
|
}
|
||||||
script = "";
|
script = "";
|
||||||
} else {
|
} else {
|
||||||
script = props.getProperty("user.script", "");
|
script = StaticProperty.USER_SCRIPT;
|
||||||
country = props.getProperty("user.country", "");
|
country = StaticProperty.USER_COUNTRY;
|
||||||
variant = props.getProperty("user.variant", "");
|
variant = StaticProperty.USER_VARIANT;
|
||||||
}
|
}
|
||||||
|
|
||||||
return getInstance(language, script, country, variant,
|
return getInstance(language, script, country, variant,
|
||||||
getDefaultExtensions(props.getProperty("user.extensions", ""))
|
getDefaultExtensions(StaticProperty.USER_EXTENSIONS)
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Locale initDefault(Locale.Category category) {
|
private static Locale initDefault(Locale.Category category) {
|
||||||
Properties props = GetPropertyAction.privilegedGetProperties();
|
|
||||||
|
|
||||||
Locale locale = Locale.defaultLocale;
|
Locale locale = Locale.defaultLocale;
|
||||||
return getInstance(
|
return getInstance(
|
||||||
props.getProperty(category.languageKey,
|
category == Category.DISPLAY ? StaticProperty.USER_LANGUAGE_DISPLAY : StaticProperty.USER_LANGUAGE_FORMAT,
|
||||||
locale.getLanguage()),
|
category == Category.DISPLAY ? StaticProperty.USER_SCRIPT_DISPLAY : StaticProperty.USER_SCRIPT_FORMAT,
|
||||||
props.getProperty(category.scriptKey,
|
category == Category.DISPLAY ? StaticProperty.USER_COUNTRY_DISPLAY : StaticProperty.USER_COUNTRY_FORMAT,
|
||||||
locale.getScript()),
|
category == Category.DISPLAY ? StaticProperty.USER_VARIANT_DISPLAY : StaticProperty.USER_VARIANT_FORMAT,
|
||||||
props.getProperty(category.countryKey,
|
getDefaultExtensions(category == Category.DISPLAY ? StaticProperty.USER_EXTENSIONS_DISPLAY : StaticProperty.USER_EXTENSIONS_FORMAT)
|
||||||
locale.getCountry()),
|
|
||||||
props.getProperty(category.variantKey,
|
|
||||||
locale.getVariant()),
|
|
||||||
getDefaultExtensions(props.getProperty(category.extensionsKey, ""))
|
|
||||||
.orElse(locale.getLocaleExtensions()));
|
.orElse(locale.getLocaleExtensions()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1222,16 +1222,17 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||||||
int nlen = CENNAM(cen, pos);
|
int nlen = CENNAM(cen, pos);
|
||||||
int elen = CENEXT(cen, pos);
|
int elen = CENEXT(cen, pos);
|
||||||
int clen = CENCOM(cen, pos);
|
int clen = CENCOM(cen, pos);
|
||||||
if (entryPos + nlen > cen.length - ENDHDR) {
|
long headerSize = (long)CENHDR + nlen + clen + elen;
|
||||||
|
// CEN header size + name length + comment length + extra length
|
||||||
|
// should not exceed 65,535 bytes per the PKWare APP.NOTE
|
||||||
|
// 4.4.10, 4.4.11, & 4.4.12. Also check that current CEN header will
|
||||||
|
// not exceed the length of the CEN array
|
||||||
|
if (headerSize > 0xFFFF || pos + headerSize > cen.length - ENDHDR) {
|
||||||
zerror("invalid CEN header (bad header size)");
|
zerror("invalid CEN header (bad header size)");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (elen > 0 && !DISABLE_ZIP64_EXTRA_VALIDATION) {
|
if (elen > 0 && !DISABLE_ZIP64_EXTRA_VALIDATION) {
|
||||||
long extraStartingOffset = pos + CENHDR + nlen;
|
checkExtraFields(pos, entryPos + nlen, elen);
|
||||||
if ((int)extraStartingOffset != extraStartingOffset) {
|
|
||||||
zerror("invalid CEN header (bad extra offset)");
|
|
||||||
}
|
|
||||||
checkExtraFields(pos, (int)extraStartingOffset, elen);
|
|
||||||
} else if (elen == 0 && (CENSIZ(cen, pos) == ZIP64_MAGICVAL
|
} else if (elen == 0 && (CENSIZ(cen, pos) == ZIP64_MAGICVAL
|
||||||
|| CENLEN(cen, pos) == ZIP64_MAGICVAL
|
|| CENLEN(cen, pos) == ZIP64_MAGICVAL
|
||||||
|| CENOFF(cen, pos) == ZIP64_MAGICVAL
|
|| CENOFF(cen, pos) == ZIP64_MAGICVAL
|
||||||
@@ -1292,7 +1293,7 @@ public class ZipFile implements ZipConstants, Closeable {
|
|||||||
|
|
||||||
int tagBlockSize = get16(cen, currentOffset);
|
int tagBlockSize = get16(cen, currentOffset);
|
||||||
currentOffset += Short.BYTES;
|
currentOffset += Short.BYTES;
|
||||||
int tagBlockEndingOffset = currentOffset + tagBlockSize;
|
long tagBlockEndingOffset = (long)currentOffset + tagBlockSize;
|
||||||
|
|
||||||
// The ending offset for this tag block should not go past the
|
// The ending offset for this tag block should not go past the
|
||||||
// offset for the end of the extra field
|
// offset for the end of the extra field
|
||||||
|
|||||||
@@ -361,7 +361,7 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||||||
@ForceInline
|
@ForceInline
|
||||||
public void checkAccess(long offset, long length, boolean readOnly) {
|
public void checkAccess(long offset, long length, boolean readOnly) {
|
||||||
if (!readOnly && this.readOnly) {
|
if (!readOnly && this.readOnly) {
|
||||||
throw new UnsupportedOperationException("Attempt to write a read-only segment");
|
throw new IllegalArgumentException("Attempt to write a read-only segment");
|
||||||
}
|
}
|
||||||
checkBounds(offset, length);
|
checkBounds(offset, length);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -57,6 +57,22 @@ public final class StaticProperty {
|
|||||||
private static final String OS_NAME;
|
private static final String OS_NAME;
|
||||||
private static final String OS_ARCH;
|
private static final String OS_ARCH;
|
||||||
private static final String OS_VERSION;
|
private static final String OS_VERSION;
|
||||||
|
public static final String USER_LANGUAGE;
|
||||||
|
public static final String USER_LANGUAGE_DISPLAY;
|
||||||
|
public static final String USER_LANGUAGE_FORMAT;
|
||||||
|
public static final String USER_SCRIPT;
|
||||||
|
public static final String USER_SCRIPT_DISPLAY;
|
||||||
|
public static final String USER_SCRIPT_FORMAT;
|
||||||
|
public static final String USER_COUNTRY;
|
||||||
|
public static final String USER_COUNTRY_DISPLAY;
|
||||||
|
public static final String USER_COUNTRY_FORMAT;
|
||||||
|
public static final String USER_VARIANT;
|
||||||
|
public static final String USER_VARIANT_DISPLAY;
|
||||||
|
public static final String USER_VARIANT_FORMAT;
|
||||||
|
public static final String USER_EXTENSIONS;
|
||||||
|
public static final String USER_EXTENSIONS_DISPLAY;
|
||||||
|
public static final String USER_EXTENSIONS_FORMAT;
|
||||||
|
public static final String USER_REGION;
|
||||||
|
|
||||||
private StaticProperty() {}
|
private StaticProperty() {}
|
||||||
|
|
||||||
@@ -79,6 +95,22 @@ public final class StaticProperty {
|
|||||||
OS_NAME = getProperty(props, "os.name");
|
OS_NAME = getProperty(props, "os.name");
|
||||||
OS_ARCH = getProperty(props, "os.arch");
|
OS_ARCH = getProperty(props, "os.arch");
|
||||||
OS_VERSION = getProperty(props, "os.version");
|
OS_VERSION = getProperty(props, "os.version");
|
||||||
|
USER_LANGUAGE = getProperty(props, "user.language", "en");
|
||||||
|
USER_LANGUAGE_DISPLAY = getProperty(props, "user.language.display", USER_LANGUAGE);
|
||||||
|
USER_LANGUAGE_FORMAT = getProperty(props, "user.language.format", USER_LANGUAGE);
|
||||||
|
USER_SCRIPT = getProperty(props, "user.script", "");
|
||||||
|
USER_SCRIPT_DISPLAY = getProperty(props, "user.script.display", USER_SCRIPT);
|
||||||
|
USER_SCRIPT_FORMAT = getProperty(props, "user.script.format", USER_SCRIPT);
|
||||||
|
USER_COUNTRY = getProperty(props, "user.country", "");
|
||||||
|
USER_COUNTRY_DISPLAY = getProperty(props, "user.country.display", USER_COUNTRY);
|
||||||
|
USER_COUNTRY_FORMAT = getProperty(props, "user.country.format", USER_COUNTRY);
|
||||||
|
USER_VARIANT = getProperty(props, "user.variant", "");
|
||||||
|
USER_VARIANT_DISPLAY = getProperty(props, "user.variant.display", USER_VARIANT);
|
||||||
|
USER_VARIANT_FORMAT = getProperty(props, "user.variant.format", USER_VARIANT);
|
||||||
|
USER_EXTENSIONS = getProperty(props, "user.extensions", "");
|
||||||
|
USER_EXTENSIONS_DISPLAY = getProperty(props, "user.extensions.display", USER_EXTENSIONS);
|
||||||
|
USER_EXTENSIONS_FORMAT = getProperty(props, "user.extensions.format", USER_EXTENSIONS);
|
||||||
|
USER_REGION = getProperty(props, "user.region", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getProperty(Properties props, String key) {
|
private static String getProperty(Properties props, String key) {
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user