8365675: Add String Unicode Case-Folding Support

Reviewed-by: rriggs, naoto, ihse
2025-12-06 01:19:28 +01:00 · 2025-12-02 19:47:18 +00:00
parent 618732ffc0
commit b97ed667db
13 changed files with 1245 additions and 212 deletions
--- a/make/ToolsJdk.gmk
+++ b/make/ToolsJdk.gmk
@@ -79,7 +79,7 @@ TOOL_GENERATEEXTRAPROPERTIES = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_too
    build.tools.generateextraproperties.GenerateExtraProperties

 TOOL_GENERATECASEFOLDING = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
-    build.tools.generatecharacter.CaseFolding
+    build.tools.generatecharacter.GenerateCaseFolding

 TOOL_MAKEZIPREPRODUCIBLE = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
    build.tools.makezipreproducible.MakeZipReproducible
--- a/make/jdk/src/classes/build/tools/generatecharacter/CaseFolding.java
+++ b/make/jdk/src/classes/build/tools/generatecharacter/CaseFolding.java
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package build.tools.generatecharacter;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.nio.file.StandardOpenOption;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-public class CaseFolding {
-
-    public static void main(String[] args) throws Throwable {
-        if (args.length != 3) {
-            System.err.println("Usage: java CaseFolding TemplateFile CaseFolding.txt CaseFolding.java");
-            System.exit(1);
-        }
-        var templateFile = Paths.get(args[0]);
-        var caseFoldingTxt = Paths.get(args[1]);
-        var genSrcFile = Paths.get(args[2]);
-        var supportedTypes = "^.*; [CTS]; .*$";
-        var caseFoldingEntries = Files.lines(caseFoldingTxt)
-            .filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
-            .map(line -> {
-                String[] cols = line.split("; ");
-                return new String[] {cols[0], cols[1], cols[2]};
-            })
-            .filter(cols -> {
-                //  the folding case doesn't map back to the original char.
-                var cp1 = Integer.parseInt(cols[0], 16);
-                var cp2 = Integer.parseInt(cols[2], 16);
-                return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
-            })
-            .map(cols -> String.format("        entry(0x%s, 0x%s)", cols[0], cols[2]))
-            .collect(Collectors.joining(",\n", "", ""));
-
-        // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's.
-        // 0049; T; 0131; # LATIN CAPITAL LETTER I
-        final String T_0x0131_0x49 = String.format("        entry(0x%04x, 0x%04x),\n", 0x0131, 0x49);
-
-        // Generate .java file
-        Files.write(
-            genSrcFile,
-            Files.lines(templateFile)
-                .map(line -> line.contains("%%%Entries") ? T_0x0131_0x49 + caseFoldingEntries : line)
-                .collect(Collectors.toList()),
-            StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
-    }
-}
--- a/make/jdk/src/classes/build/tools/generatecharacter/GenerateCaseFolding.java
+++ b/make/jdk/src/classes/build/tools/generatecharacter/GenerateCaseFolding.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package build.tools.generatecharacter;
+
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+public class GenerateCaseFolding {
+
+    public static void main(String[] args) throws Throwable {
+        if (args.length != 3) {
+            System.err.println("Usage: java GenerateCaseFolding TemplateFile CaseFolding.txt CaseFolding.java");
+            System.exit(1);
+        }
+        var templateFile = Paths.get(args[0]);
+        var caseFoldingTxt = Paths.get(args[1]);
+        var genSrcFile = Paths.get(args[2]);
+
+        // java.lang
+        var supportedTypes = "^.*; [CF]; .*$";  // full/1:M case folding
+        String[][] caseFoldings = Files.lines(caseFoldingTxt)
+                .filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
+                .map(line -> {
+                    var fields = line.split("; ");
+                    var cp = fields[0];
+                    fields = fields[2].trim().split(" ");
+                    var folding = new String[fields.length + 1];
+                    folding[0] = cp;
+                    System.arraycopy(fields, 0, folding, 1, fields.length);
+                    return folding;
+                })
+                .toArray(size -> new String[size][]);
+
+        // util.regex
+        var expandedSupportedTypes = "^.*; [CTS]; .*$";
+        var expanded_caseFoldingEntries = Files.lines(caseFoldingTxt)
+                .filter(line -> !line.startsWith("#") && line.matches(expandedSupportedTypes))
+                .map(line -> {
+                    String[] cols = line.split("; ");
+                    return new String[]{cols[0], cols[1], cols[2]};
+                })
+                .filter(cols -> {
+                    // the folding case doesn't map back to the original char.
+                    var cp1 = Integer.parseInt(cols[0], 16);
+                    var cp2 = Integer.parseInt(cols[2], 16);
+                    return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
+                })
+                .map(cols -> String.format("        entry(0x%s, 0x%s)", cols[0], cols[2]))
+                .collect(Collectors.joining(",\n", "", ""));
+
+        // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's.
+        // 0049; T; 0131; # LATIN CAPITAL LETTER I
+        final String T_0x0131_0x49 = String.format("        entry(0x%04x, 0x%04x),\n", 0x0131, 0x49);
+
+        Files.write(
+                genSrcFile,
+                Files.lines(templateFile)
+                        .map(line -> line.contains("%%%Entries") ? genFoldingEntries(caseFoldings) : line)
+                        .map(line -> line.contains("%%%Expanded_Case_Map_Entries") ? T_0x0131_0x49 + expanded_caseFoldingEntries : line)
+                        .collect(Collectors.toList()),
+                StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
+    }
+
+    private static long foldingToLong(String[] folding) {
+        int cp = Integer.parseInt(folding[0], 16);
+        long value = (long)Integer.parseInt(folding[1], 16);
+        if (!Character.isSupplementaryCodePoint(cp) && folding.length != 2) {
+            var shift = 16;
+            for (int j = 2; j < folding.length; j++) {
+                value |= (long)Integer.parseInt(folding[j], 16) << shift;
+                shift <<= 1;
+            }
+            value = value | (long) (folding.length - 1) << 48;
+        }
+        return value;
+    }
+
+    private static String genFoldingEntries(String[][] foldings) {
+        StringBuilder sb = new StringBuilder();
+        sb.append("    private static final int[] CASE_FOLDING_CPS = {\n");
+        int width = 10;
+        for (int i = 0; i < foldings.length; i++) {
+            if (i % width == 0)
+                sb.append("        ");
+            sb.append(String.format("0X%s", foldings[i][0]));
+            if (i < foldings.length - 1)
+                sb.append(", ");
+            if (i % width == width - 1 || i == foldings.length - 1)
+                sb.append("\n");
+        }
+        sb.append("    };\n\n");
+
+        sb.append("    private static final long[] CASE_FOLDING_VALUES = {\n");
+        width = 6;
+        for (int i = 0; i < foldings.length; i++) {
+            if (i % width == 0)
+                sb.append("        "); // indent
+            sb.append(String.format("0x%013xL", foldingToLong(foldings[i])));
+            if (i < foldings.length - 1)
+                sb.append(", ");
+            if (i % width == width - 1 || i == foldings.length - 1) {
+                sb.append("\n");
+            }
+        }
+        sb.append("    };\n");
+        return sb.toString();
+    }
+}
--- a/make/modules/java.base/gensrc/GensrcCharacterData.gmk
+++ b/make/modules/java.base/gensrc/GensrcCharacterData.gmk
@@ -72,5 +72,22 @@ TARGETS += $(GENSRC_CHARACTERDATA)

 ################################################################################

+
+GENSRC_STRINGCASEFOLDING := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/lang/CaseFolding.java
+
+STRINGCASEFOLDING_TEMPLATE := $(MODULE_SRC)/share/classes/jdk/internal/lang/CaseFolding.java.template
+CASEFOLDINGTXT := $(MODULE_SRC)/share/data/unicodedata/CaseFolding.txt
+
+$(GENSRC_STRINGCASEFOLDING): $(BUILD_TOOLS_JDK) $(STRINGCASEFOLDING_TEMPLATE) $(CASEFOLDINGTXT)
+	$(call LogInfo, Generating $@)
+	$(call MakeTargetDir)
+	$(TOOL_GENERATECASEFOLDING) \
+	    $(STRINGCASEFOLDING_TEMPLATE) \
+	    $(CASEFOLDINGTXT) \
+	    $(GENSRC_STRINGCASEFOLDING)
+
+TARGETS += $(GENSRC_STRINGCASEFOLDING)
+
+
 endif # include guard
 include MakeIncludeEnd.gmk
--- a/make/modules/java.base/gensrc/GensrcRegex.gmk
+++ b/make/modules/java.base/gensrc/GensrcRegex.gmk
@@ -50,22 +50,5 @@ TARGETS += $(GENSRC_INDICCONJUNCTBREAK)

 ################################################################################

-GENSRC_CASEFOLDING := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/CaseFolding.java
-
-CASEFOLDINGTEMP := $(MODULE_SRC)/share/classes/jdk/internal/util/regex/CaseFolding.java.template
-CASEFOLDINGTXT := $(MODULE_SRC)/share/data/unicodedata/CaseFolding.txt
-
-$(GENSRC_CASEFOLDING): $(BUILD_TOOLS_JDK) $(CASEFOLDINGTEMP) $(CASEFOLDINGTXT)
-	$(call LogInfo, Generating $@)
-	$(call MakeTargetDir)
-	$(TOOL_GENERATECASEFOLDING) \
-	    $(CASEFOLDINGTEMP) \
-	    $(CASEFOLDINGTXT) \
-	    $(GENSRC_CASEFOLDING)
-
-TARGETS += $(GENSRC_CASEFOLDING)
-
-################################################################################
-
 endif # include guard
 include MakeIncludeEnd.gmk
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -117,9 +117,38 @@ import sun.nio.cs.UTF_8;
 * Unicode code points (i.e., characters), in addition to those for
 * dealing with Unicode code units (i.e., {@code char} values).
 *
- * <p>Unless otherwise noted, methods for comparing Strings do not take locale
- * into account.  The {@link java.text.Collator} class provides methods for
- * finer-grain, locale-sensitive String comparison.
+ * <p><b>String comparison and case-insensitive matching</b>
+ *
+ * <p>There are several related ways to compare {@code String} values; choose
+ * the one whose semantics fit your purpose:
+ *
+ * <ul>
+ *   <li><b>Exact content equality</b> — {@link #equals(Object)} checks that two
+ *       strings contain the identical char sequence of UTF-16 code units. This is
+ *       a strict, case-sensitive comparison suitable for exact matching, hashing
+ *       and any situation that requires bit-for-bit stability.</li>
+ *
+ *   <li><b>Simple case-insensitive equality</b> — {@link #equalsIgnoreCase(String)}
+ *       (and the corresponding {@link #compareToIgnoreCase(String)} and {@link #CASE_INSENSITIVE_ORDER})
+ *       performs a per-code-point, locale-independent comparison using
+ *       {@link Character#toUpperCase(int)} and {@link Character#toLowerCase(int)}.
+ *       It is convenient for many common case-insensitive checks.</li>
+ *
+ *   <li><b>Unicode case-folded equivalence</b> — {@link #equalsFoldCase(String)}
+ *       (and the corresponding {@link #compareToFoldCase(String)} and {@link #UNICODE_CASEFOLD_ORDER})
+ *       implement the Unicode <em>{@index "full case folding"}</em> rules defined in
+ *       <a href="https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt">Unicode CaseFolding.txt</a>.
+ *       Case folding is locale-independent and language-neutral and may map a single code
+ *       point to multiple code points (1:M mappings). For example, the German sharp
+ *       s ({@code U+00DF}) is folded to the sequence {@code "ss"}.
+ *       Use these methods when you need Unicode-compliant
+ *       <a href="https://www.unicode.org/versions/latest/core-spec/chapter-5/#G21790">
+ *       caseless matching</a>, searching, or ordering.</li>
+ * </ul>
+ *
+ * <p>Unless otherwise noted, methods for comparing Strings do not take locale into
+ * account. The {@link java.text.Collator} class provides methods for finer-grain,
+ * locale-sensitive String comparison.
 *
 * @implNote The implementation of the string concatenation operator is left to
 * the discretion of a Java compiler, as long as the compiler ultimately conforms
@@ -2179,6 +2208,7 @@ public final class String
     *          false} otherwise
     *
     * @see  #equals(Object)
+     * @see  #equalsFoldCase(String)
     * @see  #codePoints()
     */
    public boolean equalsIgnoreCase(String anotherString) {
@@ -2188,6 +2218,57 @@ public final class String
                && regionMatches(true, 0, anotherString, 0, length());
    }

+    /**
+     * Compares this {@code String} to another {@code String} for equality,
+     * using <em>{@index "Unicode case folding"}</em>. Two strings are considered equal
+     * by this method if their case-folded forms are identical.
+     * <p>
+     * Case folding is defined by the Unicode Standard in
+     * <a href="https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt">CaseFolding.txt</a>,
+     * including 1:M mappings. For example, {@code "Fuß".equalsFoldCase("FUSS")}
+     * returns {@code true}, since the character {@code U+00DF} (sharp s) folds
+     * to {@code "ss"}.
+     * <p>
+     * Case folding is locale-independent and language-neutral, unlike
+     * locale-sensitive transformations such as {@link #toLowerCase()} or
+     * {@link #toUpperCase()}. It is intended for caseless matching,
+     * searching, and indexing.
+     *
+     * @apiNote
+     * This method is the Unicode-compliant alternative to
+     * {@link #equalsIgnoreCase(String)}. It implements full case folding as
+     * defined by the Unicode Standard, which may differ from the simpler
+     * per-character mapping performed by {@code equalsIgnoreCase}.
+     * For example:
+     * {@snippet lang=java :
+     * String a = "Fuß";
+     * String b = "FUSS";
+     * boolean equalsFoldCase = a.equalsFoldCase(b);       // returns true
+     * boolean equalsIgnoreCase = a.equalsIgnoreCase(b);   // returns false
+     * }
+     *
+     * @param  anotherString
+     *         The {@code String} to compare this {@code String} against
+     *
+     * @return  {@code true} if the given object is not {@code null} and represents
+     *          the same sequence of characters as this string under Unicode case
+     *          folding; {@code false} otherwise.
+     *
+     * @spec    https://www.unicode.org/versions/latest/core-spec/chapter-5/#G21790 Unicode Caseless Matching
+     * @see     #compareToFoldCase(String)
+     * @see     #equalsIgnoreCase(String)
+     * @since   26
+     */
+    public boolean equalsFoldCase(String anotherString) {
+        if (this == anotherString) {
+            return true;
+        }
+        if (anotherString == null) {
+            return false;
+        }
+        return UNICODE_CASEFOLD_ORDER.compare(this, anotherString) == 0;
+    }
+
    /**
     * Compares two strings lexicographically.
     * The comparison is based on the Unicode value of each character in
@@ -2303,12 +2384,86 @@ public final class String
     *          than this String, ignoring case considerations.
     * @see     java.text.Collator
     * @see     #codePoints()
+     * @see     #compareToFoldCase(String)
     * @since   1.2
     */
    public int compareToIgnoreCase(String str) {
        return CASE_INSENSITIVE_ORDER.compare(this, str);
    }

+    /**
+     * A Comparator that orders {@code String} objects as by
+     * {@link #compareToFoldCase(String) compareToFoldCase()}.
+     *
+     * @see     #compareToFoldCase(String)
+     * @since   26
+     */
+    public static final Comparator<String> UNICODE_CASEFOLD_ORDER
+            = new FoldCaseComparator();
+
+    private static class FoldCaseComparator implements Comparator<String> {
+
+        @Override
+        public int compare(String s1, String s2) {
+            byte[] v1 = s1.value;
+            byte[] v2 = s2.value;
+            if (s1.coder == s2.coder()) {
+                return s1.coder == LATIN1 ? StringLatin1.compareToFC(v1, v2)
+                                          : StringUTF16.compareToFC(v1, v2);
+            }
+            return s1.coder == LATIN1 ? StringLatin1.compareToFC_UTF16(v1, v2)
+                                      : StringUTF16.compareToFC_Latin1(v1, v2);
+        }
+    }
+
+    /**
+     * Compares two strings lexicographically using <em>{@index "Unicode case folding"}</em>.
+     * This method returns an integer whose sign is that of calling {@code compareTo}
+     * on the Unicode case folded version of the strings. Unicode Case folding
+     * eliminates differences in case according to the Unicode Standard, using the
+     * mappings defined in
+     * <a href="https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt">CaseFolding.txt</a>,
+     * including 1:M mappings, such as {@code"ß"} → {@code }"ss"}.
+     * <p>
+     * Case folding is a locale-independent, language-neutral form of case mapping,
+     * primarily intended for caseless matching. Unlike {@link #compareToIgnoreCase(String)},
+     * which applies a simpler locale-insensitive uppercase mapping. This method
+     * follows the Unicode <em>{@index "full"}</em> case folding, providing stable and
+     * consistent results across all environments.
+     * <p>
+     * Note that this method does <em>not</em> take locale into account, and may
+     * produce results that differ from locale-sensitive ordering. Use
+     * {@link java.text.Collator} for locale-sensitive comparison.
+     *
+     * @apiNote
+     * This method is the Unicode-compliant alternative to
+     * {@link #compareToIgnoreCase(String)}. It implements the
+     * <em>{@index "full case folding"}</em> as defined by the Unicode Standard, which
+     * may differ from the simpler per-character mapping performed by
+     * {@code compareToIgnoreCase}.
+     * For example:
+     * {@snippet lang=java :
+     * String a = "Fuß";
+     * String b = "FUSS";
+     * int cmpFoldCase = a.compareToFoldCase(b);     // returns 0
+     * int cmpIgnoreCase = a.compareToIgnoreCase(b); // returns > 0
+     * }
+     *
+     * @param   str   the {@code String} to be compared.
+     * @return  a negative integer, zero, or a positive integer as the specified
+     *          String is greater than, equal to, or less than this String,
+     *          ignoring case considerations by case folding.
+     *
+     * @spec    https://www.unicode.org/versions/latest/core-spec/chapter-5/#G21790 Unicode Caseless Matching
+     * @see     java.text.Collator
+     * @see     #compareToIgnoreCase(String)
+     * @see     #equalsFoldCase(String)
+     * @since   26
+     */
+    public int compareToFoldCase(String str) {
+        return UNICODE_CASEFOLD_ORDER.compare(this, str);
+    }
+
    /**
     * Tests if two string regions are equal.
     * <p>
--- a/src/java.base/share/classes/java/lang/StringLatin1.java
+++ b/src/java.base/share/classes/java/lang/StringLatin1.java
@@ -32,6 +32,8 @@ import java.util.function.Consumer;
 import java.util.function.IntConsumer;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
+
+import jdk.internal.lang.CaseFolding;
 import jdk.internal.util.ArraysSupport;
 import jdk.internal.vm.annotation.IntrinsicCandidate;

@@ -179,6 +181,128 @@ final class StringLatin1 {
        return len1 - len2;
    }

+    private static int compareToFC0(byte[] value, int off, int last, byte[] other, int ooff, int olast) {
+        int k1 = off, k2 = ooff;
+        boolean lo1 = false, lo2 = false;  // true if we have a leftover 's' from u+00df -> ss
+        while ((k1 < last || lo1) && (k2 < olast || lo2)) {
+            int c1, c2;
+            if (lo1) {
+                c1 = 0x73; // leftover 's'
+                lo1 = false;
+            } else {
+                c1 = getChar(value, k1++);
+                if (c1 == 0xdf) {
+                    c1 = 0x73;
+                    lo1 = true;
+                }
+            }
+            if (lo2) {
+                c2 = 0x73; // 's'
+                lo2 = false;
+            } else {
+                c2 = getChar(other, k2++);
+                if (c2 == 0xdf) {
+                    c2 = 0x73;
+                    lo2 = true;
+                }
+            }
+            if (!CharacterDataLatin1.equalsIgnoreCase((byte)c1, (byte)c2)) {
+                return Character.toLowerCase(c1) - Character.toLowerCase(c2);
+            }
+        }
+        if (k1 < last || lo1) {
+            return 1;
+        }
+        if (k2 < olast || lo2) {
+            return -1;
+        }
+        return 0;
+    }
+
+    static int compareToFC(byte[] value, byte[] other) {
+        int len = value.length;
+        int olen = other.length;
+        int lim = Math.min(len, olen);
+        for (int k = 0; k < lim; k++) {
+            byte b1 = value[k];
+            byte b2 = other[k];
+            if (!CharacterDataLatin1.equalsIgnoreCase(b1, b2)) {
+                int c1 = b1 & 0xff;
+                int c2 = b2 & 0xff;
+                if (c1 == 0xdf || c2 == 0xdf) {  // 0xdf is the only 1:M in latin1 range
+                    return compareToFC0(value, k, len, other, k, olen);
+                }
+                return Character.toLowerCase(c1) - Character.toLowerCase(c2);
+            }
+        }
+        return len - olen;
+    }
+
+    private static int compareToFC0_UTF16(byte[] value, int off, int last, byte[] other, int ooff, int olast) {
+        int f1 = 0, f2 = 0;
+        int k1 = off, k2 = ooff;
+        while ((k1 < last || f1 != 0) && (k2 < olast || f2 != 0)) {
+            int c1, c2;
+            if (f1 != 0) {
+                c1 = (f1 & 0xffff); f1 >>>= 16;
+            } else {
+                c1 = getChar(value, k1++);
+                var f = CaseFolding.fold(c1);
+                if (CaseFolding.isSingleCodePoint(f)) {
+                    c1 = (int)(f & 0xfffff);
+                } else {
+                    c1 = (int)f & 0xffff;
+                    f1 = (int)(f >>> 16);
+                }
+            }
+            if (f2 != 0) {
+                c2 = f2 & 0xffff; f2 >>>= 16;
+            } else {
+                c2 = StringUTF16.codePointAt(other, k2, olast, true);
+                k2 += Character.charCount(c2);
+                var f = CaseFolding.fold(c2);
+                if (CaseFolding.isSingleCodePoint(f)) {
+                    c2 = (int)(f & 0xfffff);
+                } else {
+                    c2 = (int)(f & 0xffff);
+                    f2 = (int)(f >>> 16);
+                }
+            }
+            if (c1 != c2) {
+                return c1 - c2;
+            }
+        }
+        if (k1 < last || f1 != 0) {
+            return 1;
+        }
+        if (k2 < olast || f2 != 0) {
+            return -1;
+        }
+        return 0;
+    }
+
+    // latin1 vs utf16
+    static int compareToFC_UTF16(byte[] value, byte[] other) {
+        int last = length(value);
+        int olast = StringUTF16.length(other);
+        int lim = Math.min(last, olast);
+        for (int k = 0; k < lim; k++) {
+            int cp1 = getChar(value, k);
+            int cp2 = StringUTF16.codePointAt(other, k, olast, true);
+            if (cp1 != cp2) {
+                long cf1 = CaseFolding.fold(cp1);
+                long cf2 = CaseFolding.fold(cp2);
+                if (cf1 != cf2) {
+                    if (!CaseFolding.isSingleCodePoint(cf1) || !CaseFolding.isSingleCodePoint(cf2)) {
+                        return compareToFC0_UTF16(value, k, last, other, k, olast);
+                    }
+                    return (int)(cf1 - cf2);
+                }
+            }
+        }
+        return last - olast;
+    }
+
    static int hashCode(byte[] value) {
        return ArraysSupport.hashCodeOfUnsigned(value, 0, value.length, 0);
    }
--- a/src/java.base/share/classes/java/lang/StringUTF16.java
+++ b/src/java.base/share/classes/java/lang/StringUTF16.java
@@ -34,6 +34,7 @@ import java.util.function.IntConsumer;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;

+import jdk.internal.lang.CaseFolding;
 import jdk.internal.misc.Unsafe;
 import jdk.internal.util.ArraysSupport;
 import jdk.internal.vm.annotation.ForceInline;
@@ -93,7 +94,7 @@ final class StringUTF16 {
        return value.length >> 1;
    }

-    private static int codePointAt(byte[] value, int index, int end, boolean checked) {
+    static int codePointAt(byte[] value, int index, int end, boolean checked) {
        assert index < end;
        if (checked) {
            checkIndex(index, value);
@@ -592,6 +593,77 @@ final class StringUTF16 {
        return -StringLatin1.compareToCI_UTF16(other, value);
    }

+    public static int compareToFC_Latin1(byte[] value, byte[] other) {
+        return -StringLatin1.compareToFC_UTF16(other, value);
+    }
+
+    private static int compareToFC0(byte[] value, int off, int last, byte[] other, int ooff, int olast) {
+        int f1 = 0, f2 = 0;
+        int k1 = off, k2 = ooff;
+        while ((k1 < last || f1 != 0) && (k2 < olast || f2 != 0)) {
+            int c1, c2;
+            if (f1 != 0) {
+                c1 = f1 & 0xffff; f1 >>>= 16;
+            } else {
+                c1 = StringUTF16.codePointAt(value, k1, last, true);
+                k1 += Character.charCount(c1);
+                var f = CaseFolding.fold(c1);
+                if (CaseFolding.isSingleCodePoint(f)) {
+                    c1 = (int)(f & 0xfffff);
+                } else {
+                    c1 = (int)(f & 0xffff);
+                    f1 = (int)(f >> 16);
+                }
+            }
+            if (f2 != 0) {
+                c2 = f2 & 0xffff; f2 >>>= 16;
+            } else {
+                c2 = StringUTF16.codePointAt(other, k2, olast, true);
+                k2 += Character.charCount(c2);
+                var f = CaseFolding.fold(c2);
+                if (CaseFolding.isSingleCodePoint(f)) {
+                    c2 = (int)(f & 0xfffff);
+                } else {
+                    c2 = (int)(f & 0xffff);
+                    f2 = (int)(f >>> 16);
+                }
+            }
+            if (c1 != c2) {
+                return c1 - c2;
+            }
+        }
+        if (k1 < last || f1 != 0) {
+            return 1;
+        }
+        if (k2 < olast || f2 != 0) {
+            return -1;
+        }
+        return 0;
+    }
+
+    public static int compareToFC(byte[] value, byte[] other) {
+        int tlast = length(value);
+        int olast = length(other);
+        int lim = Math.min(tlast, olast);
+        int k = 0;
+        while (k < lim) {
+            int cp1 = codePointAt(value, k, tlast, true);
+            int cp2 = codePointAt(other, k, olast, true);
+            if (cp1 != cp2) {
+                long cf1 = CaseFolding.fold(cp1);
+                long cf2 = CaseFolding.fold(cp2);
+                if (cf1 != cf2) {
+                    if (!CaseFolding.isSingleCodePoint(cf1) || !CaseFolding.isSingleCodePoint(cf2)) {
+                        return compareToFC0(value, k, tlast, other, k, olast);
+                    }
+                    return (int) cf1 - (int) cf2;
+                }
+            }
+            k += Character.charCount(cp1);
+        }
+        return tlast - olast;
+    }
+
    static int hashCode(byte[] value) {
        return ArraysSupport.hashCodeOfUTF16(value, 0, value.length >> 1, 0);
    }
--- a/src/java.base/share/classes/java/util/regex/Pattern.java
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java
@@ -43,8 +43,8 @@ import java.util.function.Predicate;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;

+import jdk.internal.lang.CaseFolding;
 import jdk.internal.util.ArraysSupport;
-import jdk.internal.util.regex.CaseFolding;
 import jdk.internal.util.regex.Grapheme;

 /**
--- a/src/java.base/share/classes/jdk/internal/lang/CaseFolding.java.template
+++ b/src/java.base/share/classes/jdk/internal/lang/CaseFolding.java.template
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package jdk.internal.lang;
+
+import java.util.Arrays;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static java.util.Map.entry;
+
+/**
+ * Utility class that handles Unicode case folding properties defined in
+ * CasingFolding.txt, including 1:M full case folding.
+ */
+public final class CaseFolding {
+
+    private CaseFolding()  {}
+
+   /**
+    * Tests whether the specified code point has a folding mapping entry defined.
+    *
+    * @param  cp
+    *         the Unicode code point to test
+    * @return {@code true} if the given code point has a case folding mapping entry
+    *         defined in (@code caseFoldingMap}, {@code false} otherwise
+    */
+    public static boolean isDefined(int cp) {
+         return getDefined(cp) != -1;
+    }
+
+   /**
+    * Returns the case-folded form of the specified code point according
+    * to the Unicode case folding mappings.
+    * <p>
+    * If the code point has no case folding mapping defined, this method returns
+    * the original code point.
+    *
+    * Possible combinations of the returning case-folding form as a long value
+    *
+    *  +---+---------+--------+---------+--------+--------+
+    *  | 1:1 mapping |  0000  |   0000  |  000x  |  xxxx  |  0041 => 0061 or 1E921 => 1E943
+    *  +---+---------+--------+---------+--------+--------+
+    *  | 1:2 mapping |  0002  |   0000  |  xxxx  |  xxxx  |  FB02 => 0066 006C
+    *  +---+---------+--------+---------+--------+--------+
+    *  | 1:3 mapping |  0003  |   xxxx  |  xxxx  |  xxxx  |  FB03 => 0066 0066 0069
+    *  +---+---------+--------+---------+--------+--------+
+    *
+    * @param  cp
+    *         the Unicode code point to fold
+    * @return a long value representing the case-folded form of the input
+    *         code point, encoded as TBD
+    */
+    public static long fold(int cp) {
+        var fold = getDefined(cp);
+        return fold == -1 ? cp : fold;
+    }
+
+    public static boolean isSingleCodePoint(long fold) {
+        return (fold >> 48) == 0;
+    }
+
+   /**
+    * Returns an expansion set to "close" a given regex Unicode character class range for case-sensitive
+    * matching, according to the
+    * <a href="https://www.unicode.org/reports/tr18/#Simple_Loose_Matches">Simple Loose Matches</a>
+    * rule defined in Unicode Technical Standard #18: Unicode Regular Expressions.
+    * <p>
+    * To conform with Level 1 of UTS #18, specifically RL1.5: Simple Loose Matches, simple case folding must
+    * be applied to literals and (optionally) to character classes. When applied to character classes, each
+    * character class is expected to be closed under simple case folding. See the standard for the
+    * detailed explanation and example of "closed".
+    * <p>
+    * RL1.5 states: To meet this requirement, an implementation that supports case-sensitive matching should
+    * <ol>
+    * <li>Provide at least the simple, default Unicode case-insensitive matching, and</li>
+    * <li>Specify which character properties or constructs are closed under the matching.</li>
+    * </ol>
+    * <p>
+    * In the {@code  Pattern} implementation, 5 types of constructs maybe case-sensitive when matching:
+    * back-refs, string slice (sequences), single, family(char-property) and class range. Single and
+    * family may appears independently or within a class.
+    * <p>
+    * For loose/case-insensitive matching, the back-refs, slices and singles apply {@code toUpperCase} and
+    * {@code toLowerCase} to both the pattern and the input string. This effectively 'close' the class for
+    * matching.
+    * <p>
+    * The family/char-properties are not "closed" and should remain unchanged. This is acceptable per RL1.5,
+    * if their behavior is clearly specified.
+    * <p>
+    * This method addresses that requirement for the "range" construct within in character class by computing
+    * the additional characters that should be included to close the range under simple case folding:
+    * <p>
+    * For each character in the input range {@code [start, end]} (inclusive), if the character has a simple
+    * case folding mapping in Unicode's CaseFolding.txt, the mapping is not a round-trip map, and the mapped
+    * character is not already in the range, then that mapped character (typically lowercase) is added to
+    * the expansion set.
+    * <p>
+    * This allows regex character class "range" implementation to use the returned expansion set to support
+    * additional case-insensitive matching, without duplicating characters already covered by the existing
+    * regex range implementation. The expectation is the matching is done using both the uppercase and
+    * lowercase forms of the input character, for example
+    *
+    * <pre>{@code
+    *
+    *     ch -> inRange(lower, Character.toUpperCase(ch), upper) ||
+    *           inRange(lower, Character.toLower(ch), upper) ||
+    *           additionalClosingCharacters.contains(Character.toUpperCase(ch)) ||
+    *           additionalClosingCharacters.contains(Character.toUpperCase(ch))
+    * }</pre>
+    *
+    * @param start the starting code point of the character range
+    * @param end the ending code point of the character range
+    * @return a {@code int[]} containing the all simple case equivalents of characters in the range, excluding
+    *         those already in the range
+    * @spec https://www.unicode.org/reports/tr18/#Simple_Loose_Matches
+    */
+    public static int[] getClassRangeClosingCharacters(int start, int end) {
+        int[] expanded = new int[expanded_case_cps.length];
+        int off = 0;
+        for (int cp : expanded_case_cps) {
+            if (cp >= start && cp <= end) {
+                int folding = expanded_case_map.get(cp);
+                if (folding < start || folding > end) {
+                    expanded[off++] = folding;
+                }
+            }
+        }
+        return Arrays.copyOf(expanded, off);
+    }
+
+    private static final Map<Integer, Integer> expanded_case_map = Map.ofEntries(
+%%%Expanded_Case_Map_Entries
+    );
+
+    private static final int[] expanded_case_cps = expanded_case_map.keySet()
+      .stream()
+      .mapToInt(Integer::intValue)
+      .toArray();
+
+    private static final int HASH_CP = 0;
+    private static final int HASH_INDEX = 1;
+    private static final int HASH_NEXT = 2;
+
+    private static int[][] hashKeys(int[] keys) {
+        var hashes = new int[keys.length << 1][3];  // cp + hash + next
+        var off = keys.length;
+        for (int i = 0; i < keys.length; i++) {
+            var cp = keys[i];
+            var hash = cp % keys.length;
+            while (hashes[hash][HASH_CP] != 0) {
+                var next = hashes[hash][HASH_NEXT];
+                if (next == 0) {
+                    hashes[hash][HASH_NEXT] = off;
+                    hash = off++;
+                    break;
+                } else {
+                    hash = next;
+                }
+            }
+            hashes[hash][HASH_CP] = cp;
+            hashes[hash][HASH_INDEX] = i;
+        }
+        return Arrays.copyOf(hashes, off);
+    }
+
+    private static long getDefined(int cp) {
+        var hashes = CASE_FOLDING_HASHES;
+        var length = CASE_FOLDING_CPS.length;  // hashed based on total defined.
+        var hash = cp % length;
+        while (hashes[hash][HASH_CP] != cp) {
+            var next = hashes[hash][HASH_NEXT];
+            if (next == 0) {
+                return -1;   // hash miss
+            }
+            hash = next;
+        }
+        var index = hashes[hash][HASH_INDEX];
+        return CASE_FOLDING_VALUES[index];
+    }
+
+%%%Entries
+
+    private static final int[][] CASE_FOLDING_HASHES = hashKeys(CASE_FOLDING_CPS);
+}
--- a/src/java.base/share/classes/jdk/internal/util/regex/CaseFolding.java.template
+++ b/src/java.base/share/classes/jdk/internal/util/regex/CaseFolding.java.template
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package jdk.internal.util.regex;
-
-import java.util.Arrays;
-import java.util.Map;
-import java.util.Objects;
-
-import static java.util.Map.entry;
-
-public final class CaseFolding {
-
-    private static final Map<Integer, Integer> expanded_case_map = Map.ofEntries(
-%%%Entries
-    );
-
-    private static final int[] expanded_case_cps = expanded_case_map.keySet()
-      .stream()
-      .mapToInt(Integer::intValue)
-      .toArray();
-
-    private CaseFolding()  {}
-
-    /**
-     * Returns an expansion set to "close" a given regex Unicode character class range for case-sensitive
-     * matching, according to the
-     * <a href="https://www.unicode.org/reports/tr18/#Simple_Loose_Matches">Simple Loose Matches</a>
-     * rule defined in Unicode Technical Standard #18: Unicode Regular Expressions.
-     * <p>
-     * To conform with Level 1 of UTS #18, specifically RL1.5: Simple Loose Matches, simple case folding must
-     * be applied to literals and (optionally) to character classes. When applied to character classes, each
-     * character class is expected to be closed under simple case folding. See the standard for the
-     * detailed explanation and example of "closed".
-     * <p>
-     * RL1.5 states: To meet this requirement, an implementation that supports case-sensitive matching should
-     * <ol>
-     * <li>Provide at least the simple, default Unicode case-insensitive matching, and</li>
-     * <li>Specify which character properties or constructs are closed under the matching.</li>
-     * </ol>
-     * <p>
-     * In the {@code  Pattern} implementation, 5 types of constructs maybe case-sensitive when matching:
-     * back-refs, string slice (sequences), single, family(char-property) and class range. Single and
-     * family may appears independently or within a class.
-     * <p>
-     * For loose/case-insensitive matching, the back-refs, slices and singles apply {code toUpperCase} and
-     * {@code toLowerCase} to both the pattern and the input string. This effectively 'close' the class for
-     * matching.
-     * <p>
-     * The family/char-properties are not "closed" and should remain unchanged. This is acceptable per RL1.5,
-     * if their behavior is clearly specified.
-     * <p>
-     * This method addresses that requirement for the "range" construct within in character class by computing
-     * the additional characters that should be included to close the range under simple case folding:
-     * <p>
-     * For each character in the input range {@code [start, end]} (inclusive), if the character has a simple
-     * case folding mapping in Unicode's CaseFolding.txt, the mapping is not a round-trip map, and the mapped
-     * character is not already in the range, then that mapped character (typically lowercase) is added to
-     * the expansion set.
-     * <p>
-     * This allows regex character class "range" implementation to use the returned expansion set to support
-     * additional case-insensitive matching, without duplicating characters already covered by the existing
-     * regex range implementation. The expectation is the matching is done using both the uppercase and
-     * lowercase forms of the input character, for example
-     *
-     * <pre>{@code
-     *
-     *     ch -> inRange(lower, Character.toUpperCase(ch), upper) ||
-     *           inRange(lower, Character.toLower(ch), upper) ||
-     *           additionalClosingCharacters.contains(Character.toUpperCase(ch)) ||
-     *           additionalClosingCharacters.contains(Character.toUpperCase(ch))
-     * }</pre>
-     *
-     * <p>
-     * @spec https://www.unicode.org/reports/tr18/#Simple_Loose_Matches
-     * @param start the starting code point of the character range
-     * @param end the ending code point of the character range
-     * @return a {@code int[]} containing the all simple case equivalents of characters in the range, excluding
-     *         those already in the range
-     */
-    public static int[] getClassRangeClosingCharacters(int start, int end) {
-        int[] expanded = new int[expanded_case_cps.length];
-        int off = 0;
-        for (int cp : expanded_case_cps) {
-            if (cp >= start && cp <= end) {
-                int folding = expanded_case_map.get(cp);
-                if (folding < start || folding > end) {
-                    expanded[off++] = folding;
-                }
-            }
-        }
-        return Arrays.copyOf(expanded, off);
-    }
-}
--- a/test/jdk/java/lang/String/UnicodeCaseFoldingTest.java
+++ b/test/jdk/java/lang/String/UnicodeCaseFoldingTest.java
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @summary tests unicode case-folding based String comparison and equality
+ * @bug 4397357
+ * @library /lib/testlibrary/java/lang
+ * @modules java.base/jdk.internal.lang:+open
+ * @run junit/othervm
+ * UnicodeCaseFoldingTest
+ */
+
+import java.nio.file.Files;
+import java.util.stream.Stream;
+import java.util.stream.Collectors;
+import java.util.ArrayList;
+
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import jdk.internal.lang.CaseFolding;
+
+public class UnicodeCaseFoldingTest {
+
+    @Test
+    void testAllCommnFullCodePointsListedInCaseFoldinigTxt() throws Throwable {
+        var filter = "^.*; [CF]; .*$";  // C=common, F=full, for full case folding
+        var results = Files.lines(UCDFiles.CASEFOLDING)
+                .filter(line -> !line.startsWith("#") && line.matches(filter))
+                .map(line -> {
+                    var fields = line.split("; ");
+                    var cp = Integer.parseInt(fields[0], 16);
+                    fields = fields[2].trim().split(" ");
+                    var folding = new int[fields.length];
+                    for (int i = 0; i < folding.length; i++) {
+                        folding[i] = Integer.parseInt(fields[i], 16);
+                    }
+                    var source = new String(Character.toChars(cp));
+                    var expected = new String(folding, 0, folding.length);
+                    // (1) Verify the folding result matches expected
+                    assertEquals(expected, foldCase(source), "CaseFolding.fold(): ");
+
+                    // (2) Verify compareToFoldCase() result
+                    assertEquals(0, source.compareToFoldCase(expected), "source.compareToFoldCase(expected)");
+                    assertEquals(0, expected.compareToFoldCase(source), "expected.compareToFoldCase(source)");
+
+                    // (3) Verify equalsFoldCase() result
+                    assertEquals(true, source.equalsFoldCase(expected), "source.equalsFoldCase(expected)");
+                    assertEquals(true, expected.equalsFoldCase(source), "expected.equalsFoldCase(source)");
+                    return null;
+                })
+                .filter(error -> error != null)
+                .toArray();
+        assertEquals(0, results.length);
+    }
+
+    @Test
+    void testAllSimpleCodePointsListedInCaseFoldinigTxt() throws Throwable {
+        // S=simple, for simple case folding. The simple case folding should still matches
+        var filter = "^.*; [S]; .*$";
+        var results = Files.lines(UCDFiles.CASEFOLDING)
+                .filter(line -> !line.startsWith("#") && line.matches(filter))
+                .map(line -> {
+                    var fields = line.split("; ");
+                    var cp = Integer.parseInt(fields[0], 16);
+                    fields = fields[2].trim().split(" ");
+                    var folding = new int[fields.length];
+                    for (int i = 0; i < folding.length; i++) {
+                        folding[i] = Integer.parseInt(fields[i], 16);
+                    }
+                    var source = new String(Character.toChars(cp));
+                    var expected = new String(folding, 0, folding.length);
+
+                    // (1) Verify compareToFoldCase() result
+                    assertEquals(0, source.compareToFoldCase(expected), "source.compareToFoldCase(expected)");
+                    assertEquals(0, expected.compareToFoldCase(source), "expected.compareToFoldCase(source)");
+
+                    // (2) Verify equalsFoldCase() result
+                    assertEquals(true, source.equalsFoldCase(expected), "source.equalsFoldCase(expected)");
+                    assertEquals(true, expected.equalsFoldCase(source), "expected.equalsFoldCase(source)");
+                    return null;
+                })
+                .filter(error -> error != null)
+                .toArray();
+        assertEquals(0, results.length);
+    }
+
+    @Test
+    public void testAllCodePointsFoldToThemselvesIfNotListed() throws Exception {
+        // Collect all code points that appear in CaseFolding.txt
+        var listed = Files.lines(UCDFiles.CASEFOLDING)
+                .filter(line -> !line.startsWith("#") && line.matches("^.*; [CF]; .*$"))
+                .map(line -> Integer.parseInt(line.split("; ")[0], 16))
+                .collect(Collectors.toSet());
+
+        var failures = new ArrayList<String>();
+
+        // Scan BMP + Supplementary Plane 1 (U+0000..U+1FFFF)
+        for (int cp = Character.MIN_CODE_POINT; cp <= 0x1FFFF; cp++) {
+            if (!Character.isDefined(cp)) {
+                continue;     // skip undefined
+            }
+            if (Character.isSurrogate((char) cp)) {
+                continue; // skip surrogate code units
+            }
+            if (listed.contains(cp)) {
+                continue;          // already tested separately
+            }
+            String s = new String(Character.toChars(cp));
+            String folded = foldCase(s);
+            if (!s.equals(folded)) {
+                failures.add(String.format("Unexpected folding: U+%04X '%s' → '%s'", cp, s, folded));
+            }
+        }
+
+        assertEquals(0, failures.size(),
+                () -> "Some unlisted code points folded unexpectedly:\n"
+                        + String.join("\n", failures));
+    }
+
+    @ParameterizedTest(name = "CaseFold \"{0}\" → \"{1}\"")
+    @MethodSource("caseFoldTestCases")
+    void testIndividualCaseFolding(String input, String expected) {
+        assertEquals(expected, foldCase(input));
+    }
+
+    static Stream<Arguments> caseFoldTestCases() {
+        return Stream.of(
+                // ASCII simple cases
+                Arguments.of("ABC", "abc"),
+                Arguments.of("already", "already"),
+                Arguments.of("MiXeD123", "mixed123"),
+                // --- Latin-1 to non-Latin-1 fold ---
+                Arguments.of("aBc\u00B5Efg", "abc\u03BCefg"), // "µ" → "μ"
+                Arguments.of("test\u00B5\ud801\udc00X", "test\u03bc\ud801\udc28x"),
+                // German Eszett
+                Arguments.of("Stra\u00DFe", "strasse"), // "Straße"
+                Arguments.of("\u1E9E", "ss"), // "ẞ"  capital sharp S
+                // Turkish dotted I / dotless i
+                Arguments.of("I", "i"),
+                Arguments.of("\u0130", "i\u0307"), // capital dotted I → "i + dot above"
+                Arguments.of("\u0069\u0307", "i\u0307"), // small i + dot above remains
+                Arguments.of("\u0131", "\u0131"), // "ı" (dotless i stays dotless)
+
+                // Greek special cases ---
+                Arguments.of("\u039F\u03A3", "\u03BF\u03C3"), // "ΟΣ" → "οσ"  final sigma always folds to normal sigma
+                Arguments.of("\u1F88", "\u1F00\u03B9"), // "ᾈ" → "ἀι"    Alpha with psili + ypogegrammeni
+                Arguments.of("\u039C\u03AC\u03CA\u03BF\u03C2", "\u03BC\u03AC\u03CA\u03BF\u03C3"), // "Μάϊος" → "μάϊοσ"
+                Arguments.of("\u1F08", "\u1F00"), //  Ἀ (Capital Alpha with psili) → ἀ
+
+                // Supplementary Plane characters
+                Arguments.of("\uD801\uDC00", "\uD801\uDC28"), // Deseret Capital Letter Long I → Small
+                Arguments.of("\uD801\uDC01", "\uD801\uDC29"), // Deseret Capital Letter Long E → Small
+
+                // Supplementary inside ASCII
+                Arguments.of("abc\uD801\uDC00def", "abc\uD801\uDC28def"),
+                // Ligatures and compatibility folds
+                Arguments.of("\uFB00", "ff"), // ﬀ → ff
+                Arguments.of("\uFB03", "ffi"), // ﬃ → ffi
+                Arguments.of("\u212A", "k"), // Kelvin sign → k
+
+                Arguments.of("abc\uFB00def", "abcffdef"), // ﬀ → ff
+                Arguments.of("abc\uFB03def", "abcffidef"), // ﬃ → ffi
+                Arguments.of("abc\u212Adef", "abckdef"), // Kelvin sign → k
+
+                // --- Fullwidth ---
+                Arguments.of("\uFF21\uFF22\uFF23", "\uFF41\uFF42\uFF43"), // "ＡＢＣ" → "ａｂｃ"
+
+                // --- Armenian ---
+                Arguments.of("\u0531", "\u0561"), // "Ա" → "ա"
+
+                // --- Cherokee ---
+                Arguments.of("\u13A0", "\u13A0"), // Capital Cherokee A folds to itself
+                Arguments.of("\uAB70", "\u13A0") // Small Cherokee A folds Capital Cherokee A
+        );
+    }
+
+    static Stream<Arguments> caseFoldEqualProvider() {
+        return Stream.of(
+                Arguments.of("abc", "ABC"),
+                Arguments.of("aBcDe", "AbCdE"),
+                Arguments.of("\u00C0\u00E7", "\u00E0\u00C7"), // Àç vs àÇ
+                Arguments.of("straße", "STRASSE"), // ß → ss
+                Arguments.of("\uD83C\uDDE6", "\uD83C\uDDE6"), // 🇦 vs 🇦
+                Arguments.of("\u1E9E", "ss"), // ẞ (capital sharp S)
+                Arguments.of("\u03A3", "\u03C3"), // Σ vs σ (Greek Sigma)
+                Arguments.of("\u03C3", "\u03C2"), // σ vs ς (Greek sigma/final sigma)
+                Arguments.of("\u212B", "\u00E5"), // Å (Angstrom sign) vs å
+                Arguments.of("\uFB00", "ff"), // ﬀ (ligature)
+                Arguments.of("\u01C5", "\u01C5"), // ǅ (Latin capital D with small z with caron)
+                Arguments.of("Caf\u00E9", "CAF\u00C9"), // Café vs CAFÉ
+                Arguments.of("\u03BA\u03B1\u03BB\u03B7\u03BC\u03AD\u03C1\u03B1", "\u039A\u0391\u039B\u0397\u039C\u0388\u03A1\u0391"), // καλημέρα vs ΚΑΛΗΜΕΡΑ
+                Arguments.of("\u4E2D\u56FD", "\u4E2D\u56FD"), // 中国
+                Arguments.of("\u03B1", "\u0391"), // α vs Α (Greek alpha)
+                Arguments.of("\u212B", "\u00C5"), // Å vs Å
+                // from StringCompareToIgnoreCase
+                Arguments.of("\u0100\u0102\u0104\u0106\u0108", "\u0100\u0102\u0104\u0106\u0109"), // ĀĂĄĆĈ vs ĀĂĄĆĉ
+                Arguments.of("\u0101\u0103\u0105\u0107\u0109", "\u0100\u0102\u0104\u0106\u0109"), // āăąćĉ vs ĀĂĄĆĉ
+                Arguments.of("\ud801\udc00\ud801\udc01\ud801\udc02\ud801\udc03\ud801\udc04",
+                        "\ud801\udc00\ud801\udc01\ud801\udc02\ud801\udc03\ud801\udc2c"), // 𐐀𐐁𐐂𐐃𐐄 vs 𐐀𐐁𐐂𐐃𐐬
+                Arguments.of("\ud801\udc28\ud801\udc29\ud801\udc2a\ud801\udc2b\ud801\udc2c",
+                        "\ud801\udc00\ud801\udc01\ud801\udc02\ud801\udc03\ud801\udc2c") // 𐐨𐐩𐐪𐐫𐐬 vs 𐐀𐐁𐐂𐐃𐐬
+        );
+    }
+
+    @ParameterizedTest
+    @MethodSource("caseFoldEqualProvider")
+    void testcompareToFoldCaseEquals(String s1, String s2) {
+        assertEquals(0, s1.compareToFoldCase(s2));
+        assertEquals(0, s2.compareToFoldCase(s1));
+        assertEquals(true, s1.equalsFoldCase(s2));
+        assertEquals(true, s2.equalsFoldCase(s1));
+        assertEquals(foldCase(s1), foldCase(s2));
+    }
+
+    static Stream<Arguments> caseFoldOrderingProvider() {
+        return Stream.of(
+                Arguments.of("asa", "aß", -1), // ß → ss → "asa" < "ass"
+                Arguments.of("aß", "asa", +1),
+                Arguments.of("a\u00DF", "ass", 0), // aß vs ass
+                Arguments.of("\uFB03", "ffi", 0), // ﬃ (ligature)
+                Arguments.of("\u00C5", "Z", 1), // Å vs Z
+                Arguments.of("A", "\u00C0", -1), // A vs À
+                Arguments.of("\u03A9", "\u03C9", 0), // Ω vs ω
+                Arguments.of("\u03C2", "\u03C3", 0), // ς vs σ
+                Arguments.of("\uD835\uDD23", "R", 1), // 𝔯 (fraktur r) vs R
+                Arguments.of("\uFF26", "E", 1), // Ｆ (full-width F) vs E
+                Arguments.of("\u00C9clair", "Eclair", 1), // Éclair vs Eclair
+                Arguments.of("\u03bc\u00df", "\u00b5s", 1),
+                Arguments.of("\u00b5s", "\u03bc\u00df", -1)
+        );
+    }
+
+    @ParameterizedTest
+    @MethodSource("caseFoldOrderingProvider")
+    void testcompareToFoldCaseOrdering(String s1, String s2, int expectedSign) {
+        int cmp = s1.compareToFoldCase(s2);
+        assertEquals(expectedSign, Integer.signum(cmp));
+    }
+
+    static Stream<Arguments> roundTripProvider() {
+        return Stream.of(
+                Arguments.of("abc"),
+                Arguments.of("ABC"),
+                Arguments.of("straße"),
+                Arguments.of("Àç"),
+                Arguments.of("aß"),
+                Arguments.of("\uFB02uff"), // ﬂuff (ligature in "fluff")
+                Arguments.of("\u00C9COLE") // ÉCOLE
+        );
+    }
+
+    @ParameterizedTest
+    @MethodSource("roundTripProvider")
+    void testCaseFoldRoundTrip(String s) {
+        String folded = foldCase(s);
+        assertEquals(0, s.compareToFoldCase(folded));
+        assertEquals(0, folded.compareToFoldCase(s));
+        assertEquals(true, s.equalsFoldCase(folded));
+        assertEquals(true, folded.equalsFoldCase(s));
+    }
+
+    // helper to test the integrity of folding mapping
+    private static int[] longToFolding(long value) {
+        int len = (int) (value >>> 48);
+        if (len == 0) {
+            return new int[]{(int) (value & 0xFFFFF)};
+        } else {
+            var folding = new int[len];
+            for (int i = 0; i < len; i++) {
+                folding[i] = (int) (value & 0xFFFF);
+                value >>= 16;
+            }
+            return folding;
+        }
+    }
+
+    private static String foldCase(String s) {
+        int first;
+        int len = s.length();
+        int cpCnt = 1;
+        for (first = 0; first < len; first += cpCnt) {
+            int cp = s.codePointAt(first);
+            if (CaseFolding.isDefined(cp)) {
+                break;
+            }
+            cpCnt = Character.charCount(cp);
+        }
+        if (first == len) {
+            return s;
+        }
+        StringBuilder sb = new StringBuilder(len);
+        sb.append(s, 0, first);
+        for (int i = first; i < len; i += cpCnt) {
+            int cp = s.codePointAt(i);
+            int[] folded = longToFolding(CaseFolding.fold(cp));
+            for (int f : folded) {
+                sb.appendCodePoint(f);
+            }
+            cpCnt = Character.charCount(cp);
+        }
+        return sb.toString();
+    }
+}
--- a/test/micro/org/openjdk/bench/java/lang/StringCompareToFoldCase.java
+++ b/test/micro/org/openjdk/bench/java/lang/StringCompareToFoldCase.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.java.lang;
+
+import org.openjdk.jmh.annotations.*;
+import java.util.concurrent.TimeUnit;
+
+/*
+ * This benchmark naively explores String::compareToFoldCase performance
+ */
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@State(Scope.Thread)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 1)
+@Fork(3)
+public class StringCompareToFoldCase {
+
+    private String asciiUpper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    private String asciiUpperLower = "ABCDEFGHIJKLMNOpqrstuvwxyz";
+    private String asciiLower = "abcdefghijklmnopqrstuvwxyz";
+
+    private String asciiWithDF = "abcdßßßßßßßßßßßßßßßßWXYZ";
+    private String asciiWithDFSS = "abcdssssssssssssssssßßßßßßßßWXYZ";
+
+    private String asciiLatine1 = "ABCDEFGHIJKLMNOpqrstuvwxyz0";
+    private String asciiLatin1UTF16 = "abcdefghijklmnopqrstuvwxyz\u0391";
+
+    private String greekUpper = "\u0391\u0392\u0393\u0394\u0395\u0391\u0392\u0393\u0394\u0395"; // ΑΒΓΔΕ
+    private String greekUpperLower = "\u0391\u0392\u0393\u0394\u0395\u0391\u0392\u0393\u0394\u03B5"; // ΑΒΓΔε
+    private String greekLower = "\u03B1\u03B2\u03B3\u03B4\u03B5\u03B1\u03B2\u03B3\u03B4\u03B5"; // αβγδε
+
+    public String supUpper = "\ud801\udc00\ud801\udc01\ud801\udc02\ud801\udc03\ud801\udc04";
+    public String supUpperLower = "\ud801\udc00\ud801\udc01\ud801\udc02\ud801\udc03\ud801\udc2c";
+    public String supLower = "\ud801\udc28\ud801\udc29\ud801\udc2a\ud801\udc2b\ud801\udc2c";
+
+    @Benchmark
+    public int asciiUpperLower() {
+        return asciiUpper.compareToIgnoreCase(asciiUpperLower);
+    }
+
+    @Benchmark
+    public int asciiLower() {
+        return asciiUpper.compareToIgnoreCase(asciiLower);
+    }
+
+    @Benchmark
+    public int greekUpperLower() {
+        return greekUpper.compareToIgnoreCase(greekUpperLower);
+    }
+
+    @Benchmark
+    public int greekLower() {
+        return greekUpper.compareToIgnoreCase(greekLower);
+    }
+
+    @Benchmark
+    public int latin1UTF16() {
+        return asciiLatine1.compareToIgnoreCase(asciiLatin1UTF16);
+    }
+
+    @Benchmark
+    public int supUpperLower() {
+        return supUpper.compareToIgnoreCase(supUpperLower);
+    }
+
+    @Benchmark
+    public int supLower() {
+        return supUpper.compareToIgnoreCase(supLower);
+    }
+
+    @Benchmark
+    public int asciiUpperLowerFC() {
+        return asciiUpper.compareToFoldCase(asciiUpperLower);
+    }
+
+    @Benchmark
+    public int asciiLowerFC() {
+        return asciiUpper.compareToFoldCase(asciiLower);
+    }
+
+    @Benchmark
+    public int asciiWithDFFC() {
+        return asciiWithDF.compareToFoldCase(asciiWithDFSS);
+    }
+
+    @Benchmark
+    public int greekUpperLowerFC() {
+        return greekUpper.compareToFoldCase(greekUpperLower);
+    }
+
+    @Benchmark
+    public int greekLowerFC() {
+        return greekUpper.compareToFoldCase(greekLower);
+    }
+
+    @Benchmark
+    public int latin1UTF16FC() {
+        return asciiLatine1.compareToFoldCase(asciiLatin1UTF16); }
+
+    @Benchmark
+    public int supUpperLowerFC() {
+        return supUpper.compareToFoldCase(supUpperLower);
+    }
+
+    @Benchmark
+    public int supLowerFC() {
+        return supUpper.compareToFoldCase(supLower);
+    }
+
+    @Benchmark
+    public boolean asciiUpperLowerEQ() {
+        return asciiUpper.equalsIgnoreCase(asciiUpperLower);
+    }
+
+    @Benchmark
+    public boolean asciiLowerEQ() {
+        return asciiUpper.equalsIgnoreCase(asciiLower);
+    }
+
+    @Benchmark
+    public boolean greekUpperLowerEQ() {
+        return greekUpper.equalsIgnoreCase(greekUpperLower);
+    }
+
+    @Benchmark
+    public boolean greekLowerEQ() {
+        return greekUpper.equalsIgnoreCase(greekLower);
+    }
+
+    @Benchmark
+    public boolean latin1UTF16EQ() {
+        return asciiLatine1.equalsIgnoreCase(asciiLatin1UTF16);
+    }
+
+    @Benchmark
+    public boolean supUpperLowerEQ() {
+        return supUpper.equalsIgnoreCase(supUpperLower);
+    }
+
+    @Benchmark
+    public boolean supLowerEQ() {
+        return supUpper.equalsIgnoreCase(supLower);
+    }
+
+    @Benchmark
+    public boolean asciiUpperLowerEQFC() {
+        return asciiUpper.equalsFoldCase(asciiUpperLower);
+    }
+
+    @Benchmark
+    public boolean asciiLowerEQFC() {
+        return asciiUpper.equalsFoldCase(asciiLower);
+    }
+
+    @Benchmark
+    public boolean greekUpperLowerEQFC() {
+        return greekUpper.equalsFoldCase(greekUpperLower);
+    }
+
+    @Benchmark
+    public boolean greekLowerEQFC() {
+        return greekUpper.equalsFoldCase(greekLower);
+    }
+
+    @Benchmark
+    public boolean latin1UTF16EQFC() {
+        return asciiLatine1.equalsFoldCase(asciiLatin1UTF16);
+    }
+
+    @Benchmark
+    public boolean supUpperLowerEQFC() {
+        return supUpper.equalsFoldCase(supUpperLower);
+    }
+
+    @Benchmark
+    public boolean supLowerEQFC() {
+        return supUpper.equalsFoldCase(supLower);
+    }
+ }