8316594: C2 SuperWord: wrong result with hand unrolled loops

Reviewed-by: kvn, thartmann
This commit is contained in:
Emanuel Peter
2023-10-05 06:46:44 +00:00
parent c6c69b579c
commit 3630af26e6
3 changed files with 107 additions and 8 deletions

View File

@@ -2568,15 +2568,12 @@ bool SuperWord::output() {
// Set the memory dependency of the LoadVector as early as possible.
// Walk up the memory chain, and ignore any StoreVector that provably
// does not have any memory dependency.
VPointer p1(n->as_Mem(), phase(), lpt(), nullptr, false);
while (mem->is_StoreVector()) {
VPointer p2(mem->as_Mem(), phase(), lpt(), nullptr, false);
if (p1.not_equal(p2)) {
// Either Less or Greater -> provably no overlap between the two memory regions.
mem = mem->in(MemNode::Memory);
} else {
// No proof that there is no overlap. Stop here.
VPointer p_store(mem->as_Mem(), phase(), lpt(), nullptr, false);
if (p_store.overlap_possible_with_any_in(p)) {
break;
} else {
mem = mem->in(MemNode::Memory);
}
}
Node* adr = first->in(MemNode::Address);

View File

@@ -119,6 +119,20 @@ class VPointer : public ArenaObj {
}
}
bool overlap_possible_with_any_in(Node_List* p) {
for (uint k = 0; k < p->size(); k++) {
MemNode* mem = p->at(k)->as_Mem();
VPointer p_mem(mem, phase(), lpt(), nullptr, false);
// Only if we know that we have Less or Greater can we
// be sure that there can never be an overlap between
// the two memory regions.
if (!not_equal(p_mem)) {
return true;
}
}
return false;
}
bool not_equal(VPointer& q) { return not_equal(cmp(q)); }
bool equal(VPointer& q) { return equal(cmp(q)); }
bool comparable(VPointer& q) { return comparable(cmp(q)); }

View File

@@ -25,27 +25,35 @@
/**
* @test
* @requires vm.compiler2.enabled
* @bug 8316679
* @requires vm.cpu.features ~= ".*avx2.*"
* @bug 8316679 8316594
* @summary In SuperWord::output, LoadVector can be moved before StoreVector, but only if it is proven to be safe.
* @key randomness
* @modules java.base/jdk.internal.misc
* @library /test/lib
* @run main/othervm -XX:CompileCommand=compileonly,compiler.loopopts.superword.TestMovingLoadBeforeStore::test*
* -Xbatch -XX:LoopUnrollLimit=100
* -XX:+UnlockDiagnosticVMOptions -XX:+StressLCM
* --add-modules java.base --add-exports java.base/jdk.internal.misc=ALL-UNNAMED
* compiler.loopopts.superword.TestMovingLoadBeforeStore
*/
package compiler.loopopts.superword;
import java.util.Random;
import jdk.test.lib.Utils;
import jdk.internal.misc.Unsafe;
public class TestMovingLoadBeforeStore {
static int RANGE = 1024*64;
static int NINE = 9;
private static final Random random = Utils.getRandomInstance();
static Unsafe UNSAFE = Unsafe.getUnsafe();
public static void main(String[] strArr) {
byte a[] = new byte[RANGE];
byte b[] = new byte[RANGE];
for (int i = 0; i < 100; i++) {
for (int j = 0; j < a.length; j++) {
a[j] = (byte)random.nextInt();
@@ -56,6 +64,30 @@ public class TestMovingLoadBeforeStore {
test1(a_res, a_res, i % 2);
verify("a in test1", a_ref, a_res, a);
}
for (int i = 0; i < 100; i++) {
for (int j = 0; j < a.length; j++) {
a[j] = (byte)random.nextInt();
b[j] = (byte)random.nextInt();
}
byte[] a_ref = a.clone();
byte[] b_ref = b.clone();
byte[] a_res = a.clone();
byte[] b_res = b.clone();
ref2(a_ref, b_ref);
test2(a_res, b_res);
verify("a in test2", a_ref, a_res, a);
verify("b in test2", b_ref, b_res, b);
}
for (int i = 0; i < 100; i++) {
for (int j = 0; j < a.length; j++) {
a[j] = (byte)random.nextInt();
}
byte[] a_ref = a.clone();
byte[] a_res = a.clone();
ref3(a_ref);
test3(a_res);
verify("a in test3", a_ref, a_res, a);
}
}
static void verify(String name, byte[] ref, byte[] res, byte[] orig) {
@@ -96,4 +128,60 @@ public class TestMovingLoadBeforeStore {
b[inv + i + 3]++;
}
}
static void test2(byte[] a, byte[] b) {
for (int i = 46; i < 6000; i++) {
a[47 + i + 0]++;
a[47 + i + 1]++;
a[47 + i + 2]++;
a[47 + i + 3]++;
b[NINE + i + 0]++;
b[NINE + i + 1]++;
b[NINE + i + 2]++;
b[NINE + i + 3]++;
}
}
static void ref2(byte[] a, byte[] b) {
for (int i = 46; i < 6000; i++) {
a[47 + i + 0]++;
a[47 + i + 1]++;
a[47 + i + 2]++;
a[47 + i + 3]++;
b[NINE + i + 0]++;
b[NINE + i + 1]++;
b[NINE + i + 2]++;
b[NINE + i + 3]++;
}
}
static void test3(byte[] a) {
for (int i = 51; i < 6000; i++) {
int adr = UNSAFE.ARRAY_BYTE_BASE_OFFSET + 42 + i;
UNSAFE.putIntUnaligned(a, adr + 0*4, UNSAFE.getIntUnaligned(a, adr + 0*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 1*4, UNSAFE.getIntUnaligned(a, adr + 1*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 2*4, UNSAFE.getIntUnaligned(a, adr + 2*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 3*4, UNSAFE.getIntUnaligned(a, adr + 3*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 4*4, UNSAFE.getIntUnaligned(a, adr + 4*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 5*4, UNSAFE.getIntUnaligned(a, adr + 5*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 6*4, UNSAFE.getIntUnaligned(a, adr + 6*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 7*4, UNSAFE.getIntUnaligned(a, adr + 7*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 8*4, UNSAFE.getIntUnaligned(a, adr + 8*4) + 1);
}
}
static void ref3(byte[] a) {
for (int i = 51; i < 6000; i++) {
int adr = UNSAFE.ARRAY_BYTE_BASE_OFFSET + 42 + i;
UNSAFE.putIntUnaligned(a, adr + 0*4, UNSAFE.getIntUnaligned(a, adr + 0*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 1*4, UNSAFE.getIntUnaligned(a, adr + 1*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 2*4, UNSAFE.getIntUnaligned(a, adr + 2*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 3*4, UNSAFE.getIntUnaligned(a, adr + 3*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 4*4, UNSAFE.getIntUnaligned(a, adr + 4*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 5*4, UNSAFE.getIntUnaligned(a, adr + 5*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 6*4, UNSAFE.getIntUnaligned(a, adr + 6*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 7*4, UNSAFE.getIntUnaligned(a, adr + 7*4) + 1);
UNSAFE.putIntUnaligned(a, adr + 8*4, UNSAFE.getIntUnaligned(a, adr + 8*4) + 1);
}
}
}