mirror of
https://github.com/JetBrains/JetBrainsRuntime.git
synced 2025-12-06 09:29:38 +01:00
8304042: C2 SuperWord: schedule must remove packs with cyclic dependencies
Reviewed-by: kvn, thartmann
(cherry picked from commit 83a924a100)
This commit is contained in:
committed by
Vitaly Provodin
parent
6328ec9c38
commit
82da6a91b9
@@ -895,7 +895,7 @@ class PhaseIdealLoop : public PhaseTransform {
|
||||
public:
|
||||
// Set/get control node out. Set lower bit to distinguish from IdealLoopTree
|
||||
// Returns true if "n" is a data node, false if it's a control node.
|
||||
bool has_ctrl( Node *n ) const { return ((intptr_t)_nodes[n->_idx]) & 1; }
|
||||
bool has_ctrl(const Node* n) const { return ((intptr_t)_nodes[n->_idx]) & 1; }
|
||||
|
||||
private:
|
||||
// clear out dead code after build_loop_late
|
||||
@@ -972,7 +972,7 @@ public:
|
||||
|
||||
PhaseIterGVN &igvn() const { return _igvn; }
|
||||
|
||||
bool has_node( Node* n ) const {
|
||||
bool has_node(const Node* n) const {
|
||||
guarantee(n != nullptr, "No Node.");
|
||||
return _nodes[n->_idx] != nullptr;
|
||||
}
|
||||
@@ -1003,8 +1003,7 @@ public:
|
||||
// location of all Nodes in the subsumed block, we lazily do it. As we
|
||||
// pull such a subsumed block out of the array, we write back the final
|
||||
// correct block.
|
||||
Node *get_ctrl( Node *i ) {
|
||||
|
||||
Node* get_ctrl(const Node* i) {
|
||||
assert(has_node(i), "");
|
||||
Node *n = get_ctrl_no_update(i);
|
||||
_nodes.map( i->_idx, (Node*)((intptr_t)n + 1) );
|
||||
@@ -1024,12 +1023,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
Node *get_ctrl_no_update_helper(Node *i) const {
|
||||
Node* get_ctrl_no_update_helper(const Node* i) const {
|
||||
assert(has_ctrl(i), "should be control, not loop");
|
||||
return (Node*)(((intptr_t)_nodes[i->_idx]) & ~1);
|
||||
}
|
||||
|
||||
Node *get_ctrl_no_update(Node *i) const {
|
||||
Node* get_ctrl_no_update(const Node* i) const {
|
||||
assert( has_ctrl(i), "" );
|
||||
Node *n = get_ctrl_no_update_helper(i);
|
||||
if (!n->in(0)) {
|
||||
|
||||
@@ -531,6 +531,8 @@ bool SuperWord::SLP_extract() {
|
||||
|
||||
DEBUG_ONLY(verify_packs();)
|
||||
|
||||
remove_cycles();
|
||||
|
||||
schedule();
|
||||
|
||||
// Record eventual count of vector packs for checks in post loop vectorization
|
||||
@@ -2336,6 +2338,230 @@ void SuperWord::verify_packs() {
|
||||
}
|
||||
#endif
|
||||
|
||||
// The PacksetGraph combines the DepPreds graph with the packset. In the PackSet
|
||||
// graph, we have two kinds of nodes:
|
||||
// (1) pack-node: Represents all nodes of some pack p in a single node, which
|
||||
// shall later become a vector node.
|
||||
// (2) scalar-node: Represents a node that is not in any pack.
|
||||
// For any edge (n1, n2) in DepPreds, we add an edge to the PacksetGraph for the
|
||||
// PacksetGraph nodes corresponding to n1 and n2.
|
||||
// We work from the DepPreds graph, because it gives us all the data-dependencies,
|
||||
// as well as more refined memory-dependencies than the C2 graph. DepPreds does
|
||||
// not have cycles. But packing nodes can introduce cyclic dependencies. Example:
|
||||
//
|
||||
// +--------+
|
||||
// A -> X | v
|
||||
// Pack [A,B] and [X,Y] [A,B] [X,Y]
|
||||
// Y -> B ^ |
|
||||
// +--------+
|
||||
//
|
||||
class PacksetGraph {
|
||||
private:
|
||||
// pid: packset graph node id.
|
||||
GrowableArray<int> _pid; // bb_idx(n) -> pid
|
||||
GrowableArray<GrowableArray<int>> _out; // out-edges
|
||||
GrowableArray<int> _incnt; // number of (implicit) in-edges
|
||||
int _max_pid = 0;
|
||||
|
||||
SuperWord* _slp;
|
||||
public:
|
||||
PacksetGraph(SuperWord* slp)
|
||||
: _pid(8, 0, /* default */ 0), _slp(slp) {
|
||||
}
|
||||
// Get pid, if there is a packset node that n belongs to. Else return 0.
|
||||
int get_pid_or_zero(const Node* n) const {
|
||||
if (!_slp->in_bb(n)) {
|
||||
return 0;
|
||||
}
|
||||
int idx = _slp->bb_idx(n);
|
||||
if (idx >= _pid.length()) {
|
||||
return 0;
|
||||
} else {
|
||||
return _pid.at(idx);
|
||||
}
|
||||
}
|
||||
int get_pid(const Node* n) {
|
||||
int poz = get_pid_or_zero(n);
|
||||
assert(poz != 0, "pid should not be zero");
|
||||
return poz;
|
||||
}
|
||||
void set_pid(const Node* n, int pid) {
|
||||
assert(n != nullptr && pid > 0, "sane inputs");
|
||||
assert(_slp->in_bb(n), "must be");
|
||||
int idx = _slp->bb_idx(n);
|
||||
_pid.at_put_grow(idx, pid);
|
||||
}
|
||||
int new_pid() {
|
||||
_incnt.push(0);
|
||||
_out.push(GrowableArray<int>());
|
||||
return ++_max_pid;
|
||||
}
|
||||
int incnt(int pid) { return _incnt.at(pid - 1); }
|
||||
void incnt_set(int pid, int cnt) { return _incnt.at_put(pid - 1, cnt); }
|
||||
GrowableArray<int>& out(int pid) { return _out.at(pid - 1); }
|
||||
|
||||
// Create nodes (from packs and scalar-nodes), and add edges, based on DepPreds.
|
||||
void build() {
|
||||
const GrowableArray<Node_List*> &packset = _slp->packset();
|
||||
const GrowableArray<Node*> &block = _slp->block();
|
||||
const DepGraph &dg = _slp->dg();
|
||||
// Map nodes in packsets
|
||||
for (int i = 0; i < packset.length(); i++) {
|
||||
Node_List* p = packset.at(i);
|
||||
int pid = new_pid();
|
||||
for (uint k = 0; k < p->size(); k++) {
|
||||
Node* n = p->at(k);
|
||||
set_pid(n, pid);
|
||||
}
|
||||
}
|
||||
|
||||
int max_pid_packset = _max_pid;
|
||||
|
||||
// Map nodes not in packset
|
||||
for (int i = 0; i < block.length(); i++) {
|
||||
Node* n = block.at(i);
|
||||
if (n->is_Phi() || n->is_CFG()) {
|
||||
continue; // ignore control flow
|
||||
}
|
||||
int pid = get_pid_or_zero(n);
|
||||
if (pid == 0) {
|
||||
pid = new_pid();
|
||||
set_pid(n, pid);
|
||||
}
|
||||
}
|
||||
|
||||
// Map edges for packset nodes
|
||||
VectorSet set;
|
||||
for (int i = 0; i < packset.length(); i++) {
|
||||
Node_List* p = packset.at(i);
|
||||
set.clear();
|
||||
int pid = get_pid(p->at(0));
|
||||
for (uint k = 0; k < p->size(); k++) {
|
||||
Node* n = p->at(k);
|
||||
assert(pid == get_pid(n), "all nodes in pack have same pid");
|
||||
for (DepPreds preds(n, dg); !preds.done(); preds.next()) {
|
||||
Node* pred = preds.current();
|
||||
int pred_pid = get_pid_or_zero(pred);
|
||||
if (pred_pid == pid && n->is_reduction()) {
|
||||
continue; // reduction -> self-cycle is not a cyclic dependency
|
||||
}
|
||||
// Only add edges once, and only for mapped nodes (in block)
|
||||
if (pred_pid > 0 && !set.test_set(pred_pid)) {
|
||||
incnt_set(pid, incnt(pid) + 1); // increment
|
||||
out(pred_pid).push(pid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Map edges for nodes not in packset
|
||||
for (int i = 0; i < block.length(); i++) {
|
||||
Node* n = block.at(i);
|
||||
int pid = get_pid_or_zero(n); // zero for Phi or CFG
|
||||
if (pid <= max_pid_packset) {
|
||||
continue; // Only scalar-nodes
|
||||
}
|
||||
for (DepPreds preds(n, dg); !preds.done(); preds.next()) {
|
||||
Node* pred = preds.current();
|
||||
int pred_pid = get_pid_or_zero(pred);
|
||||
// Only add edges for mapped nodes (in block)
|
||||
if (pred_pid > 0) {
|
||||
incnt_set(pid, incnt(pid) + 1); // increment
|
||||
out(pred_pid).push(pid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Schedule the graph to worklist. Returns true iff all nodes were scheduled.
|
||||
// This implies that we return true iff the PacksetGraph is acyclic.
|
||||
// We schedule with topological sort: schedule any node that has zero incnt.
|
||||
// Then remove that node, which decrements the incnt of all its uses (outputs).
|
||||
bool schedule() {
|
||||
GrowableArray<int> worklist;
|
||||
// Directly schedule all nodes without precedence
|
||||
for (int pid = 1; pid <= _max_pid; pid++) {
|
||||
if (incnt(pid) == 0) {
|
||||
worklist.push(pid);
|
||||
}
|
||||
}
|
||||
// Continue scheduling via topological sort
|
||||
for (int i = 0; i < worklist.length(); i++) {
|
||||
int pid = worklist.at(i);
|
||||
for (int j = 0; j < out(pid).length(); j++){
|
||||
int pid_use = out(pid).at(j);
|
||||
int incnt_use = incnt(pid_use) - 1;
|
||||
incnt_set(pid_use, incnt_use);
|
||||
// Did use lose its last input?
|
||||
if (incnt_use == 0) {
|
||||
worklist.push(pid_use);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Was every pid scheduled?
|
||||
return worklist.length() == _max_pid;
|
||||
}
|
||||
// Print the PacksetGraph.
|
||||
// print_nodes = true: print all C2 nodes beloning to PacksetGrahp node.
|
||||
// print_zero_incnt = false: do not print nodes that have no in-edges (any more).
|
||||
void print(bool print_nodes, bool print_zero_incnt) {
|
||||
const GrowableArray<Node*> &block = _slp->block();
|
||||
tty->print_cr("PacksetGraph");
|
||||
for (int pid = 1; pid <= _max_pid; pid++) {
|
||||
if (incnt(pid) == 0 && !print_zero_incnt) {
|
||||
continue;
|
||||
}
|
||||
tty->print("Node %d. incnt %d [", pid, incnt(pid));
|
||||
for (int j = 0; j < out(pid).length(); j++) {
|
||||
tty->print("%d ", out(pid).at(j));
|
||||
}
|
||||
tty->print_cr("]");
|
||||
#ifndef PRODUCT
|
||||
if (print_nodes) {
|
||||
for (int i = 0; i < block.length(); i++) {
|
||||
Node* n = block.at(i);
|
||||
if (get_pid_or_zero(n) == pid) {
|
||||
tty->print(" ");
|
||||
n->dump();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//------------------------------remove_cycles---------------------------
|
||||
// We now know that we only have independent packs, see verify_packs.
|
||||
// This is a necessary but not a sufficient condition for an acyclic
|
||||
// graph (DAG) after scheduling. Thus, we must check if the packs have
|
||||
// introduced a cycle. The SuperWord paper mentions the need for this
|
||||
// in "3.7 Scheduling".
|
||||
// Approach: given all nodes from the _block, we create a new graph.
|
||||
// The nodes that are not in a pack are their own nodes (scalar-node)
|
||||
// in that new graph. Every pack is also a node (pack-node). We then
|
||||
// add the edges according to DepPreds: a scalar-node has all edges
|
||||
// to its node's DepPreds. A pack-node has all edges from every pack
|
||||
// member to all their DepPreds.
|
||||
void SuperWord::remove_cycles() {
|
||||
if (_packset.length() == 0) {
|
||||
return; // empty packset
|
||||
}
|
||||
ResourceMark rm;
|
||||
|
||||
PacksetGraph graph(this);
|
||||
|
||||
graph.build();
|
||||
|
||||
if (!graph.schedule()) {
|
||||
if (TraceSuperWord) {
|
||||
tty->print_cr("remove_cycles found cycle in PacksetGraph:");
|
||||
graph.print(true, false);
|
||||
tty->print_cr("removing all packs from packset.");
|
||||
}
|
||||
_packset.clear();
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------schedule---------------------------
|
||||
// Adjust the memory graph for the packed operations
|
||||
void SuperWord::schedule() {
|
||||
@@ -4920,7 +5146,7 @@ void DepEdge::print() {
|
||||
// Iterator over predecessor edges in the dependence graph.
|
||||
|
||||
//------------------------------DepPreds---------------------------
|
||||
DepPreds::DepPreds(Node* n, DepGraph& dg) {
|
||||
DepPreds::DepPreds(Node* n, const DepGraph& dg) {
|
||||
_n = n;
|
||||
_done = false;
|
||||
if (_n->is_Store() || _n->is_Load()) {
|
||||
|
||||
@@ -131,7 +131,7 @@ class DepGraph {
|
||||
DepMem* tail() { return _tail; }
|
||||
|
||||
// Return dependence node corresponding to an ideal node
|
||||
DepMem* dep(Node* node) { return _map.at(node->_idx); }
|
||||
DepMem* dep(Node* node) const { return _map.at(node->_idx); }
|
||||
|
||||
// Make a new dependence graph node for an ideal node.
|
||||
DepMem* make_node(Node* node);
|
||||
@@ -161,7 +161,7 @@ private:
|
||||
bool _done;
|
||||
|
||||
public:
|
||||
DepPreds(Node* n, DepGraph& dg);
|
||||
DepPreds(Node* n, const DepGraph& dg);
|
||||
Node* current() { return _current; }
|
||||
bool done() { return _done; }
|
||||
void next();
|
||||
@@ -349,6 +349,10 @@ class SuperWord : public ResourceObj {
|
||||
#endif
|
||||
bool do_vector_loop() { return _do_vector_loop; }
|
||||
bool do_reserve_copy() { return _do_reserve_copy; }
|
||||
|
||||
const GrowableArray<Node_List*>& packset() const { return _packset; }
|
||||
const GrowableArray<Node*>& block() const { return _block; }
|
||||
const DepGraph& dg() const { return _dg; }
|
||||
private:
|
||||
IdealLoopTree* _lpt; // Current loop tree node
|
||||
CountedLoopNode* _lp; // Current CountedLoopNode
|
||||
@@ -412,12 +416,14 @@ class SuperWord : public ResourceObj {
|
||||
MemNode* align_to_ref() { return _align_to_ref; }
|
||||
void set_align_to_ref(MemNode* m) { _align_to_ref = m; }
|
||||
|
||||
Node* ctrl(Node* n) const { return _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; }
|
||||
const Node* ctrl(const Node* n) const { return _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; }
|
||||
|
||||
// block accessors
|
||||
bool in_bb(Node* n) { return n != nullptr && n->outcnt() > 0 && ctrl(n) == _bb; }
|
||||
int bb_idx(Node* n) { assert(in_bb(n), "must be"); return _bb_idx.at(n->_idx); }
|
||||
void set_bb_idx(Node* n, int i) { _bb_idx.at_put_grow(n->_idx, i); }
|
||||
public:
|
||||
bool in_bb(const Node* n) const { return n != nullptr && n->outcnt() > 0 && ctrl(n) == _bb; }
|
||||
int bb_idx(const Node* n) const { assert(in_bb(n), "must be"); return _bb_idx.at(n->_idx); }
|
||||
private:
|
||||
void set_bb_idx(Node* n, int i) { _bb_idx.at_put_grow(n->_idx, i); }
|
||||
|
||||
// visited set accessors
|
||||
void visited_clear() { _visited.clear(); }
|
||||
@@ -554,6 +560,8 @@ class SuperWord : public ResourceObj {
|
||||
void merge_packs_to_cmove();
|
||||
// Verify that for every pack, all nodes are mutually independent
|
||||
DEBUG_ONLY(void verify_packs();)
|
||||
// Remove cycles in packset.
|
||||
void remove_cycles();
|
||||
// Adjust the memory graph for the packed operations
|
||||
void schedule();
|
||||
// Remove "current" from its current position in the memory graph and insert
|
||||
|
||||
@@ -724,6 +724,16 @@ public class IRNode {
|
||||
beforeMatchingNameRegex(MUL_VI, "MulVI");
|
||||
}
|
||||
|
||||
public static final String MUL_VF = PREFIX + "MUL_VF" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(MUL_VF, "MulVF");
|
||||
}
|
||||
|
||||
public static final String MUL_VD = PREFIX + "MUL_VD" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(MUL_VD, "MulVD");
|
||||
}
|
||||
|
||||
public static final String MUL_REDUCTION_VD = PREFIX + "MUL_REDUCTION_VD" + POSTFIX;
|
||||
static {
|
||||
superWordNodes(MUL_REDUCTION_VD, "MulReductionVD");
|
||||
|
||||
@@ -0,0 +1,482 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8304042
|
||||
* @summary Test some examples with independent packs with cyclic dependency
|
||||
* between the packs.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires vm.bits == 64
|
||||
* @requires vm.cpu.features ~= ".*avx2.*" | vm.cpu.features ~= ".*asimd.*"
|
||||
* @modules java.base/jdk.internal.misc
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency
|
||||
*/
|
||||
|
||||
package compiler.loopopts.superword;
|
||||
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.test.lib.Asserts;
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
public class TestIndependentPacksWithCyclicDependency {
|
||||
static final int RANGE = 1024;
|
||||
static final int ITER = 10_000;
|
||||
static Unsafe unsafe = Unsafe.getUnsafe();
|
||||
|
||||
int[] goldI0 = new int[RANGE];
|
||||
float[] goldF0 = new float[RANGE];
|
||||
int[] goldI1 = new int[RANGE];
|
||||
float[] goldF1 = new float[RANGE];
|
||||
int[] goldI2 = new int[RANGE];
|
||||
float[] goldF2 = new float[RANGE];
|
||||
int[] goldI3 = new int[RANGE];
|
||||
float[] goldF3 = new float[RANGE];
|
||||
int[] goldI4 = new int[RANGE];
|
||||
float[] goldF4 = new float[RANGE];
|
||||
int[] goldI5 = new int[RANGE];
|
||||
float[] goldF5 = new float[RANGE];
|
||||
int[] goldI6 = new int[RANGE];
|
||||
float[] goldF6 = new float[RANGE];
|
||||
long[] goldL6 = new long[RANGE];
|
||||
int[] goldI7 = new int[RANGE];
|
||||
float[] goldF7 = new float[RANGE];
|
||||
long[] goldL7 = new long[RANGE];
|
||||
int[] goldI8 = new int[RANGE];
|
||||
float[] goldF8 = new float[RANGE];
|
||||
long[] goldL8 = new long[RANGE];
|
||||
int[] goldI9 = new int[RANGE];
|
||||
float[] goldF9 = new float[RANGE];
|
||||
long[] goldL9 = new long[RANGE];
|
||||
int[] goldI10 = new int[RANGE];
|
||||
float[] goldF10 = new float[RANGE];
|
||||
long[] goldL10 = new long[RANGE];
|
||||
|
||||
public static void main(String args[]) {
|
||||
TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
|
||||
"-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::test*",
|
||||
"-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::verify",
|
||||
"-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency::init",
|
||||
"-XX:LoopUnrollLimit=1000");
|
||||
}
|
||||
|
||||
TestIndependentPacksWithCyclicDependency() {
|
||||
// compute the gold standard in interpreter mode
|
||||
init(goldI0, goldF0);
|
||||
test0(goldI0, goldI0, goldF0, goldF0);
|
||||
init(goldI1, goldF1);
|
||||
test1(goldI1, goldI1, goldF1, goldF1);
|
||||
init(goldI2, goldF2);
|
||||
test2(goldI2, goldI2, goldF2, goldF2);
|
||||
init(goldI3, goldF3);
|
||||
test3(goldI3, goldI3, goldF3, goldF3);
|
||||
init(goldI4, goldF4);
|
||||
test4(goldI4, goldI4, goldF4, goldF4);
|
||||
// init(goldI5, goldF5);
|
||||
// test5(goldI5, goldI5, goldF5, goldF5);
|
||||
init(goldI6, goldF6, goldL6);
|
||||
test6(goldI6, goldI6, goldF6, goldF6, goldL6, goldL6);
|
||||
init(goldI7, goldF7, goldL7);
|
||||
test7(goldI7, goldI7, goldF7, goldF7, goldL7, goldL7);
|
||||
init(goldI8, goldF8, goldL8);
|
||||
test8(goldI8, goldI8, goldF8, goldF8, goldL8, goldL8);
|
||||
init(goldI9, goldF9, goldL9);
|
||||
test9(goldI9, goldI9, goldF9, goldF9, goldL9, goldL9);
|
||||
init(goldI10, goldF10, goldL10);
|
||||
test10(goldI10, goldI10, goldF10, goldF10, goldL10, goldL10);
|
||||
}
|
||||
|
||||
@Run(test = "test0")
|
||||
@Warmup(100)
|
||||
public void runTest0() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test0(dataI, dataI, dataF, dataF);
|
||||
verify("test0", dataI, goldI0);
|
||||
verify("test0", dataF, goldF0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test0(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// Hand-unrolled 2x. Int and Float slice are completely separate.
|
||||
dataIb[i+0] = dataIa[i+0] + 3;
|
||||
dataIb[i+1] = dataIa[i+1] + 3;
|
||||
dataFb[i+0] = dataFa[i+0] * 1.3f;
|
||||
dataFb[i+1] = dataFa[i+1] * 1.3f;
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test1")
|
||||
@Warmup(100)
|
||||
public void runTest1() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test1(dataI, dataI, dataF, dataF);
|
||||
verify("test1", dataI, goldI1);
|
||||
verify("test1", dataF, goldF1);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0", IRNode.VECTOR_CAST_F2X, "> 0", IRNode.VECTOR_CAST_I2X, "> 0"},
|
||||
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||||
static void test1(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// Hand-unrolled 2x. Converst to and from. StoreF -> LoadF dependency.
|
||||
dataFa[i+0] = dataIa[i+0] + 3;
|
||||
dataFa[i+1] = dataIa[i+1] + 3;
|
||||
dataIb[i+0] = (int)(dataFb[i+0] * 1.3f);
|
||||
dataIb[i+1] = (int)(dataFb[i+1] * 1.3f);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test2")
|
||||
public void runTest2() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test2(dataI, dataI, dataF, dataF);
|
||||
verify("test2", dataI, goldI2);
|
||||
verify("test2", dataF, goldF2);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// int and float arrays are two slices. But we pretend both are of type int.
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1);
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1);
|
||||
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0);
|
||||
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test3")
|
||||
@Warmup(100)
|
||||
public void runTest3() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test3(dataI, dataI, dataF, dataF);
|
||||
verify("test3", dataI, goldI3);
|
||||
verify("test3", dataF, goldF3);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VF, "> 0"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test3(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// Inversion of orders. But because we operate on separate slices, this should
|
||||
// safely vectorize. It should detect that each line is independent, so it can
|
||||
// reorder them.
|
||||
dataIb[i+0] = dataIa[i+0] + 3;
|
||||
dataFb[i+1] = dataFa[i+1] * 1.3f;
|
||||
dataFb[i+0] = dataFa[i+0] * 1.3f;
|
||||
dataIb[i+1] = dataIa[i+1] + 3;
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test4")
|
||||
@Warmup(100)
|
||||
public void runTest4() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test4(dataI, dataI, dataF, dataF);
|
||||
verify("test4", dataI, goldI4);
|
||||
verify("test4", dataF, goldF4);
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test4(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// same as test1, except that reordering leads to different semantics
|
||||
// [A,B] and [X,Y] are both packs that are internally independent
|
||||
// But we have dependencies A -> X (StoreF -> LoadF)
|
||||
// and Y -> B (StoreI -> LoadI)
|
||||
// Hence the two packs have a cyclic dependency, we cannot schedule
|
||||
// one before the other.
|
||||
dataFa[i+0] = dataIa[i+0] + 3; // A
|
||||
dataIb[i+0] = (int)(dataFb[i+0] * 1.3f); // X
|
||||
dataIb[i+1] = (int)(dataFb[i+1] * 1.3f); // Y
|
||||
dataFa[i+1] = dataIa[i+1] + 3; // B
|
||||
}
|
||||
}
|
||||
|
||||
// TODO uncomment after fixing JDK-8304720
|
||||
//
|
||||
// @Run(test = "test5")
|
||||
// public void runTest5() {
|
||||
// int[] dataI = new int[RANGE];
|
||||
// float[] dataF = new float[RANGE];
|
||||
// init(dataI, dataF);
|
||||
// test5(dataI, dataI, dataF, dataF);
|
||||
// verify("test5", dataI, goldI5);
|
||||
// verify("test5", dataF, goldF5);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// static void test5(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
|
||||
// for (int i = 0; i < RANGE; i+=2) {
|
||||
// // same as test2, except that reordering leads to different semantics
|
||||
// // explanation analogue to test4
|
||||
// unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1); // A
|
||||
// dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0); // X
|
||||
// dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4); // Y
|
||||
// unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1); // B
|
||||
// }
|
||||
// }
|
||||
|
||||
@Run(test = "test6")
|
||||
public void runTest6() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
long[] dataL = new long[RANGE];
|
||||
init(dataI, dataF, dataL);
|
||||
test6(dataI, dataI, dataF, dataF, dataL, dataL);
|
||||
verify("test6", dataI, goldI6);
|
||||
verify("test6", dataF, goldF6);
|
||||
verify("test6", dataL, goldL6);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.ADD_VF, "> 0"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test6(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// Chain of parallelizable op and conversion
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test7")
|
||||
public void runTest7() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
long[] dataL = new long[RANGE];
|
||||
init(dataI, dataF, dataL);
|
||||
test7(dataI, dataI, dataF, dataF, dataL, dataL);
|
||||
verify("test7", dataI, goldI7);
|
||||
verify("test7", dataF, goldF7);
|
||||
verify("test7", dataL, goldL7);
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test7(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// Cycle involving 3 memory slices
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // moved down
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Run(test = "test8")
|
||||
public void runTest8() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
long[] dataL = new long[RANGE];
|
||||
init(dataI, dataF, dataL);
|
||||
test8(dataI, dataI, dataF, dataF, dataL, dataL);
|
||||
verify("test8", dataI, goldI8);
|
||||
verify("test8", dataF, goldF8);
|
||||
verify("test8", dataL, goldL8);
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test8(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// 2-cycle, with more ops after
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
// more stuff after
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test9")
|
||||
public void runTest9() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
long[] dataL = new long[RANGE];
|
||||
init(dataI, dataF, dataL);
|
||||
test9(dataI, dataI, dataF, dataF, dataL, dataL);
|
||||
verify("test9", dataI, goldI9);
|
||||
verify("test9", dataF, goldF9);
|
||||
verify("test9", dataL, goldL9);
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test9(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// 2-cycle, with more stuff before
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
// 2-cycle
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test10")
|
||||
public void runTest10() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
long[] dataL = new long[RANGE];
|
||||
init(dataI, dataF, dataL);
|
||||
test10(dataI, dataI, dataF, dataF, dataL, dataL);
|
||||
verify("test10", dataI, goldI10);
|
||||
verify("test10", dataF, goldF10);
|
||||
verify("test10", dataL, goldL10);
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test10(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// This creates the following graph before SuperWord:
|
||||
//
|
||||
// A -> R -> U
|
||||
// S -> V -> B
|
||||
//
|
||||
// SuperWord analyzes the graph, and sees that [A,B] and [U,V]
|
||||
// are adjacent, isomorphic and independent packs. However,
|
||||
// [R,S] are not isomorphic (R mul, S add).
|
||||
// So it vectorizes [A,B] and [U,V] this gives us this graph:
|
||||
//
|
||||
// -> R
|
||||
// [A,B] -> [U,V] -+
|
||||
// ^ -> S |
|
||||
// | |
|
||||
// +------------------+
|
||||
//
|
||||
// The cycle thus does not only go via packs, but also scalar ops.
|
||||
//
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3; // A
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45; // R: constant mismatch
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) + 43; // S
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f; // U
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f; // V
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // B: moved down
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
}
|
||||
}
|
||||
|
||||
static void init(int[] dataI, float[] dataF) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
dataI[i] = i + 1;
|
||||
dataF[i] = i + 0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
static void init(int[] dataI, float[] dataF, long[] dataL) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
dataI[i] = i + 1;
|
||||
dataF[i] = i + 0.1f;
|
||||
dataL[i] = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, int[] data, int[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: dataI[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, float[] data, float[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
||||
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
||||
if (datav != goldv) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, long[] data, long[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: dataL[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8304042
|
||||
* @summary Test some examples with independent packs with cyclic dependency
|
||||
* between the packs.
|
||||
* Before fix, this hit: "assert(!is_visited) failed: visit only once"
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires vm.bits == 64
|
||||
* @requires vm.cpu.features ~= ".*avx2.*" | vm.cpu.features ~= ".*asimd.*"
|
||||
* @modules java.base/jdk.internal.misc
|
||||
* @library /test/lib /
|
||||
* @run main/othervm -XX:LoopUnrollLimit=250
|
||||
* -XX:CompileCommand=compileonly,compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency2::test
|
||||
* compiler.loopopts.superword.TestIndependentPacksWithCyclicDependency2
|
||||
*/
|
||||
|
||||
package compiler.loopopts.superword;
|
||||
|
||||
import jdk.test.lib.Asserts;
|
||||
import jdk.internal.misc.Unsafe;
|
||||
|
||||
public class TestIndependentPacksWithCyclicDependency2 {
|
||||
static final int RANGE = 1024;
|
||||
static final int ITER = 10_000;
|
||||
|
||||
static Unsafe unsafe = Unsafe.getUnsafe();
|
||||
|
||||
static void init(int[] dataI, float[] dataF, long[] dataL) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
dataI[i] = i + 1;
|
||||
dataF[i] = i + 0.1f;
|
||||
dataL[i] = (long)(i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void test(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb,
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// For explanation, see test 10 in TestIndependentPacksWithCyclicDependency.java
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) + 43;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // moved down
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, int[] data, int[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, float[] data, float[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
||||
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
||||
if (datav != goldv) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, long[] data, long[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
int[] dataI = new int[RANGE];
|
||||
int[] goldI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
float[] goldF = new float[RANGE];
|
||||
long[] dataL = new long[RANGE];
|
||||
long[] goldL = new long[RANGE];
|
||||
init(goldI, goldF, goldL);
|
||||
test(goldI, goldI, goldF, goldF, goldL, goldL);
|
||||
for (int i = 0; i < ITER; i++) {
|
||||
init(dataI, dataF, dataL);
|
||||
test(dataI, dataI, dataF, dataF, dataL, dataL);
|
||||
}
|
||||
verify("test", dataI, goldI);
|
||||
verify("test", dataF, goldF);
|
||||
verify("test", dataL, goldL);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user