8306302: C2 Superword fix: use VectorMaskCmp and VectorBlend instead of CMoveVF/D

Reviewed-by: fgao, jbhateja
This commit is contained in:
Emanuel Peter
2023-05-24 07:00:27 +00:00
parent 2836c34b64
commit beb75e651f
13 changed files with 820 additions and 583 deletions

View File

@@ -5992,49 +5992,6 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{
ins_pipe(pipe_slow);
%}
// ------------------------- Vector conditional move --------------------------
instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
predicate(UseSVE == 0 ||
(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
n->in(1)->in(2)->get_int() != BoolTest::ne));
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
effect(TEMP_DEF dst);
format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src2$$FloatRegister, $src1$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pRegGov pgtmp) %{
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) ||
(UseSVE > 0 && n->in(1)->in(2)->get_int() == BoolTest::ne));
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
effect(TEMP pgtmp);
format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
$src2$$FloatRegister, condition);
__ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
// ------------------------------ Vector round ---------------------------------
// vector Math.round

View File

@@ -4258,49 +4258,6 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{
ins_pipe(pipe_slow);
%}
// ------------------------- Vector conditional move --------------------------
instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
predicate(UseSVE == 0 ||
(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
n->in(1)->in(2)->get_int() != BoolTest::ne));
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
effect(TEMP_DEF dst);
format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
ins_encode %{
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
$src2$$FloatRegister, $src1$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pRegGov pgtmp) %{
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) ||
(UseSVE > 0 && n->in(1)->in(2)->get_int() == BoolTest::ne));
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
effect(TEMP pgtmp);
format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
ins_encode %{
assert(UseSVE > 0, "must be sve");
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
BasicType bt = Matcher::vector_element_basic_type(this);
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
__ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
$src2$$FloatRegister, condition);
__ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
$pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
// ------------------------------ Vector round ---------------------------------
// vector Math.round

View File

@@ -1504,12 +1504,6 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_CMoveVF:
case Op_CMoveVD:
if (UseAVX < 1) { // enabled for AVX only
return false;
}
break;
case Op_StrIndexOf:
if (!UseSSE42Intrinsics) {
return false;
@@ -1740,11 +1734,6 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false; // 512bit vpmullq, vandpd and vxorpd are not available
}
break;
case Op_CMoveVF:
if (vlen != 8) {
return false; // implementation limitation (only vcmov8F_reg is present)
}
break;
case Op_RotateRightV:
case Op_RotateLeftV:
if (bt != T_INT && bt != T_LONG) {
@@ -1772,11 +1761,6 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false;
}
break;
case Op_CMoveVD:
if (vlen != 4) {
return false; // implementation limitation (only vcmov4D_reg is present)
}
break;
case Op_MaxV:
case Op_MinV:
if (UseSSE < 4 && is_integral_type(bt)) {
@@ -2947,29 +2931,6 @@ operand legVecZ() %{
interface(REG_INTER);
%}
// Comparison Code for FP conditional move
operand cmpOp_vcmppd() %{
match(Bool);
predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
n->as_Bool()->_test._test != BoolTest::no_overflow);
format %{ "" %}
interface(COND_INTER) %{
equal (0x0, "eq");
less (0x1, "lt");
less_equal (0x2, "le");
not_equal (0xC, "ne");
greater_equal(0xD, "ge");
greater (0xE, "gt");
//TODO cannot compile (adlc breaks) without two next lines with error:
// x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{
// equal' for overflow.
overflow (0x20, "o"); // not really supported by the instruction
no_overflow (0x21, "no"); // not really supported by the instruction
%}
%}
// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
// ============================================================================
@@ -5983,42 +5944,6 @@ instruct vmulD_mem(vec dst, vec src, memory mem) %{
ins_pipe( pipe_slow );
%}
instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
predicate(Matcher::vector_length(n) == 8);
match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t"
"blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t"
%}
ins_encode %{
assert(UseAVX > 0, "required");
int vlen_enc = Assembler::AVX_256bit;
int cond = (Assembler::Condition)($copnd$$cmpcode);
__ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc);
__ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
predicate(Matcher::vector_length(n) == 4);
match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t"
"vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
%}
ins_encode %{
assert(UseAVX > 0, "required");
int vlen_enc = Assembler::AVX_256bit;
int cond = (Assembler::Condition)($copnd$$cmpcode);
__ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc);
__ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- DIV --------------------------------------
// Floats vector div

View File

@@ -4203,7 +4203,6 @@ bool MatchRule::is_vector() const {
"AddVB","AddVS","AddVI","AddVL","AddVF","AddVD",
"SubVB","SubVS","SubVI","SubVL","SubVF","SubVD",
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
"CMoveVD", "CMoveVF",
"DivVF","DivVD",
"AbsVB","AbsVS","AbsVI","AbsVL","AbsVF","AbsVD",
"NegVF","NegVD","NegVI","NegVL",

View File

@@ -81,9 +81,7 @@ macro(CompressBitsV)
macro(ExpandBitsV)
macro(ConstraintCast)
macro(CMoveD)
macro(CMoveVD)
macro(CMoveF)
macro(CMoveVF)
macro(CMoveI)
macro(CMoveL)
macro(CMoveP)

View File

@@ -2385,20 +2385,6 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
n->del_req(3);
break;
}
case Op_CMoveVF:
case Op_CMoveVD: {
// Restructure into a binary tree for Matching:
// CMoveVF (Binary bool mask) (Binary src1 src2)
Node* in_cc = n->in(1);
assert(in_cc->is_Con(), "The condition input of cmove vector node must be a constant.");
Node* bol = new BoolNode(in_cc, (BoolTest::mask)in_cc->get_int());
Node* pair1 = new BinaryNode(bol, in_cc);
n->set_req(1, pair1);
Node* pair2 = new BinaryNode(n->in(2), n->in(3));
n->set_req(2, pair2);
n->del_req(3);
break;
}
case Op_MacroLogicV: {
Node* pair1 = new BinaryNode(n->in(1), n->in(2));
Node* pair2 = new BinaryNode(n->in(3), n->in(4));

View File

@@ -60,7 +60,6 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
_mem_slice_tail(arena(), 8, 0, nullptr), // memory slice tails
_node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
_clone_map(phase->C->clone_map()), // map of nodes created in cloning
_cmovev_kit(_arena, this), // map to facilitate CMoveV creation
_align_to_ref(nullptr), // memory reference to align vectors to
_disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
_dg(_arena), // dependence graph
@@ -619,9 +618,6 @@ bool SuperWord::SLP_extract() {
combine_packs();
construct_my_pack_map();
if (UseVectorCmov) {
merge_packs_to_cmove();
}
filter_packs();
@@ -1580,18 +1576,6 @@ void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
//------------------------------data_size---------------------------
int SuperWord::data_size(Node* s) {
Node* use = nullptr; //test if the node is a candidate for CMoveV optimization, then return the size of CMov
if (UseVectorCmov) {
use = _cmovev_kit.is_Bool_candidate(s);
if (use != nullptr) {
return data_size(use);
}
use = _cmovev_kit.is_Cmp_candidate(s);
if (use != nullptr) {
return data_size(use);
}
}
int bsize = type2aelembytes(velt_basic_type(s));
assert(bsize != 0, "valid size");
return bsize;
@@ -2052,213 +2036,6 @@ void SuperWord::filter_packs() {
#endif
}
//------------------------------merge_packs_to_cmove---------------------------
// Merge qualified CMove into new vector-nodes
// We want to catch this pattern and subsume Cmp and Bool into CMove
//
// Sub Con
// / | /
// / | / /
// / | / /
// / | / /
// / / /
// / / | /
// v / | /
// Cmp | /
// | | /
// v | /
// Bool | /
// \ | /
// \ | /
// \ | /
// \ | /
// \ v /
// CMove
//
void SuperWord::merge_packs_to_cmove() {
for (int i = _packset.length() - 1; i >= 0; i--) {
Node_List* pk = _packset.at(i);
if (_cmovev_kit.can_merge_cmove_pack(pk)) {
_cmovev_kit.make_cmove_pack(pk);
}
}
#ifndef PRODUCT
if (TraceSuperWord) {
tty->print_cr("\nSuperWord::merge_packs_to_cmove(): After merge");
print_packset();
tty->cr();
}
#endif
}
Node* CMoveKit::is_Bool_candidate(Node* def) const {
Node* use = nullptr;
if (!def->is_Bool() || def->in(0) != nullptr || def->outcnt() != 1) {
return nullptr;
}
for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
use = def->fast_out(j);
if (!_sw->same_generation(def, use) || !use->is_CMove()) {
return nullptr;
}
}
return use;
}
Node* CMoveKit::is_Cmp_candidate(Node* def) const {
Node* use = nullptr;
if (!def->is_Cmp() || def->in(0) != nullptr || def->outcnt() != 1) {
return nullptr;
}
for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
use = def->fast_out(j);
if (!_sw->same_generation(def, use) || (use = is_Bool_candidate(use)) == nullptr || !_sw->same_generation(def, use)) {
return nullptr;
}
}
return use;
}
// Determine if the current pack is an ideal cmove pack, and if its related packs,
// i.e. bool node pack and cmp node pack, can be successfully merged for vectorization.
bool CMoveKit::can_merge_cmove_pack(Node_List* cmove_pk) {
Node* cmove = cmove_pk->at(0);
if (!SuperWord::is_cmove_fp_opcode(cmove->Opcode()) ||
pack(cmove) != nullptr /* already in the cmove pack */) {
return false;
}
if (cmove->in(0) != nullptr) {
NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: CMove %d has control flow, escaping...", cmove->_idx); cmove->dump();})
return false;
}
Node* bol = cmove->as_CMove()->in(CMoveNode::Condition);
if (!bol->is_Bool() ||
bol->outcnt() != 1 ||
!_sw->same_generation(bol, cmove) ||
bol->in(0) != nullptr || // Bool node has control flow!!
_sw->my_pack(bol) == nullptr) {
NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: Bool %d does not fit CMove %d for building vector, escaping...", bol->_idx, cmove->_idx); bol->dump();})
return false;
}
Node_List* bool_pk = _sw->my_pack(bol);
if (bool_pk->size() != cmove_pk->size() ) {
return false;
}
Node* cmp = bol->in(1);
if (!cmp->is_Cmp() ||
cmp->outcnt() != 1 ||
!_sw->same_generation(cmp, cmove) ||
cmp->in(0) != nullptr || // Cmp node has control flow!!
_sw->my_pack(cmp) == nullptr) {
NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: Cmp %d does not fit CMove %d for building vector, escaping...", cmp->_idx, cmove->_idx); cmp->dump();})
return false;
}
Node_List* cmp_pk = _sw->my_pack(cmp);
if (cmp_pk->size() != cmove_pk->size() ) {
return false;
}
if (!test_cmp_pack(cmp_pk, cmove_pk)) {
NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: cmp pack for Cmp %d failed vectorization test", cmp->_idx); cmp->dump();})
return false;
}
return true;
}
// Create a new cmove pack to substitute the old one, map all info to the
// new pack and delete the old cmove pack and related packs from the packset.
void CMoveKit::make_cmove_pack(Node_List* cmove_pk) {
Node* cmove = cmove_pk->at(0);
Node* bol = cmove->as_CMove()->in(CMoveNode::Condition);
Node_List* bool_pk = _sw->my_pack(bol);
Node* cmp = bol->in(1);
Node_List* cmp_pk = _sw->my_pack(cmp);
Node_List* new_cmove_pk = new Node_List();
uint sz = cmove_pk->size() - 1;
for (uint i = 0; i <= sz; ++i) {
Node* cmov = cmove_pk->at(i);
Node* bol = bool_pk->at(i);
Node* cmp = cmp_pk->at(i);
new_cmove_pk->insert(i, cmov);
map(cmov, new_cmove_pk);
map(bol, new_cmove_pk);
map(cmp, new_cmove_pk);
_sw->set_my_pack(cmov, new_cmove_pk); // and keep old packs for cmp and bool
}
_sw->_packset.remove(cmove_pk);
_sw->_packset.remove(bool_pk);
_sw->_packset.remove(cmp_pk);
_sw->_packset.append(new_cmove_pk);
NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmove_pack: added syntactic CMove pack"); _sw->print_pack(new_cmove_pk);})
}
bool CMoveKit::test_cmp_pack(Node_List* cmp_pk, Node_List* cmove_pk) {
Node* cmp0 = cmp_pk->at(0);
assert(cmp0->is_Cmp(), "CMoveKit::test_cmp_pack: should be Cmp Node");
assert(cmove_pk->at(0)->is_CMove(), "CMoveKit::test_cmp_pack: should be CMove");
assert(cmp_pk->size() == cmove_pk->size(), "CMoveKit::test_cmp_pack: should be same size");
Node* in1 = cmp0->in(1);
Node* in2 = cmp0->in(2);
Node_List* in1_pk = _sw->my_pack(in1);
Node_List* in2_pk = _sw->my_pack(in2);
if ( (in1_pk != nullptr && in1_pk->size() != cmp_pk->size())
|| (in2_pk != nullptr && in2_pk->size() != cmp_pk->size()) ) {
return false;
}
// test if "all" in1 are in the same pack or the same node
if (in1_pk == nullptr) {
for (uint j = 1; j < cmp_pk->size(); j++) {
if (cmp_pk->at(j)->in(1) != in1) {
return false;
}
}//for: in1_pk is not pack but all Cmp nodes in the pack have the same in(1)
}
// test if "all" in2 are in the same pack or the same node
if (in2_pk == nullptr) {
for (uint j = 1; j < cmp_pk->size(); j++) {
if (cmp_pk->at(j)->in(2) != in2) {
return false;
}
}//for: in2_pk is not pack but all Cmp nodes in the pack have the same in(2)
}
//now check if cmp_pk may be subsumed in vector built for cmove_pk
int cmove_ind1, cmove_ind2;
if (cmp_pk->at(0)->in(1) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
&& cmp_pk->at(0)->in(2) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
cmove_ind1 = CMoveNode::IfFalse;
cmove_ind2 = CMoveNode::IfTrue;
} else if (cmp_pk->at(0)->in(2) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
&& cmp_pk->at(0)->in(1) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
cmove_ind2 = CMoveNode::IfFalse;
cmove_ind1 = CMoveNode::IfTrue;
}
else {
return false;
}
for (uint j = 1; j < cmp_pk->size(); j++) {
if (cmp_pk->at(j)->in(1) != cmove_pk->at(j)->as_CMove()->in(cmove_ind1)
|| cmp_pk->at(j)->in(2) != cmove_pk->at(j)->as_CMove()->in(cmove_ind2)) {
return false;
}//if
}
NOT_PRODUCT(if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmp_pack: cmp pack for 1st Cmp %d is OK for vectorization: ", cmp0->_idx); cmp0->dump(); })
return true;
}
//------------------------------implemented---------------------------
// Can code be generated for pack p?
bool SuperWord::implemented(Node_List* p) {
@@ -2283,9 +2060,9 @@ bool SuperWord::implemented(Node_List* p) {
// integer subword types with superword vectorization.
// See JDK-8294816 for miscompilation issues with shorts.
return false;
} else if (is_cmove_fp_opcode(opc)) {
retValue = is_cmov_pack(p) && VectorNode::implemented(opc, size, velt_basic_type(p0));
NOT_PRODUCT(if(retValue && is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmove pack"); print_pack(p);})
} else if (p0->is_Cmp()) {
// Cmp -> Bool -> Cmove
retValue = UseVectorCmov;
} else if (requires_long_to_int_conversion(opc)) {
// Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros
// returns int type, but Vector API for them returns long type. To unify
@@ -2308,10 +2085,6 @@ bool SuperWord::implemented(Node_List* p) {
return retValue;
}
bool SuperWord::is_cmov_pack(Node_List* p) {
return _cmovev_kit.pack(p->at(0)) != nullptr;
}
bool SuperWord::requires_long_to_int_conversion(int opc) {
switch(opc) {
case Op_PopCountL:
@@ -2385,9 +2158,6 @@ bool SuperWord::profitable(Node_List* p) {
// just the ones outside the block.)
for (uint i = 0; i < p->size(); i++) {
Node* def = p->at(i);
if (is_cmov_pack_internal_node(p, def)) {
continue;
}
for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
Node* use = def->fast_out(j);
for (uint k = 0; k < use->req(); k++) {
@@ -2408,11 +2178,30 @@ bool SuperWord::profitable(Node_List* p) {
}
}
}
if (p0->is_Cmp()) {
// Verify that Cmp pack only has Bool pack uses
for (DUIterator_Fast jmax, j = p0->fast_outs(jmax); j < jmax; j++) {
Node* bol = p0->fast_out(j);
if (!bol->is_Bool() || bol->in(0) != nullptr || !is_vector_use(bol, 1)) {
return false;
}
}
}
if (p0->is_Bool()) {
// Verify that Bool pack only has CMove pack uses
for (DUIterator_Fast jmax, j = p0->fast_outs(jmax); j < jmax; j++) {
Node* cmove = p0->fast_out(j);
if (!cmove->is_CMove() || cmove->in(0) != nullptr || !is_vector_use(cmove, 1)) {
return false;
}
}
}
return true;
}
#ifdef ASSERT
void SuperWord::verify_packs() {
// Verify independence at pack level.
for (int i = 0; i < _packset.length(); i++) {
Node_List* p = _packset.at(i);
Node* dependence = find_dependence(p);
@@ -2431,6 +2220,27 @@ void SuperWord::verify_packs() {
}
assert(dependence == nullptr, "all nodes in pack must be mutually independent");
}
// Verify all nodes in packset have my_pack set correctly.
Unique_Node_List processed;
for (int i = 0; i < _packset.length(); i++) {
Node_List* p = _packset.at(i);
for (uint k = 0; k < p->size(); k++) {
Node* n = p->at(k);
assert(in_bb(n), "only nodes in bb can be in packset");
assert(!processed.member(n), "node should only occur once in packset");
assert(my_pack(n) == p, "n has consisten packset info");
processed.push(n);
}
}
// Check that no other node has my_pack set.
for (int i = 0; i < _block.length(); i++) {
Node* n = _block.at(i);
if (!processed.member(n)) {
assert(my_pack(n) == nullptr, "should not have pack if not in packset");
}
}
}
#endif
@@ -2535,7 +2345,7 @@ public:
if (pid == 0) {
pid = new_pid();
set_pid(n, pid);
assert(_slp->my_pack(n) == nullptr || UseVectorCmov, "no packset");
assert(_slp->my_pack(n) == nullptr, "no packset");
}
}
@@ -2953,7 +2763,89 @@ bool SuperWord::output() {
Node* one = vector_opd(p, 3);
vn = VectorNode::make(opc, in, zero, one, vlen, velt_basic_type(n));
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
} else if (n->req() == 3 && !is_cmov_pack(p)) {
} else if (n->is_Cmp()) {
// Bool + Cmp + CMove -> VectorMaskCmp + VectorBlend
continue;
} else if (n->is_Bool()) {
// Bool + Cmp + CMove -> VectorMaskCmp + VectorBlend
continue;
} else if (n->is_CMove()) {
// Bool + Cmp + CMove -> VectorMaskCmp + VectorBlend
BoolNode* bol = n->in(1)->as_Bool();
assert(bol != nullptr, "must have Bool above CMove");
BoolTest::mask bol_test = bol->_test._test;
assert(bol_test == BoolTest::eq ||
bol_test == BoolTest::ne ||
bol_test == BoolTest::ge ||
bol_test == BoolTest::gt ||
bol_test == BoolTest::lt ||
bol_test == BoolTest::le,
"CMove bool should be one of: eq,ne,ge,ge,lt,le");
Node_List* p_bol = my_pack(bol);
assert(p_bol != nullptr, "CMove must have matching Bool pack");
CmpNode* cmp = bol->in(1)->as_Cmp();
assert(cmp != nullptr, "must have cmp above CMove");
Node_List* p_cmp = my_pack(cmp);
assert(p_cmp != nullptr, "Bool must have matching Cmp pack");
Node* cmp_in1 = vector_opd(p_cmp, 1);
Node* cmp_in2 = vector_opd(p_cmp, 2);
Node* blend_in1 = vector_opd(p, 2);
Node* blend_in2 = vector_opd(p, 3);
if (cmp->Opcode() == Op_CmpF || cmp->Opcode() == Op_CmpD) {
// If we have a Float or Double comparison, we must be careful with
// handling NaN's correctly. CmpF and CmpD have a return code, as
// they are based on the java bytecodes fcmpl/dcmpl:
// -1: cmp_in1 < cmp_in2, or at least one of the two is a NaN
// 0: cmp_in1 == cmp_in2 (no NaN)
// 1: cmp_in1 > cmp_in2 (no NaN)
//
// The "bol_test" selects which of the [-1, 0, 1] cases lead to "true".
//
// Note: ordered (O) comparison returns "false" if either input is NaN.
// unordered (U) comparison returns "true" if either input is NaN.
//
// The VectorMaskCmpNode does a comparison directly on in1 and in2, in the java
// standard way (all comparisons are ordered, except NEQ is unordered).
//
// In the following, "bol_test" already matches the cmp code for VectorMaskCmpNode:
// BoolTest::eq: Case 0 -> EQ_O
// BoolTest::ne: Case -1, 1 -> NEQ_U
// BoolTest::ge: Case 0, 1 -> GE_O
// BoolTest::gt: Case 1 -> GT_O
//
// But the lt and le comparisons must be converted from unordered to ordered:
// BoolTest::lt: Case -1 -> LT_U -> VectorMaskCmp would interpret lt as LT_O
// BoolTest::le: Case -1, 0 -> LE_U -> VectorMaskCmp would interpret le as LE_O
//
if (bol_test == BoolTest::lt || bol_test == BoolTest::le) {
// Negating the bol_test and swapping the blend-inputs leaves all non-NaN cases equal,
// but converts the unordered (U) to an ordered (O) comparison.
// VectorBlend(VectorMaskCmp(LT_U, in1_cmp, in2_cmp), in1_blend, in2_blend)
// <==> VectorBlend(VectorMaskCmp(GE_O, in1_cmp, in2_cmp), in2_blend, in1_blend)
// VectorBlend(VectorMaskCmp(LE_U, in1_cmp, in2_cmp), in1_blend, in2_blend)
// <==> VectorBlend(VectorMaskCmp(GT_O, in1_cmp, in2_cmp), in2_blend, in1_blend)
bol_test = bol->_test.negate();
swap(blend_in1, blend_in2);
}
}
// VectorMaskCmp
ConINode* bol_test_node = _igvn.intcon((int)bol_test);
BasicType bt = velt_basic_type(cmp);
const TypeVect* vt = TypeVect::make(bt, vlen);
VectorNode* mask = new VectorMaskCmpNode(bol_test, cmp_in1, cmp_in2, bol_test_node, vt);
_igvn.register_new_node_with_optimizer(mask);
_phase->set_ctrl(mask, _phase->get_ctrl(p->at(0)));
_igvn._worklist.push(mask);
// VectorBlend
vn = new VectorBlendNode(blend_in1, blend_in2, mask);
} else if (n->req() == 3) {
// Promote operands to vector
Node* in1 = nullptr;
bool node_isa_reduction = is_marked_reduction(n);
@@ -3037,85 +2929,6 @@ bool SuperWord::output() {
int vopc = VectorCastNode::opcode(opc, in->bottom_type()->is_vect()->element_basic_type());
vn = VectorCastNode::make(vopc, in, bt, vlen);
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
} else if (is_cmov_pack(p)) {
if (cl->is_rce_post_loop()) {
// do not refactor of flow in post loop context
return false;
}
if (!n->is_CMove()) {
continue;
}
// place here CMoveVDNode
NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"); print_loop(false);})
Node* bol = n->in(CMoveNode::Condition);
if (!bol->is_Bool() && bol->Opcode() == Op_ExtractI && bol->req() > 1 ) {
NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d", bol->_idx, bol->in(1)->_idx); bol->dump(); bol->in(1)->dump();})
bol = bol->in(1); //may be ExtractNode
}
assert(bol->is_Bool(), "should be BoolNode - too late to bail out!");
if (!bol->is_Bool()) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();})
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
BoolTest boltest = bol->as_Bool()->_test;
BoolTest::mask cond = boltest._test;
Node* cmp = bol->in(1);
// When the src order of cmp node and cmove node are the same:
// cmp: CmpD src1 src2
// bool: Bool cmp mask
// cmove: CMoveD bool scr1 src2
// =====> vectorized, equivalent to
// cmovev: CMoveVD mask src_vector1 src_vector2
//
// When the src order of cmp node and cmove node are different:
// cmp: CmpD src2 src1
// bool: Bool cmp mask
// cmove: CMoveD bool scr1 src2
// =====> equivalent to
// cmp: CmpD src1 src2
// bool: Bool cmp negate(mask)
// cmove: CMoveD bool scr1 src2
// (Note: when mask is ne or eq, we don't need to negate it even after swapping.)
// =====> vectorized, equivalent to
// cmovev: CMoveVD negate(mask) src_vector1 src_vector2
if (cmp->in(2) == n->in(CMoveNode::IfFalse) && cond != BoolTest::ne && cond != BoolTest::eq) {
assert(cmp->in(1) == n->in(CMoveNode::IfTrue), "cmpnode and cmovenode don't share the same inputs.");
cond = boltest.negate();
}
Node* cc = _igvn.intcon((int)cond);
NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", cc->_idx); cc->dump();})
Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse
if (src1 == nullptr) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be null, exiting SuperWord");})
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue
if (src2 == nullptr) {
if (do_reserve_copy()) {
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be null, exiting SuperWord");})
return false; //and reverse to backup IG
}
ShouldNotReachHere();
}
BasicType bt = velt_basic_type(n);
const TypeVect* vt = TypeVect::make(bt, vlen);
assert(bt == T_FLOAT || bt == T_DOUBLE, "Only vectorization for FP cmovs is supported");
if (bt == T_FLOAT) {
vn = new CMoveVFNode(cc, src1, src2, vt);
} else {
assert(bt == T_DOUBLE, "Expected double");
vn = new CMoveVDNode(cc, src1, src2, vt);
}
NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();})
} else if (opc == Op_FmaD || opc == Op_FmaF) {
// Promote operands to vector
Node* in1 = vector_opd(p, 1);
@@ -3455,7 +3268,7 @@ void SuperWord::insert_extracts(Node_List* p) {
Node* n = use->in(k);
if (def == n) {
Node_List* u_pk = my_pack(use);
if ((u_pk == nullptr || !is_cmov_pack(u_pk) || use->is_CMove()) && !is_vector_use(use, k)) {
if ((u_pk == nullptr || use->is_CMove()) && !is_vector_use(use, k)) {
_n_idx_list.push(use, k);
}
}
@@ -3886,6 +3699,18 @@ void SuperWord::compute_vector_element_type() {
}
}
}
for (int i = 0; i < _block.length(); i++) {
Node* n = _block.at(i);
Node* nn = n;
if (nn->is_Bool() && nn->in(0) == nullptr) {
nn = nn->in(1);
assert(nn->is_Cmp(), "always have Cmp above Bool");
}
if (nn->is_Cmp() && nn->in(0) == nullptr) {
nn = nn->in(1);
set_velt_type(n, velt_type(nn));
}
}
#ifndef PRODUCT
if (TraceSuperWord && Verbose) {
for (int i = 0; i < _block.length(); i++) {

View File

@@ -203,24 +203,6 @@ class SWNodeInfo {
};
class SuperWord;
class CMoveKit {
friend class SuperWord;
private:
SuperWord* _sw;
Dict* _dict;
CMoveKit(Arena* a, SuperWord* sw) : _sw(sw) {_dict = new Dict(cmpkey, hashkey, a);}
void* _2p(Node* key) const { return (void*)(intptr_t)key; } // 2 conversion functions to make gcc happy
Dict* dict() const { return _dict; }
void map(Node* key, Node_List* val) { assert(_dict->operator[](_2p(key)) == nullptr, "key existed"); _dict->Insert(_2p(key), (void*)val); }
void unmap(Node* key) { _dict->Delete(_2p(key)); }
Node_List* pack(Node* key) const { return (Node_List*)_dict->operator[](_2p(key)); }
Node* is_Bool_candidate(Node* nd) const; // if it is the right candidate return corresponding CMove* ,
Node* is_Cmp_candidate(Node* nd) const; // otherwise return null
// Determine if the current pack is a cmove candidate that can be vectorized.
bool can_merge_cmove_pack(Node_List* cmove_pk);
void make_cmove_pack(Node_List* cmove_pk);
bool test_cmp_pack(Node_List* cmp_pk, Node_List* cmove_pk);
};//class CMoveKit
// JVMCI: OrderedPair is moved up to deal with compilation issues on Windows
//------------------------------OrderedPair---------------------------
@@ -309,7 +291,6 @@ class SuperWord : public ResourceObj {
GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes
GrowableArray<SWNodeInfo> _node_info; // Info needed per node
CloneMap& _clone_map; // map of nodes created in cloning
CMoveKit _cmovev_kit; // support for vectorization of CMov
MemNode* _align_to_ref; // Memory reference that pre-loop will align to
GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs
@@ -458,9 +439,6 @@ class SuperWord : public ResourceObj {
private:
void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
// is pack good for converting into one vector node replacing bunches of Cmp, Bool, CMov nodes.
bool is_cmov_pack(Node_List* p);
bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); }
static bool is_cmove_fp_opcode(int opc) { return (opc == Op_CMoveF || opc == Op_CMoveD); }
static bool requires_long_to_int_conversion(int opc);
// For pack p, are all idx operands the same?
bool same_inputs(Node_List* p, int idx);
@@ -595,9 +573,8 @@ private:
void construct_my_pack_map();
// Remove packs that are not implemented or not profitable.
void filter_packs();
// Merge CMove into new vector-nodes
void merge_packs_to_cmove();
// Verify that for every pack, all nodes are mutually independent
// Verify that for every pack, all nodes are mutually independent.
// Also verify that packset and my_pack are consistent.
DEBUG_ONLY(void verify_packs();)
// Adjust the memory graph for the packed operations
void schedule();

View File

@@ -82,9 +82,11 @@ int VectorNode::opcode(int sopc, BasicType bt) {
case Op_FmaF:
return (bt == T_FLOAT ? Op_FmaVF : 0);
case Op_CMoveF:
return (bt == T_FLOAT ? Op_CMoveVF : 0);
return (bt == T_FLOAT ? Op_VectorBlend : 0);
case Op_CMoveD:
return (bt == T_DOUBLE ? Op_CMoveVD : 0);
return (bt == T_DOUBLE ? Op_VectorBlend : 0);
case Op_Bool:
return Op_VectorMaskCmp;
case Op_DivF:
return (bt == T_FLOAT ? Op_DivVF : 0);
case Op_DivD:
@@ -683,10 +685,6 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) {
*start = 1;
*end = 3; // 2 vector operands
break;
case Op_CMoveI: case Op_CMoveL: case Op_CMoveF: case Op_CMoveD:
*start = 2;
*end = n->req();
break;
case Op_FmaD:
case Op_FmaF:
*start = 1;

View File

@@ -392,22 +392,6 @@ public:
virtual int Opcode() const;
};
//------------------------------CMoveVFNode--------------------------------------
// Vector float conditional move
class CMoveVFNode : public VectorNode {
public:
CMoveVFNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {}
virtual int Opcode() const;
};
//------------------------------CMoveVDNode--------------------------------------
// Vector double conditional move
class CMoveVDNode : public VectorNode {
public:
CMoveVDNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {}
virtual int Opcode() const;
};
//------------------------------MulReductionVINode--------------------------------------
// Vector multiply byte, short and int as a reduction
class MulReductionVINode : public UnorderedReductionNode {

View File

@@ -1768,8 +1768,6 @@
declare_c2_type(NegVDNode, NegVNode) \
declare_c2_type(FmaVDNode, VectorNode) \
declare_c2_type(FmaVFNode, VectorNode) \
declare_c2_type(CMoveVFNode, VectorNode) \
declare_c2_type(CMoveVDNode, VectorNode) \
declare_c2_type(CompressVNode, VectorNode) \
declare_c2_type(CompressMNode, VectorNode) \
declare_c2_type(ExpandVNode, VectorNode) \

View File

@@ -30,30 +30,25 @@ import jdk.test.lib.Utils;
/*
* @test
* @bug 8289422
* @bug 8289422 8306088
* @key randomness
* @summary Auto-vectorization enhancement to support vector conditional move on AArch64
* @requires os.arch=="aarch64"
* @summary Auto-vectorization enhancement to support vector conditional move.
* @library /test/lib /
* @run driver compiler.c2.irTests.TestVectorConditionalMove
*/
public class TestVectorConditionalMove {
final private static int SIZE = 3000;
final private static int SIZE = 1024;
private static final Random RANDOM = Utils.getRandomInstance();
private static float[] floata = new float[SIZE];
private static float[] floatb = new float[SIZE];
private static float[] floatc = new float[SIZE];
private static double[] doublea = new double[SIZE];
private static double[] doubleb = new double[SIZE];
private static double[] doublec = new double[SIZE];
public static void main(String[] args) {
TestFramework.runWithFlags("-Xcomp", "-XX:-TieredCompilation", "-XX:+UseCMoveUnconditionally",
"-XX:+UseVectorCmov", "-XX:CompileCommand=exclude,*.cmove*");
TestFramework.runWithFlags("-XX:-TieredCompilation",
"-XX:+UseCMoveUnconditionally",
"-XX:+UseVectorCmov",
"-XX:CompileCommand=compileonly,*.TestVectorConditionalMove.test*");
}
// Compare 2 values, and pick one of them
private float cmoveFloatGT(float a, float b) {
return (a > b) ? a : b;
}
@@ -94,8 +89,124 @@ public class TestVectorConditionalMove {
return (a != b) ? a : b;
}
// Extensions: compare 2 values, and pick from 2 consts
private float cmoveFGTforFConst(float a, float b) {
return (a > b) ? 0.1f : -0.1f;
}
private float cmoveFGEforFConst(float a, float b) {
return (a >= b) ? 0.1f : -0.1f;
}
private float cmoveFLTforFConst(float a, float b) {
return (a < b) ? 0.1f : -0.1f;
}
private float cmoveFLEforFConst(float a, float b) {
return (a <= b) ? 0.1f : -0.1f;
}
private float cmoveFEQforFConst(float a, float b) {
return (a == b) ? 0.1f : -0.1f;
}
private float cmoveFNEQforFConst(float a, float b) {
return (a != b) ? 0.1f : -0.1f;
}
private double cmoveDGTforDConst(double a, double b) {
return (a > b) ? 0.1 : -0.1;
}
private double cmoveDGEforDConst(double a, double b) {
return (a >= b) ? 0.1 : -0.1;
}
private double cmoveDLTforDConst(double a, double b) {
return (a < b) ? 0.1 : -0.1;
}
private double cmoveDLEforDConst(double a, double b) {
return (a <= b) ? 0.1 : -0.1;
}
private double cmoveDEQforDConst(double a, double b) {
return (a == b) ? 0.1 : -0.1;
}
private double cmoveDNEQforDConst(double a, double b) {
return (a != b) ? 0.1 : -0.1;
}
// Extension: Compare 2 ILFD values, and pick from 2 ILFD values
private int cmoveIGTforI(int a, int b, int c, int d) {
return (a > b) ? c : d;
}
private long cmoveIGTforL(int a, int b, long c, long d) {
return (a > b) ? c : d;
}
private float cmoveIGTforF(int a, int b, float c, float d) {
return (a > b) ? c : d;
}
private double cmoveIGTforD(int a, int b, double c, double d) {
return (a > b) ? c : d;
}
private int cmoveLGTforI(long a, long b, int c, int d) {
return (a > b) ? c : d;
}
private long cmoveLGTforL(long a, long b, long c, long d) {
return (a > b) ? c : d;
}
private float cmoveLGTforF(long a, long b, float c, float d) {
return (a > b) ? c : d;
}
private double cmoveLGTforD(long a, long b, double c, double d) {
return (a > b) ? c : d;
}
private int cmoveFGTforI(float a, float b, int c, int d) {
return (a > b) ? c : d;
}
private long cmoveFGTforL(float a, float b, long c, long d) {
return (a > b) ? c : d;
}
private float cmoveFGTforF(float a, float b, float c, float d) {
return (a > b) ? c : d;
}
private double cmoveFGTforD(float a, float b, double c, double d) {
return (a > b) ? c : d;
}
private int cmoveDGTforI(double a, double b, int c, int d) {
return (a > b) ? c : d;
}
private long cmoveDGTforL(double a, double b, long c, long d) {
return (a > b) ? c : d;
}
private float cmoveDGTforF(double a, double b, float c, float d) {
return (a > b) ? c : d;
}
private double cmoveDGTforD(double a, double b, double c, double d) {
return (a > b) ? c : d;
}
// Compare 2 values, and pick one of them
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVFGT(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] > b[i]) ? a[i] : b[i];
@@ -103,7 +214,8 @@ public class TestVectorConditionalMove {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVFGTSwap(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] > a[i]) ? a[i] : b[i];
@@ -111,7 +223,8 @@ public class TestVectorConditionalMove {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVFLT(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] < b[i]) ? a[i] : b[i];
@@ -119,7 +232,8 @@ public class TestVectorConditionalMove {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVFLTSwap(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] < a[i]) ? a[i] : b[i];
@@ -127,7 +241,8 @@ public class TestVectorConditionalMove {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVFEQ(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] == b[i]) ? a[i] : b[i];
@@ -135,7 +250,8 @@ public class TestVectorConditionalMove {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVDLE(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] <= b[i]) ? a[i] : b[i];
@@ -143,7 +259,8 @@ public class TestVectorConditionalMove {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVDLESwap(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] <= a[i]) ? a[i] : b[i];
@@ -151,7 +268,8 @@ public class TestVectorConditionalMove {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVDGE(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] >= b[i]) ? a[i] : b[i];
@@ -159,7 +277,8 @@ public class TestVectorConditionalMove {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVDGESwap(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (b[i] >= a[i]) ? a[i] : b[i];
@@ -167,31 +286,339 @@ public class TestVectorConditionalMove {
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveVDNE(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] != b[i]) ? a[i] : b[i];
}
}
// Extensions: compare 2 values, and pick from 2 consts
@Test
@IR(failOn = {IRNode.CMOVE_VD})
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFGTforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] > b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFGEforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] >= b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFLTforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] < b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFLEforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] <= b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFEQforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] == b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFNEQforFConst(float[] a, float[] b, float[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] != b[i]) ? 0.1f : -0.1f;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDGTforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] > b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDGEforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] >= b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDLTforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] < b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDLEforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] <= b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDEQforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] == b[i]) ? 0.1 : -0.1;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDNEQforDConst(double[] a, double[] b, double[] c) {
for (int i = 0; i < a.length; i++) {
c[i] = (a[i] != b[i]) ? 0.1 : -0.1;
}
}
// Extension: Compare 2 ILFD values, and pick from 2 ILFD values
// Note:
// To guarantee that CMove is introduced, I need to perform the loads before the branch. To ensure they
// do not float down into the branches, I compute a value, and store it to r2 (same as r, except that the
// compilation does not know that).
// So far, vectorization only works for CMoveF/D, with same data-width comparison (F/I for F, D/L for D).
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveIGTforI(int[] a, int[] b, int[] c, int[] d, int[] r, int[] r2) {
for (int i = 0; i < a.length; i++) {
int cc = c[i];
int dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveIGTforL(int[] a, int[] b, long[] c, long[] d, long[] r, long[] r2) {
for (int i = 0; i < a.length; i++) {
long cc = c[i];
long dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveIGTforF(int[] a, int[] b, float[] c, float[] d, float[] r, float[] r2) {
for (int i = 0; i < a.length; i++) {
float cc = c[i];
float dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveIGTforD(int[] a, int[] b, double[] c, double[] d, double[] r, double[] r2) {
for (int i = 0; i < a.length; i++) {
double cc = c[i];
double dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveLGTforI(long[] a, long[] b, int[] c, int[] d, int[] r, int[] r2) {
for (int i = 0; i < a.length; i++) {
int cc = c[i];
int dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveLGTforL(long[] a, long[] b, long[] c, long[] d, long[] r, long[] r2) {
for (int i = 0; i < a.length; i++) {
long cc = c[i];
long dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveLGTforF(long[] a, long[] b, float[] c, float[] d, float[] r, float[] r2) {
for (int i = 0; i < a.length; i++) {
float cc = c[i];
float dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
// Requires avx2, else L is restricted to 16 byte, and D has 32. That leads to a vector elements mismatch of 2 to 4.
private static void testCMoveLGTforD(long[] a, long[] b, double[] c, double[] d, double[] r, double[] r2) {
for (int i = 0; i < a.length; i++) {
double cc = c[i];
double dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveFGTforI(float[] a, float[] b, int[] c, int[] d, int[] r, int[] r2) {
for (int i = 0; i < a.length; i++) {
int cc = c[i];
int dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveFGTforL(float[] a, float[] b, long[] c, long[] d, long[] r, long[] r2) {
for (int i = 0; i < a.length; i++) {
long cc = c[i];
long dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveFGTforF(float[] a, float[] b, float[] c, float[] d, float[] r, float[] r2) {
for (int i = 0; i < a.length; i++) {
float cc = c[i];
float dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveFGTforD(float[] a, float[] b, double[] c, double[] d, double[] r, double[] r2) {
for (int i = 0; i < a.length; i++) {
double cc = c[i];
double dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveDGTforI(double[] a, double[] b, int[] c, int[] d, int[] r, int[] r2) {
for (int i = 0; i < a.length; i++) {
int cc = c[i];
int dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveDGTforL(double[] a, double[] b, long[] c, long[] d, long[] r, long[] r2) {
for (int i = 0; i < a.length; i++) {
long cc = c[i];
long dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveDGTforF(double[] a, double[] b, float[] c, float[] d, float[] r, float[] r2) {
for (int i = 0; i < a.length; i++) {
float cc = c[i];
float dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
private static void testCMoveDGTforD(double[] a, double[] b, double[] c, double[] d, double[] r, double[] r2) {
for (int i = 0; i < a.length; i++) {
double cc = c[i];
double dd = d[i];
r2[i] = cc + dd;
r[i] = (a[i] > b[i]) ? cc : dd;
}
}
@Test
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
private static void testCMoveVDUnsupported() {
double[] doublec = new double[SIZE];
int seed = 1001;
for (int i = 0; i < doublec.length; i++) {
doublec[i] = (i % 2 == 0) ? seed + i : seed - i;
}
}
@Warmup(0)
@Run(test = {"testCMoveVFGT", "testCMoveVFLT","testCMoveVDLE", "testCMoveVDGE", "testCMoveVFEQ", "testCMoveVDNE",
"testCMoveVFGTSwap", "testCMoveVFLTSwap","testCMoveVDLESwap", "testCMoveVDGESwap"})
"testCMoveVFGTSwap", "testCMoveVFLTSwap","testCMoveVDLESwap", "testCMoveVDGESwap",
"testCMoveFGTforFConst", "testCMoveFGEforFConst", "testCMoveFLTforFConst",
"testCMoveFLEforFConst", "testCMoveFEQforFConst", "testCMoveFNEQforFConst",
"testCMoveDGTforDConst", "testCMoveDGEforDConst", "testCMoveDLTforDConst",
"testCMoveDLEforDConst", "testCMoveDEQforDConst", "testCMoveDNEQforDConst"})
private void testCMove_runner() {
for (int i = 0; i < SIZE; i++) {
floata[i] = RANDOM.nextFloat();
floatb[i] = RANDOM.nextFloat();
doublea[i] = RANDOM.nextDouble();
doubleb[i] = RANDOM.nextDouble();
}
float[] floata = new float[SIZE];
float[] floatb = new float[SIZE];
float[] floatc = new float[SIZE];
double[] doublea = new double[SIZE];
double[] doubleb = new double[SIZE];
double[] doublec = new double[SIZE];
init(floata);
init(floatb);
init(doublea);
init(doubleb);
testCMoveVFGT(floata, floatb, floatc);
testCMoveVDLE(doublea, doubleb, doublec);
@@ -207,6 +634,7 @@ public class TestVectorConditionalMove {
Asserts.assertEquals(doublec[i], cmoveDoubleGE(doublea[i], doubleb[i]));
}
// Ensure we frequently have equals
for (int i = 0; i < SIZE; i++) {
if (i % 3 == 0) {
floatb[i] = floata[i];
@@ -234,5 +662,215 @@ public class TestVectorConditionalMove {
Asserts.assertEquals(floatc[i], cmoveFloatLTSwap(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDoubleGESwap(doublea[i], doubleb[i]));
}
// Extensions: compare 2 values, and pick from 2 consts
testCMoveFGTforFConst(floata, floatb, floatc);
testCMoveDGTforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFGTforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDGTforDConst(doublea[i], doubleb[i]));
}
testCMoveFGEforFConst(floata, floatb, floatc);
testCMoveDGEforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFGEforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDGEforDConst(doublea[i], doubleb[i]));
}
testCMoveFLTforFConst(floata, floatb, floatc);
testCMoveDLTforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFLTforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDLTforDConst(doublea[i], doubleb[i]));
}
testCMoveFLEforFConst(floata, floatb, floatc);
testCMoveDLEforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFLEforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDLEforDConst(doublea[i], doubleb[i]));
}
testCMoveFEQforFConst(floata, floatb, floatc);
testCMoveDEQforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFEQforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDEQforDConst(doublea[i], doubleb[i]));
}
testCMoveFNEQforFConst(floata, floatb, floatc);
testCMoveDNEQforDConst(doublea, doubleb, doublec);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(floatc[i], cmoveFNEQforFConst(floata[i], floatb[i]));
Asserts.assertEquals(doublec[i], cmoveDNEQforDConst(doublea[i], doubleb[i]));
}
}
@Warmup(0)
@Run(test = {"testCMoveIGTforI",
"testCMoveIGTforL",
"testCMoveIGTforF",
"testCMoveIGTforD",
"testCMoveLGTforI",
"testCMoveLGTforL",
"testCMoveLGTforF",
"testCMoveLGTforD",
"testCMoveFGTforI",
"testCMoveFGTforL",
"testCMoveFGTforF",
"testCMoveFGTforD",
"testCMoveDGTforI",
"testCMoveDGTforL",
"testCMoveDGTforF",
"testCMoveDGTforD"})
private void testCMove_runner_two() {
int[] aI = new int[SIZE];
int[] bI = new int[SIZE];
int[] cI = new int[SIZE];
int[] dI = new int[SIZE];
int[] rI = new int[SIZE];
long[] aL = new long[SIZE];
long[] bL = new long[SIZE];
long[] cL = new long[SIZE];
long[] dL = new long[SIZE];
long[] rL = new long[SIZE];
float[] aF = new float[SIZE];
float[] bF = new float[SIZE];
float[] cF = new float[SIZE];
float[] dF = new float[SIZE];
float[] rF = new float[SIZE];
double[] aD = new double[SIZE];
double[] bD = new double[SIZE];
double[] cD = new double[SIZE];
double[] dD = new double[SIZE];
double[] rD = new double[SIZE];
init(aI);
init(bI);
init(cI);
init(dI);
init(aL);
init(bL);
init(cL);
init(dL);
init(aF);
init(bF);
init(cF);
init(dF);
init(aD);
init(bD);
init(cD);
init(dD);
testCMoveIGTforI(aI, bI, cI, dI, rI, rI);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rI[i], cmoveIGTforI(aI[i], bI[i], cI[i], dI[i]));
}
testCMoveIGTforL(aI, bI, cL, dL, rL, rL);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rL[i], cmoveIGTforL(aI[i], bI[i], cL[i], dL[i]));
}
testCMoveIGTforF(aI, bI, cF, dF, rF, rF);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rF[i], cmoveIGTforF(aI[i], bI[i], cF[i], dF[i]));
}
testCMoveIGTforD(aI, bI, cD, dD, rD, rD);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rD[i], cmoveIGTforD(aI[i], bI[i], cD[i], dD[i]));
}
testCMoveLGTforI(aL, bL, cI, dI, rI, rI);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rI[i], cmoveLGTforI(aL[i], bL[i], cI[i], dI[i]));
}
testCMoveLGTforL(aL, bL, cL, dL, rL, rL);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rL[i], cmoveLGTforL(aL[i], bL[i], cL[i], dL[i]));
}
testCMoveLGTforF(aL, bL, cF, dF, rF, rF);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rF[i], cmoveLGTforF(aL[i], bL[i], cF[i], dF[i]));
}
testCMoveLGTforD(aL, bL, cD, dD, rD, rD);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rD[i], cmoveLGTforD(aL[i], bL[i], cD[i], dD[i]));
}
testCMoveFGTforI(aF, bF, cI, dI, rI, rI);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rI[i], cmoveFGTforI(aF[i], bF[i], cI[i], dI[i]));
}
testCMoveFGTforL(aF, bF, cL, dL, rL, rL);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rL[i], cmoveFGTforL(aF[i], bF[i], cL[i], dL[i]));
}
testCMoveFGTforF(aF, bF, cF, dF, rF, rF);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rF[i], cmoveFGTforF(aF[i], bF[i], cF[i], dF[i]));
}
testCMoveFGTforD(aF, bF, cD, dD, rD, rD);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rD[i], cmoveFGTforD(aF[i], bF[i], cD[i], dD[i]));
}
testCMoveDGTforI(aD, bD, cI, dI, rI, rI);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rI[i], cmoveDGTforI(aD[i], bD[i], cI[i], dI[i]));
}
testCMoveDGTforL(aD, bD, cL, dL, rL, rL);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rL[i], cmoveDGTforL(aD[i], bD[i], cL[i], dL[i]));
}
testCMoveDGTforF(aD, bD, cF, dF, rF, rF);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rF[i], cmoveDGTforF(aD[i], bD[i], cF[i], dF[i]));
}
testCMoveDGTforD(aD, bD, cD, dD, rD, rD);
for (int i = 0; i < SIZE; i++) {
Asserts.assertEquals(rD[i], cmoveDGTforD(aD[i], bD[i], cD[i], dD[i]));
}
}
private static void init(int[] a) {
for (int i = 0; i < SIZE; i++) {
a[i] = RANDOM.nextInt();
}
}
private static void init(long[] a) {
for (int i = 0; i < SIZE; i++) {
a[i] = RANDOM.nextLong();
}
}
private static void init(float[] a) {
for (int i = 0; i < SIZE; i++) {
a[i] = RANDOM.nextFloat();
if (RANDOM.nextInt() % 20 == 0) {
a[i] = Float.NaN;
}
}
}
private static void init(double[] a) {
for (int i = 0; i < SIZE; i++) {
a[i] = RANDOM.nextDouble();
if (RANDOM.nextInt() % 20 == 0) {
a[i] = Double.NaN;
}
}
}
}

View File

@@ -304,16 +304,6 @@ public class IRNode {
beforeMatchingNameRegex(CMOVE_I, "CMoveI");
}
public static final String CMOVE_VD = PREFIX + "CMOVE_VD" + POSTFIX;
static {
superWordNodes(CMOVE_VD, "CMoveVD");
}
public static final String CMOVE_VF = PREFIX + "CMOVE_VF" + POSTFIX;
static {
superWordNodes(CMOVE_VF, "CMoveVF");
}
public static final String CMP_I = PREFIX + "CMP_I" + POSTFIX;
static {
beforeMatchingNameRegex(CMP_I, "CmpI");
@@ -1278,6 +1268,11 @@ public class IRNode {
beforeMatchingNameRegex(VECTOR_BLEND, "VectorBlend");
}
public static final String VECTOR_MASK_CMP = PREFIX + "VECTOR_MASK_CMP" + POSTFIX;
static {
beforeMatchingNameRegex(VECTOR_MASK_CMP, "VectorMaskCmp");
}
public static final String VECTOR_CAST_B2X = PREFIX + "VECTOR_CAST_B2X" + POSTFIX;
static {
beforeMatchingNameRegex(VECTOR_CAST_B2X, "VectorCastB2X");