mirror of
https://github.com/JetBrains/JetBrainsRuntime.git
synced 2025-12-06 09:29:38 +01:00
8306302: C2 Superword fix: use VectorMaskCmp and VectorBlend instead of CMoveVF/D
Reviewed-by: fgao, jbhateja
This commit is contained in:
@@ -5992,49 +5992,6 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------- Vector conditional move --------------------------
|
||||
|
||||
instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
|
||||
predicate(UseSVE == 0 ||
|
||||
(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
|
||||
n->in(1)->in(2)->get_int() != BoolTest::ne));
|
||||
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
|
||||
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
||||
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
|
||||
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
$src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pRegGov pgtmp) %{
|
||||
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) ||
|
||||
(UseSVE > 0 && n->in(1)->in(2)->get_int() == BoolTest::ne));
|
||||
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
|
||||
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
|
||||
effect(TEMP pgtmp);
|
||||
format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
|
||||
ins_encode %{
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, condition);
|
||||
__ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------ Vector round ---------------------------------
|
||||
|
||||
// vector Math.round
|
||||
|
||||
@@ -4258,49 +4258,6 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------- Vector conditional move --------------------------
|
||||
|
||||
instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{
|
||||
predicate(UseSVE == 0 ||
|
||||
(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) &&
|
||||
n->in(1)->in(2)->get_int() != BoolTest::ne));
|
||||
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
|
||||
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %}
|
||||
ins_encode %{
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
||||
__ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16);
|
||||
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
$src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pRegGov pgtmp) %{
|
||||
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) ||
|
||||
(UseSVE > 0 && n->in(1)->in(2)->get_int() == BoolTest::ne));
|
||||
match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2)));
|
||||
match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2)));
|
||||
effect(TEMP pgtmp);
|
||||
format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %}
|
||||
ins_encode %{
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant);
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, condition);
|
||||
__ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------ Vector round ---------------------------------
|
||||
|
||||
// vector Math.round
|
||||
|
||||
@@ -1504,12 +1504,6 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CMoveVF:
|
||||
case Op_CMoveVD:
|
||||
if (UseAVX < 1) { // enabled for AVX only
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_StrIndexOf:
|
||||
if (!UseSSE42Intrinsics) {
|
||||
return false;
|
||||
@@ -1740,11 +1734,6 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
return false; // 512bit vpmullq, vandpd and vxorpd are not available
|
||||
}
|
||||
break;
|
||||
case Op_CMoveVF:
|
||||
if (vlen != 8) {
|
||||
return false; // implementation limitation (only vcmov8F_reg is present)
|
||||
}
|
||||
break;
|
||||
case Op_RotateRightV:
|
||||
case Op_RotateLeftV:
|
||||
if (bt != T_INT && bt != T_LONG) {
|
||||
@@ -1772,11 +1761,6 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_CMoveVD:
|
||||
if (vlen != 4) {
|
||||
return false; // implementation limitation (only vcmov4D_reg is present)
|
||||
}
|
||||
break;
|
||||
case Op_MaxV:
|
||||
case Op_MinV:
|
||||
if (UseSSE < 4 && is_integral_type(bt)) {
|
||||
@@ -2947,29 +2931,6 @@ operand legVecZ() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Comparison Code for FP conditional move
|
||||
operand cmpOp_vcmppd() %{
|
||||
match(Bool);
|
||||
|
||||
predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
|
||||
n->as_Bool()->_test._test != BoolTest::no_overflow);
|
||||
format %{ "" %}
|
||||
interface(COND_INTER) %{
|
||||
equal (0x0, "eq");
|
||||
less (0x1, "lt");
|
||||
less_equal (0x2, "le");
|
||||
not_equal (0xC, "ne");
|
||||
greater_equal(0xD, "ge");
|
||||
greater (0xE, "gt");
|
||||
//TODO cannot compile (adlc breaks) without two next lines with error:
|
||||
// x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{
|
||||
// equal' for overflow.
|
||||
overflow (0x20, "o"); // not really supported by the instruction
|
||||
no_overflow (0x21, "no"); // not really supported by the instruction
|
||||
%}
|
||||
%}
|
||||
|
||||
|
||||
// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
|
||||
|
||||
// ============================================================================
|
||||
@@ -5983,42 +5944,6 @@ instruct vmulD_mem(vec dst, vec src, memory mem) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
|
||||
predicate(Matcher::vector_length(n) == 8);
|
||||
match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
|
||||
effect(TEMP dst, USE src1, USE src2);
|
||||
format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t"
|
||||
"blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
assert(UseAVX > 0, "required");
|
||||
|
||||
int vlen_enc = Assembler::AVX_256bit;
|
||||
int cond = (Assembler::Condition)($copnd$$cmpcode);
|
||||
__ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc);
|
||||
__ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
|
||||
predicate(Matcher::vector_length(n) == 4);
|
||||
match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
|
||||
effect(TEMP dst, USE src1, USE src2);
|
||||
format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t"
|
||||
"vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
|
||||
%}
|
||||
ins_encode %{
|
||||
assert(UseAVX > 0, "required");
|
||||
|
||||
int vlen_enc = Assembler::AVX_256bit;
|
||||
int cond = (Assembler::Condition)($copnd$$cmpcode);
|
||||
__ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc);
|
||||
__ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// --------------------------------- DIV --------------------------------------
|
||||
|
||||
// Floats vector div
|
||||
|
||||
@@ -4203,7 +4203,6 @@ bool MatchRule::is_vector() const {
|
||||
"AddVB","AddVS","AddVI","AddVL","AddVF","AddVD",
|
||||
"SubVB","SubVS","SubVI","SubVL","SubVF","SubVD",
|
||||
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
|
||||
"CMoveVD", "CMoveVF",
|
||||
"DivVF","DivVD",
|
||||
"AbsVB","AbsVS","AbsVI","AbsVL","AbsVF","AbsVD",
|
||||
"NegVF","NegVD","NegVI","NegVL",
|
||||
|
||||
@@ -81,9 +81,7 @@ macro(CompressBitsV)
|
||||
macro(ExpandBitsV)
|
||||
macro(ConstraintCast)
|
||||
macro(CMoveD)
|
||||
macro(CMoveVD)
|
||||
macro(CMoveF)
|
||||
macro(CMoveVF)
|
||||
macro(CMoveI)
|
||||
macro(CMoveL)
|
||||
macro(CMoveP)
|
||||
|
||||
@@ -2385,20 +2385,6 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
|
||||
n->del_req(3);
|
||||
break;
|
||||
}
|
||||
case Op_CMoveVF:
|
||||
case Op_CMoveVD: {
|
||||
// Restructure into a binary tree for Matching:
|
||||
// CMoveVF (Binary bool mask) (Binary src1 src2)
|
||||
Node* in_cc = n->in(1);
|
||||
assert(in_cc->is_Con(), "The condition input of cmove vector node must be a constant.");
|
||||
Node* bol = new BoolNode(in_cc, (BoolTest::mask)in_cc->get_int());
|
||||
Node* pair1 = new BinaryNode(bol, in_cc);
|
||||
n->set_req(1, pair1);
|
||||
Node* pair2 = new BinaryNode(n->in(2), n->in(3));
|
||||
n->set_req(2, pair2);
|
||||
n->del_req(3);
|
||||
break;
|
||||
}
|
||||
case Op_MacroLogicV: {
|
||||
Node* pair1 = new BinaryNode(n->in(1), n->in(2));
|
||||
Node* pair2 = new BinaryNode(n->in(3), n->in(4));
|
||||
|
||||
@@ -60,7 +60,6 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
|
||||
_mem_slice_tail(arena(), 8, 0, nullptr), // memory slice tails
|
||||
_node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
|
||||
_clone_map(phase->C->clone_map()), // map of nodes created in cloning
|
||||
_cmovev_kit(_arena, this), // map to facilitate CMoveV creation
|
||||
_align_to_ref(nullptr), // memory reference to align vectors to
|
||||
_disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
|
||||
_dg(_arena), // dependence graph
|
||||
@@ -619,9 +618,6 @@ bool SuperWord::SLP_extract() {
|
||||
combine_packs();
|
||||
|
||||
construct_my_pack_map();
|
||||
if (UseVectorCmov) {
|
||||
merge_packs_to_cmove();
|
||||
}
|
||||
|
||||
filter_packs();
|
||||
|
||||
@@ -1580,18 +1576,6 @@ void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
|
||||
|
||||
//------------------------------data_size---------------------------
|
||||
int SuperWord::data_size(Node* s) {
|
||||
Node* use = nullptr; //test if the node is a candidate for CMoveV optimization, then return the size of CMov
|
||||
if (UseVectorCmov) {
|
||||
use = _cmovev_kit.is_Bool_candidate(s);
|
||||
if (use != nullptr) {
|
||||
return data_size(use);
|
||||
}
|
||||
use = _cmovev_kit.is_Cmp_candidate(s);
|
||||
if (use != nullptr) {
|
||||
return data_size(use);
|
||||
}
|
||||
}
|
||||
|
||||
int bsize = type2aelembytes(velt_basic_type(s));
|
||||
assert(bsize != 0, "valid size");
|
||||
return bsize;
|
||||
@@ -2052,213 +2036,6 @@ void SuperWord::filter_packs() {
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------merge_packs_to_cmove---------------------------
|
||||
// Merge qualified CMove into new vector-nodes
|
||||
// We want to catch this pattern and subsume Cmp and Bool into CMove
|
||||
//
|
||||
// Sub Con
|
||||
// / | /
|
||||
// / | / /
|
||||
// / | / /
|
||||
// / | / /
|
||||
// / / /
|
||||
// / / | /
|
||||
// v / | /
|
||||
// Cmp | /
|
||||
// | | /
|
||||
// v | /
|
||||
// Bool | /
|
||||
// \ | /
|
||||
// \ | /
|
||||
// \ | /
|
||||
// \ | /
|
||||
// \ v /
|
||||
// CMove
|
||||
//
|
||||
|
||||
void SuperWord::merge_packs_to_cmove() {
|
||||
for (int i = _packset.length() - 1; i >= 0; i--) {
|
||||
Node_List* pk = _packset.at(i);
|
||||
if (_cmovev_kit.can_merge_cmove_pack(pk)) {
|
||||
_cmovev_kit.make_cmove_pack(pk);
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord) {
|
||||
tty->print_cr("\nSuperWord::merge_packs_to_cmove(): After merge");
|
||||
print_packset();
|
||||
tty->cr();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Node* CMoveKit::is_Bool_candidate(Node* def) const {
|
||||
Node* use = nullptr;
|
||||
if (!def->is_Bool() || def->in(0) != nullptr || def->outcnt() != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
|
||||
use = def->fast_out(j);
|
||||
if (!_sw->same_generation(def, use) || !use->is_CMove()) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return use;
|
||||
}
|
||||
|
||||
Node* CMoveKit::is_Cmp_candidate(Node* def) const {
|
||||
Node* use = nullptr;
|
||||
if (!def->is_Cmp() || def->in(0) != nullptr || def->outcnt() != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
|
||||
use = def->fast_out(j);
|
||||
if (!_sw->same_generation(def, use) || (use = is_Bool_candidate(use)) == nullptr || !_sw->same_generation(def, use)) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return use;
|
||||
}
|
||||
|
||||
// Determine if the current pack is an ideal cmove pack, and if its related packs,
|
||||
// i.e. bool node pack and cmp node pack, can be successfully merged for vectorization.
|
||||
bool CMoveKit::can_merge_cmove_pack(Node_List* cmove_pk) {
|
||||
Node* cmove = cmove_pk->at(0);
|
||||
|
||||
if (!SuperWord::is_cmove_fp_opcode(cmove->Opcode()) ||
|
||||
pack(cmove) != nullptr /* already in the cmove pack */) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (cmove->in(0) != nullptr) {
|
||||
NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: CMove %d has control flow, escaping...", cmove->_idx); cmove->dump();})
|
||||
return false;
|
||||
}
|
||||
|
||||
Node* bol = cmove->as_CMove()->in(CMoveNode::Condition);
|
||||
if (!bol->is_Bool() ||
|
||||
bol->outcnt() != 1 ||
|
||||
!_sw->same_generation(bol, cmove) ||
|
||||
bol->in(0) != nullptr || // Bool node has control flow!!
|
||||
_sw->my_pack(bol) == nullptr) {
|
||||
NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: Bool %d does not fit CMove %d for building vector, escaping...", bol->_idx, cmove->_idx); bol->dump();})
|
||||
return false;
|
||||
}
|
||||
Node_List* bool_pk = _sw->my_pack(bol);
|
||||
if (bool_pk->size() != cmove_pk->size() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Node* cmp = bol->in(1);
|
||||
if (!cmp->is_Cmp() ||
|
||||
cmp->outcnt() != 1 ||
|
||||
!_sw->same_generation(cmp, cmove) ||
|
||||
cmp->in(0) != nullptr || // Cmp node has control flow!!
|
||||
_sw->my_pack(cmp) == nullptr) {
|
||||
NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: Cmp %d does not fit CMove %d for building vector, escaping...", cmp->_idx, cmove->_idx); cmp->dump();})
|
||||
return false;
|
||||
}
|
||||
Node_List* cmp_pk = _sw->my_pack(cmp);
|
||||
if (cmp_pk->size() != cmove_pk->size() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!test_cmp_pack(cmp_pk, cmove_pk)) {
|
||||
NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: cmp pack for Cmp %d failed vectorization test", cmp->_idx); cmp->dump();})
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create a new cmove pack to substitute the old one, map all info to the
|
||||
// new pack and delete the old cmove pack and related packs from the packset.
|
||||
void CMoveKit::make_cmove_pack(Node_List* cmove_pk) {
|
||||
Node* cmove = cmove_pk->at(0);
|
||||
Node* bol = cmove->as_CMove()->in(CMoveNode::Condition);
|
||||
Node_List* bool_pk = _sw->my_pack(bol);
|
||||
Node* cmp = bol->in(1);
|
||||
Node_List* cmp_pk = _sw->my_pack(cmp);
|
||||
|
||||
Node_List* new_cmove_pk = new Node_List();
|
||||
uint sz = cmove_pk->size() - 1;
|
||||
for (uint i = 0; i <= sz; ++i) {
|
||||
Node* cmov = cmove_pk->at(i);
|
||||
Node* bol = bool_pk->at(i);
|
||||
Node* cmp = cmp_pk->at(i);
|
||||
|
||||
new_cmove_pk->insert(i, cmov);
|
||||
|
||||
map(cmov, new_cmove_pk);
|
||||
map(bol, new_cmove_pk);
|
||||
map(cmp, new_cmove_pk);
|
||||
|
||||
_sw->set_my_pack(cmov, new_cmove_pk); // and keep old packs for cmp and bool
|
||||
}
|
||||
_sw->_packset.remove(cmove_pk);
|
||||
_sw->_packset.remove(bool_pk);
|
||||
_sw->_packset.remove(cmp_pk);
|
||||
_sw->_packset.append(new_cmove_pk);
|
||||
NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmove_pack: added syntactic CMove pack"); _sw->print_pack(new_cmove_pk);})
|
||||
}
|
||||
|
||||
bool CMoveKit::test_cmp_pack(Node_List* cmp_pk, Node_List* cmove_pk) {
|
||||
Node* cmp0 = cmp_pk->at(0);
|
||||
assert(cmp0->is_Cmp(), "CMoveKit::test_cmp_pack: should be Cmp Node");
|
||||
assert(cmove_pk->at(0)->is_CMove(), "CMoveKit::test_cmp_pack: should be CMove");
|
||||
assert(cmp_pk->size() == cmove_pk->size(), "CMoveKit::test_cmp_pack: should be same size");
|
||||
Node* in1 = cmp0->in(1);
|
||||
Node* in2 = cmp0->in(2);
|
||||
Node_List* in1_pk = _sw->my_pack(in1);
|
||||
Node_List* in2_pk = _sw->my_pack(in2);
|
||||
|
||||
if ( (in1_pk != nullptr && in1_pk->size() != cmp_pk->size())
|
||||
|| (in2_pk != nullptr && in2_pk->size() != cmp_pk->size()) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// test if "all" in1 are in the same pack or the same node
|
||||
if (in1_pk == nullptr) {
|
||||
for (uint j = 1; j < cmp_pk->size(); j++) {
|
||||
if (cmp_pk->at(j)->in(1) != in1) {
|
||||
return false;
|
||||
}
|
||||
}//for: in1_pk is not pack but all Cmp nodes in the pack have the same in(1)
|
||||
}
|
||||
// test if "all" in2 are in the same pack or the same node
|
||||
if (in2_pk == nullptr) {
|
||||
for (uint j = 1; j < cmp_pk->size(); j++) {
|
||||
if (cmp_pk->at(j)->in(2) != in2) {
|
||||
return false;
|
||||
}
|
||||
}//for: in2_pk is not pack but all Cmp nodes in the pack have the same in(2)
|
||||
}
|
||||
//now check if cmp_pk may be subsumed in vector built for cmove_pk
|
||||
int cmove_ind1, cmove_ind2;
|
||||
if (cmp_pk->at(0)->in(1) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
|
||||
&& cmp_pk->at(0)->in(2) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
|
||||
cmove_ind1 = CMoveNode::IfFalse;
|
||||
cmove_ind2 = CMoveNode::IfTrue;
|
||||
} else if (cmp_pk->at(0)->in(2) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
|
||||
&& cmp_pk->at(0)->in(1) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
|
||||
cmove_ind2 = CMoveNode::IfFalse;
|
||||
cmove_ind1 = CMoveNode::IfTrue;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (uint j = 1; j < cmp_pk->size(); j++) {
|
||||
if (cmp_pk->at(j)->in(1) != cmove_pk->at(j)->as_CMove()->in(cmove_ind1)
|
||||
|| cmp_pk->at(j)->in(2) != cmove_pk->at(j)->as_CMove()->in(cmove_ind2)) {
|
||||
return false;
|
||||
}//if
|
||||
}
|
||||
NOT_PRODUCT(if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmp_pack: cmp pack for 1st Cmp %d is OK for vectorization: ", cmp0->_idx); cmp0->dump(); })
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------implemented---------------------------
|
||||
// Can code be generated for pack p?
|
||||
bool SuperWord::implemented(Node_List* p) {
|
||||
@@ -2283,9 +2060,9 @@ bool SuperWord::implemented(Node_List* p) {
|
||||
// integer subword types with superword vectorization.
|
||||
// See JDK-8294816 for miscompilation issues with shorts.
|
||||
return false;
|
||||
} else if (is_cmove_fp_opcode(opc)) {
|
||||
retValue = is_cmov_pack(p) && VectorNode::implemented(opc, size, velt_basic_type(p0));
|
||||
NOT_PRODUCT(if(retValue && is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmove pack"); print_pack(p);})
|
||||
} else if (p0->is_Cmp()) {
|
||||
// Cmp -> Bool -> Cmove
|
||||
retValue = UseVectorCmov;
|
||||
} else if (requires_long_to_int_conversion(opc)) {
|
||||
// Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros
|
||||
// returns int type, but Vector API for them returns long type. To unify
|
||||
@@ -2308,10 +2085,6 @@ bool SuperWord::implemented(Node_List* p) {
|
||||
return retValue;
|
||||
}
|
||||
|
||||
bool SuperWord::is_cmov_pack(Node_List* p) {
|
||||
return _cmovev_kit.pack(p->at(0)) != nullptr;
|
||||
}
|
||||
|
||||
bool SuperWord::requires_long_to_int_conversion(int opc) {
|
||||
switch(opc) {
|
||||
case Op_PopCountL:
|
||||
@@ -2385,9 +2158,6 @@ bool SuperWord::profitable(Node_List* p) {
|
||||
// just the ones outside the block.)
|
||||
for (uint i = 0; i < p->size(); i++) {
|
||||
Node* def = p->at(i);
|
||||
if (is_cmov_pack_internal_node(p, def)) {
|
||||
continue;
|
||||
}
|
||||
for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
|
||||
Node* use = def->fast_out(j);
|
||||
for (uint k = 0; k < use->req(); k++) {
|
||||
@@ -2408,11 +2178,30 @@ bool SuperWord::profitable(Node_List* p) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (p0->is_Cmp()) {
|
||||
// Verify that Cmp pack only has Bool pack uses
|
||||
for (DUIterator_Fast jmax, j = p0->fast_outs(jmax); j < jmax; j++) {
|
||||
Node* bol = p0->fast_out(j);
|
||||
if (!bol->is_Bool() || bol->in(0) != nullptr || !is_vector_use(bol, 1)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (p0->is_Bool()) {
|
||||
// Verify that Bool pack only has CMove pack uses
|
||||
for (DUIterator_Fast jmax, j = p0->fast_outs(jmax); j < jmax; j++) {
|
||||
Node* cmove = p0->fast_out(j);
|
||||
if (!cmove->is_CMove() || cmove->in(0) != nullptr || !is_vector_use(cmove, 1)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void SuperWord::verify_packs() {
|
||||
// Verify independence at pack level.
|
||||
for (int i = 0; i < _packset.length(); i++) {
|
||||
Node_List* p = _packset.at(i);
|
||||
Node* dependence = find_dependence(p);
|
||||
@@ -2431,6 +2220,27 @@ void SuperWord::verify_packs() {
|
||||
}
|
||||
assert(dependence == nullptr, "all nodes in pack must be mutually independent");
|
||||
}
|
||||
|
||||
// Verify all nodes in packset have my_pack set correctly.
|
||||
Unique_Node_List processed;
|
||||
for (int i = 0; i < _packset.length(); i++) {
|
||||
Node_List* p = _packset.at(i);
|
||||
for (uint k = 0; k < p->size(); k++) {
|
||||
Node* n = p->at(k);
|
||||
assert(in_bb(n), "only nodes in bb can be in packset");
|
||||
assert(!processed.member(n), "node should only occur once in packset");
|
||||
assert(my_pack(n) == p, "n has consisten packset info");
|
||||
processed.push(n);
|
||||
}
|
||||
}
|
||||
|
||||
// Check that no other node has my_pack set.
|
||||
for (int i = 0; i < _block.length(); i++) {
|
||||
Node* n = _block.at(i);
|
||||
if (!processed.member(n)) {
|
||||
assert(my_pack(n) == nullptr, "should not have pack if not in packset");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2535,7 +2345,7 @@ public:
|
||||
if (pid == 0) {
|
||||
pid = new_pid();
|
||||
set_pid(n, pid);
|
||||
assert(_slp->my_pack(n) == nullptr || UseVectorCmov, "no packset");
|
||||
assert(_slp->my_pack(n) == nullptr, "no packset");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2953,7 +2763,89 @@ bool SuperWord::output() {
|
||||
Node* one = vector_opd(p, 3);
|
||||
vn = VectorNode::make(opc, in, zero, one, vlen, velt_basic_type(n));
|
||||
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
||||
} else if (n->req() == 3 && !is_cmov_pack(p)) {
|
||||
} else if (n->is_Cmp()) {
|
||||
// Bool + Cmp + CMove -> VectorMaskCmp + VectorBlend
|
||||
continue;
|
||||
} else if (n->is_Bool()) {
|
||||
// Bool + Cmp + CMove -> VectorMaskCmp + VectorBlend
|
||||
continue;
|
||||
} else if (n->is_CMove()) {
|
||||
// Bool + Cmp + CMove -> VectorMaskCmp + VectorBlend
|
||||
|
||||
BoolNode* bol = n->in(1)->as_Bool();
|
||||
assert(bol != nullptr, "must have Bool above CMove");
|
||||
BoolTest::mask bol_test = bol->_test._test;
|
||||
assert(bol_test == BoolTest::eq ||
|
||||
bol_test == BoolTest::ne ||
|
||||
bol_test == BoolTest::ge ||
|
||||
bol_test == BoolTest::gt ||
|
||||
bol_test == BoolTest::lt ||
|
||||
bol_test == BoolTest::le,
|
||||
"CMove bool should be one of: eq,ne,ge,ge,lt,le");
|
||||
Node_List* p_bol = my_pack(bol);
|
||||
assert(p_bol != nullptr, "CMove must have matching Bool pack");
|
||||
|
||||
CmpNode* cmp = bol->in(1)->as_Cmp();
|
||||
assert(cmp != nullptr, "must have cmp above CMove");
|
||||
Node_List* p_cmp = my_pack(cmp);
|
||||
assert(p_cmp != nullptr, "Bool must have matching Cmp pack");
|
||||
|
||||
Node* cmp_in1 = vector_opd(p_cmp, 1);
|
||||
Node* cmp_in2 = vector_opd(p_cmp, 2);
|
||||
|
||||
Node* blend_in1 = vector_opd(p, 2);
|
||||
Node* blend_in2 = vector_opd(p, 3);
|
||||
|
||||
if (cmp->Opcode() == Op_CmpF || cmp->Opcode() == Op_CmpD) {
|
||||
// If we have a Float or Double comparison, we must be careful with
|
||||
// handling NaN's correctly. CmpF and CmpD have a return code, as
|
||||
// they are based on the java bytecodes fcmpl/dcmpl:
|
||||
// -1: cmp_in1 < cmp_in2, or at least one of the two is a NaN
|
||||
// 0: cmp_in1 == cmp_in2 (no NaN)
|
||||
// 1: cmp_in1 > cmp_in2 (no NaN)
|
||||
//
|
||||
// The "bol_test" selects which of the [-1, 0, 1] cases lead to "true".
|
||||
//
|
||||
// Note: ordered (O) comparison returns "false" if either input is NaN.
|
||||
// unordered (U) comparison returns "true" if either input is NaN.
|
||||
//
|
||||
// The VectorMaskCmpNode does a comparison directly on in1 and in2, in the java
|
||||
// standard way (all comparisons are ordered, except NEQ is unordered).
|
||||
//
|
||||
// In the following, "bol_test" already matches the cmp code for VectorMaskCmpNode:
|
||||
// BoolTest::eq: Case 0 -> EQ_O
|
||||
// BoolTest::ne: Case -1, 1 -> NEQ_U
|
||||
// BoolTest::ge: Case 0, 1 -> GE_O
|
||||
// BoolTest::gt: Case 1 -> GT_O
|
||||
//
|
||||
// But the lt and le comparisons must be converted from unordered to ordered:
|
||||
// BoolTest::lt: Case -1 -> LT_U -> VectorMaskCmp would interpret lt as LT_O
|
||||
// BoolTest::le: Case -1, 0 -> LE_U -> VectorMaskCmp would interpret le as LE_O
|
||||
//
|
||||
if (bol_test == BoolTest::lt || bol_test == BoolTest::le) {
|
||||
// Negating the bol_test and swapping the blend-inputs leaves all non-NaN cases equal,
|
||||
// but converts the unordered (U) to an ordered (O) comparison.
|
||||
// VectorBlend(VectorMaskCmp(LT_U, in1_cmp, in2_cmp), in1_blend, in2_blend)
|
||||
// <==> VectorBlend(VectorMaskCmp(GE_O, in1_cmp, in2_cmp), in2_blend, in1_blend)
|
||||
// VectorBlend(VectorMaskCmp(LE_U, in1_cmp, in2_cmp), in1_blend, in2_blend)
|
||||
// <==> VectorBlend(VectorMaskCmp(GT_O, in1_cmp, in2_cmp), in2_blend, in1_blend)
|
||||
bol_test = bol->_test.negate();
|
||||
swap(blend_in1, blend_in2);
|
||||
}
|
||||
}
|
||||
|
||||
// VectorMaskCmp
|
||||
ConINode* bol_test_node = _igvn.intcon((int)bol_test);
|
||||
BasicType bt = velt_basic_type(cmp);
|
||||
const TypeVect* vt = TypeVect::make(bt, vlen);
|
||||
VectorNode* mask = new VectorMaskCmpNode(bol_test, cmp_in1, cmp_in2, bol_test_node, vt);
|
||||
_igvn.register_new_node_with_optimizer(mask);
|
||||
_phase->set_ctrl(mask, _phase->get_ctrl(p->at(0)));
|
||||
_igvn._worklist.push(mask);
|
||||
|
||||
// VectorBlend
|
||||
vn = new VectorBlendNode(blend_in1, blend_in2, mask);
|
||||
} else if (n->req() == 3) {
|
||||
// Promote operands to vector
|
||||
Node* in1 = nullptr;
|
||||
bool node_isa_reduction = is_marked_reduction(n);
|
||||
@@ -3037,85 +2929,6 @@ bool SuperWord::output() {
|
||||
int vopc = VectorCastNode::opcode(opc, in->bottom_type()->is_vect()->element_basic_type());
|
||||
vn = VectorCastNode::make(vopc, in, bt, vlen);
|
||||
vlen_in_bytes = vn->as_Vector()->length_in_bytes();
|
||||
} else if (is_cmov_pack(p)) {
|
||||
if (cl->is_rce_post_loop()) {
|
||||
// do not refactor of flow in post loop context
|
||||
return false;
|
||||
}
|
||||
if (!n->is_CMove()) {
|
||||
continue;
|
||||
}
|
||||
// place here CMoveVDNode
|
||||
NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"); print_loop(false);})
|
||||
Node* bol = n->in(CMoveNode::Condition);
|
||||
if (!bol->is_Bool() && bol->Opcode() == Op_ExtractI && bol->req() > 1 ) {
|
||||
NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d", bol->_idx, bol->in(1)->_idx); bol->dump(); bol->in(1)->dump();})
|
||||
bol = bol->in(1); //may be ExtractNode
|
||||
}
|
||||
|
||||
assert(bol->is_Bool(), "should be BoolNode - too late to bail out!");
|
||||
if (!bol->is_Bool()) {
|
||||
if (do_reserve_copy()) {
|
||||
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();})
|
||||
return false; //and reverse to backup IG
|
||||
}
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
BoolTest boltest = bol->as_Bool()->_test;
|
||||
BoolTest::mask cond = boltest._test;
|
||||
Node* cmp = bol->in(1);
|
||||
// When the src order of cmp node and cmove node are the same:
|
||||
// cmp: CmpD src1 src2
|
||||
// bool: Bool cmp mask
|
||||
// cmove: CMoveD bool scr1 src2
|
||||
// =====> vectorized, equivalent to
|
||||
// cmovev: CMoveVD mask src_vector1 src_vector2
|
||||
//
|
||||
// When the src order of cmp node and cmove node are different:
|
||||
// cmp: CmpD src2 src1
|
||||
// bool: Bool cmp mask
|
||||
// cmove: CMoveD bool scr1 src2
|
||||
// =====> equivalent to
|
||||
// cmp: CmpD src1 src2
|
||||
// bool: Bool cmp negate(mask)
|
||||
// cmove: CMoveD bool scr1 src2
|
||||
// (Note: when mask is ne or eq, we don't need to negate it even after swapping.)
|
||||
// =====> vectorized, equivalent to
|
||||
// cmovev: CMoveVD negate(mask) src_vector1 src_vector2
|
||||
if (cmp->in(2) == n->in(CMoveNode::IfFalse) && cond != BoolTest::ne && cond != BoolTest::eq) {
|
||||
assert(cmp->in(1) == n->in(CMoveNode::IfTrue), "cmpnode and cmovenode don't share the same inputs.");
|
||||
cond = boltest.negate();
|
||||
}
|
||||
Node* cc = _igvn.intcon((int)cond);
|
||||
NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", cc->_idx); cc->dump();})
|
||||
|
||||
Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse
|
||||
if (src1 == nullptr) {
|
||||
if (do_reserve_copy()) {
|
||||
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be null, exiting SuperWord");})
|
||||
return false; //and reverse to backup IG
|
||||
}
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue
|
||||
if (src2 == nullptr) {
|
||||
if (do_reserve_copy()) {
|
||||
NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be null, exiting SuperWord");})
|
||||
return false; //and reverse to backup IG
|
||||
}
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
BasicType bt = velt_basic_type(n);
|
||||
const TypeVect* vt = TypeVect::make(bt, vlen);
|
||||
assert(bt == T_FLOAT || bt == T_DOUBLE, "Only vectorization for FP cmovs is supported");
|
||||
if (bt == T_FLOAT) {
|
||||
vn = new CMoveVFNode(cc, src1, src2, vt);
|
||||
} else {
|
||||
assert(bt == T_DOUBLE, "Expected double");
|
||||
vn = new CMoveVDNode(cc, src1, src2, vt);
|
||||
}
|
||||
NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();})
|
||||
} else if (opc == Op_FmaD || opc == Op_FmaF) {
|
||||
// Promote operands to vector
|
||||
Node* in1 = vector_opd(p, 1);
|
||||
@@ -3455,7 +3268,7 @@ void SuperWord::insert_extracts(Node_List* p) {
|
||||
Node* n = use->in(k);
|
||||
if (def == n) {
|
||||
Node_List* u_pk = my_pack(use);
|
||||
if ((u_pk == nullptr || !is_cmov_pack(u_pk) || use->is_CMove()) && !is_vector_use(use, k)) {
|
||||
if ((u_pk == nullptr || use->is_CMove()) && !is_vector_use(use, k)) {
|
||||
_n_idx_list.push(use, k);
|
||||
}
|
||||
}
|
||||
@@ -3886,6 +3699,18 @@ void SuperWord::compute_vector_element_type() {
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < _block.length(); i++) {
|
||||
Node* n = _block.at(i);
|
||||
Node* nn = n;
|
||||
if (nn->is_Bool() && nn->in(0) == nullptr) {
|
||||
nn = nn->in(1);
|
||||
assert(nn->is_Cmp(), "always have Cmp above Bool");
|
||||
}
|
||||
if (nn->is_Cmp() && nn->in(0) == nullptr) {
|
||||
nn = nn->in(1);
|
||||
set_velt_type(n, velt_type(nn));
|
||||
}
|
||||
}
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord && Verbose) {
|
||||
for (int i = 0; i < _block.length(); i++) {
|
||||
|
||||
@@ -203,24 +203,6 @@ class SWNodeInfo {
|
||||
};
|
||||
|
||||
class SuperWord;
|
||||
class CMoveKit {
|
||||
friend class SuperWord;
|
||||
private:
|
||||
SuperWord* _sw;
|
||||
Dict* _dict;
|
||||
CMoveKit(Arena* a, SuperWord* sw) : _sw(sw) {_dict = new Dict(cmpkey, hashkey, a);}
|
||||
void* _2p(Node* key) const { return (void*)(intptr_t)key; } // 2 conversion functions to make gcc happy
|
||||
Dict* dict() const { return _dict; }
|
||||
void map(Node* key, Node_List* val) { assert(_dict->operator[](_2p(key)) == nullptr, "key existed"); _dict->Insert(_2p(key), (void*)val); }
|
||||
void unmap(Node* key) { _dict->Delete(_2p(key)); }
|
||||
Node_List* pack(Node* key) const { return (Node_List*)_dict->operator[](_2p(key)); }
|
||||
Node* is_Bool_candidate(Node* nd) const; // if it is the right candidate return corresponding CMove* ,
|
||||
Node* is_Cmp_candidate(Node* nd) const; // otherwise return null
|
||||
// Determine if the current pack is a cmove candidate that can be vectorized.
|
||||
bool can_merge_cmove_pack(Node_List* cmove_pk);
|
||||
void make_cmove_pack(Node_List* cmove_pk);
|
||||
bool test_cmp_pack(Node_List* cmp_pk, Node_List* cmove_pk);
|
||||
};//class CMoveKit
|
||||
|
||||
// JVMCI: OrderedPair is moved up to deal with compilation issues on Windows
|
||||
//------------------------------OrderedPair---------------------------
|
||||
@@ -309,7 +291,6 @@ class SuperWord : public ResourceObj {
|
||||
GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes
|
||||
GrowableArray<SWNodeInfo> _node_info; // Info needed per node
|
||||
CloneMap& _clone_map; // map of nodes created in cloning
|
||||
CMoveKit _cmovev_kit; // support for vectorization of CMov
|
||||
MemNode* _align_to_ref; // Memory reference that pre-loop will align to
|
||||
|
||||
GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs
|
||||
@@ -458,9 +439,6 @@ class SuperWord : public ResourceObj {
|
||||
private:
|
||||
void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
|
||||
// is pack good for converting into one vector node replacing bunches of Cmp, Bool, CMov nodes.
|
||||
bool is_cmov_pack(Node_List* p);
|
||||
bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); }
|
||||
static bool is_cmove_fp_opcode(int opc) { return (opc == Op_CMoveF || opc == Op_CMoveD); }
|
||||
static bool requires_long_to_int_conversion(int opc);
|
||||
// For pack p, are all idx operands the same?
|
||||
bool same_inputs(Node_List* p, int idx);
|
||||
@@ -595,9 +573,8 @@ private:
|
||||
void construct_my_pack_map();
|
||||
// Remove packs that are not implemented or not profitable.
|
||||
void filter_packs();
|
||||
// Merge CMove into new vector-nodes
|
||||
void merge_packs_to_cmove();
|
||||
// Verify that for every pack, all nodes are mutually independent
|
||||
// Verify that for every pack, all nodes are mutually independent.
|
||||
// Also verify that packset and my_pack are consistent.
|
||||
DEBUG_ONLY(void verify_packs();)
|
||||
// Adjust the memory graph for the packed operations
|
||||
void schedule();
|
||||
|
||||
@@ -82,9 +82,11 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
case Op_FmaF:
|
||||
return (bt == T_FLOAT ? Op_FmaVF : 0);
|
||||
case Op_CMoveF:
|
||||
return (bt == T_FLOAT ? Op_CMoveVF : 0);
|
||||
return (bt == T_FLOAT ? Op_VectorBlend : 0);
|
||||
case Op_CMoveD:
|
||||
return (bt == T_DOUBLE ? Op_CMoveVD : 0);
|
||||
return (bt == T_DOUBLE ? Op_VectorBlend : 0);
|
||||
case Op_Bool:
|
||||
return Op_VectorMaskCmp;
|
||||
case Op_DivF:
|
||||
return (bt == T_FLOAT ? Op_DivVF : 0);
|
||||
case Op_DivD:
|
||||
@@ -683,10 +685,6 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) {
|
||||
*start = 1;
|
||||
*end = 3; // 2 vector operands
|
||||
break;
|
||||
case Op_CMoveI: case Op_CMoveL: case Op_CMoveF: case Op_CMoveD:
|
||||
*start = 2;
|
||||
*end = n->req();
|
||||
break;
|
||||
case Op_FmaD:
|
||||
case Op_FmaF:
|
||||
*start = 1;
|
||||
|
||||
@@ -392,22 +392,6 @@ public:
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------CMoveVFNode--------------------------------------
|
||||
// Vector float conditional move
|
||||
class CMoveVFNode : public VectorNode {
|
||||
public:
|
||||
CMoveVFNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------CMoveVDNode--------------------------------------
|
||||
// Vector double conditional move
|
||||
class CMoveVDNode : public VectorNode {
|
||||
public:
|
||||
CMoveVDNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------MulReductionVINode--------------------------------------
|
||||
// Vector multiply byte, short and int as a reduction
|
||||
class MulReductionVINode : public UnorderedReductionNode {
|
||||
|
||||
@@ -1768,8 +1768,6 @@
|
||||
declare_c2_type(NegVDNode, NegVNode) \
|
||||
declare_c2_type(FmaVDNode, VectorNode) \
|
||||
declare_c2_type(FmaVFNode, VectorNode) \
|
||||
declare_c2_type(CMoveVFNode, VectorNode) \
|
||||
declare_c2_type(CMoveVDNode, VectorNode) \
|
||||
declare_c2_type(CompressVNode, VectorNode) \
|
||||
declare_c2_type(CompressMNode, VectorNode) \
|
||||
declare_c2_type(ExpandVNode, VectorNode) \
|
||||
|
||||
@@ -30,30 +30,25 @@ import jdk.test.lib.Utils;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8289422
|
||||
* @bug 8289422 8306088
|
||||
* @key randomness
|
||||
* @summary Auto-vectorization enhancement to support vector conditional move on AArch64
|
||||
* @requires os.arch=="aarch64"
|
||||
* @summary Auto-vectorization enhancement to support vector conditional move.
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.irTests.TestVectorConditionalMove
|
||||
*/
|
||||
|
||||
public class TestVectorConditionalMove {
|
||||
final private static int SIZE = 3000;
|
||||
final private static int SIZE = 1024;
|
||||
private static final Random RANDOM = Utils.getRandomInstance();
|
||||
|
||||
private static float[] floata = new float[SIZE];
|
||||
private static float[] floatb = new float[SIZE];
|
||||
private static float[] floatc = new float[SIZE];
|
||||
private static double[] doublea = new double[SIZE];
|
||||
private static double[] doubleb = new double[SIZE];
|
||||
private static double[] doublec = new double[SIZE];
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework.runWithFlags("-Xcomp", "-XX:-TieredCompilation", "-XX:+UseCMoveUnconditionally",
|
||||
"-XX:+UseVectorCmov", "-XX:CompileCommand=exclude,*.cmove*");
|
||||
TestFramework.runWithFlags("-XX:-TieredCompilation",
|
||||
"-XX:+UseCMoveUnconditionally",
|
||||
"-XX:+UseVectorCmov",
|
||||
"-XX:CompileCommand=compileonly,*.TestVectorConditionalMove.test*");
|
||||
}
|
||||
|
||||
// Compare 2 values, and pick one of them
|
||||
private float cmoveFloatGT(float a, float b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
@@ -94,8 +89,124 @@ public class TestVectorConditionalMove {
|
||||
return (a != b) ? a : b;
|
||||
}
|
||||
|
||||
// Extensions: compare 2 values, and pick from 2 consts
|
||||
private float cmoveFGTforFConst(float a, float b) {
|
||||
return (a > b) ? 0.1f : -0.1f;
|
||||
}
|
||||
|
||||
private float cmoveFGEforFConst(float a, float b) {
|
||||
return (a >= b) ? 0.1f : -0.1f;
|
||||
}
|
||||
|
||||
private float cmoveFLTforFConst(float a, float b) {
|
||||
return (a < b) ? 0.1f : -0.1f;
|
||||
}
|
||||
|
||||
private float cmoveFLEforFConst(float a, float b) {
|
||||
return (a <= b) ? 0.1f : -0.1f;
|
||||
}
|
||||
|
||||
private float cmoveFEQforFConst(float a, float b) {
|
||||
return (a == b) ? 0.1f : -0.1f;
|
||||
}
|
||||
|
||||
private float cmoveFNEQforFConst(float a, float b) {
|
||||
return (a != b) ? 0.1f : -0.1f;
|
||||
}
|
||||
|
||||
private double cmoveDGTforDConst(double a, double b) {
|
||||
return (a > b) ? 0.1 : -0.1;
|
||||
}
|
||||
|
||||
private double cmoveDGEforDConst(double a, double b) {
|
||||
return (a >= b) ? 0.1 : -0.1;
|
||||
}
|
||||
|
||||
private double cmoveDLTforDConst(double a, double b) {
|
||||
return (a < b) ? 0.1 : -0.1;
|
||||
}
|
||||
|
||||
private double cmoveDLEforDConst(double a, double b) {
|
||||
return (a <= b) ? 0.1 : -0.1;
|
||||
}
|
||||
|
||||
private double cmoveDEQforDConst(double a, double b) {
|
||||
return (a == b) ? 0.1 : -0.1;
|
||||
}
|
||||
|
||||
private double cmoveDNEQforDConst(double a, double b) {
|
||||
return (a != b) ? 0.1 : -0.1;
|
||||
}
|
||||
|
||||
// Extension: Compare 2 ILFD values, and pick from 2 ILFD values
|
||||
private int cmoveIGTforI(int a, int b, int c, int d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private long cmoveIGTforL(int a, int b, long c, long d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private float cmoveIGTforF(int a, int b, float c, float d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private double cmoveIGTforD(int a, int b, double c, double d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private int cmoveLGTforI(long a, long b, int c, int d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private long cmoveLGTforL(long a, long b, long c, long d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private float cmoveLGTforF(long a, long b, float c, float d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private double cmoveLGTforD(long a, long b, double c, double d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private int cmoveFGTforI(float a, float b, int c, int d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private long cmoveFGTforL(float a, float b, long c, long d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private float cmoveFGTforF(float a, float b, float c, float d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private double cmoveFGTforD(float a, float b, double c, double d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private int cmoveDGTforI(double a, double b, int c, int d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private long cmoveDGTforL(double a, double b, long c, long d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private float cmoveDGTforF(double a, double b, float c, float d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
private double cmoveDGTforD(double a, double b, double c, double d) {
|
||||
return (a > b) ? c : d;
|
||||
}
|
||||
|
||||
// Compare 2 values, and pick one of them
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveVFGT(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] > b[i]) ? a[i] : b[i];
|
||||
@@ -103,7 +214,8 @@ public class TestVectorConditionalMove {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveVFGTSwap(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (b[i] > a[i]) ? a[i] : b[i];
|
||||
@@ -111,7 +223,8 @@ public class TestVectorConditionalMove {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveVFLT(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] < b[i]) ? a[i] : b[i];
|
||||
@@ -119,7 +232,8 @@ public class TestVectorConditionalMove {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveVFLTSwap(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (b[i] < a[i]) ? a[i] : b[i];
|
||||
@@ -127,7 +241,8 @@ public class TestVectorConditionalMove {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveVFEQ(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] == b[i]) ? a[i] : b[i];
|
||||
@@ -135,7 +250,8 @@ public class TestVectorConditionalMove {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveVDLE(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] <= b[i]) ? a[i] : b[i];
|
||||
@@ -143,7 +259,8 @@ public class TestVectorConditionalMove {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveVDLESwap(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (b[i] <= a[i]) ? a[i] : b[i];
|
||||
@@ -151,7 +268,8 @@ public class TestVectorConditionalMove {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveVDGE(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] >= b[i]) ? a[i] : b[i];
|
||||
@@ -159,7 +277,8 @@ public class TestVectorConditionalMove {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveVDGESwap(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (b[i] >= a[i]) ? a[i] : b[i];
|
||||
@@ -167,31 +286,339 @@ public class TestVectorConditionalMove {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveVDNE(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] != b[i]) ? a[i] : b[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Extensions: compare 2 values, and pick from 2 consts
|
||||
@Test
|
||||
@IR(failOn = {IRNode.CMOVE_VD})
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveFGTforFConst(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] > b[i]) ? 0.1f : -0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveFGEforFConst(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] >= b[i]) ? 0.1f : -0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveFLTforFConst(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] < b[i]) ? 0.1f : -0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveFLEforFConst(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] <= b[i]) ? 0.1f : -0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveFEQforFConst(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] == b[i]) ? 0.1f : -0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveFNEQforFConst(float[] a, float[] b, float[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] != b[i]) ? 0.1f : -0.1f;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveDGTforDConst(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] > b[i]) ? 0.1 : -0.1;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveDGEforDConst(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] >= b[i]) ? 0.1 : -0.1;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveDLTforDConst(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] < b[i]) ? 0.1 : -0.1;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveDLEforDConst(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] <= b[i]) ? 0.1 : -0.1;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveDEQforDConst(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] == b[i]) ? 0.1 : -0.1;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveDNEQforDConst(double[] a, double[] b, double[] c) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
c[i] = (a[i] != b[i]) ? 0.1 : -0.1;
|
||||
}
|
||||
}
|
||||
|
||||
// Extension: Compare 2 ILFD values, and pick from 2 ILFD values
|
||||
// Note:
|
||||
// To guarantee that CMove is introduced, I need to perform the loads before the branch. To ensure they
|
||||
// do not float down into the branches, I compute a value, and store it to r2 (same as r, except that the
|
||||
// compilation does not know that).
|
||||
// So far, vectorization only works for CMoveF/D, with same data-width comparison (F/I for F, D/L for D).
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveIGTforI(int[] a, int[] b, int[] c, int[] d, int[] r, int[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
int cc = c[i];
|
||||
int dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveIGTforL(int[] a, int[] b, long[] c, long[] d, long[] r, long[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
long cc = c[i];
|
||||
long dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveIGTforF(int[] a, int[] b, float[] c, float[] d, float[] r, float[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
float cc = c[i];
|
||||
float dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveIGTforD(int[] a, int[] b, double[] c, double[] d, double[] r, double[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
double cc = c[i];
|
||||
double dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveLGTforI(long[] a, long[] b, int[] c, int[] d, int[] r, int[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
int cc = c[i];
|
||||
int dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveLGTforL(long[] a, long[] b, long[] c, long[] d, long[] r, long[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
long cc = c[i];
|
||||
long dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveLGTforF(long[] a, long[] b, float[] c, float[] d, float[] r, float[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
float cc = c[i];
|
||||
float dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
|
||||
// Requires avx2, else L is restricted to 16 byte, and D has 32. That leads to a vector elements mismatch of 2 to 4.
|
||||
private static void testCMoveLGTforD(long[] a, long[] b, double[] c, double[] d, double[] r, double[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
double cc = c[i];
|
||||
double dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveFGTforI(float[] a, float[] b, int[] c, int[] d, int[] r, int[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
int cc = c[i];
|
||||
int dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveFGTforL(float[] a, float[] b, long[] c, long[] d, long[] r, long[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
long cc = c[i];
|
||||
long dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveFGTforF(float[] a, float[] b, float[] c, float[] d, float[] r, float[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
float cc = c[i];
|
||||
float dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveFGTforD(float[] a, float[] b, double[] c, double[] d, double[] r, double[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
double cc = c[i];
|
||||
double dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveDGTforI(double[] a, double[] b, int[] c, int[] d, int[] r, int[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
int cc = c[i];
|
||||
int dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveDGTforL(double[] a, double[] b, long[] c, long[] d, long[] r, long[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
long cc = c[i];
|
||||
long dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveDGTforF(double[] a, double[] b, float[] c, float[] d, float[] r, float[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
float cc = c[i];
|
||||
float dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"},
|
||||
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
|
||||
private static void testCMoveDGTforD(double[] a, double[] b, double[] c, double[] d, double[] r, double[] r2) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
double cc = c[i];
|
||||
double dd = d[i];
|
||||
r2[i] = cc + dd;
|
||||
r[i] = (a[i] > b[i]) ? cc : dd;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND})
|
||||
private static void testCMoveVDUnsupported() {
|
||||
double[] doublec = new double[SIZE];
|
||||
int seed = 1001;
|
||||
for (int i = 0; i < doublec.length; i++) {
|
||||
doublec[i] = (i % 2 == 0) ? seed + i : seed - i;
|
||||
}
|
||||
}
|
||||
|
||||
@Warmup(0)
|
||||
@Run(test = {"testCMoveVFGT", "testCMoveVFLT","testCMoveVDLE", "testCMoveVDGE", "testCMoveVFEQ", "testCMoveVDNE",
|
||||
"testCMoveVFGTSwap", "testCMoveVFLTSwap","testCMoveVDLESwap", "testCMoveVDGESwap"})
|
||||
"testCMoveVFGTSwap", "testCMoveVFLTSwap","testCMoveVDLESwap", "testCMoveVDGESwap",
|
||||
"testCMoveFGTforFConst", "testCMoveFGEforFConst", "testCMoveFLTforFConst",
|
||||
"testCMoveFLEforFConst", "testCMoveFEQforFConst", "testCMoveFNEQforFConst",
|
||||
"testCMoveDGTforDConst", "testCMoveDGEforDConst", "testCMoveDLTforDConst",
|
||||
"testCMoveDLEforDConst", "testCMoveDEQforDConst", "testCMoveDNEQforDConst"})
|
||||
private void testCMove_runner() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
floata[i] = RANDOM.nextFloat();
|
||||
floatb[i] = RANDOM.nextFloat();
|
||||
doublea[i] = RANDOM.nextDouble();
|
||||
doubleb[i] = RANDOM.nextDouble();
|
||||
}
|
||||
float[] floata = new float[SIZE];
|
||||
float[] floatb = new float[SIZE];
|
||||
float[] floatc = new float[SIZE];
|
||||
double[] doublea = new double[SIZE];
|
||||
double[] doubleb = new double[SIZE];
|
||||
double[] doublec = new double[SIZE];
|
||||
|
||||
init(floata);
|
||||
init(floatb);
|
||||
init(doublea);
|
||||
init(doubleb);
|
||||
|
||||
testCMoveVFGT(floata, floatb, floatc);
|
||||
testCMoveVDLE(doublea, doubleb, doublec);
|
||||
@@ -207,6 +634,7 @@ public class TestVectorConditionalMove {
|
||||
Asserts.assertEquals(doublec[i], cmoveDoubleGE(doublea[i], doubleb[i]));
|
||||
}
|
||||
|
||||
// Ensure we frequently have equals
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
if (i % 3 == 0) {
|
||||
floatb[i] = floata[i];
|
||||
@@ -234,5 +662,215 @@ public class TestVectorConditionalMove {
|
||||
Asserts.assertEquals(floatc[i], cmoveFloatLTSwap(floata[i], floatb[i]));
|
||||
Asserts.assertEquals(doublec[i], cmoveDoubleGESwap(doublea[i], doubleb[i]));
|
||||
}
|
||||
|
||||
// Extensions: compare 2 values, and pick from 2 consts
|
||||
testCMoveFGTforFConst(floata, floatb, floatc);
|
||||
testCMoveDGTforDConst(doublea, doubleb, doublec);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(floatc[i], cmoveFGTforFConst(floata[i], floatb[i]));
|
||||
Asserts.assertEquals(doublec[i], cmoveDGTforDConst(doublea[i], doubleb[i]));
|
||||
}
|
||||
|
||||
testCMoveFGEforFConst(floata, floatb, floatc);
|
||||
testCMoveDGEforDConst(doublea, doubleb, doublec);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(floatc[i], cmoveFGEforFConst(floata[i], floatb[i]));
|
||||
Asserts.assertEquals(doublec[i], cmoveDGEforDConst(doublea[i], doubleb[i]));
|
||||
}
|
||||
|
||||
testCMoveFLTforFConst(floata, floatb, floatc);
|
||||
testCMoveDLTforDConst(doublea, doubleb, doublec);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(floatc[i], cmoveFLTforFConst(floata[i], floatb[i]));
|
||||
Asserts.assertEquals(doublec[i], cmoveDLTforDConst(doublea[i], doubleb[i]));
|
||||
}
|
||||
|
||||
testCMoveFLEforFConst(floata, floatb, floatc);
|
||||
testCMoveDLEforDConst(doublea, doubleb, doublec);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(floatc[i], cmoveFLEforFConst(floata[i], floatb[i]));
|
||||
Asserts.assertEquals(doublec[i], cmoveDLEforDConst(doublea[i], doubleb[i]));
|
||||
}
|
||||
|
||||
testCMoveFEQforFConst(floata, floatb, floatc);
|
||||
testCMoveDEQforDConst(doublea, doubleb, doublec);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(floatc[i], cmoveFEQforFConst(floata[i], floatb[i]));
|
||||
Asserts.assertEquals(doublec[i], cmoveDEQforDConst(doublea[i], doubleb[i]));
|
||||
}
|
||||
|
||||
testCMoveFNEQforFConst(floata, floatb, floatc);
|
||||
testCMoveDNEQforDConst(doublea, doubleb, doublec);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(floatc[i], cmoveFNEQforFConst(floata[i], floatb[i]));
|
||||
Asserts.assertEquals(doublec[i], cmoveDNEQforDConst(doublea[i], doubleb[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@Warmup(0)
|
||||
@Run(test = {"testCMoveIGTforI",
|
||||
"testCMoveIGTforL",
|
||||
"testCMoveIGTforF",
|
||||
"testCMoveIGTforD",
|
||||
"testCMoveLGTforI",
|
||||
"testCMoveLGTforL",
|
||||
"testCMoveLGTforF",
|
||||
"testCMoveLGTforD",
|
||||
"testCMoveFGTforI",
|
||||
"testCMoveFGTforL",
|
||||
"testCMoveFGTforF",
|
||||
"testCMoveFGTforD",
|
||||
"testCMoveDGTforI",
|
||||
"testCMoveDGTforL",
|
||||
"testCMoveDGTforF",
|
||||
"testCMoveDGTforD"})
|
||||
private void testCMove_runner_two() {
|
||||
int[] aI = new int[SIZE];
|
||||
int[] bI = new int[SIZE];
|
||||
int[] cI = new int[SIZE];
|
||||
int[] dI = new int[SIZE];
|
||||
int[] rI = new int[SIZE];
|
||||
long[] aL = new long[SIZE];
|
||||
long[] bL = new long[SIZE];
|
||||
long[] cL = new long[SIZE];
|
||||
long[] dL = new long[SIZE];
|
||||
long[] rL = new long[SIZE];
|
||||
float[] aF = new float[SIZE];
|
||||
float[] bF = new float[SIZE];
|
||||
float[] cF = new float[SIZE];
|
||||
float[] dF = new float[SIZE];
|
||||
float[] rF = new float[SIZE];
|
||||
double[] aD = new double[SIZE];
|
||||
double[] bD = new double[SIZE];
|
||||
double[] cD = new double[SIZE];
|
||||
double[] dD = new double[SIZE];
|
||||
double[] rD = new double[SIZE];
|
||||
|
||||
init(aI);
|
||||
init(bI);
|
||||
init(cI);
|
||||
init(dI);
|
||||
init(aL);
|
||||
init(bL);
|
||||
init(cL);
|
||||
init(dL);
|
||||
init(aF);
|
||||
init(bF);
|
||||
init(cF);
|
||||
init(dF);
|
||||
init(aD);
|
||||
init(bD);
|
||||
init(cD);
|
||||
init(dD);
|
||||
|
||||
testCMoveIGTforI(aI, bI, cI, dI, rI, rI);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rI[i], cmoveIGTforI(aI[i], bI[i], cI[i], dI[i]));
|
||||
}
|
||||
|
||||
testCMoveIGTforL(aI, bI, cL, dL, rL, rL);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rL[i], cmoveIGTforL(aI[i], bI[i], cL[i], dL[i]));
|
||||
}
|
||||
|
||||
testCMoveIGTforF(aI, bI, cF, dF, rF, rF);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rF[i], cmoveIGTforF(aI[i], bI[i], cF[i], dF[i]));
|
||||
}
|
||||
|
||||
testCMoveIGTforD(aI, bI, cD, dD, rD, rD);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rD[i], cmoveIGTforD(aI[i], bI[i], cD[i], dD[i]));
|
||||
}
|
||||
|
||||
testCMoveLGTforI(aL, bL, cI, dI, rI, rI);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rI[i], cmoveLGTforI(aL[i], bL[i], cI[i], dI[i]));
|
||||
}
|
||||
|
||||
testCMoveLGTforL(aL, bL, cL, dL, rL, rL);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rL[i], cmoveLGTforL(aL[i], bL[i], cL[i], dL[i]));
|
||||
}
|
||||
|
||||
testCMoveLGTforF(aL, bL, cF, dF, rF, rF);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rF[i], cmoveLGTforF(aL[i], bL[i], cF[i], dF[i]));
|
||||
}
|
||||
|
||||
testCMoveLGTforD(aL, bL, cD, dD, rD, rD);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rD[i], cmoveLGTforD(aL[i], bL[i], cD[i], dD[i]));
|
||||
}
|
||||
|
||||
testCMoveFGTforI(aF, bF, cI, dI, rI, rI);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rI[i], cmoveFGTforI(aF[i], bF[i], cI[i], dI[i]));
|
||||
}
|
||||
|
||||
testCMoveFGTforL(aF, bF, cL, dL, rL, rL);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rL[i], cmoveFGTforL(aF[i], bF[i], cL[i], dL[i]));
|
||||
}
|
||||
|
||||
testCMoveFGTforF(aF, bF, cF, dF, rF, rF);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rF[i], cmoveFGTforF(aF[i], bF[i], cF[i], dF[i]));
|
||||
}
|
||||
|
||||
testCMoveFGTforD(aF, bF, cD, dD, rD, rD);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rD[i], cmoveFGTforD(aF[i], bF[i], cD[i], dD[i]));
|
||||
}
|
||||
|
||||
testCMoveDGTforI(aD, bD, cI, dI, rI, rI);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rI[i], cmoveDGTforI(aD[i], bD[i], cI[i], dI[i]));
|
||||
}
|
||||
|
||||
testCMoveDGTforL(aD, bD, cL, dL, rL, rL);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rL[i], cmoveDGTforL(aD[i], bD[i], cL[i], dL[i]));
|
||||
}
|
||||
|
||||
testCMoveDGTforF(aD, bD, cF, dF, rF, rF);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rF[i], cmoveDGTforF(aD[i], bD[i], cF[i], dF[i]));
|
||||
}
|
||||
|
||||
testCMoveDGTforD(aD, bD, cD, dD, rD, rD);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
Asserts.assertEquals(rD[i], cmoveDGTforD(aD[i], bD[i], cD[i], dD[i]));
|
||||
}
|
||||
}
|
||||
|
||||
private static void init(int[] a) {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
a[i] = RANDOM.nextInt();
|
||||
}
|
||||
}
|
||||
|
||||
private static void init(long[] a) {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
a[i] = RANDOM.nextLong();
|
||||
}
|
||||
}
|
||||
|
||||
private static void init(float[] a) {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
a[i] = RANDOM.nextFloat();
|
||||
if (RANDOM.nextInt() % 20 == 0) {
|
||||
a[i] = Float.NaN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void init(double[] a) {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
a[i] = RANDOM.nextDouble();
|
||||
if (RANDOM.nextInt() % 20 == 0) {
|
||||
a[i] = Double.NaN;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -304,16 +304,6 @@ public class IRNode {
|
||||
beforeMatchingNameRegex(CMOVE_I, "CMoveI");
|
||||
}
|
||||
|
||||
public static final String CMOVE_VD = PREFIX + "CMOVE_VD" + POSTFIX;
|
||||
static {
|
||||
superWordNodes(CMOVE_VD, "CMoveVD");
|
||||
}
|
||||
|
||||
public static final String CMOVE_VF = PREFIX + "CMOVE_VF" + POSTFIX;
|
||||
static {
|
||||
superWordNodes(CMOVE_VF, "CMoveVF");
|
||||
}
|
||||
|
||||
public static final String CMP_I = PREFIX + "CMP_I" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(CMP_I, "CmpI");
|
||||
@@ -1278,6 +1268,11 @@ public class IRNode {
|
||||
beforeMatchingNameRegex(VECTOR_BLEND, "VectorBlend");
|
||||
}
|
||||
|
||||
public static final String VECTOR_MASK_CMP = PREFIX + "VECTOR_MASK_CMP" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(VECTOR_MASK_CMP, "VectorMaskCmp");
|
||||
}
|
||||
|
||||
public static final String VECTOR_CAST_B2X = PREFIX + "VECTOR_CAST_B2X" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(VECTOR_CAST_B2X, "VectorCastB2X");
|
||||
|
||||
Reference in New Issue
Block a user