mirror of
https://github.com/JetBrains/JetBrainsRuntime.git
synced 2025-12-06 09:29:38 +01:00
8309502: RISC-V: String.indexOf intrinsic may produce misaligned memory loads
Reviewed-by: luhenry, fjiang, fyang
This commit is contained in:
@@ -491,7 +491,9 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
|
||||
}
|
||||
bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
|
||||
add(result, haystack, isLL ? nlen_tmp : ch2);
|
||||
ld(ch2, Address(result)); // load 8 bytes from source string
|
||||
// load 8 bytes from source string
|
||||
// if isLL is false then read granularity can be 2
|
||||
load_long_misaligned(ch2, Address(result), ch1, isLL ? 1 : 2); // can use ch1 as temp register here as it will be trashed by next mv anyway
|
||||
mv(ch1, tmp6);
|
||||
if (isLL) {
|
||||
j(BMLOOPSTR1_AFTER_LOAD);
|
||||
@@ -679,10 +681,30 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
|
||||
slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
|
||||
add(haystack, haystack, tmp3);
|
||||
neg(hlen_neg, tmp3);
|
||||
if (AvoidUnalignedAccesses) {
|
||||
// preload first value, then we will read by 1 character per loop, instead of four
|
||||
// just shifting previous ch2 right by size of character in bits
|
||||
add(tmp3, haystack, hlen_neg);
|
||||
(this->*load_4chr)(ch2, Address(tmp3), noreg);
|
||||
if (isLL) {
|
||||
// need to erase 1 most significant byte in 32-bit value of ch2
|
||||
slli(ch2, ch2, 40);
|
||||
srli(ch2, ch2, 32);
|
||||
} else {
|
||||
slli(ch2, ch2, 16); // 2 most significant bytes will be erased by this operation
|
||||
}
|
||||
}
|
||||
|
||||
bind(CH1_LOOP);
|
||||
add(ch2, haystack, hlen_neg);
|
||||
(this->*load_4chr)(ch2, Address(ch2), noreg);
|
||||
add(tmp3, haystack, hlen_neg);
|
||||
if (AvoidUnalignedAccesses) {
|
||||
srli(ch2, ch2, isLL ? 8 : 16);
|
||||
(this->*haystack_load_1chr)(tmp3, Address(tmp3, isLL ? 3 : 6), noreg);
|
||||
slli(tmp3, tmp3, isLL ? 24 : 48);
|
||||
add(ch2, ch2, tmp3);
|
||||
} else {
|
||||
(this->*load_4chr)(ch2, Address(tmp3), noreg);
|
||||
}
|
||||
beq(ch1, ch2, MATCH);
|
||||
add(hlen_neg, hlen_neg, haystack_chr_size);
|
||||
blez(hlen_neg, CH1_LOOP);
|
||||
@@ -700,10 +722,23 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
|
||||
slli(tmp3, result_tmp, haystack_chr_shift);
|
||||
add(haystack, haystack, tmp3);
|
||||
neg(hlen_neg, tmp3);
|
||||
|
||||
if (AvoidUnalignedAccesses) {
|
||||
// preload first value, then we will read by 1 character per loop, instead of two
|
||||
// just shifting previous ch2 right by size of character in bits
|
||||
add(tmp3, haystack, hlen_neg);
|
||||
(this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
|
||||
slli(ch2, ch2, isLL ? 8 : 16);
|
||||
}
|
||||
bind(CH1_LOOP);
|
||||
add(tmp3, haystack, hlen_neg);
|
||||
(this->*load_2chr)(ch2, Address(tmp3), noreg);
|
||||
if (AvoidUnalignedAccesses) {
|
||||
srli(ch2, ch2, isLL ? 8 : 16);
|
||||
(this->*haystack_load_1chr)(tmp3, Address(tmp3, isLL ? 1 : 2), noreg);
|
||||
slli(tmp3, tmp3, isLL ? 8 : 16);
|
||||
add(ch2, ch2, tmp3);
|
||||
} else {
|
||||
(this->*load_2chr)(ch2, Address(tmp3), noreg);
|
||||
}
|
||||
beq(ch1, ch2, MATCH);
|
||||
add(hlen_neg, hlen_neg, haystack_chr_size);
|
||||
blez(hlen_neg, CH1_LOOP);
|
||||
@@ -727,7 +762,14 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
|
||||
|
||||
bind(FIRST_LOOP);
|
||||
add(ch2, haystack, hlen_neg);
|
||||
(this->*load_2chr)(ch2, Address(ch2), noreg);
|
||||
if (AvoidUnalignedAccesses) {
|
||||
(this->*haystack_load_1chr)(tmp2, Address(ch2, isLL ? 1 : 2), noreg); // we need a temp register, we can safely use hlen_tmp here, which is a synonym for tmp2
|
||||
(this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
|
||||
slli(tmp2, tmp2, isLL ? 8 : 16);
|
||||
add(ch2, ch2, tmp2);
|
||||
} else {
|
||||
(this->*load_2chr)(ch2, Address(ch2), noreg);
|
||||
}
|
||||
beq(first, ch2, STR1_LOOP);
|
||||
|
||||
bind(STR2_NEXT);
|
||||
@@ -751,10 +793,7 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
|
||||
bind(DO1);
|
||||
(this->*needle_load_1chr)(ch1, Address(needle), noreg);
|
||||
sub(result_tmp, haystack_len, 1);
|
||||
mv(tmp3, result_tmp);
|
||||
if (haystack_chr_shift) {
|
||||
slli(tmp3, result_tmp, haystack_chr_shift);
|
||||
}
|
||||
slli(tmp3, result_tmp, haystack_chr_shift);
|
||||
add(haystack, haystack, tmp3);
|
||||
neg(hlen_neg, tmp3);
|
||||
|
||||
@@ -1932,4 +1971,4 @@ void C2_MacroAssembler::extract_fp_v(FloatRegister dst, VectorRegister src, Basi
|
||||
vslidedown_vx(tmp, src, t0);
|
||||
vfmv_f_s(dst, tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1700,12 +1700,29 @@ void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in
|
||||
}
|
||||
}
|
||||
|
||||
// granularity is 1, 2 bytes per load
|
||||
// granularity is 1 OR 2 bytes per load. dst and src.base() allowed to be the same register
|
||||
void MacroAssembler::load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
|
||||
if (granularity != 1 && granularity != 2) {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
if (AvoidUnalignedAccesses && (granularity != 2)) {
|
||||
assert_different_registers(dst, tmp);
|
||||
assert_different_registers(tmp, src.base());
|
||||
is_signed ? lb(tmp, Address(src.base(), src.offset() + 1)) : lbu(tmp, Address(src.base(), src.offset() + 1));
|
||||
slli(tmp, tmp, 8);
|
||||
lbu(dst, src);
|
||||
add(dst, dst, tmp);
|
||||
} else {
|
||||
is_signed ? lh(dst, src) : lhu(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
// granularity is 1, 2 OR 4 bytes per load, if granularity 2 or 4 then dst and src.base() allowed to be the same register
|
||||
void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity) {
|
||||
if (AvoidUnalignedAccesses && (granularity != 4)) {
|
||||
assert_different_registers(dst, tmp, src.base());
|
||||
switch(granularity) {
|
||||
case 1:
|
||||
assert_different_registers(dst, tmp, src.base());
|
||||
lbu(dst, src);
|
||||
lbu(tmp, Address(src.base(), src.offset() + 1));
|
||||
slli(tmp, tmp, 8);
|
||||
@@ -1718,9 +1735,11 @@ void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp
|
||||
add(dst, dst, tmp);
|
||||
break;
|
||||
case 2:
|
||||
lhu(dst, src);
|
||||
assert_different_registers(dst, tmp);
|
||||
assert_different_registers(tmp, src.base());
|
||||
is_signed ? lh(tmp, Address(src.base(), src.offset() + 2)) : lhu(tmp, Address(src.base(), src.offset() + 2));
|
||||
slli(tmp, tmp, 16);
|
||||
lhu(dst, src);
|
||||
add(dst, dst, tmp);
|
||||
break;
|
||||
default:
|
||||
@@ -1731,12 +1750,12 @@ void MacroAssembler::load_int_misaligned(Register dst, Address src, Register tmp
|
||||
}
|
||||
}
|
||||
|
||||
// granularity is 1, 2 or 4 bytes per load
|
||||
// granularity is 1, 2, 4 or 8 bytes per load, if granularity 4 or 8 then dst and src.base() allowed to be same register
|
||||
void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tmp, int granularity) {
|
||||
if (AvoidUnalignedAccesses && (granularity != 8)) {
|
||||
assert_different_registers(dst, tmp, src.base());
|
||||
switch(granularity){
|
||||
case 1:
|
||||
assert_different_registers(dst, tmp, src.base());
|
||||
lbu(dst, src);
|
||||
lbu(tmp, Address(src.base(), src.offset() + 1));
|
||||
slli(tmp, tmp, 8);
|
||||
@@ -1761,6 +1780,7 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
|
||||
add(dst, dst, tmp);
|
||||
break;
|
||||
case 2:
|
||||
assert_different_registers(dst, tmp, src.base());
|
||||
lhu(dst, src);
|
||||
lhu(tmp, Address(src.base(), src.offset() + 2));
|
||||
slli(tmp, tmp, 16);
|
||||
@@ -1773,9 +1793,11 @@ void MacroAssembler::load_long_misaligned(Register dst, Address src, Register tm
|
||||
add(dst, dst, tmp);
|
||||
break;
|
||||
case 4:
|
||||
lwu(dst, src);
|
||||
assert_different_registers(dst, tmp);
|
||||
assert_different_registers(tmp, src.base());
|
||||
lwu(tmp, Address(src.base(), src.offset() + 4));
|
||||
slli(tmp, tmp, 32);
|
||||
lwu(dst, src);
|
||||
add(dst, dst, tmp);
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -431,6 +431,7 @@ class MacroAssembler: public Assembler {
|
||||
void store_sized_value(Address dst, Register src, size_t size_in_bytes);
|
||||
|
||||
// Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
|
||||
void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
|
||||
void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
|
||||
void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user