aarch64: Add SVE2.1 Contiguous load/store instructions.
Hi, This patch add support for SVE2.1 instructions ld1q, ld2q, ld3q and ld4q, st1q, st2q, st3q and st4q. Regression testing for aarch64-none-elf target and found no regressions. Ok for binutils-master? Regards, Srinath.
This commit is contained in:
parent
b34104edab
commit
b33f1bcd15
@ -6749,6 +6749,9 @@ parse_operands (char *str, const aarch64_opcode *opcode)
|
|||||||
case AARCH64_OPND_SVE_ZtxN:
|
case AARCH64_OPND_SVE_ZtxN:
|
||||||
case AARCH64_OPND_SME_Zdnx2:
|
case AARCH64_OPND_SME_Zdnx2:
|
||||||
case AARCH64_OPND_SME_Zdnx4:
|
case AARCH64_OPND_SME_Zdnx4:
|
||||||
|
case AARCH64_OPND_SME_Zt2:
|
||||||
|
case AARCH64_OPND_SME_Zt3:
|
||||||
|
case AARCH64_OPND_SME_Zt4:
|
||||||
case AARCH64_OPND_SME_Zmx2:
|
case AARCH64_OPND_SME_Zmx2:
|
||||||
case AARCH64_OPND_SME_Zmx4:
|
case AARCH64_OPND_SME_Zmx4:
|
||||||
case AARCH64_OPND_SME_Znx2:
|
case AARCH64_OPND_SME_Znx2:
|
||||||
|
@ -80,3 +80,17 @@
|
|||||||
.*: Error: selected processor does not support `fminqv v4.2d,p3,z2.d'
|
.*: Error: selected processor does not support `fminqv v4.2d,p3,z2.d'
|
||||||
.*: Error: selected processor does not support `fminqv v8.2d,p4,z1.d'
|
.*: Error: selected processor does not support `fminqv v8.2d,p4,z1.d'
|
||||||
.*: Error: selected processor does not support `fminqv v16.4s,p7,z0.s'
|
.*: Error: selected processor does not support `fminqv v16.4s,p7,z0.s'
|
||||||
|
.*: Error: selected processor does not support `ld1q Z0.Q,p4/Z,\[Z16.D,x0\]'
|
||||||
|
.*: Error: selected processor does not support `ld2q {Z0.Q,Z1.Q},p4/Z,\[x0,#-4,MUL VL\]'
|
||||||
|
.*: Error: selected processor does not support `ld3q {Z0.Q,Z1.Q,Z2.Q},p4/Z,\[x0,#-4,MUL VL\]'
|
||||||
|
.*: Error: selected processor does not support `ld4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4/Z,\[x0,#-4,MUL VL\]'
|
||||||
|
.*: Error: selected processor does not support `ld2q {Z0.Q,Z1.Q},p4/Z,\[x0,x2,lsl#4\]'
|
||||||
|
.*: Error: selected processor does not support `ld3q {Z0.Q,Z1.Q,Z2.Q},p4/Z,\[x0,x4,lsl#4\]'
|
||||||
|
.*: Error: selected processor does not support `ld4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4/Z,\[x0,x6,lsl#4\]'
|
||||||
|
.*: Error: selected processor does not support `st1q Z0.Q,p4,\[Z16.D,x0\]'
|
||||||
|
.*: Error: selected processor does not support `st2q {Z0.Q,Z1.Q},p4,\[x0,#-4,MUL VL\]'
|
||||||
|
.*: Error: selected processor does not support `st3q {Z0.Q,Z1.Q,Z2.Q},p4,\[x0,#-4,MUL VL\]'
|
||||||
|
.*: Error: selected processor does not support `st4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4,\[x0,#-4,MUL VL\]'
|
||||||
|
.*: Error: selected processor does not support `st2q {Z0.Q,Z1.Q},p4,\[x0,x2,lsl#4\]'
|
||||||
|
.*: Error: selected processor does not support `st3q {Z0.Q,Z1.Q,Z2.Q},p4,\[x0,x4,lsl#4\]'
|
||||||
|
.*: Error: selected processor does not support `st4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4,\[x0,x6,lsl#4\]'
|
||||||
|
@ -89,3 +89,17 @@
|
|||||||
.*: 64d7ac44 fminqv v4.2d, p3, z2.d
|
.*: 64d7ac44 fminqv v4.2d, p3, z2.d
|
||||||
.*: 64d7b028 fminqv v8.2d, p4, z1.d
|
.*: 64d7b028 fminqv v8.2d, p4, z1.d
|
||||||
.*: 6497bc10 fminqv v16.4s, p7, z0.s
|
.*: 6497bc10 fminqv v16.4s, p7, z0.s
|
||||||
|
.*: c400b200 ld1q z0.q, p4/z, \[z16.d, x0\]
|
||||||
|
.*: a49ef000 ld2q {z0.q, z1.q}, p4/z, \[x0, #-4, mul vl\]
|
||||||
|
.*: a51ef000 ld3q {z0.q, z1.q, z2.q}, p4/z, \[x0, #-4, mul vl\]
|
||||||
|
.*: a59ef000 ld4q {z0.q, z1.q, z2.q, z3.q}, p4/z, \[x0, #-4, mul vl\]
|
||||||
|
.*: a4a2f000 ld2h {z0.h-z1.h}, p4/z, \[x0, #4, mul vl\]
|
||||||
|
.*: a5249000 ld3q {z0.q, z1.q, z2.q}, p4/z, \[x0, x4, lsl #4\]
|
||||||
|
.*: a5a69000 ld4q {z0.q, z1.q, z2.q, z3.q}, p4/z, \[x0, x6, lsl #4\]
|
||||||
|
.*: e4203200 st1q z0.q, p4, \[z16.d, x0\]
|
||||||
|
.*: e44e1000 st2q {z0.q, z1.q}, p4, \[x0, #-4, mul vl\]
|
||||||
|
.*: e48e1000 st3q {z0.q, z1.q, z2.q}, p4, \[x0, #-4, mul vl\]
|
||||||
|
.*: e4ce1000 st4q {z0.q, z1.q, z2.q, z3.q}, p4, \[x0, #-4, mul vl\]
|
||||||
|
.*: e4621000 st2q {z0.q, z1.q}, p4, \[x0, x2, lsl #4\]
|
||||||
|
.*: e4a41000 st3q {z0.q, z1.q, z2.q}, p4, \[x0, x4, lsl #4\]
|
||||||
|
.*: e4e61000 st4q {z0.q, z1.q, z2.q, z3.q}, p4, \[x0, x6, lsl #4\]
|
||||||
|
@ -90,3 +90,18 @@ fminqv v2.4s, p2, z4.s
|
|||||||
fminqv v4.2d, p3, z2.d
|
fminqv v4.2d, p3, z2.d
|
||||||
fminqv v8.2d, p4, z1.d
|
fminqv v8.2d, p4, z1.d
|
||||||
fminqv v16.4s, p7, z0.s
|
fminqv v16.4s, p7, z0.s
|
||||||
|
ld1q Z0.Q, p4/Z, [Z16.D, x0]
|
||||||
|
ld2q {Z0.Q, Z1.Q}, p4/Z, [x0, #-4, MUL VL]
|
||||||
|
ld3q {Z0.Q, Z1.Q, Z2.Q}, p4/Z, [x0, #-4, MUL VL]
|
||||||
|
ld4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4/Z, [x0, #-4, MUL VL]
|
||||||
|
ld2q {Z0.Q, Z1.Q}, p4/Z, [x0, x2, lsl #4]
|
||||||
|
ld3q {Z0.Q, Z1.Q, Z2.Q}, p4/Z, [x0, x4, lsl #4]
|
||||||
|
ld4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4/Z, [x0, x6, lsl #4]
|
||||||
|
|
||||||
|
st1q Z0.Q, p4, [Z16.D, x0]
|
||||||
|
st2q {Z0.Q, Z1.Q}, p4, [x0, #-4, MUL VL]
|
||||||
|
st3q {Z0.Q, Z1.Q, Z2.Q}, p4, [x0, #-4, MUL VL]
|
||||||
|
st4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4, [x0, #-4, MUL VL]
|
||||||
|
st2q {Z0.Q, Z1.Q}, p4, [x0, x2, lsl #4]
|
||||||
|
st3q {Z0.Q, Z1.Q, Z2.Q}, p4, [x0, x4, lsl #4]
|
||||||
|
st4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4, [x0, x6, lsl #4]
|
||||||
|
@ -797,6 +797,9 @@ enum aarch64_opnd
|
|||||||
AARCH64_OPND_MOPS_WB_Rn, /* Rn!, in bits [5, 9]. */
|
AARCH64_OPND_MOPS_WB_Rn, /* Rn!, in bits [5, 9]. */
|
||||||
AARCH64_OPND_CSSC_SIMM8, /* CSSC signed 8-bit immediate. */
|
AARCH64_OPND_CSSC_SIMM8, /* CSSC signed 8-bit immediate. */
|
||||||
AARCH64_OPND_CSSC_UIMM8, /* CSSC unsigned 8-bit immediate. */
|
AARCH64_OPND_CSSC_UIMM8, /* CSSC unsigned 8-bit immediate. */
|
||||||
|
AARCH64_OPND_SME_Zt2, /* Qobule SVE vector register list. */
|
||||||
|
AARCH64_OPND_SME_Zt3, /* Trible SVE vector register list. */
|
||||||
|
AARCH64_OPND_SME_Zt4, /* Quad SVE vector register list. */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Qualifier constrains an operand. It either specifies a variant of an
|
/* Qualifier constrains an operand. It either specifies a variant of an
|
||||||
|
@ -2160,6 +2160,21 @@ aarch64_ext_sve_reglist (const aarch64_operand *self,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Decode {Zn.<T> , Zm.<T>}. The fields array specifies which field
|
||||||
|
to use for Zn. The opcode-dependent value specifies the number
|
||||||
|
of registers in the list. */
|
||||||
|
bool
|
||||||
|
aarch64_ext_sve_reglist_zt (const aarch64_operand *self,
|
||||||
|
aarch64_opnd_info *info, aarch64_insn code,
|
||||||
|
const aarch64_inst *inst ATTRIBUTE_UNUSED,
|
||||||
|
aarch64_operand_error *errors ATTRIBUTE_UNUSED)
|
||||||
|
{
|
||||||
|
info->reglist.first_regno = extract_field (self->fields[0], code, 0);
|
||||||
|
info->reglist.num_regs = get_operand_specific_data (self);
|
||||||
|
info->reglist.stride = 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/* Decode a strided register list. The first field holds the top bit
|
/* Decode a strided register list. The first field holds the top bit
|
||||||
(0 or 16) and the second field holds the lower bits. The stride is
|
(0 or 16) and the second field holds the lower bits. The stride is
|
||||||
16 divided by the list length. */
|
16 divided by the list length. */
|
||||||
|
@ -139,6 +139,7 @@ AARCH64_DECL_OPD_EXTRACTOR (ext_imm_rotate2);
|
|||||||
AARCH64_DECL_OPD_EXTRACTOR (ext_x0_to_x30);
|
AARCH64_DECL_OPD_EXTRACTOR (ext_x0_to_x30);
|
||||||
AARCH64_DECL_OPD_EXTRACTOR (ext_simple_index);
|
AARCH64_DECL_OPD_EXTRACTOR (ext_simple_index);
|
||||||
AARCH64_DECL_OPD_EXTRACTOR (ext_plain_shrimm);
|
AARCH64_DECL_OPD_EXTRACTOR (ext_plain_shrimm);
|
||||||
|
AARCH64_DECL_OPD_EXTRACTOR (ext_sve_reglist_zt);
|
||||||
|
|
||||||
#undef AARCH64_DECL_OPD_EXTRACTOR
|
#undef AARCH64_DECL_OPD_EXTRACTOR
|
||||||
|
|
||||||
|
@ -1870,6 +1870,9 @@ operand_general_constraint_met_p (const aarch64_opnd_info *opnds, int idx,
|
|||||||
case AARCH64_OPND_SME_Zmx4:
|
case AARCH64_OPND_SME_Zmx4:
|
||||||
case AARCH64_OPND_SME_Znx2:
|
case AARCH64_OPND_SME_Znx2:
|
||||||
case AARCH64_OPND_SME_Znx4:
|
case AARCH64_OPND_SME_Znx4:
|
||||||
|
case AARCH64_OPND_SME_Zt2:
|
||||||
|
case AARCH64_OPND_SME_Zt3:
|
||||||
|
case AARCH64_OPND_SME_Zt4:
|
||||||
num = get_operand_specific_data (&aarch64_operands[type]);
|
num = get_operand_specific_data (&aarch64_operands[type]);
|
||||||
if (!check_reglist (opnd, mismatch_detail, idx, num, 1))
|
if (!check_reglist (opnd, mismatch_detail, idx, num, 1))
|
||||||
return 0;
|
return 0;
|
||||||
@ -3626,7 +3629,10 @@ print_register_list (char *buf, size_t size, const aarch64_opnd_info *opnd,
|
|||||||
/* The hyphenated form is preferred for disassembly if there are
|
/* The hyphenated form is preferred for disassembly if there are
|
||||||
more than two registers in the list, and the register numbers
|
more than two registers in the list, and the register numbers
|
||||||
are monotonically increasing in increments of one. */
|
are monotonically increasing in increments of one. */
|
||||||
if (stride == 1 && num_regs > 1)
|
if (stride == 1 && num_regs > 1
|
||||||
|
&& ((opnd->type != AARCH64_OPND_SME_Zt2)
|
||||||
|
&& (opnd->type != AARCH64_OPND_SME_Zt3)
|
||||||
|
&& (opnd->type != AARCH64_OPND_SME_Zt4)))
|
||||||
snprintf (buf, size, "{%s-%s}%s",
|
snprintf (buf, size, "{%s-%s}%s",
|
||||||
style_reg (styler, "%s%d.%s", prefix, first_reg, qlf_name),
|
style_reg (styler, "%s%d.%s", prefix, first_reg, qlf_name),
|
||||||
style_reg (styler, "%s%d.%s", prefix, last_reg, qlf_name), tb);
|
style_reg (styler, "%s%d.%s", prefix, last_reg, qlf_name), tb);
|
||||||
@ -4071,6 +4077,9 @@ aarch64_print_operand (char *buf, size_t size, bfd_vma pc,
|
|||||||
case AARCH64_OPND_SME_Znx4:
|
case AARCH64_OPND_SME_Znx4:
|
||||||
case AARCH64_OPND_SME_Ztx2_STRIDED:
|
case AARCH64_OPND_SME_Ztx2_STRIDED:
|
||||||
case AARCH64_OPND_SME_Ztx4_STRIDED:
|
case AARCH64_OPND_SME_Ztx4_STRIDED:
|
||||||
|
case AARCH64_OPND_SME_Zt2:
|
||||||
|
case AARCH64_OPND_SME_Zt3:
|
||||||
|
case AARCH64_OPND_SME_Zt4:
|
||||||
print_register_list (buf, size, opnd, "z", styler);
|
print_register_list (buf, size, opnd, "z", styler);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1781,6 +1781,14 @@
|
|||||||
{ \
|
{ \
|
||||||
QLF3(S_S,P_Z,S_S), \
|
QLF3(S_S,P_Z,S_S), \
|
||||||
}
|
}
|
||||||
|
#define OP_SVE_SZS_QD \
|
||||||
|
{ \
|
||||||
|
QLF3(S_Q,P_Z,S_D), \
|
||||||
|
}
|
||||||
|
#define OP_SVE_SUS_QD \
|
||||||
|
{ \
|
||||||
|
QLF3(S_Q,NIL,S_D), \
|
||||||
|
}
|
||||||
#define OP_SVE_SBB \
|
#define OP_SVE_SBB \
|
||||||
{ \
|
{ \
|
||||||
QLF3(S_S,S_B,S_B), \
|
QLF3(S_S,S_B,S_B), \
|
||||||
@ -6353,6 +6361,21 @@ const struct aarch64_opcode aarch64_opcode_table[] =
|
|||||||
|
|
||||||
SVE2p1_INSN("dupq",0x05202400, 0xffe0fc00, sve_index1, 0, OP2 (SVE_Zd, SVE_Zn_5_INDEX), OP_SVE_VV_BHSD, 0, 0),
|
SVE2p1_INSN("dupq",0x05202400, 0xffe0fc00, sve_index1, 0, OP2 (SVE_Zd, SVE_Zn_5_INDEX), OP_SVE_VV_BHSD, 0, 0),
|
||||||
SVE2p1_INSN("extq",0x05602400, 0xfff0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zd, SVE_Zm_imm4), OP_SVE_BBB, 0, 0),
|
SVE2p1_INSN("extq",0x05602400, 0xfff0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zd, SVE_Zm_imm4), OP_SVE_BBB, 0, 0),
|
||||||
|
SVE2p1_INSNC("ld1q",0xc400a000, 0xffe0e000, sve_misc, 0, OP3 (SVE_Zt, SVE_Pg3, SVE_ADDR_ZX), OP_SVE_SZS_QD, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("ld2q",0xa490e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("ld3q",0xa510e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("ld4q",0xa590e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("ld2q",0xa4a0e000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("ld3q",0xa5208000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("ld4q",0xa5a08000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
|
||||||
|
SVE2p1_INSNC("st1q",0xe4202000, 0xffe0e000, sve_misc, 0, OP3 (SVE_Zt, SVE_Pg3, SVE_ADDR_ZX), OP_SVE_SUS_QD, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("st2q",0xe4400000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("st3q",0xe4800000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("st4q",0xe4c00000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("st2q",0xe4600000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("st3q",0xe4a00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
SVE2p1_INSNC("st4q",0xe4e00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
|
||||||
|
|
||||||
{0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
|
{0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
|
||||||
};
|
};
|
||||||
@ -6989,4 +7012,13 @@ const struct aarch64_opcode aarch64_opcode_table[] =
|
|||||||
Y(IMMEDIATE, imm, "CSSC_SIMM8", OPD_F_SEXT, F(FLD_CSSC_imm8), \
|
Y(IMMEDIATE, imm, "CSSC_SIMM8", OPD_F_SEXT, F(FLD_CSSC_imm8), \
|
||||||
"an 8-bit signed immediate") \
|
"an 8-bit signed immediate") \
|
||||||
Y(IMMEDIATE, imm, "CSSC_UIMM8", 0, F(FLD_CSSC_imm8), \
|
Y(IMMEDIATE, imm, "CSSC_UIMM8", 0, F(FLD_CSSC_imm8), \
|
||||||
"an 8-bit unsigned immediate")
|
"an 8-bit unsigned immediate") \
|
||||||
|
X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt2", \
|
||||||
|
2 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
|
||||||
|
"a list of 2 SVE vector registers") \
|
||||||
|
X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt3", \
|
||||||
|
3 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
|
||||||
|
"a list of 3 SVE vector registers") \
|
||||||
|
X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt4", \
|
||||||
|
4 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
|
||||||
|
"a list of 4 SVE vector registers")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user