aarch64: Add SVE2.1 Contiguous load/store instructions.

Hi,

This patch add support for SVE2.1 instructions ld1q,
ld2q, ld3q and ld4q, st1q, st2q, st3q and st4q.

Regression testing for aarch64-none-elf target and found no regressions.

Ok for binutils-master?

Regards,
Srinath.
This commit is contained in:
Srinath Parvathaneni 2024-01-15 09:40:11 +00:00 committed by Nick Clifton
parent b34104edab
commit b33f1bcd15
9 changed files with 108 additions and 2 deletions

View File

@ -6749,6 +6749,9 @@ parse_operands (char *str, const aarch64_opcode *opcode)
case AARCH64_OPND_SVE_ZtxN:
case AARCH64_OPND_SME_Zdnx2:
case AARCH64_OPND_SME_Zdnx4:
case AARCH64_OPND_SME_Zt2:
case AARCH64_OPND_SME_Zt3:
case AARCH64_OPND_SME_Zt4:
case AARCH64_OPND_SME_Zmx2:
case AARCH64_OPND_SME_Zmx4:
case AARCH64_OPND_SME_Znx2:

View File

@ -80,3 +80,17 @@
.*: Error: selected processor does not support `fminqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `fminqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `fminqv v16.4s,p7,z0.s'
.*: Error: selected processor does not support `ld1q Z0.Q,p4/Z,\[Z16.D,x0\]'
.*: Error: selected processor does not support `ld2q {Z0.Q,Z1.Q},p4/Z,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `ld3q {Z0.Q,Z1.Q,Z2.Q},p4/Z,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `ld4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4/Z,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `ld2q {Z0.Q,Z1.Q},p4/Z,\[x0,x2,lsl#4\]'
.*: Error: selected processor does not support `ld3q {Z0.Q,Z1.Q,Z2.Q},p4/Z,\[x0,x4,lsl#4\]'
.*: Error: selected processor does not support `ld4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4/Z,\[x0,x6,lsl#4\]'
.*: Error: selected processor does not support `st1q Z0.Q,p4,\[Z16.D,x0\]'
.*: Error: selected processor does not support `st2q {Z0.Q,Z1.Q},p4,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `st3q {Z0.Q,Z1.Q,Z2.Q},p4,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `st4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `st2q {Z0.Q,Z1.Q},p4,\[x0,x2,lsl#4\]'
.*: Error: selected processor does not support `st3q {Z0.Q,Z1.Q,Z2.Q},p4,\[x0,x4,lsl#4\]'
.*: Error: selected processor does not support `st4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4,\[x0,x6,lsl#4\]'

View File

@ -89,3 +89,17 @@
.*: 64d7ac44 fminqv v4.2d, p3, z2.d
.*: 64d7b028 fminqv v8.2d, p4, z1.d
.*: 6497bc10 fminqv v16.4s, p7, z0.s
.*: c400b200 ld1q z0.q, p4/z, \[z16.d, x0\]
.*: a49ef000 ld2q {z0.q, z1.q}, p4/z, \[x0, #-4, mul vl\]
.*: a51ef000 ld3q {z0.q, z1.q, z2.q}, p4/z, \[x0, #-4, mul vl\]
.*: a59ef000 ld4q {z0.q, z1.q, z2.q, z3.q}, p4/z, \[x0, #-4, mul vl\]
.*: a4a2f000 ld2h {z0.h-z1.h}, p4/z, \[x0, #4, mul vl\]
.*: a5249000 ld3q {z0.q, z1.q, z2.q}, p4/z, \[x0, x4, lsl #4\]
.*: a5a69000 ld4q {z0.q, z1.q, z2.q, z3.q}, p4/z, \[x0, x6, lsl #4\]
.*: e4203200 st1q z0.q, p4, \[z16.d, x0\]
.*: e44e1000 st2q {z0.q, z1.q}, p4, \[x0, #-4, mul vl\]
.*: e48e1000 st3q {z0.q, z1.q, z2.q}, p4, \[x0, #-4, mul vl\]
.*: e4ce1000 st4q {z0.q, z1.q, z2.q, z3.q}, p4, \[x0, #-4, mul vl\]
.*: e4621000 st2q {z0.q, z1.q}, p4, \[x0, x2, lsl #4\]
.*: e4a41000 st3q {z0.q, z1.q, z2.q}, p4, \[x0, x4, lsl #4\]
.*: e4e61000 st4q {z0.q, z1.q, z2.q, z3.q}, p4, \[x0, x6, lsl #4\]

View File

@ -90,3 +90,18 @@ fminqv v2.4s, p2, z4.s
fminqv v4.2d, p3, z2.d
fminqv v8.2d, p4, z1.d
fminqv v16.4s, p7, z0.s
ld1q Z0.Q, p4/Z, [Z16.D, x0]
ld2q {Z0.Q, Z1.Q}, p4/Z, [x0, #-4, MUL VL]
ld3q {Z0.Q, Z1.Q, Z2.Q}, p4/Z, [x0, #-4, MUL VL]
ld4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4/Z, [x0, #-4, MUL VL]
ld2q {Z0.Q, Z1.Q}, p4/Z, [x0, x2, lsl #4]
ld3q {Z0.Q, Z1.Q, Z2.Q}, p4/Z, [x0, x4, lsl #4]
ld4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4/Z, [x0, x6, lsl #4]
st1q Z0.Q, p4, [Z16.D, x0]
st2q {Z0.Q, Z1.Q}, p4, [x0, #-4, MUL VL]
st3q {Z0.Q, Z1.Q, Z2.Q}, p4, [x0, #-4, MUL VL]
st4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4, [x0, #-4, MUL VL]
st2q {Z0.Q, Z1.Q}, p4, [x0, x2, lsl #4]
st3q {Z0.Q, Z1.Q, Z2.Q}, p4, [x0, x4, lsl #4]
st4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4, [x0, x6, lsl #4]

View File

@ -797,6 +797,9 @@ enum aarch64_opnd
AARCH64_OPND_MOPS_WB_Rn, /* Rn!, in bits [5, 9]. */
AARCH64_OPND_CSSC_SIMM8, /* CSSC signed 8-bit immediate. */
AARCH64_OPND_CSSC_UIMM8, /* CSSC unsigned 8-bit immediate. */
AARCH64_OPND_SME_Zt2, /* Qobule SVE vector register list. */
AARCH64_OPND_SME_Zt3, /* Trible SVE vector register list. */
AARCH64_OPND_SME_Zt4, /* Quad SVE vector register list. */
};
/* Qualifier constrains an operand. It either specifies a variant of an

View File

@ -2160,6 +2160,21 @@ aarch64_ext_sve_reglist (const aarch64_operand *self,
return true;
}
/* Decode {Zn.<T> , Zm.<T>}. The fields array specifies which field
to use for Zn. The opcode-dependent value specifies the number
of registers in the list. */
bool
aarch64_ext_sve_reglist_zt (const aarch64_operand *self,
aarch64_opnd_info *info, aarch64_insn code,
const aarch64_inst *inst ATTRIBUTE_UNUSED,
aarch64_operand_error *errors ATTRIBUTE_UNUSED)
{
info->reglist.first_regno = extract_field (self->fields[0], code, 0);
info->reglist.num_regs = get_operand_specific_data (self);
info->reglist.stride = 1;
return true;
}
/* Decode a strided register list. The first field holds the top bit
(0 or 16) and the second field holds the lower bits. The stride is
16 divided by the list length. */

View File

@ -139,6 +139,7 @@ AARCH64_DECL_OPD_EXTRACTOR (ext_imm_rotate2);
AARCH64_DECL_OPD_EXTRACTOR (ext_x0_to_x30);
AARCH64_DECL_OPD_EXTRACTOR (ext_simple_index);
AARCH64_DECL_OPD_EXTRACTOR (ext_plain_shrimm);
AARCH64_DECL_OPD_EXTRACTOR (ext_sve_reglist_zt);
#undef AARCH64_DECL_OPD_EXTRACTOR

View File

@ -1870,6 +1870,9 @@ operand_general_constraint_met_p (const aarch64_opnd_info *opnds, int idx,
case AARCH64_OPND_SME_Zmx4:
case AARCH64_OPND_SME_Znx2:
case AARCH64_OPND_SME_Znx4:
case AARCH64_OPND_SME_Zt2:
case AARCH64_OPND_SME_Zt3:
case AARCH64_OPND_SME_Zt4:
num = get_operand_specific_data (&aarch64_operands[type]);
if (!check_reglist (opnd, mismatch_detail, idx, num, 1))
return 0;
@ -3626,7 +3629,10 @@ print_register_list (char *buf, size_t size, const aarch64_opnd_info *opnd,
/* The hyphenated form is preferred for disassembly if there are
more than two registers in the list, and the register numbers
are monotonically increasing in increments of one. */
if (stride == 1 && num_regs > 1)
if (stride == 1 && num_regs > 1
&& ((opnd->type != AARCH64_OPND_SME_Zt2)
&& (opnd->type != AARCH64_OPND_SME_Zt3)
&& (opnd->type != AARCH64_OPND_SME_Zt4)))
snprintf (buf, size, "{%s-%s}%s",
style_reg (styler, "%s%d.%s", prefix, first_reg, qlf_name),
style_reg (styler, "%s%d.%s", prefix, last_reg, qlf_name), tb);
@ -4071,6 +4077,9 @@ aarch64_print_operand (char *buf, size_t size, bfd_vma pc,
case AARCH64_OPND_SME_Znx4:
case AARCH64_OPND_SME_Ztx2_STRIDED:
case AARCH64_OPND_SME_Ztx4_STRIDED:
case AARCH64_OPND_SME_Zt2:
case AARCH64_OPND_SME_Zt3:
case AARCH64_OPND_SME_Zt4:
print_register_list (buf, size, opnd, "z", styler);
break;

View File

@ -1781,6 +1781,14 @@
{ \
QLF3(S_S,P_Z,S_S), \
}
#define OP_SVE_SZS_QD \
{ \
QLF3(S_Q,P_Z,S_D), \
}
#define OP_SVE_SUS_QD \
{ \
QLF3(S_Q,NIL,S_D), \
}
#define OP_SVE_SBB \
{ \
QLF3(S_S,S_B,S_B), \
@ -6353,6 +6361,21 @@ const struct aarch64_opcode aarch64_opcode_table[] =
SVE2p1_INSN("dupq",0x05202400, 0xffe0fc00, sve_index1, 0, OP2 (SVE_Zd, SVE_Zn_5_INDEX), OP_SVE_VV_BHSD, 0, 0),
SVE2p1_INSN("extq",0x05602400, 0xfff0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zd, SVE_Zm_imm4), OP_SVE_BBB, 0, 0),
SVE2p1_INSNC("ld1q",0xc400a000, 0xffe0e000, sve_misc, 0, OP3 (SVE_Zt, SVE_Pg3, SVE_ADDR_ZX), OP_SVE_SZS_QD, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld2q",0xa490e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld3q",0xa510e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld4q",0xa590e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld2q",0xa4a0e000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld3q",0xa5208000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld4q",0xa5a08000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st1q",0xe4202000, 0xffe0e000, sve_misc, 0, OP3 (SVE_Zt, SVE_Pg3, SVE_ADDR_ZX), OP_SVE_SUS_QD, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st2q",0xe4400000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st3q",0xe4800000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st4q",0xe4c00000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st2q",0xe4600000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st3q",0xe4a00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st4q",0xe4e00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
{0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
};
@ -6989,4 +7012,13 @@ const struct aarch64_opcode aarch64_opcode_table[] =
Y(IMMEDIATE, imm, "CSSC_SIMM8", OPD_F_SEXT, F(FLD_CSSC_imm8), \
"an 8-bit signed immediate") \
Y(IMMEDIATE, imm, "CSSC_UIMM8", 0, F(FLD_CSSC_imm8), \
"an 8-bit unsigned immediate")
"an 8-bit unsigned immediate") \
X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt2", \
2 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
"a list of 2 SVE vector registers") \
X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt3", \
3 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
"a list of 3 SVE vector registers") \
X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt4", \
4 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
"a list of 4 SVE vector registers")