aarch64: Add SVE2.1 Contiguous load/store instructions.

Hi,

This patch add support for SVE2.1 instructions ld1q,
ld2q, ld3q and ld4q, st1q, st2q, st3q and st4q.

Regression testing for aarch64-none-elf target and found no regressions.

Ok for binutils-master?

Regards,
Srinath.
This commit is contained in:
Srinath Parvathaneni 2024-01-15 09:40:11 +00:00 committed by Nick Clifton
parent b34104edab
commit b33f1bcd15
9 changed files with 108 additions and 2 deletions

View File

@ -6749,6 +6749,9 @@ parse_operands (char *str, const aarch64_opcode *opcode)
case AARCH64_OPND_SVE_ZtxN: case AARCH64_OPND_SVE_ZtxN:
case AARCH64_OPND_SME_Zdnx2: case AARCH64_OPND_SME_Zdnx2:
case AARCH64_OPND_SME_Zdnx4: case AARCH64_OPND_SME_Zdnx4:
case AARCH64_OPND_SME_Zt2:
case AARCH64_OPND_SME_Zt3:
case AARCH64_OPND_SME_Zt4:
case AARCH64_OPND_SME_Zmx2: case AARCH64_OPND_SME_Zmx2:
case AARCH64_OPND_SME_Zmx4: case AARCH64_OPND_SME_Zmx4:
case AARCH64_OPND_SME_Znx2: case AARCH64_OPND_SME_Znx2:

View File

@ -80,3 +80,17 @@
.*: Error: selected processor does not support `fminqv v4.2d,p3,z2.d' .*: Error: selected processor does not support `fminqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `fminqv v8.2d,p4,z1.d' .*: Error: selected processor does not support `fminqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `fminqv v16.4s,p7,z0.s' .*: Error: selected processor does not support `fminqv v16.4s,p7,z0.s'
.*: Error: selected processor does not support `ld1q Z0.Q,p4/Z,\[Z16.D,x0\]'
.*: Error: selected processor does not support `ld2q {Z0.Q,Z1.Q},p4/Z,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `ld3q {Z0.Q,Z1.Q,Z2.Q},p4/Z,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `ld4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4/Z,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `ld2q {Z0.Q,Z1.Q},p4/Z,\[x0,x2,lsl#4\]'
.*: Error: selected processor does not support `ld3q {Z0.Q,Z1.Q,Z2.Q},p4/Z,\[x0,x4,lsl#4\]'
.*: Error: selected processor does not support `ld4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4/Z,\[x0,x6,lsl#4\]'
.*: Error: selected processor does not support `st1q Z0.Q,p4,\[Z16.D,x0\]'
.*: Error: selected processor does not support `st2q {Z0.Q,Z1.Q},p4,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `st3q {Z0.Q,Z1.Q,Z2.Q},p4,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `st4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4,\[x0,#-4,MUL VL\]'
.*: Error: selected processor does not support `st2q {Z0.Q,Z1.Q},p4,\[x0,x2,lsl#4\]'
.*: Error: selected processor does not support `st3q {Z0.Q,Z1.Q,Z2.Q},p4,\[x0,x4,lsl#4\]'
.*: Error: selected processor does not support `st4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4,\[x0,x6,lsl#4\]'

View File

@ -89,3 +89,17 @@
.*: 64d7ac44 fminqv v4.2d, p3, z2.d .*: 64d7ac44 fminqv v4.2d, p3, z2.d
.*: 64d7b028 fminqv v8.2d, p4, z1.d .*: 64d7b028 fminqv v8.2d, p4, z1.d
.*: 6497bc10 fminqv v16.4s, p7, z0.s .*: 6497bc10 fminqv v16.4s, p7, z0.s
.*: c400b200 ld1q z0.q, p4/z, \[z16.d, x0\]
.*: a49ef000 ld2q {z0.q, z1.q}, p4/z, \[x0, #-4, mul vl\]
.*: a51ef000 ld3q {z0.q, z1.q, z2.q}, p4/z, \[x0, #-4, mul vl\]
.*: a59ef000 ld4q {z0.q, z1.q, z2.q, z3.q}, p4/z, \[x0, #-4, mul vl\]
.*: a4a2f000 ld2h {z0.h-z1.h}, p4/z, \[x0, #4, mul vl\]
.*: a5249000 ld3q {z0.q, z1.q, z2.q}, p4/z, \[x0, x4, lsl #4\]
.*: a5a69000 ld4q {z0.q, z1.q, z2.q, z3.q}, p4/z, \[x0, x6, lsl #4\]
.*: e4203200 st1q z0.q, p4, \[z16.d, x0\]
.*: e44e1000 st2q {z0.q, z1.q}, p4, \[x0, #-4, mul vl\]
.*: e48e1000 st3q {z0.q, z1.q, z2.q}, p4, \[x0, #-4, mul vl\]
.*: e4ce1000 st4q {z0.q, z1.q, z2.q, z3.q}, p4, \[x0, #-4, mul vl\]
.*: e4621000 st2q {z0.q, z1.q}, p4, \[x0, x2, lsl #4\]
.*: e4a41000 st3q {z0.q, z1.q, z2.q}, p4, \[x0, x4, lsl #4\]
.*: e4e61000 st4q {z0.q, z1.q, z2.q, z3.q}, p4, \[x0, x6, lsl #4\]

View File

@ -90,3 +90,18 @@ fminqv v2.4s, p2, z4.s
fminqv v4.2d, p3, z2.d fminqv v4.2d, p3, z2.d
fminqv v8.2d, p4, z1.d fminqv v8.2d, p4, z1.d
fminqv v16.4s, p7, z0.s fminqv v16.4s, p7, z0.s
ld1q Z0.Q, p4/Z, [Z16.D, x0]
ld2q {Z0.Q, Z1.Q}, p4/Z, [x0, #-4, MUL VL]
ld3q {Z0.Q, Z1.Q, Z2.Q}, p4/Z, [x0, #-4, MUL VL]
ld4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4/Z, [x0, #-4, MUL VL]
ld2q {Z0.Q, Z1.Q}, p4/Z, [x0, x2, lsl #4]
ld3q {Z0.Q, Z1.Q, Z2.Q}, p4/Z, [x0, x4, lsl #4]
ld4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4/Z, [x0, x6, lsl #4]
st1q Z0.Q, p4, [Z16.D, x0]
st2q {Z0.Q, Z1.Q}, p4, [x0, #-4, MUL VL]
st3q {Z0.Q, Z1.Q, Z2.Q}, p4, [x0, #-4, MUL VL]
st4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4, [x0, #-4, MUL VL]
st2q {Z0.Q, Z1.Q}, p4, [x0, x2, lsl #4]
st3q {Z0.Q, Z1.Q, Z2.Q}, p4, [x0, x4, lsl #4]
st4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4, [x0, x6, lsl #4]

View File

@ -797,6 +797,9 @@ enum aarch64_opnd
AARCH64_OPND_MOPS_WB_Rn, /* Rn!, in bits [5, 9]. */ AARCH64_OPND_MOPS_WB_Rn, /* Rn!, in bits [5, 9]. */
AARCH64_OPND_CSSC_SIMM8, /* CSSC signed 8-bit immediate. */ AARCH64_OPND_CSSC_SIMM8, /* CSSC signed 8-bit immediate. */
AARCH64_OPND_CSSC_UIMM8, /* CSSC unsigned 8-bit immediate. */ AARCH64_OPND_CSSC_UIMM8, /* CSSC unsigned 8-bit immediate. */
AARCH64_OPND_SME_Zt2, /* Qobule SVE vector register list. */
AARCH64_OPND_SME_Zt3, /* Trible SVE vector register list. */
AARCH64_OPND_SME_Zt4, /* Quad SVE vector register list. */
}; };
/* Qualifier constrains an operand. It either specifies a variant of an /* Qualifier constrains an operand. It either specifies a variant of an

View File

@ -2160,6 +2160,21 @@ aarch64_ext_sve_reglist (const aarch64_operand *self,
return true; return true;
} }
/* Decode {Zn.<T> , Zm.<T>}. The fields array specifies which field
to use for Zn. The opcode-dependent value specifies the number
of registers in the list. */
bool
aarch64_ext_sve_reglist_zt (const aarch64_operand *self,
aarch64_opnd_info *info, aarch64_insn code,
const aarch64_inst *inst ATTRIBUTE_UNUSED,
aarch64_operand_error *errors ATTRIBUTE_UNUSED)
{
info->reglist.first_regno = extract_field (self->fields[0], code, 0);
info->reglist.num_regs = get_operand_specific_data (self);
info->reglist.stride = 1;
return true;
}
/* Decode a strided register list. The first field holds the top bit /* Decode a strided register list. The first field holds the top bit
(0 or 16) and the second field holds the lower bits. The stride is (0 or 16) and the second field holds the lower bits. The stride is
16 divided by the list length. */ 16 divided by the list length. */

View File

@ -139,6 +139,7 @@ AARCH64_DECL_OPD_EXTRACTOR (ext_imm_rotate2);
AARCH64_DECL_OPD_EXTRACTOR (ext_x0_to_x30); AARCH64_DECL_OPD_EXTRACTOR (ext_x0_to_x30);
AARCH64_DECL_OPD_EXTRACTOR (ext_simple_index); AARCH64_DECL_OPD_EXTRACTOR (ext_simple_index);
AARCH64_DECL_OPD_EXTRACTOR (ext_plain_shrimm); AARCH64_DECL_OPD_EXTRACTOR (ext_plain_shrimm);
AARCH64_DECL_OPD_EXTRACTOR (ext_sve_reglist_zt);
#undef AARCH64_DECL_OPD_EXTRACTOR #undef AARCH64_DECL_OPD_EXTRACTOR

View File

@ -1870,6 +1870,9 @@ operand_general_constraint_met_p (const aarch64_opnd_info *opnds, int idx,
case AARCH64_OPND_SME_Zmx4: case AARCH64_OPND_SME_Zmx4:
case AARCH64_OPND_SME_Znx2: case AARCH64_OPND_SME_Znx2:
case AARCH64_OPND_SME_Znx4: case AARCH64_OPND_SME_Znx4:
case AARCH64_OPND_SME_Zt2:
case AARCH64_OPND_SME_Zt3:
case AARCH64_OPND_SME_Zt4:
num = get_operand_specific_data (&aarch64_operands[type]); num = get_operand_specific_data (&aarch64_operands[type]);
if (!check_reglist (opnd, mismatch_detail, idx, num, 1)) if (!check_reglist (opnd, mismatch_detail, idx, num, 1))
return 0; return 0;
@ -3626,7 +3629,10 @@ print_register_list (char *buf, size_t size, const aarch64_opnd_info *opnd,
/* The hyphenated form is preferred for disassembly if there are /* The hyphenated form is preferred for disassembly if there are
more than two registers in the list, and the register numbers more than two registers in the list, and the register numbers
are monotonically increasing in increments of one. */ are monotonically increasing in increments of one. */
if (stride == 1 && num_regs > 1) if (stride == 1 && num_regs > 1
&& ((opnd->type != AARCH64_OPND_SME_Zt2)
&& (opnd->type != AARCH64_OPND_SME_Zt3)
&& (opnd->type != AARCH64_OPND_SME_Zt4)))
snprintf (buf, size, "{%s-%s}%s", snprintf (buf, size, "{%s-%s}%s",
style_reg (styler, "%s%d.%s", prefix, first_reg, qlf_name), style_reg (styler, "%s%d.%s", prefix, first_reg, qlf_name),
style_reg (styler, "%s%d.%s", prefix, last_reg, qlf_name), tb); style_reg (styler, "%s%d.%s", prefix, last_reg, qlf_name), tb);
@ -4071,6 +4077,9 @@ aarch64_print_operand (char *buf, size_t size, bfd_vma pc,
case AARCH64_OPND_SME_Znx4: case AARCH64_OPND_SME_Znx4:
case AARCH64_OPND_SME_Ztx2_STRIDED: case AARCH64_OPND_SME_Ztx2_STRIDED:
case AARCH64_OPND_SME_Ztx4_STRIDED: case AARCH64_OPND_SME_Ztx4_STRIDED:
case AARCH64_OPND_SME_Zt2:
case AARCH64_OPND_SME_Zt3:
case AARCH64_OPND_SME_Zt4:
print_register_list (buf, size, opnd, "z", styler); print_register_list (buf, size, opnd, "z", styler);
break; break;

View File

@ -1781,6 +1781,14 @@
{ \ { \
QLF3(S_S,P_Z,S_S), \ QLF3(S_S,P_Z,S_S), \
} }
#define OP_SVE_SZS_QD \
{ \
QLF3(S_Q,P_Z,S_D), \
}
#define OP_SVE_SUS_QD \
{ \
QLF3(S_Q,NIL,S_D), \
}
#define OP_SVE_SBB \ #define OP_SVE_SBB \
{ \ { \
QLF3(S_S,S_B,S_B), \ QLF3(S_S,S_B,S_B), \
@ -6353,6 +6361,21 @@ const struct aarch64_opcode aarch64_opcode_table[] =
SVE2p1_INSN("dupq",0x05202400, 0xffe0fc00, sve_index1, 0, OP2 (SVE_Zd, SVE_Zn_5_INDEX), OP_SVE_VV_BHSD, 0, 0), SVE2p1_INSN("dupq",0x05202400, 0xffe0fc00, sve_index1, 0, OP2 (SVE_Zd, SVE_Zn_5_INDEX), OP_SVE_VV_BHSD, 0, 0),
SVE2p1_INSN("extq",0x05602400, 0xfff0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zd, SVE_Zm_imm4), OP_SVE_BBB, 0, 0), SVE2p1_INSN("extq",0x05602400, 0xfff0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zd, SVE_Zm_imm4), OP_SVE_BBB, 0, 0),
SVE2p1_INSNC("ld1q",0xc400a000, 0xffe0e000, sve_misc, 0, OP3 (SVE_Zt, SVE_Pg3, SVE_ADDR_ZX), OP_SVE_SZS_QD, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld2q",0xa490e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld3q",0xa510e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld4q",0xa590e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld2q",0xa4a0e000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld3q",0xa5208000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("ld4q",0xa5a08000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st1q",0xe4202000, 0xffe0e000, sve_misc, 0, OP3 (SVE_Zt, SVE_Pg3, SVE_ADDR_ZX), OP_SVE_SUS_QD, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st2q",0xe4400000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st3q",0xe4800000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st4q",0xe4c00000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st2q",0xe4600000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st3q",0xe4a00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st4q",0xe4e00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
{0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL}, {0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
}; };
@ -6989,4 +7012,13 @@ const struct aarch64_opcode aarch64_opcode_table[] =
Y(IMMEDIATE, imm, "CSSC_SIMM8", OPD_F_SEXT, F(FLD_CSSC_imm8), \ Y(IMMEDIATE, imm, "CSSC_SIMM8", OPD_F_SEXT, F(FLD_CSSC_imm8), \
"an 8-bit signed immediate") \ "an 8-bit signed immediate") \
Y(IMMEDIATE, imm, "CSSC_UIMM8", 0, F(FLD_CSSC_imm8), \ Y(IMMEDIATE, imm, "CSSC_UIMM8", 0, F(FLD_CSSC_imm8), \
"an 8-bit unsigned immediate") "an 8-bit unsigned immediate") \
X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt2", \
2 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
"a list of 2 SVE vector registers") \
X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt3", \
3 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
"a list of 3 SVE vector registers") \
X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt4", \
4 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
"a list of 4 SVE vector registers")