aarch64: Add SVE2.1 dupq, eorqv and extq instructions.

Hi,

This patch add support for SVE2.1 instruction dupq, eorqv and extq.

Regression testing for aarch64-none-elf target and found no regressions.

Ok for binutils-master?

Regards,
Srinath.
This commit is contained in:
Srinath Parvathaneni 2024-01-15 09:37:32 +00:00 committed by Nick Clifton
parent 88601c2d94
commit 39092c7a1f
11 changed files with 141 additions and 1 deletions

View File

@ -6698,6 +6698,8 @@ parse_operands (char *str, const aarch64_opcode *opcode)
case AARCH64_OPND_SVE_Zm4_11_INDEX: case AARCH64_OPND_SVE_Zm4_11_INDEX:
case AARCH64_OPND_SVE_Zm4_INDEX: case AARCH64_OPND_SVE_Zm4_INDEX:
case AARCH64_OPND_SVE_Zn_INDEX: case AARCH64_OPND_SVE_Zn_INDEX:
case AARCH64_OPND_SVE_Zm_imm4:
case AARCH64_OPND_SVE_Zn_5_INDEX:
case AARCH64_OPND_SME_Zm_INDEX1: case AARCH64_OPND_SME_Zm_INDEX1:
case AARCH64_OPND_SME_Zm_INDEX2: case AARCH64_OPND_SME_Zm_INDEX2:
case AARCH64_OPND_SME_Zm_INDEX3_1: case AARCH64_OPND_SME_Zm_INDEX3_1:

View File

@ -35,3 +35,23 @@
.*: Error: selected processor does not support `uminqv v4.2d,p3,z2.d' .*: Error: selected processor does not support `uminqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `uminqv v8.2d,p4,z1.d' .*: Error: selected processor does not support `uminqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `uminqv v16.4s,p7,z0.s' .*: Error: selected processor does not support `uminqv v16.4s,p7,z0.s'
.*: Error: selected processor does not support `dupq z10.b,z20.b\[0\]'
.*: Error: selected processor does not support `dupq z10.b,z20.b\[15\]'
.*: Error: selected processor does not support `dupq z10.h,z20.h\[0\]'
.*: Error: selected processor does not support `dupq z10.h,z20.h\[7\]'
.*: Error: selected processor does not support `dupq z10.s,z20.s\[0\]'
.*: Error: selected processor does not support `dupq z10.s,z20.s\[3\]'
.*: Error: selected processor does not support `dupq z10.d,z20.d\[0\]'
.*: Error: selected processor does not support `dupq z10.d,z20.d\[1\]'
.*: Error: selected processor does not support `eorqv v0.16b,p0,z16.b'
.*: Error: selected processor does not support `eorqv v1.8h,p1,z8.h'
.*: Error: selected processor does not support `eorqv v2.4s,p2,z4.s'
.*: Error: selected processor does not support `eorqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `eorqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `eorqv v16.4s,p7,z0.s'
.*: Error: selected processor does not support `extq z0.b,z0.b,z10.b\[15\]'
.*: Error: selected processor does not support `extq z1.b,z1.b,z15.b\[7\]'
.*: Error: selected processor does not support `extq z2.b,z2.b,z5.b\[3\]'
.*: Error: selected processor does not support `extq z4.b,z4.b,z12.b\[1\]'
.*: Error: selected processor does not support `extq z8.b,z8.b,z7.b\[4\]'
.*: Error: selected processor does not support `extq z16.b,z16.b,z1.b\[8\]'

View File

@ -44,3 +44,23 @@
.*: 04cf2c44 uminqv v4.2d, p3, z2.d .*: 04cf2c44 uminqv v4.2d, p3, z2.d
.*: 04cf3028 uminqv v8.2d, p4, z1.d .*: 04cf3028 uminqv v8.2d, p4, z1.d
.*: 048f3c10 uminqv v16.4s, p7, z0.s .*: 048f3c10 uminqv v16.4s, p7, z0.s
.*: 0530268a dupq z10.b, z20.b\[0\]
.*: 053f268a dupq z10.b, z20.b\[15\]
.*: 0521268a dupq z10.h, z20.h\[0\]
.*: 052f268a dupq z10.h, z20.h\[7\]
.*: 0522268a dupq z10.s, z20.s\[0\]
.*: 052e268a dupq z10.s, z20.s\[3\]
.*: 0524268a dupq z10.d, z20.d\[0\]
.*: 052c268a dupq z10.d, z20.d\[1\]
.*: 041d2200 eorqv v0.16b, p0, z16.b
.*: 045d2501 eorqv v1.8h, p1, z8.h
.*: 049d2882 eorqv v2.4s, p2, z4.s
.*: 04dd2c44 eorqv v4.2d, p3, z2.d
.*: 04dd3028 eorqv v8.2d, p4, z1.d
.*: 049d3c10 eorqv v16.4s, p7, z0.s
.*: 056a27c0 extq z0.b, z0.b, z10.b\[15\]
.*: 056f25c1 extq z1.b, z1.b, z15.b\[7\]
.*: 056524c2 extq z2.b, z2.b, z5.b\[3\]
.*: 056c2444 extq z4.b, z4.b, z12.b\[1\]
.*: 05672508 extq z8.b, z8.b, z7.b\[4\]
.*: 05612610 extq z16.b, z16.b, z1.b\[8\]

View File

@ -39,3 +39,25 @@ uminqv v2.4s, p2, z4.s
uminqv v4.2d, p3, z2.d uminqv v4.2d, p3, z2.d
uminqv v8.2d, p4, z1.d uminqv v8.2d, p4, z1.d
uminqv v16.4s, p7, z0.s uminqv v16.4s, p7, z0.s
dupq z10.b, z20.b[0]
dupq z10.b, z20.b[15]
dupq z10.h, z20.h[0]
dupq z10.h, z20.h[7]
dupq z10.s, z20.s[0]
dupq z10.s, z20.s[3]
dupq z10.d, z20.d[0]
dupq z10.d, z20.d[1]
eorqv v0.16b, p0, z16.b
eorqv v1.8h, p1, z8.h
eorqv v2.4s, p2, z4.s
eorqv v4.2d, p3, z2.d
eorqv v8.2d, p4, z1.d
eorqv v16.4s, p7, z0.s
extq z0.b, z0.b, z10.b[15]
extq z1.b, z1.b, z15.b[7]
extq z2.b, z2.b, z5.b[3]
extq z4.b, z4.b, z12.b[1]
extq z8.b, z8.b, z7.b[4]
extq z16.b, z16.b, z1.b[8]

View File

@ -727,8 +727,10 @@ enum aarch64_opnd
AARCH64_OPND_SVE_Zm3_19_INDEX, /* z0-z7[0-3] in Zm3_INDEX plus bit 19. */ AARCH64_OPND_SVE_Zm3_19_INDEX, /* z0-z7[0-3] in Zm3_INDEX plus bit 19. */
AARCH64_OPND_SVE_Zm3_22_INDEX, /* z0-z7[0-7] in Zm3_INDEX plus bit 22. */ AARCH64_OPND_SVE_Zm3_22_INDEX, /* z0-z7[0-7] in Zm3_INDEX plus bit 22. */
AARCH64_OPND_SVE_Zm4_11_INDEX, /* z0-z15[0-3] in Zm plus bit 11. */ AARCH64_OPND_SVE_Zm4_11_INDEX, /* z0-z15[0-3] in Zm plus bit 11. */
AARCH64_OPND_SVE_Zm_imm4, /* SVE vector register with 4bit index. */
AARCH64_OPND_SVE_Zm4_INDEX, /* z0-z15[0-1] in Zm, bits [20,16]. */ AARCH64_OPND_SVE_Zm4_INDEX, /* z0-z15[0-1] in Zm, bits [20,16]. */
AARCH64_OPND_SVE_Zn, /* SVE vector register in Zn. */ AARCH64_OPND_SVE_Zn, /* SVE vector register in Zn. */
AARCH64_OPND_SVE_Zn_5_INDEX, /* Indexed SVE vector register, for DUPQ. */
AARCH64_OPND_SVE_Zn_INDEX, /* Indexed SVE vector register, for DUP. */ AARCH64_OPND_SVE_Zn_INDEX, /* Indexed SVE vector register, for DUP. */
AARCH64_OPND_SVE_ZnxN, /* SVE vector register list in Zn. */ AARCH64_OPND_SVE_ZnxN, /* SVE vector register list in Zn. */
AARCH64_OPND_SVE_Zt, /* SVE vector register in Zt. */ AARCH64_OPND_SVE_Zt, /* SVE vector register in Zt. */
@ -1002,7 +1004,8 @@ enum aarch64_insn_class
cssc, cssc,
gcs, gcs,
the, the,
sve2_urqvs sve2_urqvs,
sve_index1,
}; };
/* Opcode enumerators. */ /* Opcode enumerators. */

View File

@ -1220,6 +1220,21 @@ aarch64_ins_sve_index (const aarch64_operand *self,
return true; return true;
} }
/* Encode Zn.<T>[<imm>], where <imm> is an immediate with range of 0 to one less
than the number of elements in 128 bit, which can encode il:tsz. */
bool
aarch64_ins_sve_index_imm (const aarch64_operand *self,
const aarch64_opnd_info *info, aarch64_insn *code,
const aarch64_inst *inst ATTRIBUTE_UNUSED,
aarch64_operand_error *errors ATTRIBUTE_UNUSED)
{
insert_field (self->fields[0], code, info->reglane.regno, 0);
unsigned int esize = aarch64_get_qualifier_esize (info->qualifier);
insert_fields (code, (info->reglane.index * 2 + 1) * esize, 0,
2, self->fields[1],self->fields[2]);
return true;
}
/* Encode a logical/bitmask immediate for the MOV alias of SVE DUPM. */ /* Encode a logical/bitmask immediate for the MOV alias of SVE DUPM. */
bool bool
aarch64_ins_sve_limm_mov (const aarch64_operand *self, aarch64_ins_sve_limm_mov (const aarch64_operand *self,
@ -2079,6 +2094,7 @@ aarch64_encode_variant_using_iclass (struct aarch64_inst *inst)
case sme_shift: case sme_shift:
case sve_index: case sve_index:
case sve_index1:
case sve_shift_pred: case sve_shift_pred:
case sve_shift_unpred: case sve_shift_unpred:
case sve_shift_tsz_hsd: case sve_shift_tsz_hsd:

View File

@ -93,6 +93,7 @@ AARCH64_DECL_OPD_INSERTER (ins_sve_float_half_one);
AARCH64_DECL_OPD_INSERTER (ins_sve_float_half_two); AARCH64_DECL_OPD_INSERTER (ins_sve_float_half_two);
AARCH64_DECL_OPD_INSERTER (ins_sve_float_zero_one); AARCH64_DECL_OPD_INSERTER (ins_sve_float_zero_one);
AARCH64_DECL_OPD_INSERTER (ins_sve_index); AARCH64_DECL_OPD_INSERTER (ins_sve_index);
AARCH64_DECL_OPD_INSERTER (ins_sve_index_imm);
AARCH64_DECL_OPD_INSERTER (ins_sve_limm_mov); AARCH64_DECL_OPD_INSERTER (ins_sve_limm_mov);
AARCH64_DECL_OPD_INSERTER (ins_sve_quad_index); AARCH64_DECL_OPD_INSERTER (ins_sve_quad_index);
AARCH64_DECL_OPD_INSERTER (ins_sve_reglist); AARCH64_DECL_OPD_INSERTER (ins_sve_reglist);

View File

@ -2097,6 +2097,26 @@ aarch64_ext_sve_index (const aarch64_operand *self,
return true; return true;
} }
/* Decode Zn.<T>[<imm>], where <imm> is an immediate with range of 0 to one less
than the number of elements in 128 bit, which can encode il:tsz. */
bool
aarch64_ext_sve_index_imm (const aarch64_operand *self,
aarch64_opnd_info *info, aarch64_insn code,
const aarch64_inst *inst ATTRIBUTE_UNUSED,
aarch64_operand_error *errors ATTRIBUTE_UNUSED)
{
int val;
info->reglane.regno = extract_field (self->fields[0], code, 0);
val = extract_fields (code, 0, 2, self->fields[2], self->fields[1]);
if ((val & 15) == 0)
return 0;
while ((val & 1) == 0)
val /= 2;
info->reglane.index = val / 2;
return true;
}
/* Decode a logical immediate for the MOV alias of SVE DUPM. */ /* Decode a logical immediate for the MOV alias of SVE DUPM. */
bool bool
aarch64_ext_sve_limm_mov (const aarch64_operand *self, aarch64_ext_sve_limm_mov (const aarch64_operand *self,
@ -3231,6 +3251,17 @@ aarch64_decode_variant_using_iclass (aarch64_inst *inst)
} }
break; break;
case sve_index1:
i = extract_fields (inst->value, 0, 2, FLD_SVE_tsz, FLD_SVE_i2h);
if ((i & 15) == 0)
return false;
while ((i & 1) == 0)
{
i >>= 1;
variant += 1;
}
break;
case sve_limm: case sve_limm:
/* Pick the smallest applicable element size. */ /* Pick the smallest applicable element size. */
if ((inst->value & 0x20600) == 0x600) if ((inst->value & 0x20600) == 0x600)

View File

@ -117,6 +117,7 @@ AARCH64_DECL_OPD_EXTRACTOR (ext_sve_float_half_one);
AARCH64_DECL_OPD_EXTRACTOR (ext_sve_float_half_two); AARCH64_DECL_OPD_EXTRACTOR (ext_sve_float_half_two);
AARCH64_DECL_OPD_EXTRACTOR (ext_sve_float_zero_one); AARCH64_DECL_OPD_EXTRACTOR (ext_sve_float_zero_one);
AARCH64_DECL_OPD_EXTRACTOR (ext_sve_index); AARCH64_DECL_OPD_EXTRACTOR (ext_sve_index);
AARCH64_DECL_OPD_EXTRACTOR (ext_sve_index_imm);
AARCH64_DECL_OPD_EXTRACTOR (ext_sve_limm_mov); AARCH64_DECL_OPD_EXTRACTOR (ext_sve_limm_mov);
AARCH64_DECL_OPD_EXTRACTOR (ext_sve_quad_index); AARCH64_DECL_OPD_EXTRACTOR (ext_sve_quad_index);
AARCH64_DECL_OPD_EXTRACTOR (ext_sve_reglist); AARCH64_DECL_OPD_EXTRACTOR (ext_sve_reglist);

View File

@ -1794,6 +1794,18 @@ operand_general_constraint_met_p (const aarch64_opnd_info *opnds, int idx,
return 0; return 0;
break; break;
case AARCH64_OPND_SVE_Zm_imm4:
if (!check_reglane (opnd, mismatch_detail, idx, "z", 0, 31, 0, 15))
return 0;
break;
case AARCH64_OPND_SVE_Zn_5_INDEX:
size = aarch64_get_qualifier_esize (opnd->qualifier);
if (!check_reglane (opnd, mismatch_detail, idx, "z", 0, 31,
0, 16 / size - 1))
return 0;
break;
case AARCH64_OPND_SME_PNn3_INDEX1: case AARCH64_OPND_SME_PNn3_INDEX1:
case AARCH64_OPND_SME_PNn3_INDEX2: case AARCH64_OPND_SME_PNn3_INDEX2:
size = get_operand_field_width (get_operand_from_code (type), 1); size = get_operand_field_width (get_operand_from_code (type), 1);
@ -4074,6 +4086,7 @@ aarch64_print_operand (char *buf, size_t size, bfd_vma pc,
case AARCH64_OPND_SME_Zm_INDEX3_1: case AARCH64_OPND_SME_Zm_INDEX3_1:
case AARCH64_OPND_SME_Zm_INDEX3_2: case AARCH64_OPND_SME_Zm_INDEX3_2:
case AARCH64_OPND_SME_Zm_INDEX3_10: case AARCH64_OPND_SME_Zm_INDEX3_10:
case AARCH64_OPND_SVE_Zn_5_INDEX:
case AARCH64_OPND_SME_Zm_INDEX4_1: case AARCH64_OPND_SME_Zm_INDEX4_1:
case AARCH64_OPND_SME_Zm_INDEX4_10: case AARCH64_OPND_SME_Zm_INDEX4_10:
case AARCH64_OPND_SME_Zn_INDEX1_16: case AARCH64_OPND_SME_Zn_INDEX1_16:
@ -4082,6 +4095,7 @@ aarch64_print_operand (char *buf, size_t size, bfd_vma pc,
case AARCH64_OPND_SME_Zn_INDEX3_14: case AARCH64_OPND_SME_Zn_INDEX3_14:
case AARCH64_OPND_SME_Zn_INDEX3_15: case AARCH64_OPND_SME_Zn_INDEX3_15:
case AARCH64_OPND_SME_Zn_INDEX4_14: case AARCH64_OPND_SME_Zn_INDEX4_14:
case AARCH64_OPND_SVE_Zm_imm4:
snprintf (buf, size, "%s[%s]", snprintf (buf, size, "%s[%s]",
(opnd->qualifier == AARCH64_OPND_QLF_NIL (opnd->qualifier == AARCH64_OPND_QLF_NIL
? style_reg (styler, "z%d", opnd->reglane.regno) ? style_reg (styler, "z%d", opnd->reglane.regno)

View File

@ -6337,6 +6337,10 @@ const struct aarch64_opcode aarch64_opcode_table[] =
SVE2p1_INSNC("sminqv",0x040e2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0), SVE2p1_INSNC("sminqv",0x040e2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("umaxqv",0x040d2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0), SVE2p1_INSNC("umaxqv",0x040d2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("uminqv",0x040f2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0), SVE2p1_INSNC("uminqv",0x040f2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("eorqv",0x041d2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
SVE2p1_INSN("dupq",0x05202400, 0xffe0fc00, sve_index1, 0, OP2 (SVE_Zd, SVE_Zn_5_INDEX), OP_SVE_VV_BHSD, 0, 0),
SVE2p1_INSN("extq",0x05602400, 0xfff0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zd, SVE_Zm_imm4), OP_SVE_BBB, 0, 0),
{0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL}, {0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
}; };
@ -6816,11 +6820,17 @@ const struct aarch64_opcode aarch64_opcode_table[] =
Y(SVE_REG, sve_quad_index, "SVE_Zm4_11_INDEX", \ Y(SVE_REG, sve_quad_index, "SVE_Zm4_11_INDEX", \
4 << OPD_F_OD_LSB, F(FLD_SVE_i2h, FLD_SVE_i3l, FLD_SVE_imm4), \ 4 << OPD_F_OD_LSB, F(FLD_SVE_i2h, FLD_SVE_i3l, FLD_SVE_imm4), \
"an indexed SVE vector register") \ "an indexed SVE vector register") \
Y(SVE_REG, sve_quad_index, "SVE_Zm_imm4", \
5 << OPD_F_OD_LSB, F(FLD_SVE_Zm_5, FLD_SVE_imm4), \
"an 4bit indexed SVE vector register") \
Y(SVE_REG, sve_quad_index, "SVE_Zm4_INDEX", \ Y(SVE_REG, sve_quad_index, "SVE_Zm4_INDEX", \
4 << OPD_F_OD_LSB, F(FLD_SVE_Zm_16), \ 4 << OPD_F_OD_LSB, F(FLD_SVE_Zm_16), \
"an indexed SVE vector register") \ "an indexed SVE vector register") \
Y(SVE_REG, regno, "SVE_Zn", 0, F(FLD_SVE_Zn), \ Y(SVE_REG, regno, "SVE_Zn", 0, F(FLD_SVE_Zn), \
"an SVE vector register") \ "an SVE vector register") \
Y(SVE_REG, sve_index_imm, "SVE_Zn_5_INDEX", 0, \
F(FLD_SVE_Zn, FLD_SVE_i2h, FLD_SVE_tsz), \
"a 5 bit idexed SVE vector register") \
Y(SVE_REG, sve_index, "SVE_Zn_INDEX", 0, F(FLD_SVE_Zn), \ Y(SVE_REG, sve_index, "SVE_Zn_INDEX", 0, F(FLD_SVE_Zn), \
"an indexed SVE vector register") \ "an indexed SVE vector register") \
Y(SVE_REGLIST, sve_reglist, "SVE_ZnxN", 0, F(FLD_SVE_Zn), \ Y(SVE_REGLIST, sve_reglist, "SVE_ZnxN", 0, F(FLD_SVE_Zn), \