aarch64: Add support for FEAT_SVE2p1.

Hi,

This patch add support for FEAT_SVE2p1 (SVE2.1 Extension) feature
along with +sve2p1 optional flag to enabe this feature.

Also support for following SVE2p1 instructions is added
addqv, andqv, smaxqv, sminqv, umaxqv, uminqv and uminqv.

Regression testing for aarch64-none-elf target and found no regressions.

Ok for binutils-master?

Regards,
Srinath.
This commit is contained in:
Srinath Parvathaneni 2024-01-15 09:35:55 +00:00 committed by Nick Clifton
parent 89e06ec152
commit 88601c2d94
11 changed files with 195 additions and 2 deletions

View File

@ -3,6 +3,9 @@
hand-written asm using the new command line option --scfi=experimental on
x86-64. Only System V AMD64 ABI is supported.
* Add support for the Arm Scalable Vector Extension version 2.1 (SVE2.1)
instructions.
* Add support for the AArch64 Scalable Matrix Extension version 2.1 (SME2.1)
instructions.

View File

@ -10354,6 +10354,7 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = {
AARCH64_FEATURE (LSE128)},
{"b16b16", AARCH64_FEATURE (B16B16), AARCH64_FEATURE (SVE2)},
{"sme2p1", AARCH64_FEATURE (SME2p1), AARCH64_FEATURE (SME2)},
{"sve2p1", AARCH64_FEATURE (SVE2p1), AARCH64_FEATURE (SVE2)},
{NULL, AARCH64_NO_FEATURES, AARCH64_NO_FEATURES},
};

View File

@ -278,6 +278,8 @@ automatically cause those extensions to be disabled.
@tab Enable the 128-bit Page Descriptor Extension. This implies @code{lse128}.
@item @code{sme2p1} @tab N/A @tab No
@tab Enable the SME2.1 Extension.
@item @code{sve2p1} @tab N/A @tab No
@tab Enable the SVE2.1 Extension.
@end multitable
@node AArch64 Syntax

View File

@ -0,0 +1,4 @@
#name: Illegal test of SVE2.1 min max instructions.
#as: -march=armv9.4-a
#source: sve2p1-1.s
#error_output: sve2p1-1-bad.l

View File

@ -0,0 +1,37 @@
.*: Assembler messages:
.*: Error: selected processor does not support `addqv v0.16b,p0,z16.b'
.*: Error: selected processor does not support `addqv v1.8h,p1,z8.h'
.*: Error: selected processor does not support `addqv v2.4s,p2,z4.s'
.*: Error: selected processor does not support `addqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `addqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `addqv v16.4s,p7,z0.s'
.*: Error: selected processor does not support `andqv v0.16b,p0,z16.b'
.*: Error: selected processor does not support `andqv v1.8h,p1,z8.h'
.*: Error: selected processor does not support `andqv v2.4s,p2,z4.s'
.*: Error: selected processor does not support `andqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `andqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `andqv v16.4s,p7,z0.s'
.*: Error: selected processor does not support `smaxqv v0.16b,p0,z16.b'
.*: Error: selected processor does not support `smaxqv v1.8h,p1,z8.h'
.*: Error: selected processor does not support `smaxqv v2.4s,p2,z4.s'
.*: Error: selected processor does not support `smaxqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `smaxqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `smaxqv v16.4s,p7,z0.s'
.*: Error: selected processor does not support `umaxqv v0.16b,p0,z16.b'
.*: Error: selected processor does not support `umaxqv v1.8h,p1,z8.h'
.*: Error: selected processor does not support `umaxqv v2.4s,p2,z4.s'
.*: Error: selected processor does not support `umaxqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `umaxqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `umaxqv v16.4s,p7,z0.s'
.*: Error: selected processor does not support `sminqv v0.16b,p0,z16.b'
.*: Error: selected processor does not support `sminqv v1.8h,p1,z8.h'
.*: Error: selected processor does not support `sminqv v2.4s,p2,z4.s'
.*: Error: selected processor does not support `sminqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `sminqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `sminqv v16.4s,p7,z0.s'
.*: Error: selected processor does not support `uminqv v0.16b,p0,z16.b'
.*: Error: selected processor does not support `uminqv v1.8h,p1,z8.h'
.*: Error: selected processor does not support `uminqv v2.4s,p2,z4.s'
.*: Error: selected processor does not support `uminqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `uminqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `uminqv v16.4s,p7,z0.s'

View File

@ -0,0 +1,46 @@
#name: Test of SVE2.1 min max instructions.
#as: -march=armv9.4-a+sve2p1
#objdump: -dr
[^:]+: file format .*
[^:]+:
[^:]+:
.*: 04052200 addqv v0.16b, p0, z16.b
.*: 04452501 addqv v1.8h, p1, z8.h
.*: 04852882 addqv v2.4s, p2, z4.s
.*: 04c52c44 addqv v4.2d, p3, z2.d
.*: 04c53028 addqv v8.2d, p4, z1.d
.*: 04853c10 addqv v16.4s, p7, z0.s
.*: 041e2200 andqv v0.16b, p0, z16.b
.*: 045e2501 andqv v1.8h, p1, z8.h
.*: 049e2882 andqv v2.4s, p2, z4.s
.*: 04de2c44 andqv v4.2d, p3, z2.d
.*: 04de3028 andqv v8.2d, p4, z1.d
.*: 049e3c10 andqv v16.4s, p7, z0.s
.*: 040c2200 smaxqv v0.16b, p0, z16.b
.*: 044c2501 smaxqv v1.8h, p1, z8.h
.*: 048c2882 smaxqv v2.4s, p2, z4.s
.*: 04cc2c44 smaxqv v4.2d, p3, z2.d
.*: 04cc3028 smaxqv v8.2d, p4, z1.d
.*: 048c3c10 smaxqv v16.4s, p7, z0.s
.*: 040d2200 umaxqv v0.16b, p0, z16.b
.*: 044d2501 umaxqv v1.8h, p1, z8.h
.*: 048d2882 umaxqv v2.4s, p2, z4.s
.*: 04cd2c44 umaxqv v4.2d, p3, z2.d
.*: 04cd3028 umaxqv v8.2d, p4, z1.d
.*: 048d3c10 umaxqv v16.4s, p7, z0.s
.*: 040e2200 sminqv v0.16b, p0, z16.b
.*: 044e2501 sminqv v1.8h, p1, z8.h
.*: 048e2882 sminqv v2.4s, p2, z4.s
.*: 04ce2c44 sminqv v4.2d, p3, z2.d
.*: 04ce3028 sminqv v8.2d, p4, z1.d
.*: 048e3c10 sminqv v16.4s, p7, z0.s
.*: 040f2200 uminqv v0.16b, p0, z16.b
.*: 044f2501 uminqv v1.8h, p1, z8.h
.*: 048f2882 uminqv v2.4s, p2, z4.s
.*: 04cf2c44 uminqv v4.2d, p3, z2.d
.*: 04cf3028 uminqv v8.2d, p4, z1.d
.*: 048f3c10 uminqv v16.4s, p7, z0.s

View File

@ -0,0 +1,41 @@
addqv v0.16b, p0, z16.b
addqv v1.8h, p1, z8.h
addqv v2.4s, p2, z4.s
addqv v4.2d, p3, z2.d
addqv v8.2d, p4, z1.d
addqv v16.4s, p7, z0.s
andqv v0.16b, p0, z16.b
andqv v1.8h, p1, z8.h
andqv v2.4s, p2, z4.s
andqv v4.2d, p3, z2.d
andqv v8.2d, p4, z1.d
andqv v16.4s, p7, z0.s
smaxqv v0.16b, p0, z16.b
smaxqv v1.8h, p1, z8.h
smaxqv v2.4s, p2, z4.s
smaxqv v4.2d, p3, z2.d
smaxqv v8.2d, p4, z1.d
smaxqv v16.4s, p7, z0.s
umaxqv v0.16b, p0, z16.b
umaxqv v1.8h, p1, z8.h
umaxqv v2.4s, p2, z4.s
umaxqv v4.2d, p3, z2.d
umaxqv v8.2d, p4, z1.d
umaxqv v16.4s, p7, z0.s
sminqv v0.16b, p0, z16.b
sminqv v1.8h, p1, z8.h
sminqv v2.4s, p2, z4.s
sminqv v4.2d, p3, z2.d
sminqv v8.2d, p4, z1.d
sminqv v16.4s, p7, z0.s
uminqv v0.16b, p0, z16.b
uminqv v1.8h, p1, z8.h
uminqv v2.4s, p2, z4.s
uminqv v4.2d, p3, z2.d
uminqv v8.2d, p4, z1.d
uminqv v16.4s, p7, z0.s

View File

@ -226,6 +226,8 @@ enum aarch64_feature_bit {
AARCH64_FEATURE_B16B16,
/* SME2.1 instructions. */
AARCH64_FEATURE_SME2p1,
/* SVE2.1 instructions. */
AARCH64_FEATURE_SVE2p1,
AARCH64_NUM_FEATURES
};
@ -1000,6 +1002,7 @@ enum aarch64_insn_class
cssc,
gcs,
the,
sve2_urqvs
};
/* Opcode enumerators. */
@ -1272,7 +1275,9 @@ extern const aarch64_opcode aarch64_opcode_table[];
allow. This impacts the constraintts on assembly but yelds no
impact on disassembly. */
#define F_OPD_NARROW (1ULL << 33)
/* Next bit is 34. */
/* For the instruction with size[22:23] field. */
#define F_OPD_SIZE (1ULL << 34)
/* Next bit is 35. */
/* Instruction constraints. */
/* This instruction has a predication constraint on the instruction at PC+4. */
@ -1339,7 +1344,8 @@ static inline bool
opcode_has_special_coder (const aarch64_opcode *opcode)
{
return (opcode->flags & (F_SF | F_LSE_SZ | F_SIZEQ | F_FPTYPE | F_SSIZE | F_T
| F_GPRSIZE_IN_Q | F_LDS_SIZE | F_MISC | F_N | F_COND)) != 0;
| F_GPRSIZE_IN_Q | F_LDS_SIZE | F_MISC | F_N | F_COND
| F_OPD_SIZE)) != 0;
}
struct aarch64_name_value_pair

View File

@ -1981,6 +1981,20 @@ do_special_encoding (struct aarch64_inst *inst)
gen_sub_field (FLD_imm5, 0, num + 1, &field);
insert_field_2 (&field, &inst->value, 1 << num, inst->opcode->mask);
}
if ((inst->opcode->flags & F_OPD_SIZE) && inst->opcode->iclass == sve2_urqvs)
{
enum aarch64_opnd_qualifier qualifier[1];
aarch64_insn value1 = 0;
idx = 0;
qualifier[0] = inst->operands[idx].qualifier;
qualifier[1] = inst->operands[idx+2].qualifier;
value = aarch64_get_qualifier_standard_value (qualifier[0]);
value1 = aarch64_get_qualifier_standard_value (qualifier[1]);
assert ((value >> 1) == value1);
insert_field (FLD_size, &inst->value, value1, inst->opcode->mask);
}
if (inst->opcode->flags & F_GPRSIZE_IN_Q)
{
/* Use Rt to encode in the case of e.g.

View File

@ -2609,6 +2609,16 @@ do_special_decoding (aarch64_inst *inst)
get_vreg_qualifier_from_value ((num << 1) | Q);
}
if ((inst->opcode->flags & F_OPD_SIZE) && inst->opcode->iclass == sve2_urqvs)
{
unsigned size;
size = (unsigned) extract_field (FLD_size, inst->value,
inst->opcode->mask);
inst->operands[0].qualifier
= get_vreg_qualifier_from_value (1 + (size << 1));
inst->operands[2].qualifier = get_sreg_qualifier_from_value (size);
}
if (inst->opcode->flags & F_GPRSIZE_IN_Q)
{
/* Use Rt to encode in the case of e.g.

View File

@ -1487,6 +1487,10 @@
- P: the operand has a /[ZM] suffix and the choice of suffix is not
the same for all variants.
- v: the operand has a V_[16B|8H|4S|2D] qualifier and the choice of
qualifier suffix is not the same for all variants. This is used for
the same kinds of operands as [BHSD] above.
The _<sizes>, if present, give the subset of [BHSD] that are accepted
by the V entries in <operands>. */
#define OP_SVE_B \
@ -1911,6 +1915,13 @@
QLF3(S_S,S_H,NIL), \
QLF3(S_D,S_S,NIL), \
}
#define OP_SVE_vUS_BHSD_BHSD \
{ \
QLF3(V_16B,NIL,S_B), \
QLF3(V_8H,NIL,S_H), \
QLF3(V_4S,NIL,S_S), \
QLF3(V_2D,NIL,S_D), \
}
#define OP_SVE_VMV_SD \
{ \
QLF3(S_S,P_M,S_S), \
@ -2620,6 +2631,8 @@ static const aarch64_feature_set aarch64_feature_b16b16 =
AARCH64_FEATURE (B16B16);
static const aarch64_feature_set aarch64_feature_sme2p1 =
AARCH64_FEATURE (SME2p1);
static const aarch64_feature_set aarch64_feature_sve2p1 =
AARCH64_FEATURE (SVE2p1);
#define CORE &aarch64_feature_v8
#define FP &aarch64_feature_fp
@ -2684,6 +2697,7 @@ static const aarch64_feature_set aarch64_feature_sme2p1 =
#define D128_THE &aarch64_feature_d128_the
#define B16B16 &aarch64_feature_b16b16
#define SME2p1 &aarch64_feature_sme2p1
#define SVE2p1 &aarch64_feature_sve2p1
#define CORE_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS) \
{ NAME, OPCODE, MASK, CLASS, OP, CORE, OPS, QUALS, FLAGS, 0, 0, NULL }
@ -2762,6 +2776,12 @@ static const aarch64_feature_set aarch64_feature_sme2p1 =
#define B16B16_INSNC(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,CONSTRAINTS,TIED) \
{ NAME, OPCODE, MASK, CLASS, OP, B16B16, OPS, QUALS, \
FLAGS | F_STRICT, CONSTRAINTS, TIED, NULL }
#define SVE2p1_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
{ NAME, OPCODE, MASK, CLASS, OP, SVE2p1, OPS, QUALS, \
FLAGS | F_STRICT, 0, TIED, NULL }
#define SVE2p1_INSNC(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,CONSTRAINTS,TIED) \
{ NAME, OPCODE, MASK, CLASS, OP, SVE2p1, OPS, QUALS, \
FLAGS | F_STRICT, CONSTRAINTS, TIED, NULL }
#define SVE2AES_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
{ NAME, OPCODE, MASK, CLASS, OP, SVE2_AES, OPS, QUALS, \
FLAGS | F_STRICT, 0, TIED, NULL }
@ -6309,6 +6329,15 @@ const struct aarch64_opcode aarch64_opcode_table[] =
SME2p1_INSN ("movaz", 0xc0460200, 0xffff1f01, sme2_movaz, 0, OP2 (SME_Zdnx2, SME_ZA_array_vrsh_1), OP_SVE_HH, 0, 0),
SME2p1_INSN ("movaz", 0xc0860200, 0xffff1f01, sme2_movaz, 0, OP2 (SME_Zdnx2, SME_ZA_array_vrss_1), OP_SVE_SS, 0, 0),
SME2p1_INSN ("movaz", 0xc0c60200, 0xffff1f01, sme2_movaz, 0, OP2 (SME_Zdnx2, SME_ZA_array_vrsd_1), OP_SVE_DD, 0, 0),
/* SVE2p1 Instructions. */
SVE2p1_INSNC("addqv",0x04052000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("andqv",0x041e2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("smaxqv",0x040c2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("sminqv",0x040e2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("umaxqv",0x040d2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("uminqv",0x040f2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
{0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
};