aarch64: Add support for FEAT_SVE2p1.

Hi, This patch add support for FEAT_SVE2p1 (SVE2.1 Extension) feature along with +sve2p1 optional flag to enabe this feature. Also support for following SVE2p1 instructions is added addqv, andqv, smaxqv, sminqv, umaxqv, uminqv and uminqv. Regression testing for aarch64-none-elf target and found no regressions. Ok for binutils-master? Regards, Srinath.
2024-01-15 09:35:55 +00:00 · 2024-01-15 09:35:55 +00:00 · 88601c2d94
commit 88601c2d94
parent 89e06ec152
11 changed files with 195 additions and 2 deletions
--- a/gas/NEWS
+++ b/gas/NEWS
@ -3,6 +3,9 @@
  hand-written asm using the new command line option --scfi=experimental on
  x86-64.  Only System V AMD64 ABI is supported.

+* Add support for the Arm Scalable Vector Extension version 2.1 (SVE2.1)
+  instructions.
+
 * Add support for the AArch64 Scalable Matrix Extension version 2.1 (SME2.1)
  instructions.

--- a/gas/config/tc-aarch64.c
+++ b/gas/config/tc-aarch64.c
@ -10354,6 +10354,7 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = {
 			AARCH64_FEATURE (LSE128)},
  {"b16b16",		AARCH64_FEATURE (B16B16), AARCH64_FEATURE (SVE2)},
  {"sme2p1",		AARCH64_FEATURE (SME2p1), AARCH64_FEATURE (SME2)},
+  {"sve2p1",		AARCH64_FEATURE (SVE2p1), AARCH64_FEATURE (SVE2)},
  {NULL,		AARCH64_NO_FEATURES, AARCH64_NO_FEATURES},
 };

--- a/gas/doc/c-aarch64.texi
+++ b/gas/doc/c-aarch64.texi
@ -278,6 +278,8 @@ automatically cause those extensions to be disabled.
 @tab Enable the 128-bit Page Descriptor Extension.  This implies @code{lse128}.
@item @code{sme2p1} @tab N/A @tab No
 @tab Enable the SME2.1 Extension.
+@item @code{sve2p1} @tab N/A @tab No
+ @tab Enable the SVE2.1 Extension.
@end multitable

@node AArch64 Syntax
--- a/gas/testsuite/gas/aarch64/sve2p1-1-bad.d
+++ b/gas/testsuite/gas/aarch64/sve2p1-1-bad.d
@ -0,0 +1,4 @@
+#name: Illegal test of SVE2.1 min max instructions.
+#as: -march=armv9.4-a
+#source: sve2p1-1.s
+#error_output: sve2p1-1-bad.l
--- a/gas/testsuite/gas/aarch64/sve2p1-1-bad.l
+++ b/gas/testsuite/gas/aarch64/sve2p1-1-bad.l
@ -0,0 +1,37 @@
+.*: Assembler messages:
+.*: Error: selected processor does not support `addqv v0.16b,p0,z16.b'
+.*: Error: selected processor does not support `addqv v1.8h,p1,z8.h'
+.*: Error: selected processor does not support `addqv v2.4s,p2,z4.s'
+.*: Error: selected processor does not support `addqv v4.2d,p3,z2.d'
+.*: Error: selected processor does not support `addqv v8.2d,p4,z1.d'
+.*: Error: selected processor does not support `addqv v16.4s,p7,z0.s'
+.*: Error: selected processor does not support `andqv v0.16b,p0,z16.b'
+.*: Error: selected processor does not support `andqv v1.8h,p1,z8.h'
+.*: Error: selected processor does not support `andqv v2.4s,p2,z4.s'
+.*: Error: selected processor does not support `andqv v4.2d,p3,z2.d'
+.*: Error: selected processor does not support `andqv v8.2d,p4,z1.d'
+.*: Error: selected processor does not support `andqv v16.4s,p7,z0.s'
+.*: Error: selected processor does not support `smaxqv v0.16b,p0,z16.b'
+.*: Error: selected processor does not support `smaxqv v1.8h,p1,z8.h'
+.*: Error: selected processor does not support `smaxqv v2.4s,p2,z4.s'
+.*: Error: selected processor does not support `smaxqv v4.2d,p3,z2.d'
+.*: Error: selected processor does not support `smaxqv v8.2d,p4,z1.d'
+.*: Error: selected processor does not support `smaxqv v16.4s,p7,z0.s'
+.*: Error: selected processor does not support `umaxqv v0.16b,p0,z16.b'
+.*: Error: selected processor does not support `umaxqv v1.8h,p1,z8.h'
+.*: Error: selected processor does not support `umaxqv v2.4s,p2,z4.s'
+.*: Error: selected processor does not support `umaxqv v4.2d,p3,z2.d'
+.*: Error: selected processor does not support `umaxqv v8.2d,p4,z1.d'
+.*: Error: selected processor does not support `umaxqv v16.4s,p7,z0.s'
+.*: Error: selected processor does not support `sminqv v0.16b,p0,z16.b'
+.*: Error: selected processor does not support `sminqv v1.8h,p1,z8.h'
+.*: Error: selected processor does not support `sminqv v2.4s,p2,z4.s'
+.*: Error: selected processor does not support `sminqv v4.2d,p3,z2.d'
+.*: Error: selected processor does not support `sminqv v8.2d,p4,z1.d'
+.*: Error: selected processor does not support `sminqv v16.4s,p7,z0.s'
+.*: Error: selected processor does not support `uminqv v0.16b,p0,z16.b'
+.*: Error: selected processor does not support `uminqv v1.8h,p1,z8.h'
+.*: Error: selected processor does not support `uminqv v2.4s,p2,z4.s'
+.*: Error: selected processor does not support `uminqv v4.2d,p3,z2.d'
+.*: Error: selected processor does not support `uminqv v8.2d,p4,z1.d'
+.*: Error: selected processor does not support `uminqv v16.4s,p7,z0.s'
--- a/gas/testsuite/gas/aarch64/sve2p1-1.d
+++ b/gas/testsuite/gas/aarch64/sve2p1-1.d
@ -0,0 +1,46 @@
+#name: Test of SVE2.1 min max instructions.
+#as: -march=armv9.4-a+sve2p1
+#objdump: -dr
+
+[^:]+:     file format .*
+
+
+[^:]+:
+
+[^:]+:
+.*:	04052200 	addqv	v0.16b, p0, z16.b
+.*:	04452501 	addqv	v1.8h, p1, z8.h
+.*:	04852882 	addqv	v2.4s, p2, z4.s
+.*:	04c52c44 	addqv	v4.2d, p3, z2.d
+.*:	04c53028 	addqv	v8.2d, p4, z1.d
+.*:	04853c10 	addqv	v16.4s, p7, z0.s
+.*:	041e2200 	andqv	v0.16b, p0, z16.b
+.*:	045e2501 	andqv	v1.8h, p1, z8.h
+.*:	049e2882 	andqv	v2.4s, p2, z4.s
+.*:	04de2c44 	andqv	v4.2d, p3, z2.d
+.*:	04de3028 	andqv	v8.2d, p4, z1.d
+.*:	049e3c10 	andqv	v16.4s, p7, z0.s
+.*:	040c2200 	smaxqv	v0.16b, p0, z16.b
+.*:	044c2501 	smaxqv	v1.8h, p1, z8.h
+.*:	048c2882 	smaxqv	v2.4s, p2, z4.s
+.*:	04cc2c44 	smaxqv	v4.2d, p3, z2.d
+.*:	04cc3028 	smaxqv	v8.2d, p4, z1.d
+.*:	048c3c10 	smaxqv	v16.4s, p7, z0.s
+.*:	040d2200 	umaxqv	v0.16b, p0, z16.b
+.*:	044d2501 	umaxqv	v1.8h, p1, z8.h
+.*:	048d2882 	umaxqv	v2.4s, p2, z4.s
+.*:	04cd2c44 	umaxqv	v4.2d, p3, z2.d
+.*:	04cd3028 	umaxqv	v8.2d, p4, z1.d
+.*:	048d3c10 	umaxqv	v16.4s, p7, z0.s
+.*:	040e2200 	sminqv	v0.16b, p0, z16.b
+.*:	044e2501 	sminqv	v1.8h, p1, z8.h
+.*:	048e2882 	sminqv	v2.4s, p2, z4.s
+.*:	04ce2c44 	sminqv	v4.2d, p3, z2.d
+.*:	04ce3028 	sminqv	v8.2d, p4, z1.d
+.*:	048e3c10 	sminqv	v16.4s, p7, z0.s
+.*:	040f2200 	uminqv	v0.16b, p0, z16.b
+.*:	044f2501 	uminqv	v1.8h, p1, z8.h
+.*:	048f2882 	uminqv	v2.4s, p2, z4.s
+.*:	04cf2c44 	uminqv	v4.2d, p3, z2.d
+.*:	04cf3028 	uminqv	v8.2d, p4, z1.d
+.*:	048f3c10 	uminqv	v16.4s, p7, z0.s
--- a/gas/testsuite/gas/aarch64/sve2p1-1.s
+++ b/gas/testsuite/gas/aarch64/sve2p1-1.s
@ -0,0 +1,41 @@
+addqv v0.16b, p0, z16.b
+addqv v1.8h, p1, z8.h
+addqv v2.4s, p2, z4.s
+addqv v4.2d, p3, z2.d
+addqv v8.2d, p4, z1.d
+addqv v16.4s, p7, z0.s
+
+andqv v0.16b, p0, z16.b
+andqv v1.8h, p1, z8.h
+andqv v2.4s, p2, z4.s
+andqv v4.2d, p3, z2.d
+andqv v8.2d, p4, z1.d
+andqv v16.4s, p7, z0.s
+
+smaxqv v0.16b, p0, z16.b
+smaxqv v1.8h, p1, z8.h
+smaxqv v2.4s, p2, z4.s
+smaxqv v4.2d, p3, z2.d
+smaxqv v8.2d, p4, z1.d
+smaxqv v16.4s, p7, z0.s
+
+umaxqv v0.16b, p0, z16.b
+umaxqv v1.8h, p1, z8.h
+umaxqv v2.4s, p2, z4.s
+umaxqv v4.2d, p3, z2.d
+umaxqv v8.2d, p4, z1.d
+umaxqv v16.4s, p7, z0.s
+
+sminqv v0.16b, p0, z16.b
+sminqv v1.8h, p1, z8.h
+sminqv v2.4s, p2, z4.s
+sminqv v4.2d, p3, z2.d
+sminqv v8.2d, p4, z1.d
+sminqv v16.4s, p7, z0.s
+
+uminqv v0.16b, p0, z16.b
+uminqv v1.8h, p1, z8.h
+uminqv v2.4s, p2, z4.s
+uminqv v4.2d, p3, z2.d
+uminqv v8.2d, p4, z1.d
+uminqv v16.4s, p7, z0.s
--- a/include/opcode/aarch64.h
+++ b/include/opcode/aarch64.h
@ -226,6 +226,8 @@ enum aarch64_feature_bit {
  AARCH64_FEATURE_B16B16,
  /* SME2.1 instructions.  */
  AARCH64_FEATURE_SME2p1,
+  /* SVE2.1 instructions.  */
+  AARCH64_FEATURE_SVE2p1,
  AARCH64_NUM_FEATURES
 };

@ -1000,6 +1002,7 @@ enum aarch64_insn_class
  cssc,
  gcs,
  the,
+  sve2_urqvs
 };

 /* Opcode enumerators.  */
@ -1272,7 +1275,9 @@ extern const aarch64_opcode aarch64_opcode_table[];
   allow.  This impacts the constraintts on assembly but yelds no
   impact on disassembly.  */
 #define F_OPD_NARROW (1ULL << 33)
-/* Next bit is 34.  */
+/* For the instruction with size[22:23] field.  */
+#define F_OPD_SIZE (1ULL << 34)
+/* Next bit is 35.  */

 /* Instruction constraints.  */
 /* This instruction has a predication constraint on the instruction at PC+4.  */
@ -1339,7 +1344,8 @@ static inline bool
 opcode_has_special_coder (const aarch64_opcode *opcode)
 {
  return (opcode->flags & (F_SF | F_LSE_SZ | F_SIZEQ | F_FPTYPE | F_SSIZE | F_T
-	  | F_GPRSIZE_IN_Q | F_LDS_SIZE | F_MISC | F_N | F_COND)) != 0;
+	  | F_GPRSIZE_IN_Q | F_LDS_SIZE | F_MISC | F_N | F_COND
+	  | F_OPD_SIZE)) != 0;
 }

 struct aarch64_name_value_pair
--- a/opcodes/aarch64-asm.c
+++ b/opcodes/aarch64-asm.c
@ -1981,6 +1981,20 @@ do_special_encoding (struct aarch64_inst *inst)
      gen_sub_field (FLD_imm5, 0, num + 1, &field);
      insert_field_2 (&field, &inst->value, 1 << num, inst->opcode->mask);
    }
+
+  if ((inst->opcode->flags & F_OPD_SIZE) && inst->opcode->iclass == sve2_urqvs)
+    {
+      enum aarch64_opnd_qualifier qualifier[1];
+      aarch64_insn value1 = 0;
+      idx = 0;
+      qualifier[0] = inst->operands[idx].qualifier;
+      qualifier[1] = inst->operands[idx+2].qualifier;
+      value = aarch64_get_qualifier_standard_value (qualifier[0]);
+      value1 = aarch64_get_qualifier_standard_value (qualifier[1]);
+      assert ((value >> 1) == value1);
+      insert_field (FLD_size, &inst->value, value1, inst->opcode->mask);
+    }
+
  if (inst->opcode->flags & F_GPRSIZE_IN_Q)
    {
      /* Use Rt to encode in the case of e.g.
--- a/opcodes/aarch64-dis.c
+++ b/opcodes/aarch64-dis.c
@ -2609,6 +2609,16 @@ do_special_decoding (aarch64_inst *inst)
 	get_vreg_qualifier_from_value ((num << 1) | Q);
    }

+  if ((inst->opcode->flags & F_OPD_SIZE) && inst->opcode->iclass == sve2_urqvs)
+    {
+      unsigned size;
+      size = (unsigned) extract_field (FLD_size, inst->value,
+				       inst->opcode->mask);
+      inst->operands[0].qualifier
+	= get_vreg_qualifier_from_value (1 + (size << 1));
+      inst->operands[2].qualifier = get_sreg_qualifier_from_value (size);
+    }
+
  if (inst->opcode->flags & F_GPRSIZE_IN_Q)
    {
      /* Use Rt to encode in the case of e.g.
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@ -1487,6 +1487,10 @@
   - P: the operand has a /[ZM] suffix and the choice of suffix is not
     the same for all variants.

+   - v: the operand has a V_[16B|8H|4S|2D] qualifier and the choice of
+     qualifier suffix is not the same for all variants.  This is used for
+     the same kinds of operands as [BHSD] above.
+
   The _<sizes>, if present, give the subset of [BHSD] that are accepted
   by the V entries in <operands>.  */
 #define OP_SVE_B                                        \
@ -1911,6 +1915,13 @@
  QLF3(S_S,S_H,NIL),                                    \
  QLF3(S_D,S_S,NIL),                                    \
 }
+#define OP_SVE_vUS_BHSD_BHSD				\
+{							\
+  QLF3(V_16B,NIL,S_B),					\
+  QLF3(V_8H,NIL,S_H),					\
+  QLF3(V_4S,NIL,S_S),					\
+  QLF3(V_2D,NIL,S_D),					\
+}
 #define OP_SVE_VMV_SD                                   \
 {                                                       \
  QLF3(S_S,P_M,S_S),                                    \
@ -2620,6 +2631,8 @@ static const aarch64_feature_set aarch64_feature_b16b16 =
  AARCH64_FEATURE (B16B16);
 static const aarch64_feature_set aarch64_feature_sme2p1 =
  AARCH64_FEATURE (SME2p1);
+static const aarch64_feature_set aarch64_feature_sve2p1 =
+  AARCH64_FEATURE (SVE2p1);

 #define CORE		&aarch64_feature_v8
 #define FP		&aarch64_feature_fp
@ -2684,6 +2697,7 @@ static const aarch64_feature_set aarch64_feature_sme2p1 =
 #define D128_THE  &aarch64_feature_d128_the
 #define B16B16  &aarch64_feature_b16b16
 #define SME2p1  &aarch64_feature_sme2p1
+#define SVE2p1  &aarch64_feature_sve2p1

 #define CORE_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS) \
  { NAME, OPCODE, MASK, CLASS, OP, CORE, OPS, QUALS, FLAGS, 0, 0, NULL }
@ -2762,6 +2776,12 @@ static const aarch64_feature_set aarch64_feature_sme2p1 =
 #define B16B16_INSNC(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,CONSTRAINTS,TIED) \
  { NAME, OPCODE, MASK, CLASS, OP, B16B16, OPS, QUALS, \
    FLAGS | F_STRICT, CONSTRAINTS, TIED, NULL }
+#define SVE2p1_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
+  { NAME, OPCODE, MASK, CLASS, OP, SVE2p1, OPS, QUALS, \
+    FLAGS | F_STRICT, 0, TIED, NULL }
+#define SVE2p1_INSNC(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,CONSTRAINTS,TIED) \
+  { NAME, OPCODE, MASK, CLASS, OP, SVE2p1, OPS, QUALS, \
+    FLAGS | F_STRICT, CONSTRAINTS, TIED, NULL }
 #define SVE2AES_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
  { NAME, OPCODE, MASK, CLASS, OP, SVE2_AES, OPS, QUALS, \
    FLAGS | F_STRICT, 0, TIED, NULL }
@ -6309,6 +6329,15 @@ const struct aarch64_opcode aarch64_opcode_table[] =
  SME2p1_INSN ("movaz", 0xc0460200, 0xffff1f01, sme2_movaz, 0, OP2 (SME_Zdnx2, SME_ZA_array_vrsh_1), OP_SVE_HH, 0, 0),
  SME2p1_INSN ("movaz", 0xc0860200, 0xffff1f01, sme2_movaz, 0, OP2 (SME_Zdnx2, SME_ZA_array_vrss_1), OP_SVE_SS, 0, 0),
  SME2p1_INSN ("movaz", 0xc0c60200, 0xffff1f01, sme2_movaz, 0, OP2 (SME_Zdnx2, SME_ZA_array_vrsd_1), OP_SVE_DD, 0, 0),
+
+/* SVE2p1 Instructions.  */
+  SVE2p1_INSNC("addqv",0x04052000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("andqv",0x041e2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("smaxqv",0x040c2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("sminqv",0x040e2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("umaxqv",0x040d2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("uminqv",0x040f2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, C_SCAN_MOVPRFX, 0),
+
  {0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
 };