aarch64: [SME] SVE2 instructions added to support SME

This patch is adding new SVE2 instructions added to support SME extension.
The following SVE2 instructions are added by the SME architecture:
* PSEL,
* REVD, SCLAMP and UCLAMP.

gas/ChangeLog:

	* config/tc-aarch64.c (parse_sme_pred_reg_with_index):
	New parser.
	(parse_operands): New parser.
	* testsuite/gas/aarch64/sme-9-illegal.d: New test.
	* testsuite/gas/aarch64/sme-9-illegal.l: New test.
	* testsuite/gas/aarch64/sme-9-illegal.s: New test.
	* testsuite/gas/aarch64/sme-9.d: New test.
	* testsuite/gas/aarch64/sme-9.s: New test.

include/ChangeLog:

	* opcode/aarch64.h (enum aarch64_opnd): New operand
	AARCH64_OPND_SME_PnT_Wm_imm.

opcodes/ChangeLog:

	* aarch64-asm.c (aarch64_ins_sme_pred_reg_with_index):
	New inserter.
	* aarch64-dis.c (aarch64_ext_sme_pred_reg_with_index):
	New extractor.
	* aarch64-opc.c (aarch64_print_operand): Printout of
	OPND_SME_PnT_Wm_imm.
	* aarch64-opc.h (enum aarch64_field_kind): New bitfields
	FLD_SME_Rm, FLD_SME_i1, FLD_SME_tszh, FLD_SME_tszl.
	* aarch64-tbl.h (OP_SVE_NN_BHSD): New qualifier.
	(OP_SVE_QMQ): New qualifier.
	(struct aarch64_opcode): New instructions PSEL, REVD,
	SCLAMP and UCLAMP.
	aarch64-asm-2.c: Regenerate.
	aarch64-dis-2.c: Regenerate.
	aarch64-opc-2.c: Regenerate.
This commit is contained in:
Przemyslaw Wirkus
2021-11-17 20:26:53 +00:00
parent 8f1bfdb448
commit d3de086010
17 changed files with 736 additions and 191 deletions
+78
View File
@@ -4655,6 +4655,65 @@ parse_sme_sm_za (char **str)
return TOLOWER (p[0]);
}
/* Parse the name of the source scalable predicate register, the index base
register W12-W15 and the element index. Function performs element index
limit checks as well as qualifier type checks.
<Pn>.<T>[<Wv>, <imm>]
<Pn>.<T>[<Wv>, #<imm>]
On success function sets <Wv> to INDEX_BASE_REG, <T> to QUALIFIER and
<imm> to IMM.
Function returns <Pn>, or PARSE_FAIL.
*/
static int
parse_sme_pred_reg_with_index(char **str,
int *index_base_reg,
int *imm,
aarch64_opnd_qualifier_t *qualifier)
{
int regno;
int64_t imm_limit;
int64_t imm_value;
const reg_entry *reg = parse_reg_with_qual (str, REG_TYPE_PN, qualifier);
if (reg == NULL)
return PARSE_FAIL;
regno = reg->number;
switch (*qualifier)
{
case AARCH64_OPND_QLF_S_B:
imm_limit = 15;
break;
case AARCH64_OPND_QLF_S_H:
imm_limit = 7;
break;
case AARCH64_OPND_QLF_S_S:
imm_limit = 3;
break;
case AARCH64_OPND_QLF_S_D:
imm_limit = 1;
break;
default:
set_syntax_error (_("wrong predicate register element size, allowed b, h, s and d"));
return PARSE_FAIL;
}
if (! parse_sme_za_hv_tiles_operand_index (str, index_base_reg, &imm_value))
return PARSE_FAIL;
if (imm_value < 0 || imm_value > imm_limit)
{
set_syntax_error (_("element index out of range for given variant"));
return PARSE_FAIL;
}
*imm = imm_value;
return regno;
}
/* Parse a system register or a PSTATE field name for an MSR/MRS instruction.
Returns the encoding for the option, or PARSE_FAIL.
@@ -7068,6 +7127,25 @@ parse_operands (char *str, const aarch64_opcode *opcode)
info->reg.regno = val;
break;
case AARCH64_OPND_SME_PnT_Wm_imm:
/* <Pn>.<T>[<Wm>, #<imm>] */
{
int index_base_reg;
int imm;
val = parse_sme_pred_reg_with_index (&str,
&index_base_reg,
&imm,
&qualifier);
if (val == PARSE_FAIL)
goto failure;
info->za_tile_vector.regno = val;
info->za_tile_vector.index.regno = index_base_reg;
info->za_tile_vector.index.imm = imm;
info->qualifier = qualifier;
break;
}
case AARCH64_OPND_SVE_ADDR_RI_S4x16:
case AARCH64_OPND_SVE_ADDR_RI_S4x32:
case AARCH64_OPND_SVE_ADDR_RI_S4xVL:
@@ -0,0 +1,3 @@
#as: -march=armv8-a+sme
#source: sme-9-illegal.s
#error_output: sme-9-illegal.l
+83
View File
@@ -0,0 +1,83 @@
[^:]*: Assembler messages:
[^:]*:[0-9]+: Error: wrong predicate register element size, allowed b, h, s and d at operand 3 -- `psel p1,p15,p3.q\[w15\]'
[^:]*:[0-9]+: Error: expected vector select register W12-W15 at operand 3 -- `psel p1,p15,p3.b\[w11\]'
[^:]*:[0-9]+: Error: expected vector select register W12-W15 at operand 3 -- `psel p8,p11,p15.h\[w16\]'
[^:]*:[0-9]+: Error: expected vector select register W12-W15 at operand 3 -- `psel p2,p7,p15.s\[w3\]'
[^:]*:[0-9]+: Error: expected vector select register W12-W15 at operand 3 -- `psel p13,p3,p1.d\[w17\]'
[^:]*:[0-9]+: Error: element index out of range for given variant at operand 3 -- `psel p5,p12,p9.b\[w15,#16\]'
[^:]*:[0-9]+: Error: element index out of range for given variant at operand 3 -- `psel p1,p8,p6.h\[w14,#8\]'
[^:]*:[0-9]+: Error: element index out of range for given variant at operand 3 -- `psel p8,p4,p15.s\[w13,#4\]'
[^:]*:[0-9]+: Error: element index out of range for given variant at operand 3 -- `psel p1,p1,p1.d\[w12,#2\]'
[^:]*:[0-9]+: Error: operand mismatch -- `revd z0.q,p0/m,z0.b'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: revd z0.q, p0/m, z0.q
[^:]*:[0-9]+: Error: operand mismatch -- `sclamp z8.b,z1.b,z31.q'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: sclamp z8.b, z1.b, z31.b
[^:]*:[0-9]+: Info: other valid variant\(s\):
[^:]*:[0-9]+: Info: sclamp z8.h, z1.h, z31.h
[^:]*:[0-9]+: Info: sclamp z8.s, z1.s, z31.s
[^:]*:[0-9]+: Info: sclamp z8.d, z1.d, z31.d
[^:]*:[0-9]+: Error: operand mismatch -- `sclamp z31.h,z0.h,z17.q'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: sclamp z31.h, z0.h, z17.h
[^:]*:[0-9]+: Info: other valid variant\(s\):
[^:]*:[0-9]+: Info: sclamp z31.b, z0.b, z17.b
[^:]*:[0-9]+: Info: sclamp z31.s, z0.s, z17.s
[^:]*:[0-9]+: Info: sclamp z31.d, z0.d, z17.d
[^:]*:[0-9]+: Error: operand mismatch -- `sclamp z0.s,z31.s,z17.q'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: sclamp z0.s, z31.s, z17.s
[^:]*:[0-9]+: Info: other valid variant\(s\):
[^:]*:[0-9]+: Info: sclamp z0.b, z31.b, z17.b
[^:]*:[0-9]+: Info: sclamp z0.h, z31.h, z17.h
[^:]*:[0-9]+: Info: sclamp z0.d, z31.d, z17.d
[^:]*:[0-9]+: Error: operand mismatch -- `sclamp z31.d,z0.d,z17.q'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: sclamp z31.d, z0.d, z17.d
[^:]*:[0-9]+: Info: other valid variant\(s\):
[^:]*:[0-9]+: Info: sclamp z31.b, z0.b, z17.b
[^:]*:[0-9]+: Info: sclamp z31.h, z0.h, z17.h
[^:]*:[0-9]+: Info: sclamp z31.s, z0.s, z17.s
[^:]*:[0-9]+: Error: operand mismatch -- `sclamp z31.q,z0.d,z17.q'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: sclamp z31.d, z0.d, z17.d
[^:]*:[0-9]+: Info: other valid variant\(s\):
[^:]*:[0-9]+: Info: sclamp z31.b, z0.b, z17.b
[^:]*:[0-9]+: Info: sclamp z31.h, z0.h, z17.h
[^:]*:[0-9]+: Info: sclamp z31.s, z0.s, z17.s
[^:]*:[0-9]+: Error: operand mismatch -- `uclamp z8.b,z1.b,z31.q'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: uclamp z8.b, z1.b, z31.b
[^:]*:[0-9]+: Info: other valid variant\(s\):
[^:]*:[0-9]+: Info: uclamp z8.h, z1.h, z31.h
[^:]*:[0-9]+: Info: uclamp z8.s, z1.s, z31.s
[^:]*:[0-9]+: Info: uclamp z8.d, z1.d, z31.d
[^:]*:[0-9]+: Error: operand mismatch -- `uclamp z31.h,z0.h,z17.q'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: uclamp z31.h, z0.h, z17.h
[^:]*:[0-9]+: Info: other valid variant\(s\):
[^:]*:[0-9]+: Info: uclamp z31.b, z0.b, z17.b
[^:]*:[0-9]+: Info: uclamp z31.s, z0.s, z17.s
[^:]*:[0-9]+: Info: uclamp z31.d, z0.d, z17.d
[^:]*:[0-9]+: Error: operand mismatch -- `uclamp z0.s,z31.s,z17.q'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: uclamp z0.s, z31.s, z17.s
[^:]*:[0-9]+: Info: other valid variant\(s\):
[^:]*:[0-9]+: Info: uclamp z0.b, z31.b, z17.b
[^:]*:[0-9]+: Info: uclamp z0.h, z31.h, z17.h
[^:]*:[0-9]+: Info: uclamp z0.d, z31.d, z17.d
[^:]*:[0-9]+: Error: operand mismatch -- `uclamp z31.d,z0.d,z17.q'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: uclamp z31.d, z0.d, z17.d
[^:]*:[0-9]+: Info: other valid variant\(s\):
[^:]*:[0-9]+: Info: uclamp z31.b, z0.b, z17.b
[^:]*:[0-9]+: Info: uclamp z31.h, z0.h, z17.h
[^:]*:[0-9]+: Info: uclamp z31.s, z0.s, z17.s
[^:]*:[0-9]+: Error: operand mismatch -- `uclamp z31.q,z0.d,z17.q'
[^:]*:[0-9]+: Info: did you mean this\?
[^:]*:[0-9]+: Info: uclamp z31.d, z0.d, z17.d
[^:]*:[0-9]+: Info: other valid variant\(s\):
[^:]*:[0-9]+: Info: uclamp z31.b, z0.b, z17.b
[^:]*:[0-9]+: Info: uclamp z31.h, z0.h, z17.h
[^:]*:[0-9]+: Info: uclamp z31.s, z0.s, z17.s
+25
View File
@@ -0,0 +1,25 @@
/* Scalable Matrix Extension (SME). */
psel p1, p15, p3.q[w15]
psel p1, p15, p3.b[w11]
psel p8, p11, p15.h[w16]
psel p2, p7, p15.s[w3]
psel p13, p3, p1.d[w17]
psel p5, p12, p9.b[w15, #16]
psel p1, p8, p6.h[w14, #8]
psel p8, p4, p15.s[w13, #4]
psel p1, p1, p1.d[w12, #2]
revd z0.q, p0/m, z0.b
sclamp z8.b, z1.b, z31.q
sclamp z31.h, z0.h, z17.q
sclamp z0.s, z31.s, z17.q
sclamp z31.d, z0.d, z17.q
sclamp z31.q, z0.d, z17.q
uclamp z8.b, z1.b, z31.q
uclamp z31.h, z0.h, z17.q
uclamp z0.s, z31.s, z17.q
uclamp z31.d, z0.d, z17.q
uclamp z31.q, z0.d, z17.q
+73
View File
@@ -0,0 +1,73 @@
#name: SVE2 instructions added to support SME
#as: -march=armv8-a+sme
#objdump: -dr
.*: file format .*
Disassembly of section \.text:
0+ <.*>:
0: 25277c61 psel p1, p15, p3.b\[w15, 0\]
4: 252778a2 psel p2, p14, p5.b\[w15, 0\]
8: 257f74e3 psel p3, p13, p7.b\[w15, 7\]
c: 25ff7125 psel p5, p12, p9.b\[w15, 15\]
10: 252a6de8 psel p8, p11, p15.h\[w14, 0\]
14: 252a682d psel p13, p10, p1.h\[w14, 0\]
18: 257a640f psel p15, p9, p0.h\[w14, 3\]
1c: 25fa60c1 psel p1, p8, p6.h\[w14, 7\]
20: 25315de2 psel p2, p7, p15.s\[w13, 0\]
24: 253159e3 psel p3, p6, p15.s\[w13, 0\]
28: 257155e5 psel p5, p5, p15.s\[w13, 1\]
2c: 25f151e8 psel p8, p4, p15.s\[w13, 3\]
30: 25604c2d psel p13, p3, p1.d\[w12, 0\]
34: 2560482f psel p15, p2, p1.d\[w12, 0\]
38: 25e04421 psel p1, p1, p1.d\[w12, 1\]
3c: 052e8000 revd z0.q, p0/m, z0.q
40: 052e9c00 revd z0.q, p7/m, z0.q
44: 052e83e0 revd z0.q, p0/m, z31.q
48: 052e9c1f revd z31.q, p7/m, z0.q
4c: 4411c3e0 sclamp z0.b, z31.b, z17.b
50: 4411c01f sclamp z31.b, z0.b, z17.b
54: 441fc028 sclamp z8.b, z1.b, z31.b
58: 4451c01f sclamp z31.h, z0.h, z17.h
5c: 445fc028 sclamp z8.h, z1.h, z31.h
60: 4491c3e0 sclamp z0.s, z31.s, z17.s
64: 4491c01f sclamp z31.s, z0.s, z17.s
68: 449fc028 sclamp z8.s, z1.s, z31.s
6c: 44d1c3e0 sclamp z0.d, z31.d, z17.d
70: 44d1c01f sclamp z31.d, z0.d, z17.d
74: 44dfc028 sclamp z8.d, z1.d, z31.d
78: 4411c7e0 uclamp z0.b, z31.b, z17.b
7c: 4411c41f uclamp z31.b, z0.b, z17.b
80: 441fc428 uclamp z8.b, z1.b, z31.b
84: 4451c7e0 uclamp z0.h, z31.h, z17.h
88: 4451c41f uclamp z31.h, z0.h, z17.h
8c: 445fc428 uclamp z8.h, z1.h, z31.h
90: 4491c7e0 uclamp z0.s, z31.s, z17.s
94: 4491c41f uclamp z31.s, z0.s, z17.s
98: 449fc428 uclamp z8.s, z1.s, z31.s
9c: 44d1c7e0 uclamp z0.d, z31.d, z17.d
a0: 44d1c41f uclamp z31.d, z0.d, z17.d
a4: 44dfc428 uclamp z8.d, z1.d, z31.d
a8: 0420bca3 movprfx z3, z5
ac: 052e84a3 revd z3.q, p1/m, z5.q
b0: 0420bc81 movprfx z1, z4
b4: 052e84a1 revd z1.q, p1/m, z5.q
b8: 0420bc81 movprfx z1, z4
bc: 440bc141 sclamp z1.b, z10.b, z11.b
c0: 0420bc82 movprfx z2, z4
c4: 444bc142 sclamp z2.h, z10.h, z11.h
c8: 0420bc83 movprfx z3, z4
cc: 448bc143 sclamp z3.s, z10.s, z11.s
d0: 0420bca4 movprfx z4, z5
d4: 44cbc144 sclamp z4.d, z10.d, z11.d
d8: 0420bc81 movprfx z1, z4
dc: 440bc541 uclamp z1.b, z10.b, z11.b
e0: 0420bc82 movprfx z2, z4
e4: 444bc542 uclamp z2.h, z10.h, z11.h
e8: 0420bc83 movprfx z3, z4
ec: 448bc543 uclamp z3.s, z10.s, z11.s
f0: 0420bca4 movprfx z4, z5
f4: 44cbc544 uclamp z4.d, z10.d, z11.d
f8: 25277c61 psel p1, p15, p3.b\[w15, 0\]
fc: 252778a2 psel p2, p14, p5.b\[w15, 0\]
+86
View File
@@ -0,0 +1,86 @@
/* SVE2 instructions added to support SME. */
psel p1, p15, p3.b[w15, 0]
psel p2, p14, p5.b[w15, 0]
psel p3, p13, p7.b[w15, 7]
psel p5, p12, p9.b[w15, 15]
psel p8, p11, p15.h[w14, 0]
psel p13, p10, p1.h[w14, 0]
psel p15, p9, p0.h[w14, 3]
psel p1, p8, p6.h[w14, 7]
psel p2, p7, p15.s[w13, 0]
psel p3, p6, p15.s[w13, 0]
psel p5, p5, p15.s[w13, 1]
psel p8, p4, p15.s[w13, 3]
psel p13, p3, p1.d[w12, 0]
psel p15, p2, p1.d[w12, 0]
psel p1, p1, p1.d[w12, 1]
revd z0.q, p0/m, z0.q
revd z0.q, p7/m, z0.q
revd z0.q, p0/m, z31.q
revd z31.q, p7/m, z0.q
sclamp z0.b, z31.b, z17.b
sclamp z31.b, z0.b, z17.b
sclamp z8.b, z1.b, z31.b
sclamp z31.h, z0.h, z17.h
sclamp z8.h, z1.h, z31.h
sclamp z0.s, z31.s, z17.s
sclamp z31.s, z0.s, z17.s
sclamp z8.s, z1.s, z31.s
sclamp z0.d, z31.d, z17.d
sclamp z31.d, z0.d, z17.d
sclamp z8.d, z1.d, z31.d
uclamp z0.b, z31.b, z17.b
uclamp z31.b, z0.b, z17.b
uclamp z8.b, z1.b, z31.b
uclamp z0.h, z31.h, z17.h
uclamp z31.h, z0.h, z17.h
uclamp z8.h, z1.h, z31.h
uclamp z0.s, z31.s, z17.s
uclamp z31.s, z0.s, z17.s
uclamp z8.s, z1.s, z31.s
uclamp z0.d, z31.d, z17.d
uclamp z31.d, z0.d, z17.d
uclamp z8.d, z1.d, z31.d
/* The unpredicated MOVPRFX instruction. */
movprfx z3, z5
revd z3.q, p1/m, z5.q
movprfx z1, z4
revd z1.q, p1/m, z5.q
movprfx z1, z4
sclamp z1.b, z10.b, z11.b
movprfx z2, z4
sclamp z2.h, z10.h, z11.h
movprfx z3, z4
sclamp z3.s, z10.s, z11.s
movprfx z4, z5
sclamp z4.d, z10.d, z11.d
movprfx z1, z4
uclamp z1.b, z10.b, z11.b
movprfx z2, z4
uclamp z2.h, z10.h, z11.h
movprfx z3, z4
uclamp z3.s, z10.s, z11.s
movprfx z4, z5
uclamp z4.d, z10.d, z11.d
foo .req p1
bar .req w15
psel foo, p15, p3.b[w15, 0]
psel p2, p14, p5.b[bar, 0]
+1
View File
@@ -455,6 +455,7 @@ enum aarch64_opnd
AARCH64_OPND_SME_ZA_array, /* SME ZA[<Wv>{, #<imm>}]. */
AARCH64_OPND_SME_ADDR_RI_U4xVL, /* SME [<Xn|SP>{, #<imm>, MUL VL}]. */
AARCH64_OPND_SME_SM_ZA, /* SME {SM | ZA}. */
AARCH64_OPND_SME_PnT_Wm_imm, /* SME <Pn>.<T>[<Wm>, #<imm>]. */
AARCH64_OPND_TME_UIMM16, /* TME unsigned 16-bit immediate. */
AARCH64_OPND_SM3_IMM2, /* SM3 encodes lane in bits [13, 14]. */
};
+4 -2
View File
@@ -675,7 +675,7 @@ aarch64_insert_operand (const aarch64_operand *self,
case 33:
case 34:
case 35:
case 221:
case 222:
return aarch64_ins_reglane (self, info, code, inst, errors);
case 36:
return aarch64_ins_reglist (self, info, code, inst, errors);
@@ -721,7 +721,7 @@ aarch64_insert_operand (const aarch64_operand *self,
case 189:
case 190:
case 215:
case 220:
case 221:
return aarch64_ins_imm (self, info, code, inst, errors);
case 44:
case 45:
@@ -891,6 +891,8 @@ aarch64_insert_operand (const aarch64_operand *self,
return aarch64_ins_sme_addr_ri_u4xvl (self, info, code, inst, errors);
case 219:
return aarch64_ins_sme_sm_za (self, info, code, inst, errors);
case 220:
return aarch64_ins_sme_pred_reg_with_index (self, info, code, inst, errors);
default: assert (0); abort ();
}
}
+67
View File
@@ -1452,6 +1452,73 @@ aarch64_ins_sme_sm_za (const aarch64_operand *self,
return true;
}
/* Encode source scalable predicate register (Pn), name of the index base
register W12-W15 (Rm), and optional element index, defaulting to 0, in the
range 0 to one less than the number of vector elements in a 128-bit vector
register, encoded in "i1:tszh:tszl".
*/
bool
aarch64_ins_sme_pred_reg_with_index (const aarch64_operand *self,
const aarch64_opnd_info *info,
aarch64_insn *code,
const aarch64_inst *inst ATTRIBUTE_UNUSED,
aarch64_operand_error *errors ATTRIBUTE_UNUSED)
{
int fld_pn = info->za_tile_vector.regno;
int fld_rm = info->za_tile_vector.index.regno - 12;
int imm = info->za_tile_vector.index.imm;
int fld_i1, fld_tszh, fld_tshl;
insert_field (self->fields[0], code, fld_rm, 0);
insert_field (self->fields[1], code, fld_pn, 0);
/* Optional element index, defaulting to 0, in the range 0 to one less than
the number of vector elements in a 128-bit vector register, encoded in
"i1:tszh:tszl".
i1 tszh tszl <T>
0 0 000 RESERVED
x x xx1 B
x x x10 H
x x 100 S
x 1 000 D
*/
switch (info->qualifier)
{
case AARCH64_OPND_QLF_S_B:
/* <imm> is 4 bit value. */
fld_i1 = (imm >> 3) & 0x1;
fld_tszh = (imm >> 2) & 0x1;
fld_tshl = ((imm << 1) | 0x1) & 0x7;
break;
case AARCH64_OPND_QLF_S_H:
/* <imm> is 3 bit value. */
fld_i1 = (imm >> 2) & 0x1;
fld_tszh = (imm >> 1) & 0x1;
fld_tshl = ((imm << 2) | 0x2) & 0x7;
break;
case AARCH64_OPND_QLF_S_S:
/* <imm> is 2 bit value. */
fld_i1 = (imm >> 1) & 0x1;
fld_tszh = imm & 0x1;
fld_tshl = 0x4;
break;
case AARCH64_OPND_QLF_S_D:
/* <imm> is 1 bit value. */
fld_i1 = imm & 0x1;
fld_tszh = 0x1;
fld_tshl = 0x0;
break;
default:
assert (0);
}
insert_field (self->fields[2], code, fld_i1, 0);
insert_field (self->fields[3], code, fld_tszh, 0);
insert_field (self->fields[4], code, fld_tshl, 0);
return true;
}
/* Miscellaneous encoding functions. */
/* Encode size[0], i.e. bit 22, for
+1
View File
@@ -103,6 +103,7 @@ AARCH64_DECL_OPD_INSERTER (ins_sme_za_list);
AARCH64_DECL_OPD_INSERTER (ins_sme_za_array);
AARCH64_DECL_OPD_INSERTER (ins_sme_addr_ri_u4xvl);
AARCH64_DECL_OPD_INSERTER (ins_sme_sm_za);
AARCH64_DECL_OPD_INSERTER (ins_sme_pred_reg_with_index);
AARCH64_DECL_OPD_INSERTER (ins_imm_rotate1);
AARCH64_DECL_OPD_INSERTER (ins_imm_rotate2);
+235 -189
View File
File diff suppressed because it is too large Load Diff
+43
View File
@@ -1890,6 +1890,49 @@ aarch64_ext_sme_sm_za (const aarch64_operand *self,
return true;
}
bool
aarch64_ext_sme_pred_reg_with_index (const aarch64_operand *self,
aarch64_opnd_info *info, aarch64_insn code,
const aarch64_inst *inst ATTRIBUTE_UNUSED,
aarch64_operand_error *errors ATTRIBUTE_UNUSED)
{
aarch64_insn fld_rm = extract_field (self->fields[0], code, 0);
aarch64_insn fld_pn = extract_field (self->fields[1], code, 0);
aarch64_insn fld_i1 = extract_field (self->fields[2], code, 0);
aarch64_insn fld_tszh = extract_field (self->fields[3], code, 0);
aarch64_insn fld_tszl = extract_field (self->fields[4], code, 0);
int imm;
info->za_tile_vector.regno = fld_pn;
info->za_tile_vector.index.regno = fld_rm + 12;
if (fld_tszh == 0x1 && fld_tszl == 0x0)
{
info->qualifier = AARCH64_OPND_QLF_S_D;
imm = fld_i1;
}
else if (fld_tszl == 0x4)
{
info->qualifier = AARCH64_OPND_QLF_S_S;
imm = (fld_i1 << 1) | fld_tszh;
}
else if ((fld_tszl & 0x3) == 0x2)
{
info->qualifier = AARCH64_OPND_QLF_S_H;
imm = (fld_i1 << 2) | (fld_tszh << 1) | (fld_tszl >> 2);
}
else if (fld_tszl & 0x1)
{
info->qualifier = AARCH64_OPND_QLF_S_B;
imm = (fld_i1 << 3) | (fld_tszh << 2) | (fld_tszl >> 1);
}
else
return false;
info->za_tile_vector.index.imm = imm;
return true;
}
/* Decode Zn[MM], where MM has a 7-bit triangular encoding. The fields
array specifies which field to use for Zn. MM is encoded in the
concatenation of imm5 and SVE_tszh, with imm5 being the less
+1
View File
@@ -127,6 +127,7 @@ AARCH64_DECL_OPD_EXTRACTOR (ext_sme_za_list);
AARCH64_DECL_OPD_EXTRACTOR (ext_sme_za_array);
AARCH64_DECL_OPD_EXTRACTOR (ext_sme_addr_ri_u4xvl);
AARCH64_DECL_OPD_EXTRACTOR (ext_sme_sm_za);
AARCH64_DECL_OPD_EXTRACTOR (ext_sme_pred_reg_with_index);
AARCH64_DECL_OPD_EXTRACTOR (ext_imm_rotate1);
AARCH64_DECL_OPD_EXTRACTOR (ext_imm_rotate2);
+1
View File
@@ -244,6 +244,7 @@ const struct aarch64_operand aarch64_operands[] =
{AARCH64_OPND_CLASS_SVE_REG, "SME_ZA_array", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_SME_Rv,FLD_imm4_2}, "ZA array"},
{AARCH64_OPND_CLASS_ADDRESS, "SME_ADDR_RI_U4xVL", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn,FLD_imm4_2}, "memory offset"},
{AARCH64_OPND_CLASS_ADDRESS, "SME_SM_ZA", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_CRm}, "streaming mode"},
{AARCH64_OPND_CLASS_SVE_REG, "SME_PnT_Wm_imm", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_SME_Rm,FLD_SVE_Pn,FLD_SME_i1,FLD_SME_tszh,FLD_SME_tszl}, "Source scalable predicate register with index "},
{AARCH64_OPND_CLASS_IMMEDIATE, "TME_UIMM16", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_imm16}, "a 16-bit unsigned immediate for TME tcancel"},
{AARCH64_OPND_CLASS_SIMD_ELEMENT, "SM3_IMM2", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_SM3_imm2}, "an indexed SM3 vector immediate"},
{AARCH64_OPND_CLASS_NIL, "", 0, {0}, "DUMMY"},
+12
View File
@@ -332,6 +332,10 @@ const aarch64_field fields[] =
{ 13, 2 }, /* SME_Rv: vector select register W12-W15, bits [14:13]. */
{ 13, 3 }, /* SME Pm second source scalable predicate register P0-P7. */
{ 0, 8 }, /* SME_zero_mask: list of up to 8 tile names separated by commas [7:0]. */
{ 16, 2 }, /* SME_Rm: index base register W12-W15 [17:16]. */
{ 23, 1 }, /* SME_i1: immediate field, bit 23. */
{ 22, 1 }, /* SME_tszh: immediate and qualifier field, bit 22. */
{ 18, 3 }, /* SME_tshl: immediate and qualifier field, bits [20:18]. */
{ 11, 2 }, /* rotate1: FCMLA immediate rotate. */
{ 13, 2 }, /* rotate2: Indexed element FCMLA immediate rotate. */
{ 12, 1 }, /* rotate3: FCADD immediate rotate. */
@@ -3473,6 +3477,14 @@ aarch64_print_operand (char *buf, size_t size, bfd_vma pc,
snprintf (buf, size, "%s", opnd->reg.regno == 's' ? "sm" : "za");
break;
case AARCH64_OPND_SME_PnT_Wm_imm:
snprintf (buf, size, "p%d.%s[w%d, %d]",
opnd->za_tile_vector.regno,
aarch64_get_qualifier_name (opnd->qualifier),
opnd->za_tile_vector.index.regno,
opnd->za_tile_vector.index.imm);
break;
case AARCH64_OPND_CRn:
case AARCH64_OPND_CRm:
snprintf (buf, size, "C%" PRIi64, opnd->imm.value);
+4
View File
@@ -159,6 +159,10 @@ enum aarch64_field_kind
FLD_SME_Rv,
FLD_SME_Pm,
FLD_SME_zero_mask,
FLD_SME_Rm,
FLD_SME_i1,
FLD_SME_tszh,
FLD_SME_tszl,
FLD_rotate1,
FLD_rotate2,
FLD_rotate3,
+19
View File
@@ -1509,6 +1509,13 @@
{ \
QLF3(S_B,P_Z,S_B), \
}
#define OP_SVE_NN_BHSD \
{ \
QLF3(NIL,NIL,S_B), \
QLF3(NIL,NIL,S_H), \
QLF3(NIL,NIL,S_S), \
QLF3(NIL,NIL,S_D) \
}
#define OP_SVE_BZBB \
{ \
QLF4(S_B,P_Z,S_B,S_B), \
@@ -1537,6 +1544,10 @@
{ \
QLF3(S_D,P_M,S_D), \
}
#define OP_SVE_QMQ \
{ \
QLF3(S_Q,P_M,S_Q), \
}
#define OP_SVE_DMH \
{ \
QLF3(S_D,P_M,S_H), \
@@ -5178,6 +5189,11 @@ const struct aarch64_opcode aarch64_opcode_table[] =
SME_INSN ("ldr", 0xe1000000, 0xffff9c10, sme_ldr, 0, OP2 (SME_ZA_array, SME_ADDR_RI_U4xVL), {}, 0, 1),
SME_INSN ("str", 0xe1200000, 0xffff9c10, sme_str, 0, OP2 (SME_ZA_array, SME_ADDR_RI_U4xVL), {}, 0, 1),
SME_INSNC ("revd", 0x52e8000, 0xffffe000, sme_misc, 0, OP3 (SVE_Zd, SVE_Pg3, SVE_Zn), OP_SVE_QMQ, 0, C_SCAN_MOVPRFX, 0),
SME_INSNC ("sclamp", 0x4400c000, 0xff20fc00, sve_size_bhsd, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_BHSD, 0, C_SCAN_MOVPRFX, 0),
SME_INSNC ("uclamp", 0x4400c400, 0xff20fc00, sve_size_bhsd, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_BHSD, 0, C_SCAN_MOVPRFX, 0),
SME_INSN ("psel", 0x25204000, 0xff20c000, sme_misc, 0, OP3 (SVE_Pd, SVE_Pg4_10, SME_PnT_Wm_imm), OP_SVE_NN_BHSD, 0, 0),
/* SIMD Dot Product (optional in v8.2-A). */
DOT_INSN ("udot", 0x2e009400, 0xbf20fc00, dotproduct, OP3 (Vd, Vn, Vm), QL_V3DOT, F_SIZEQ),
DOT_INSN ("sdot", 0xe009400, 0xbf20fc00, dotproduct, OP3 (Vd, Vn, Vm), QL_V3DOT, F_SIZEQ),
@@ -5773,6 +5789,9 @@ const struct aarch64_opcode aarch64_opcode_table[] =
Y(ADDRESS, sme_sm_za, "SME_SM_ZA", 0, \
F(FLD_CRm), \
"streaming mode") \
Y(SVE_REG, sme_pred_reg_with_index, "SME_PnT_Wm_imm", 0, \
F(FLD_SME_Rm,FLD_SVE_Pn,FLD_SME_i1,FLD_SME_tszh,FLD_SME_tszl), \
"Source scalable predicate register with index ") \
Y(IMMEDIATE, imm, "TME_UIMM16", 0, F(FLD_imm16), \
"a 16-bit unsigned immediate for TME tcancel") \
Y(SIMD_ELEMENT, reglane, "SM3_IMM2", 0, F(FLD_SM3_imm2), \