arc: Add ARCHS release 310a tune variant.

Add mtune and mcpu options for ARCHS release 310a type CPU. The
mtune=release31a is designed to be used as an alternative to the
mcpu=hs4x_rel31 option.
ARCHS4x release 31a uses DSP instructions which are implemented a bit
different than mpy9. Hence, use safer mpy2 option.

gcc/
	* config/arc/arc-arch.h (arc_tune_attr): Add
	ARC_TUNE_ARCHS4X_REL31A variant.
	* config/arc/arc.cc (arc_override_options): Tune options for
	release 310a.
	(arc_sched_issue_rate): Use correct enum.
	(arc600_corereg_hazard): Textual change.
	(arc_hazard): Add release 310a tunning.
	* config/arc/arc.md (tune): Update and take into consideration new
	tune option.
	(tune_dspmpy): Likewise.
	(tune_store): New attribute.
	* config/arc/arc.opt (mtune): New tune option.
	* config/arc/arcHS4x.md (hs4x_brcc0, hs4x_brcc1): New cpu units.
	(hs4x_brcc_op): New instruction rezervation.
	(hs4x_data_store_1_op): Likewise.
	* config/arc/arc-cpus.def (hs4x_rel31): New cpu variant.
	* config/arc/arc-tables.opt: Regenerate.
	* config/arc/t-multilib: Likewise.
	* doc/invoke.texi (ARC): Update mcpu and tune sections.

Signed-off-by: Claudiu Zissulescu <claziss@gmail.com>
This commit is contained in:
Claudiu Zissulescu
2022-07-18 15:07:00 +03:00
parent 87f46a16ec
commit 7501eec65c
9 changed files with 181 additions and 90 deletions
+2 -1
View File
@@ -77,7 +77,8 @@ enum arc_tune_attr
ARC_TUNE_CORE_3,
ARC_TUNE_ARCHS4X,
ARC_TUNE_ARCHS4XD,
ARC_TUNE_ARCHS4XD_SLOW
ARC_TUNE_ARCHS4XD_SLOW,
ARC_TUNE_ARCHS4X_REL31A
};
/* Extra options for a processor template to hold any CPU specific
+1
View File
@@ -64,6 +64,7 @@ ARC_CPU (hs38, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, NONE)
ARC_CPU (hs38_linux, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64|FL_FPU_FPUD_ALL, NONE, NONE)
ARC_CPU (hs4x, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4X)
ARC_CPU (hs4xd, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4XD)
ARC_CPU (hs4x_rel31, hs, FL_MPYOPT_2|FL_DIVREM|FL_LL64, NONE, ARCHS4X_REL31A)
ARC_CPU (arc600, 6xx, FL_BS, NONE, ARC600)
ARC_CPU (arc600_norm, 6xx, FL_BS|FL_NORM, NONE, ARC600)
+3
View File
@@ -69,6 +69,9 @@ Enum(processor_type) String(hs4x) Value(PROCESSOR_hs4x)
EnumValue
Enum(processor_type) String(hs4xd) Value(PROCESSOR_hs4xd)
EnumValue
Enum(processor_type) String(hs4x_rel31) Value(PROCESSOR_hs4x_rel31)
EnumValue
Enum(processor_type) String(arc600) Value(PROCESSOR_arc600)
+119 -73
View File
@@ -646,8 +646,8 @@ arc_sched_issue_rate (void)
{
switch (arc_tune)
{
case TUNE_ARCHS4X:
case TUNE_ARCHS4XD:
case ARC_TUNE_ARCHS4X:
case ARC_TUNE_ARCHS4XD:
return 3;
default:
break;
@@ -1458,6 +1458,12 @@ arc_override_options (void)
if (!OPTION_SET_P (unaligned_access) && TARGET_HS)
unaligned_access = 1;
if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A))
{
TARGET_CODE_DENSITY_FRAME = 0;
flag_delayed_branch = 0;
}
/* These need to be done at start up. It's convenient to do them here. */
arc_init ();
}
@@ -7817,6 +7823,115 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
return arc_store_addr_hazard_internal_p (producer, consumer);
}
/* Return length adjustment for INSN.
For ARC600:
A write to a core reg greater or equal to 32 must not be immediately
followed by a use. Anticipate the length requirement to insert a nop
between PRED and SUCC to prevent a hazard. */
static int
arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
{
if (!TARGET_ARC600)
return 0;
if (GET_CODE (PATTERN (pred)) == SEQUENCE)
pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
if (GET_CODE (PATTERN (succ)) == SEQUENCE)
succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
if (recog_memoized (pred) == CODE_FOR_mulsi_600
|| recog_memoized (pred) == CODE_FOR_umul_600
|| recog_memoized (pred) == CODE_FOR_mac_600
|| recog_memoized (pred) == CODE_FOR_mul64_600
|| recog_memoized (pred) == CODE_FOR_mac64_600
|| recog_memoized (pred) == CODE_FOR_umul64_600
|| recog_memoized (pred) == CODE_FOR_umac64_600)
return 0;
subrtx_iterator::array_type array;
FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
{
const_rtx x = *iter;
switch (GET_CODE (x))
{
case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
break;
default:
/* This is also fine for PRE/POST_MODIFY, because they
contain a SET. */
continue;
}
rtx dest = XEXP (x, 0);
/* Check if this sets a an extension register. N.B. we use 61 for the
condition codes, which is definitely not an extension register. */
if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
/* Check if the same register is used by the PAT. */
&& (refers_to_regno_p
(REGNO (dest),
REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
PATTERN (succ), 0)))
return 4;
}
return 0;
}
/* For ARC600:
A write to a core reg greater or equal to 32 must not be immediately
followed by a use. Anticipate the length requirement to insert a nop
between PRED and SUCC to prevent a hazard. */
int
arc_hazard (rtx_insn *pred, rtx_insn *succ)
{
if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
return 0;
if (TARGET_ARC600)
return arc600_corereg_hazard (pred, succ);
return 0;
}
/* When compiling for release 310a, insert a nop before any
conditional jump. */
static int
arc_check_release31a (rtx_insn *pred, rtx_insn *succ)
{
if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
return 0;
if (!JUMP_P (pred) && !single_set (pred))
return 0;
if (!JUMP_P (succ) && !single_set (succ))
return 0;
if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A))
switch (get_attr_type (pred))
{
case TYPE_STORE:
switch (get_attr_type (succ))
{
case TYPE_BRCC:
case TYPE_BRCC_NO_DELAY_SLOT:
case TYPE_LOOP_END:
return 1;
default:
break;
}
break;
case TYPE_BRCC:
case TYPE_BRCC_NO_DELAY_SLOT:
case TYPE_LOOP_END:
if (get_attr_type (succ) == TYPE_STORE)
return 1;
break;
default:
break;
}
return 0;
}
/* The same functionality as arc_hazard. It is called in machine
reorg before any other optimization. Hence, the NOP size is taken
into account when doing branch shortening. */
@@ -7830,10 +7945,8 @@ workaround_arc_anomaly (void)
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
succ0 = next_real_insn (insn);
if (arc_hazard (insn, succ0))
{
emit_insn_before (gen_nopv (), succ0);
}
if (arc_hazard (insn, succ0) || arc_check_release31a (insn, succ0))
emit_insn_before (gen_nopv (), succ0);
}
if (!TARGET_ARC700)
@@ -9324,56 +9437,6 @@ disi_highpart (rtx in)
return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
}
/* Return length adjustment for INSN.
For ARC600:
A write to a core reg greater or equal to 32 must not be immediately
followed by a use. Anticipate the length requirement to insert a nop
between PRED and SUCC to prevent a hazard. */
static int
arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
{
if (!TARGET_ARC600)
return 0;
if (GET_CODE (PATTERN (pred)) == SEQUENCE)
pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
if (GET_CODE (PATTERN (succ)) == SEQUENCE)
succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
if (recog_memoized (pred) == CODE_FOR_mulsi_600
|| recog_memoized (pred) == CODE_FOR_umul_600
|| recog_memoized (pred) == CODE_FOR_mac_600
|| recog_memoized (pred) == CODE_FOR_mul64_600
|| recog_memoized (pred) == CODE_FOR_mac64_600
|| recog_memoized (pred) == CODE_FOR_umul64_600
|| recog_memoized (pred) == CODE_FOR_umac64_600)
return 0;
subrtx_iterator::array_type array;
FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
{
const_rtx x = *iter;
switch (GET_CODE (x))
{
case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
break;
default:
/* This is also fine for PRE/POST_MODIFY, because they
contain a SET. */
continue;
}
rtx dest = XEXP (x, 0);
/* Check if this sets an extension register. N.B. we use 61 for the
condition codes, which is definitely not an extension register. */
if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
/* Check if the same register is used by the PAT. */
&& (refers_to_regno_p
(REGNO (dest),
REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
PATTERN (succ), 0)))
return 4;
}
return 0;
}
/* Given a rtx, check if it is an assembly instruction or not. */
static int
@@ -9408,23 +9471,6 @@ arc_asm_insn_p (rtx x)
return 0;
}
/* For ARC600:
A write to a core reg greater or equal to 32 must not be immediately
followed by a use. Anticipate the length requirement to insert a nop
between PRED and SUCC to prevent a hazard. */
int
arc_hazard (rtx_insn *pred, rtx_insn *succ)
{
if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
return 0;
if (TARGET_ARC600)
return arc600_corereg_hazard (pred, succ);
return 0;
}
/* Return length adjustment for INSN. */
int
+20 -12
View File
@@ -645,22 +645,21 @@
;; is made that makes conditional execution required.
(define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \
core_3, archs4x, archs4xd, archs4xd_slow"
archs4x, archs4xd"
(const
(cond [(symbol_ref "arc_tune == TUNE_ARC600")
(cond [(symbol_ref "arc_tune == ARC_TUNE_ARC600")
(const_string "arc600")
(symbol_ref "arc_tune == ARC_TUNE_ARC7XX")
(const_string "arc7xx")
(symbol_ref "arc_tune == TUNE_ARC700_4_2_STD")
(symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_STD")
(const_string "arc700_4_2_std")
(symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
(symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_XMAC")
(const_string "arc700_4_2_xmac")
(symbol_ref "arc_tune == ARC_TUNE_CORE_3")
(const_string "core_3")
(symbol_ref "arc_tune == TUNE_ARCHS4X")
(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A"))
(const_string "archs4x")
(ior (symbol_ref "arc_tune == TUNE_ARCHS4XD")
(symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW"))
(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD")
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW"))
(const_string "archs4xd")]
(const_string "none"))))
@@ -671,13 +670,22 @@ core_3, archs4x, archs4xd, archs4xd_slow"
(define_attr "tune_dspmpy" "none, slow, fast"
(const
(cond [(ior (symbol_ref "arc_tune == TUNE_ARCHS4X")
(symbol_ref "arc_tune == TUNE_ARCHS4XD"))
(cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD"))
(const_string "fast")
(symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW")
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW")
(const_string "slow")]
(const_string "none"))))
(define_attr "tune_store" "none, normal, rel31a"
(const
(cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD"))
(const_string "normal")
(symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A")
(const_string "rel31a")]
(const_string "none"))))
;; Move instructions.
(define_expand "movqi"
[(set (match_operand:QI 0 "move_dest_operand" "")
+3
View File
@@ -276,6 +276,9 @@ Enum(arc_tune_attr) String(arc750d) Value(ARC_TUNE_ARC700_4_2_XMAC)
EnumValue
Enum(arc_tune_attr) String(core3) Value(ARC_TUNE_CORE_3)
EnumValue
Enum(arc_tune_attr) String(release31a) Value(ARC_TUNE_ARCHS4X_REL31A)
mindexed-loads
Target Var(TARGET_INDEXED_LOADS) Init(TARGET_INDEXED_LOADS_DEFAULT)
Enable the use of indexed loads.
+15 -2
View File
@@ -27,14 +27,21 @@
(define_cpu_unit "hs4x_mult" "ARCHS4x")
(define_cpu_unit "hs4x_x1, hs4x_x2" "ARCHS4x")
(define_cpu_unit "hs4x_y1, hs4x_y2" "ARCHS4x")
(define_cpu_unit "hs4x_brcc0, hs4x_brcc1" "ARCHS4x")
(define_insn_reservation "hs4x_brj_op" 1
(and (match_test "TARGET_HS")
(eq_attr "tune" "archs4x, archs4xd")
(eq_attr "type" "call, call_no_delay_slot, uncond_branch, jump, \
branch, brcc,brcc_no_delay_slot, sfunc"))
branch, sfunc"))
"hs4x_issue0")
(define_insn_reservation "hs4x_brcc_op" 1
(and (match_test "TARGET_HS")
(eq_attr "tune" "archs4x, archs4xd")
(eq_attr "type" "brcc,brcc_no_delay_slot,loop_end"))
"hs4x_issue0 + hs4x_brcc0 + hs4x_brcc1")
(define_insn_reservation "hs4x_data_load_op" 4
(and (match_test "TARGET_HS")
(eq_attr "tune" "archs4x, archs4xd")
@@ -43,10 +50,16 @@ branch, brcc,brcc_no_delay_slot, sfunc"))
(define_insn_reservation "hs4x_data_store_op" 1
(and (match_test "TARGET_HS")
(eq_attr "tune" "archs4x, archs4xd")
(eq_attr "tune_store" "normal")
(eq_attr "type" "store"))
"hs4x_issue1 + hs4x_ld_st")
(define_insn_reservation "hs4x_data_store_1_op" 2
(and (match_test "TARGET_HS")
(eq_attr "tune_store" "rel31a")
(eq_attr "type" "store"))
"hs4x_issue1 + hs4x_ld_st + hs4x_brcc0, hs4x_brcc1")
;; Advanced ALU
(define_insn_reservation "hs4x_adv_alue_op" 4
(and (match_test "TARGET_HS")
+2 -2
View File
@@ -21,9 +21,9 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400
MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=hs4x_rel31/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400
MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400
MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd hs4x_rel31 arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400
# Aliases:
MULTILIB_MATCHES = mcpu?arc600=mcpu?ARC600
+16
View File
@@ -20053,6 +20053,15 @@ Compile for ARC HS38 CPU.
@item hs38_linux
Compile for ARC HS38 CPU with all hardware extensions on.
@item hs4x
Compile for ARC HS4x CPU.
@item hs4xd
Compile for ARC HS4xD CPU.
@item hs4x_rel31
Compile for ARC HS4x CPU release 3.10a.
@item arc600_norm
Compile for ARC 600 CPU with @code{norm} instructions enabled.
@@ -20662,6 +20671,13 @@ Tune for ARC725D CPU.
@item ARC750D
Tune for ARC750D CPU.
@item core3
Tune for ARCv2 core3 type CPU. This option enable usage of
@code{dbnz} instruction.
@item release31a
Tune for ARC4x release 3.10a.
@end table
@item -mmultcost=@var{num}