s390.c (Z10_PREDICT_DISTANCE): New macro.
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com> * config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro. (s390_z10_fix_long_loop_prediction): New function. (s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg. (s390_reorg): Walk over the INSNs and invoke s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp. From-SVN: r150955
This commit is contained in:
parent
f114923514
commit
b0f86a7e6b
@ -1,3 +1,11 @@
|
||||
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
|
||||
|
||||
* config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
|
||||
(s390_z10_fix_long_loop_prediction): New function.
|
||||
(s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
|
||||
(s390_reorg): Walk over the INSNs and invoke
|
||||
s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.
|
||||
|
||||
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
|
||||
|
||||
* config/s390/s390.md ("*brx_stage1_<GPR:mode>", "*brxg_64bit",
|
||||
|
@ -345,6 +345,10 @@ struct GTY(()) machine_function
|
||||
#define REGNO_PAIR_OK(REGNO, MODE) \
|
||||
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
|
||||
|
||||
/* That's the read ahead of the dynamic branch prediction unit in
|
||||
bytes on a z10 CPU. */
|
||||
#define Z10_PREDICT_DISTANCE 384
|
||||
|
||||
static enum machine_mode
|
||||
s390_libgcc_cmp_return_mode (void)
|
||||
{
|
||||
@ -9661,6 +9665,66 @@ s390_optimize_prologue (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* On z10 the dynamic branch prediction must see the backward jump in
|
||||
a window of 384 bytes. If not it falls back to the static
|
||||
prediction. This function rearranges the loop backward branch in a
|
||||
way which makes the static prediction always correct. The function
|
||||
returns true if it added an instruction. */
|
||||
static bool
|
||||
s390_z10_fix_long_loop_prediction (rtx insn)
|
||||
{
|
||||
rtx set = single_set (insn);
|
||||
rtx code_label, label_ref, new_label;
|
||||
rtx uncond_jump;
|
||||
rtx cur_insn;
|
||||
rtx tmp;
|
||||
int distance;
|
||||
|
||||
/* This will exclude branch on count and branch on index patterns
|
||||
since these are correctly statically predicted. */
|
||||
if (!set
|
||||
|| SET_DEST (set) != pc_rtx
|
||||
|| GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
|
||||
return false;
|
||||
|
||||
label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
|
||||
XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
|
||||
|
||||
gcc_assert (GET_CODE (label_ref) == LABEL_REF);
|
||||
|
||||
code_label = XEXP (label_ref, 0);
|
||||
|
||||
if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
|
||||
|| INSN_ADDRESSES (INSN_UID (insn)) == -1
|
||||
|| (INSN_ADDRESSES (INSN_UID (insn))
|
||||
- INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
|
||||
return false;
|
||||
|
||||
for (distance = 0, cur_insn = PREV_INSN (insn);
|
||||
distance < Z10_PREDICT_DISTANCE - 6;
|
||||
distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
|
||||
if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
|
||||
return false;
|
||||
|
||||
new_label = gen_label_rtx ();
|
||||
uncond_jump = emit_jump_insn_after (
|
||||
gen_rtx_SET (VOIDmode, pc_rtx,
|
||||
gen_rtx_LABEL_REF (VOIDmode, code_label)),
|
||||
insn);
|
||||
emit_label_after (new_label, uncond_jump);
|
||||
|
||||
tmp = XEXP (SET_SRC (set), 1);
|
||||
XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
|
||||
XEXP (SET_SRC (set), 2) = tmp;
|
||||
INSN_CODE (insn) = -1;
|
||||
|
||||
XEXP (label_ref, 0) = new_label;
|
||||
JUMP_LABEL (insn) = new_label;
|
||||
JUMP_LABEL (uncond_jump) = code_label;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Returns 1 if INSN reads the value of REG for purposes not related
|
||||
to addressing of memory, and 0 otherwise. */
|
||||
static int
|
||||
@ -9743,97 +9807,87 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
|
||||
if that register's value is delivered via a bypass, then the
|
||||
pipeline recycles, thereby causing significant performance decline.
|
||||
This function locates such situations and exchanges the two
|
||||
operands of the compare. */
|
||||
static void
|
||||
s390_z10_optimize_cmp (void)
|
||||
operands of the compare. The function return true whenever it
|
||||
added an insn. */
|
||||
static bool
|
||||
s390_z10_optimize_cmp (rtx insn)
|
||||
{
|
||||
rtx insn, prev_insn, next_insn;
|
||||
int added_NOPs = 0;
|
||||
rtx prev_insn, next_insn;
|
||||
bool insn_added_p = false;
|
||||
rtx cond, *op0, *op1;
|
||||
|
||||
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
||||
if (GET_CODE (PATTERN (insn)) == PARALLEL)
|
||||
{
|
||||
rtx cond, *op0, *op1;
|
||||
/* Handle compare and branch and branch on count
|
||||
instructions. */
|
||||
rtx pattern = single_set (insn);
|
||||
|
||||
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
|
||||
continue;
|
||||
if (!pattern
|
||||
|| SET_DEST (pattern) != pc_rtx
|
||||
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
|
||||
return false;
|
||||
|
||||
if (GET_CODE (PATTERN (insn)) == PARALLEL)
|
||||
cond = XEXP (SET_SRC (pattern), 0);
|
||||
op0 = &XEXP (cond, 0);
|
||||
op1 = &XEXP (cond, 1);
|
||||
}
|
||||
else if (GET_CODE (PATTERN (insn)) == SET)
|
||||
{
|
||||
rtx src, dest;
|
||||
|
||||
/* Handle normal compare instructions. */
|
||||
src = SET_SRC (PATTERN (insn));
|
||||
dest = SET_DEST (PATTERN (insn));
|
||||
|
||||
if (!REG_P (dest)
|
||||
|| !CC_REGNO_P (REGNO (dest))
|
||||
|| GET_CODE (src) != COMPARE)
|
||||
return false;
|
||||
|
||||
/* s390_swap_cmp will try to find the conditional
|
||||
jump when passing NULL_RTX as condition. */
|
||||
cond = NULL_RTX;
|
||||
op0 = &XEXP (src, 0);
|
||||
op1 = &XEXP (src, 1);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
if (!REG_P (*op0) || !REG_P (*op1))
|
||||
return false;
|
||||
|
||||
/* Swap the COMPARE arguments and its mask if there is a
|
||||
conflicting access in the previous insn. */
|
||||
prev_insn = PREV_INSN (insn);
|
||||
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
|
||||
&& reg_referenced_p (*op1, PATTERN (prev_insn)))
|
||||
s390_swap_cmp (cond, op0, op1, insn);
|
||||
|
||||
/* Check if there is a conflict with the next insn. If there
|
||||
was no conflict with the previous insn, then swap the
|
||||
COMPARE arguments and its mask. If we already swapped
|
||||
the operands, or if swapping them would cause a conflict
|
||||
with the previous insn, issue a NOP after the COMPARE in
|
||||
order to separate the two instuctions. */
|
||||
next_insn = NEXT_INSN (insn);
|
||||
if (next_insn != NULL_RTX && INSN_P (next_insn)
|
||||
&& s390_non_addr_reg_read_p (*op1, next_insn))
|
||||
{
|
||||
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
|
||||
&& s390_non_addr_reg_read_p (*op0, prev_insn))
|
||||
{
|
||||
/* Handle compare and branch and branch on count
|
||||
instructions. */
|
||||
rtx pattern = single_set (insn);
|
||||
|
||||
if (!pattern
|
||||
|| SET_DEST (pattern) != pc_rtx
|
||||
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
|
||||
continue;
|
||||
|
||||
cond = XEXP (SET_SRC (pattern), 0);
|
||||
op0 = &XEXP (cond, 0);
|
||||
op1 = &XEXP (cond, 1);
|
||||
}
|
||||
else if (GET_CODE (PATTERN (insn)) == SET)
|
||||
{
|
||||
rtx src, dest;
|
||||
|
||||
/* Handle normal compare instructions. */
|
||||
src = SET_SRC (PATTERN (insn));
|
||||
dest = SET_DEST (PATTERN (insn));
|
||||
|
||||
if (!REG_P (dest)
|
||||
|| !CC_REGNO_P (REGNO (dest))
|
||||
|| GET_CODE (src) != COMPARE)
|
||||
continue;
|
||||
|
||||
/* s390_swap_cmp will try to find the conditional
|
||||
jump when passing NULL_RTX as condition. */
|
||||
cond = NULL_RTX;
|
||||
op0 = &XEXP (src, 0);
|
||||
op1 = &XEXP (src, 1);
|
||||
if (REGNO (*op1) == 0)
|
||||
emit_insn_after (gen_nop1 (), insn);
|
||||
else
|
||||
emit_insn_after (gen_nop (), insn);
|
||||
insn_added_p = true;
|
||||
}
|
||||
else
|
||||
continue;
|
||||
|
||||
if (!REG_P (*op0) || !REG_P (*op1))
|
||||
continue;
|
||||
|
||||
/* Swap the COMPARE arguments and its mask if there is a
|
||||
conflicting access in the previous insn. */
|
||||
prev_insn = PREV_INSN (insn);
|
||||
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
|
||||
&& reg_referenced_p (*op1, PATTERN (prev_insn)))
|
||||
s390_swap_cmp (cond, op0, op1, insn);
|
||||
|
||||
/* Check if there is a conflict with the next insn. If there
|
||||
was no conflict with the previous insn, then swap the
|
||||
COMPARE arguments and its mask. If we already swapped
|
||||
the operands, or if swapping them would cause a conflict
|
||||
with the previous insn, issue a NOP after the COMPARE in
|
||||
order to separate the two instuctions. */
|
||||
next_insn = NEXT_INSN (insn);
|
||||
if (next_insn != NULL_RTX && INSN_P (next_insn)
|
||||
&& s390_non_addr_reg_read_p (*op1, next_insn))
|
||||
{
|
||||
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
|
||||
&& s390_non_addr_reg_read_p (*op0, prev_insn))
|
||||
{
|
||||
if (REGNO (*op1) == 0)
|
||||
emit_insn_after (gen_nop1 (), insn);
|
||||
else
|
||||
emit_insn_after (gen_nop (), insn);
|
||||
added_NOPs = 1;
|
||||
}
|
||||
else
|
||||
s390_swap_cmp (cond, op0, op1, insn);
|
||||
}
|
||||
}
|
||||
|
||||
/* Adjust branches if we added new instructions. */
|
||||
if (added_NOPs)
|
||||
shorten_branches (get_insns ());
|
||||
return insn_added_p;
|
||||
}
|
||||
|
||||
|
||||
/* Perform machine-dependent processing. */
|
||||
|
||||
static void
|
||||
@ -9944,10 +9998,33 @@ s390_reorg (void)
|
||||
/* Try to optimize prologue and epilogue further. */
|
||||
s390_optimize_prologue ();
|
||||
|
||||
/* Eliminate z10-specific pipeline recycles related to some compare
|
||||
instructions. */
|
||||
/* Walk over the insns and do some z10 specific changes. */
|
||||
if (s390_tune == PROCESSOR_2097_Z10)
|
||||
s390_z10_optimize_cmp ();
|
||||
{
|
||||
rtx insn;
|
||||
bool insn_added_p = false;
|
||||
|
||||
/* The insn lengths and addresses have to be up to date for the
|
||||
following manipulations. */
|
||||
shorten_branches (get_insns ());
|
||||
|
||||
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
||||
{
|
||||
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
|
||||
continue;
|
||||
|
||||
if (JUMP_P (insn))
|
||||
insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
|
||||
|
||||
if (GET_CODE (PATTERN (insn)) == PARALLEL
|
||||
|| GET_CODE (PATTERN (insn)) == SET)
|
||||
insn_added_p |= s390_z10_optimize_cmp (insn);
|
||||
}
|
||||
|
||||
/* Adjust branches if we added new instructions. */
|
||||
if (insn_added_p)
|
||||
shorten_branches (get_insns ());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1046,6 +1046,64 @@
|
||||
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
|
||||
; 10 byte for clgr/jg
|
||||
|
||||
; And now the same two patterns as above but with a negated CC mask.
|
||||
|
||||
; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
|
||||
; The following instructions do a complementary access of their second
|
||||
; operand (z01 only): crj_c, cgrjc, cr, cgr
|
||||
(define_insn "*icmp_and_br_signed_<mode>"
|
||||
[(set (pc)
|
||||
(if_then_else (match_operator 0 "s390_signed_integer_comparison"
|
||||
[(match_operand:GPR 1 "register_operand" "d,d")
|
||||
(match_operand:GPR 2 "nonmemory_operand" "d,C")])
|
||||
(pc)
|
||||
(label_ref (match_operand 3 "" ""))))
|
||||
(clobber (reg:CC CC_REGNUM))]
|
||||
"TARGET_Z10"
|
||||
{
|
||||
if (get_attr_length (insn) == 6)
|
||||
return which_alternative ?
|
||||
"c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3";
|
||||
else
|
||||
return which_alternative ?
|
||||
"c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3";
|
||||
}
|
||||
[(set_attr "op_type" "RIE")
|
||||
(set_attr "type" "branch")
|
||||
(set_attr "z10prop" "z10_super_c,z10_super")
|
||||
(set (attr "length")
|
||||
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
|
||||
(const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
|
||||
; 10 byte for cgr/jg
|
||||
|
||||
; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
|
||||
; The following instructions do a complementary access of their second
|
||||
; operand (z10 only): clrj, clgrj, clr, clgr
|
||||
(define_insn "*icmp_and_br_unsigned_<mode>"
|
||||
[(set (pc)
|
||||
(if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
|
||||
[(match_operand:GPR 1 "register_operand" "d,d")
|
||||
(match_operand:GPR 2 "nonmemory_operand" "d,I")])
|
||||
(pc)
|
||||
(label_ref (match_operand 3 "" ""))))
|
||||
(clobber (reg:CC CC_REGNUM))]
|
||||
"TARGET_Z10"
|
||||
{
|
||||
if (get_attr_length (insn) == 6)
|
||||
return which_alternative ?
|
||||
"cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3";
|
||||
else
|
||||
return which_alternative ?
|
||||
"cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3";
|
||||
}
|
||||
[(set_attr "op_type" "RIE")
|
||||
(set_attr "type" "branch")
|
||||
(set_attr "z10prop" "z10_super_c,z10_super")
|
||||
(set (attr "length")
|
||||
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
|
||||
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
|
||||
; 10 byte for clgr/jg
|
||||
|
||||
;;
|
||||
;;- Move instructions.
|
||||
;;
|
||||
|
Loading…
x
Reference in New Issue
Block a user