s390.c (Z10_PREDICT_DISTANCE): New macro.

2009-08-20  Andreas Krebbel  <krebbel1@de.ibm.com>

	* config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
	(s390_z10_fix_long_loop_prediction): New function.
	(s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
	(s390_reorg): Walk over the INSNs and invoke
	s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.

From-SVN: r150955
This commit is contained in:
Andreas Krebbel 2009-08-20 09:21:13 +00:00 committed by Andreas Krebbel
parent f114923514
commit b0f86a7e6b
3 changed files with 225 additions and 82 deletions

View File

@ -1,3 +1,11 @@
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
* config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
(s390_z10_fix_long_loop_prediction): New function.
(s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
(s390_reorg): Walk over the INSNs and invoke
s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
* config/s390/s390.md ("*brx_stage1_<GPR:mode>", "*brxg_64bit",

View File

@ -345,6 +345,10 @@ struct GTY(()) machine_function
#define REGNO_PAIR_OK(REGNO, MODE) \
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
/* That's the read ahead of the dynamic branch prediction unit in
bytes on a z10 CPU. */
#define Z10_PREDICT_DISTANCE 384
static enum machine_mode
s390_libgcc_cmp_return_mode (void)
{
@ -9661,6 +9665,66 @@ s390_optimize_prologue (void)
}
}
/* On z10 the dynamic branch prediction must see the backward jump in
a window of 384 bytes. If not it falls back to the static
prediction. This function rearranges the loop backward branch in a
way which makes the static prediction always correct. The function
returns true if it added an instruction. */
static bool
s390_z10_fix_long_loop_prediction (rtx insn)
{
rtx set = single_set (insn);
rtx code_label, label_ref, new_label;
rtx uncond_jump;
rtx cur_insn;
rtx tmp;
int distance;
/* This will exclude branch on count and branch on index patterns
since these are correctly statically predicted. */
if (!set
|| SET_DEST (set) != pc_rtx
|| GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
return false;
label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
gcc_assert (GET_CODE (label_ref) == LABEL_REF);
code_label = XEXP (label_ref, 0);
if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
|| INSN_ADDRESSES (INSN_UID (insn)) == -1
|| (INSN_ADDRESSES (INSN_UID (insn))
- INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
return false;
for (distance = 0, cur_insn = PREV_INSN (insn);
distance < Z10_PREDICT_DISTANCE - 6;
distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
return false;
new_label = gen_label_rtx ();
uncond_jump = emit_jump_insn_after (
gen_rtx_SET (VOIDmode, pc_rtx,
gen_rtx_LABEL_REF (VOIDmode, code_label)),
insn);
emit_label_after (new_label, uncond_jump);
tmp = XEXP (SET_SRC (set), 1);
XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
XEXP (SET_SRC (set), 2) = tmp;
INSN_CODE (insn) = -1;
XEXP (label_ref, 0) = new_label;
JUMP_LABEL (insn) = new_label;
JUMP_LABEL (uncond_jump) = code_label;
return true;
}
/* Returns 1 if INSN reads the value of REG for purposes not related
to addressing of memory, and 0 otherwise. */
static int
@ -9743,97 +9807,87 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
if that register's value is delivered via a bypass, then the
pipeline recycles, thereby causing significant performance decline.
This function locates such situations and exchanges the two
operands of the compare. */
static void
s390_z10_optimize_cmp (void)
operands of the compare. The function return true whenever it
added an insn. */
static bool
s390_z10_optimize_cmp (rtx insn)
{
rtx insn, prev_insn, next_insn;
int added_NOPs = 0;
rtx prev_insn, next_insn;
bool insn_added_p = false;
rtx cond, *op0, *op1;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
if (GET_CODE (PATTERN (insn)) == PARALLEL)
{
rtx cond, *op0, *op1;
/* Handle compare and branch and branch on count
instructions. */
rtx pattern = single_set (insn);
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
continue;
if (!pattern
|| SET_DEST (pattern) != pc_rtx
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
return false;
if (GET_CODE (PATTERN (insn)) == PARALLEL)
cond = XEXP (SET_SRC (pattern), 0);
op0 = &XEXP (cond, 0);
op1 = &XEXP (cond, 1);
}
else if (GET_CODE (PATTERN (insn)) == SET)
{
rtx src, dest;
/* Handle normal compare instructions. */
src = SET_SRC (PATTERN (insn));
dest = SET_DEST (PATTERN (insn));
if (!REG_P (dest)
|| !CC_REGNO_P (REGNO (dest))
|| GET_CODE (src) != COMPARE)
return false;
/* s390_swap_cmp will try to find the conditional
jump when passing NULL_RTX as condition. */
cond = NULL_RTX;
op0 = &XEXP (src, 0);
op1 = &XEXP (src, 1);
}
else
return false;
if (!REG_P (*op0) || !REG_P (*op1))
return false;
/* Swap the COMPARE arguments and its mask if there is a
conflicting access in the previous insn. */
prev_insn = PREV_INSN (insn);
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
&& reg_referenced_p (*op1, PATTERN (prev_insn)))
s390_swap_cmp (cond, op0, op1, insn);
/* Check if there is a conflict with the next insn. If there
was no conflict with the previous insn, then swap the
COMPARE arguments and its mask. If we already swapped
the operands, or if swapping them would cause a conflict
with the previous insn, issue a NOP after the COMPARE in
order to separate the two instuctions. */
next_insn = NEXT_INSN (insn);
if (next_insn != NULL_RTX && INSN_P (next_insn)
&& s390_non_addr_reg_read_p (*op1, next_insn))
{
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
&& s390_non_addr_reg_read_p (*op0, prev_insn))
{
/* Handle compare and branch and branch on count
instructions. */
rtx pattern = single_set (insn);
if (!pattern
|| SET_DEST (pattern) != pc_rtx
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
continue;
cond = XEXP (SET_SRC (pattern), 0);
op0 = &XEXP (cond, 0);
op1 = &XEXP (cond, 1);
}
else if (GET_CODE (PATTERN (insn)) == SET)
{
rtx src, dest;
/* Handle normal compare instructions. */
src = SET_SRC (PATTERN (insn));
dest = SET_DEST (PATTERN (insn));
if (!REG_P (dest)
|| !CC_REGNO_P (REGNO (dest))
|| GET_CODE (src) != COMPARE)
continue;
/* s390_swap_cmp will try to find the conditional
jump when passing NULL_RTX as condition. */
cond = NULL_RTX;
op0 = &XEXP (src, 0);
op1 = &XEXP (src, 1);
if (REGNO (*op1) == 0)
emit_insn_after (gen_nop1 (), insn);
else
emit_insn_after (gen_nop (), insn);
insn_added_p = true;
}
else
continue;
if (!REG_P (*op0) || !REG_P (*op1))
continue;
/* Swap the COMPARE arguments and its mask if there is a
conflicting access in the previous insn. */
prev_insn = PREV_INSN (insn);
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
&& reg_referenced_p (*op1, PATTERN (prev_insn)))
s390_swap_cmp (cond, op0, op1, insn);
/* Check if there is a conflict with the next insn. If there
was no conflict with the previous insn, then swap the
COMPARE arguments and its mask. If we already swapped
the operands, or if swapping them would cause a conflict
with the previous insn, issue a NOP after the COMPARE in
order to separate the two instuctions. */
next_insn = NEXT_INSN (insn);
if (next_insn != NULL_RTX && INSN_P (next_insn)
&& s390_non_addr_reg_read_p (*op1, next_insn))
{
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
&& s390_non_addr_reg_read_p (*op0, prev_insn))
{
if (REGNO (*op1) == 0)
emit_insn_after (gen_nop1 (), insn);
else
emit_insn_after (gen_nop (), insn);
added_NOPs = 1;
}
else
s390_swap_cmp (cond, op0, op1, insn);
}
}
/* Adjust branches if we added new instructions. */
if (added_NOPs)
shorten_branches (get_insns ());
return insn_added_p;
}
/* Perform machine-dependent processing. */
static void
@ -9944,10 +9998,33 @@ s390_reorg (void)
/* Try to optimize prologue and epilogue further. */
s390_optimize_prologue ();
/* Eliminate z10-specific pipeline recycles related to some compare
instructions. */
/* Walk over the insns and do some z10 specific changes. */
if (s390_tune == PROCESSOR_2097_Z10)
s390_z10_optimize_cmp ();
{
rtx insn;
bool insn_added_p = false;
/* The insn lengths and addresses have to be up to date for the
following manipulations. */
shorten_branches (get_insns ());
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
continue;
if (JUMP_P (insn))
insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
if (GET_CODE (PATTERN (insn)) == PARALLEL
|| GET_CODE (PATTERN (insn)) == SET)
insn_added_p |= s390_z10_optimize_cmp (insn);
}
/* Adjust branches if we added new instructions. */
if (insn_added_p)
shorten_branches (get_insns ());
}
}

View File

@ -1046,6 +1046,64 @@
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
; 10 byte for clgr/jg
; And now the same two patterns as above but with a negated CC mask.
; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
; The following instructions do a complementary access of their second
; operand (z01 only): crj_c, cgrjc, cr, cgr
(define_insn "*icmp_and_br_signed_<mode>"
[(set (pc)
(if_then_else (match_operator 0 "s390_signed_integer_comparison"
[(match_operand:GPR 1 "register_operand" "d,d")
(match_operand:GPR 2 "nonmemory_operand" "d,C")])
(pc)
(label_ref (match_operand 3 "" ""))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10"
{
if (get_attr_length (insn) == 6)
return which_alternative ?
"c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3";
else
return which_alternative ?
"c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3";
}
[(set_attr "op_type" "RIE")
(set_attr "type" "branch")
(set_attr "z10prop" "z10_super_c,z10_super")
(set (attr "length")
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
(const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
; 10 byte for cgr/jg
; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
; The following instructions do a complementary access of their second
; operand (z10 only): clrj, clgrj, clr, clgr
(define_insn "*icmp_and_br_unsigned_<mode>"
[(set (pc)
(if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
[(match_operand:GPR 1 "register_operand" "d,d")
(match_operand:GPR 2 "nonmemory_operand" "d,I")])
(pc)
(label_ref (match_operand 3 "" ""))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10"
{
if (get_attr_length (insn) == 6)
return which_alternative ?
"cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3";
else
return which_alternative ?
"cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3";
}
[(set_attr "op_type" "RIE")
(set_attr "type" "branch")
(set_attr "z10prop" "z10_super_c,z10_super")
(set (attr "length")
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
; 10 byte for clgr/jg
;;
;;- Move instructions.
;;