vect: support vectorization of early break forced live IVs as scalar

author Tamar Christina <tamar.christina@arm.com>

Sun, 30 Nov 2025 07:29:50 +0000 (07:29 +0000)

committer Tamar Christina <tamar.christina@arm.com>

Sun, 30 Nov 2025 07:32:30 +0000 (07:32 +0000)
author Tamar Christina <tamar.christina@arm.com>
Sun, 30 Nov 2025 07:29:50 +0000 (07:29 +0000)
committer Tamar Christina <tamar.christina@arm.com>
Sun, 30 Nov 2025 07:32:30 +0000 (07:32 +0000)
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_139.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_139.c

new file mode 100644 (file)

index 0000000..9599493
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_139.c
@@ -0,0 +1,37 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+__attribute__((noipa))
+unsigned loop9(unsigned char *a, unsigned n, unsigned c)
+{
+  for (unsigned j = 0;;)
+    {
+      if (c <= j)
+        __builtin_abort();
+
+      unsigned char *slot = (unsigned char *)a + j;
+
+      *slot = (char)j;
+
+      unsigned d = j + 1;
+      if (d < n)
+        j = d;
+      else
+        return d;
+    }
+}
+
+int main ()
+{
+  check_vect ();
+
+  unsigned char buff[16] = {0};
+  unsigned res = loop9 (buff, 16, 20);
+  if (res != 16)
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c

index b3f40b8c9ba49e41bd283e46a462238c3b5825ef..bc862ad20e68db8f3c0ba6facf47e13a56a7cd6d 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c
@@ -23,5 +23,6 @@ unsigned test4(unsigned x, unsigned n)
   return ret;
  }
  
-/* cannot safely vectorize this due due to the group misalignment.  */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" } } */
+/* AArch64 will scalarize the load and is able to vectorize it.  */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect" { target aarch64*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" { target { ! aarch64*-*-* } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c

index b7a7bc5cb0cfdfdb74adb120c54ba15019832cf1..43abd01c078da7d3f80045ecbd37b72ac918f678 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c
@@ -20,5 +20,4 @@ foo (int start)
  }
  
  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
  /* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c

index feb7ee7d61c92145e8defc095f2ad096b1e3f777..37806adea7b9788d3122fa32148a8709d5cf57be 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c
@@ -15,6 +15,5 @@ foo (int *a) {
  }
  
  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
  /* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */
  /* { dg-final { scan-assembler {\tnot\tp[0-7]\.b, p[0-7]/z, p.*\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c

index 260482a94df750b7886d72eed1964e70288c0886..e3ed63afb05cbef15d3c58a18acb0f3650161223 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c
@@ -15,7 +15,6 @@ foo (int *restrict a, int * restrict b) {
  }
  
  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
  /* { dg-final { scan-tree-dump "Both peeling and versioning will be applied" "vect" } } */
  /* { dg-final { scan-assembler {\tnot\tp[0-7]\.b, p[0-7]/z, p.*\n} } } */
  /* { dg-final { scan-assembler {\teor\t.*\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_5.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_5.c

index a03bb1dec21ef75aa0cbfb22c8bb02b99644239e..1977bf3af2db247825900c4200676f4dc2ca4f9a 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_5.c
@@ -20,5 +20,4 @@ foo (void)
  }
  
  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
  /* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_6.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_6.c

index 9bfd1a65c4feb0c140d4abf98508fc8af08042ba..0b40d26ae2a3f3c882a7e571140f9efabcf9c41a 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_6.c
@@ -20,5 +20,4 @@ foo (int start)
  }
  
  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
  /* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_7.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_7.c

index 0182e131a173b7b05e88c3393ba854b2da25c6b2..7a24d689e95a65aa65e1ec6558d117d19407a2c6 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_7.c
@@ -20,5 +20,4 @@ foo (void)
  }
  
  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
  /* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_9.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_9.c

index cc904e88170f072e1d3c6be86643d99a7cd5cb12..136d18c2ea89f5a93a1edfc24fe8b7f97bae82d8 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_9.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_9.c
@@ -20,6 +20,6 @@ foo (void)
  }
  
  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* Peels using a scalar loop.  */
-/* { dg-final { scan-tree-dump-not "pfa_iv_offset" "vect" } } */
+/* Peels using fully masked loop.  */
+/* { dg-final { scan-tree-dump "misalignment for fully-masked loop" "vect" } } */
  /* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c

index 1ebc735a82f4a59d8eccff39346e46a449b4729a..1aca6c7de1d4196fb12bf3202258229a6ec3995d 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
@@ -33,6 +33,5 @@ foo (void)
  }
  
  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
  /* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */
  
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc

index 9ddf9acf2f190958cf2594385f42fa6d58a61bc3..43847c4c3fbdbe7b8364d30e0b614b39cbabf367 100644 (file)
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2161,6 +2161,16 @@ vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
        return false;
      }
  
+  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+      && induction_type == vect_step_op_mul)
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "Peeling for is not supported for nonlinear mult"
+                        " induction using partial vectorization.\n");
+      return false;
+    }
+
    /* Avoid compile time hog on vect_peel_nonlinear_iv_init.  */
    if (induction_type == vect_step_op_mul)
      {
@@ -2315,6 +2325,9 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
                    The phi args associated with the edge UPDATE_E in the bb
                    UPDATE_E->dest are updated accordingly.
  
+     - EARLY_EXIT_P - Indicates whether the exit is an early exit rather than
+                     the main latch exit.
+
       Assumption 1: Like the rest of the vectorizer, this function assumes
       a single loop exit that has a single predecessor.
  
@@ -2333,7 +2346,8 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
  
  static void
  vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
-                                 tree niters, edge update_e)
+                                 tree niters, edge update_e,
+                                 bool early_exit_p)
  {
    gphi_iterator gsi, gsi1;
    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -2400,15 +2414,16 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
        else
         ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
                                           niters, step_expr,
-                                         induction_type);
+                                         induction_type, early_exit_p);
  
        var = create_tmp_var (type, "tmp");
  
        gimple_seq new_stmts = NULL;
        ni_name = force_gimple_operand (ni, &new_stmts, false, var);
  
-      /* Exit_bb shouldn't be empty.  */
-      if (!gsi_end_p (last_gsi))
+      /* Exit_bb shouldn't be empty, but we also can't insert after a ctrl
+        statements.  */
+      if (!gsi_end_p (last_gsi) && !is_ctrl_stmt (gsi_stmt (last_gsi)))
         {
           gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
           gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
@@ -2419,8 +2434,15 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
           gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
         }
  
-      /* Fix phi expressions in the successor bb.  */
-      adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
+      /* Fix phi expressions in all out of loop bb.  */
+      imm_use_iterator imm_iter;
+      gimple *use_stmt;
+      use_operand_p use_p;
+      tree ic_var = PHI_ARG_DEF_FROM_EDGE (phi1, update_e);
+      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, ic_var)
+       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+         FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+           SET_USE (use_p, ni_name);
      }
  }
  
@@ -3562,14 +3584,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
        if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
         update_e = single_succ_edge (LOOP_VINFO_IV_EXIT (loop_vinfo)->dest);
  
-      /* If we have a peeled vector iteration, all exits are the same, leave it
-        and so the main exit needs to be treated the same as the alternative
-        exits in that we leave their updates to vectorizable_live_operations.
-        */
-      if (!LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
-       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
-                                         update_e);
-
        /* If we have a peeled vector iteration we will never skip the epilog loop
          and we can simplify the cfg a lot by not doing the edge split.  */
        if (skip_epilog
@@ -3625,6 +3639,41 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
           scale_loop_profile (epilog, prob_epilog, -1);
         }
  
+      /* If we have a peeled vector iteration, all exits are the same, leave it
+        and so the main exit needs to be treated the same as the alternative
+        exits in that we leave their updates to vectorizable_live_operations.
+        */
+      tree vector_iters_vf = niters_vector_mult_vf;
+      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+       {
+         tree scal_iv_ty = signed_type_for (TREE_TYPE (vector_iters_vf));
+         tree tmp_niters_vf = make_ssa_name (scal_iv_ty);
+         basic_block exit_bb = NULL;
+         edge update_e = NULL;
+
+         /* Identify the early exit merge block.  I wish we had stored this.  */
+         for (auto e : get_loop_exit_edges (loop))
+           if (e != LOOP_VINFO_IV_EXIT (loop_vinfo))
+             {
+               exit_bb = e->dest;
+               update_e = single_succ_edge (exit_bb);
+               break;
+             }
+         vect_update_ivs_after_vectorizer (loop_vinfo, tmp_niters_vf,
+                                           update_e, true);
+
+         if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
+           vector_iters_vf = tmp_niters_vf;
+
+         LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo) = tmp_niters_vf;
+       }
+
+       bool recalculate_peel_niters_init
+         = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
+       vect_update_ivs_after_vectorizer (loop_vinfo, vector_iters_vf,
+                                         update_e,
+                                         recalculate_peel_niters_init);
+
        /* Recalculate the dominators after adding the guard edge.  */
        if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
         iterate_fix_dominators (CDI_DOMINATORS, doms, false);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc

index ab6c0f084703d38d8f531fbb1c12ac16a4f6cdbf..3ac264f0ce32de5d307c4c4b623d3490ee9c41d2 100644 (file)
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -8951,14 +8951,25 @@ vect_create_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
  tree
  vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
                              tree skip_niters, tree step_expr,
-                            enum vect_induction_op_type induction_type)
+                            enum vect_induction_op_type induction_type,
+                            bool early_exit_p)
  {
-  gcc_assert (TREE_CODE (skip_niters) == INTEGER_CST);
+  gcc_assert (TREE_CODE (skip_niters) == INTEGER_CST || early_exit_p);
    tree type = TREE_TYPE (init_expr);
    unsigned prec = TYPE_PRECISION (type);
    switch (induction_type)
      {
+    /* neg inductions are typically not used for loop termination conditions but
+       are typically implemented as b = -b.  That is every scalar iteration b is
+       negated.  That means that for the initial value of b we will have to
+       determine whether the number of skipped iteration is a multiple of 2
+       because every 2 scalar iterations we are back at "b".  */
      case vect_step_op_neg:
+      /* For early exits the neg induction will always be the same value at the
+        start of the iteration.  */
+      if (early_exit_p)
+       break;
+
        if (TREE_INT_CST_LOW (skip_niters) % 2)
         init_expr = gimple_build (stmts, NEGATE_EXPR, type, init_expr);
        /* else no change.  */
@@ -8966,13 +8977,15 @@ vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
  
      case vect_step_op_shr:
      case vect_step_op_shl:
-      skip_niters = gimple_convert (stmts, type, skip_niters);
-      step_expr = gimple_build (stmts, MULT_EXPR, type, step_expr, skip_niters);
+      skip_niters = fold_build1 (NOP_EXPR, type, skip_niters);
+      step_expr = fold_build1 (NOP_EXPR, type, step_expr);
+      step_expr = fold_build2 (MULT_EXPR, type, step_expr, skip_niters);
        /* When shift mount >= precision, need to avoid UD.
          In the original loop, there's no UD, and according to semantic,
          init_expr should be 0 for lshr, ashl, and >>= (prec - 1) for ashr.  */
-      if (!tree_fits_uhwi_p (step_expr)
+      if ((!tree_fits_uhwi_p (step_expr)
           || tree_to_uhwi (step_expr) >= prec)
+         && !early_exit_p)
         {
           if (induction_type == vect_step_op_shl
               || TYPE_UNSIGNED (type))
@@ -8983,13 +8996,19 @@ vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
                                       wide_int_to_tree (type, prec - 1));
         }
        else
-       init_expr = gimple_build (stmts, (induction_type == vect_step_op_shr
+       {
+         init_expr = fold_build2 ((induction_type == vect_step_op_shr
                                           ? RSHIFT_EXPR : LSHIFT_EXPR),
-                                 type, init_expr, step_expr);
+                                   type, init_expr, step_expr);
+         init_expr = force_gimple_operand (init_expr, stmts, false, NULL);
+       }
        break;
  
      case vect_step_op_mul:
        {
+       /* Due to UB we can't support vect_step_op_mul with early break for now.
+          so assert and block.  */
+       gcc_assert (TREE_CODE (skip_niters) == INTEGER_CST);
         tree utype = unsigned_type_for (type);
         init_expr = gimple_convert (stmts, utype, init_expr);
         wide_int skipn = wi::to_wide (skip_niters);
@@ -9073,9 +9092,7 @@ vect_update_nonlinear_iv (gimple_seq* stmts, tree vectype,
      case vect_step_op_mul:
        {
         /* Use unsigned mult to avoid UD integer overflow.  */
-       tree uvectype
-         = build_vector_type (unsigned_type_for (TREE_TYPE (vectype)),
-                              TYPE_VECTOR_SUBPARTS (vectype));
+       tree uvectype = unsigned_type_for (vectype);
         vec_def = gimple_convert (stmts, uvectype, vec_def);
         vec_step = gimple_convert (stmts, uvectype, vec_step);
         vec_def = gimple_build (stmts, MULT_EXPR, uvectype,
@@ -9322,7 +9339,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
       to adjust the start value here.  */
    if (niters_skip != NULL_TREE)
      init_expr = vect_peel_nonlinear_iv_init (&stmts, init_expr, niters_skip,
-                                            step_expr, induction_type);
+                                            step_expr, induction_type, false);
  
    vec_init = vect_create_nonlinear_iv_init (&stmts, init_expr,
                                             step_expr, nunits, vectype,
@@ -9703,53 +9720,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
                                    LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
        peel_mul = gimple_build_vector_from_val (&init_stmts,
                                                step_vectype, peel_mul);
-
-      /* If early break then we have to create a new PHI which we can use as
-        an offset to adjust the induction reduction in early exits.
-
-        This is because when peeling for alignment using masking, the first
-        few elements of the vector can be inactive.  As such if we find the
-        entry in the first iteration we have adjust the starting point of
-        the scalar code.
-
-        We do this by creating a new scalar PHI that keeps track of whether
-        we are the first iteration of the loop (with the additional masking)
-        or whether we have taken a loop iteration already.
-
-        The generated sequence:
-
-        pre-header:
-          bb1:
-            i_1 = <number of leading inactive elements>
-
-          header:
-          bb2:
-            i_2 = PHI <i_1(bb1), 0(latch)>
-            …
-
-          early-exit:
-          bb3:
-            i_3 = iv_step * i_2 + PHI<vector-iv>
-
-        The first part of the adjustment to create i_1 and i_2 are done here
-        and the last part creating i_3 is done in
-        vectorizable_live_operations when the induction extraction is
-        materialized.  */
-      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
-         && !LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo))
-       {
-         auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
-         tree ty_skip_niters = TREE_TYPE (skip_niters);
-         tree break_lhs_phi = vect_get_new_vect_var (ty_skip_niters,
-                                                     vect_scalar_var,
-                                                     "pfa_iv_offset");
-         gphi *nphi = create_phi_node (break_lhs_phi, bb);
-         add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION);
-         add_phi_arg (nphi, build_zero_cst (ty_skip_niters),
-                      loop_latch_edge (iv_loop), UNKNOWN_LOCATION);
-
-         LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo) = PHI_RESULT (nphi);
-       }
      }
    tree step_mul = NULL_TREE;
    unsigned ivn;
@@ -10325,8 +10295,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
                  to the latch then we're restarting the iteration in the
                  scalar loop.  So get the first live value.  */
               bool early_break_first_element_p
-               = (all_exits_as_early_p || !main_exit_edge)
-                  && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def;
+               = all_exits_as_early_p || !main_exit_edge;
               if (early_break_first_element_p)
                 {
                   tmp_vec_lhs = vec_lhs0;
@@ -10335,52 +10304,13 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
  
               gimple_stmt_iterator exit_gsi;
               tree new_tree
-               = vectorizable_live_operation_1 (loop_vinfo,
-                                                e->dest, vectype,
-                                                slp_node, bitsize,
-                                                tmp_bitstart, tmp_vec_lhs,
-                                                lhs_type, &exit_gsi);
+                 = vectorizable_live_operation_1 (loop_vinfo,
+                                                  e->dest, vectype,
+                                                  slp_node, bitsize,
+                                                  tmp_bitstart, tmp_vec_lhs,
+                                                  lhs_type, &exit_gsi);
  
               auto gsi = gsi_for_stmt (use_stmt);
-             if (early_break_first_element_p
-                 && LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo))
-               {
-                 tree step_expr
-                   = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
-                 tree break_lhs_phi
-                   = LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo);
-                 tree ty_skip_niters = TREE_TYPE (break_lhs_phi);
-                 gimple_seq iv_stmts = NULL;
-
-                 /* Now create the PHI for the outside loop usage to
-                    retrieve the value for the offset counter.  */
-                 tree rphi_step
-                   = gimple_convert (&iv_stmts, ty_skip_niters, step_expr);
-                 tree tmp2
-                   = gimple_build (&iv_stmts, MULT_EXPR,
-                                   ty_skip_niters, rphi_step,
-                                   break_lhs_phi);
-
-                 if (POINTER_TYPE_P (TREE_TYPE (new_tree)))
-                   {
-                     tmp2 = gimple_convert (&iv_stmts, sizetype, tmp2);
-                     tmp2 = gimple_build (&iv_stmts, POINTER_PLUS_EXPR,
-                                          TREE_TYPE (new_tree), new_tree,
-                                          tmp2);
-                   }
-                 else
-                   {
-                     tmp2 = gimple_convert (&iv_stmts, TREE_TYPE (new_tree),
-                                            tmp2);
-                     tmp2 = gimple_build (&iv_stmts, PLUS_EXPR,
-                                          TREE_TYPE (new_tree), new_tree,
-                                          tmp2);
-                   }
-
-                 new_tree = tmp2;
-                 gsi_insert_seq_before (&exit_gsi, iv_stmts, GSI_SAME_STMT);
-               }
-
               tree lhs_phi = gimple_phi_result (use_stmt);
               remove_phi_node (&gsi, false);
               gimple *copy = gimple_build_assign (lhs_phi, new_tree);
@@ -11021,6 +10951,101 @@ move_early_exit_stmts (loop_vec_info loop_vinfo)
         SET_PHI_ARG_DEF_ON_EDGE (phi, e, last_seen_vuse);
  }
  
+/* Generate adjustment code for early break scalar IVs filling in the value
+   we created earlier on for LOOP_VINFO_EARLY_BRK_NITERS_VAR.  */
+
+static void
+vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info loop_vinfo)
+{
+  DUMP_VECT_SCOPE ("vect_update_ivs_after_vectorizer_for_early_breaks");
+
+  if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+    return;
+
+  gcc_assert (LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo));
+
+  tree phi_var = LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo);
+  tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  tree ty_var = TREE_TYPE (phi_var);
+  auto loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree induc_var = niters_skip ? copy_ssa_name (phi_var) : phi_var;
+
+  auto induction_phi = create_phi_node (induc_var, loop->header);
+  tree induc_def = PHI_RESULT (induction_phi);
+
+  /* Create the iv update inside the loop.  */
+  gimple_seq init_stmts = NULL;
+  gimple_seq stmts = NULL;
+  gimple_seq iv_stmts = NULL;
+  tree tree_vf = build_int_cst (ty_var, vf);
+
+  /* For loop len targets we have to use .SELECT_VL (ivtmp_33, VF); instead of
+     just += VF as the VF can change in between two loop iterations.  */
+  if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+    {
+      vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+      tree_vf = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
+                                  NULL_TREE, 0, 0);
+    }
+
+  tree iter_var;
+  if (POINTER_TYPE_P (ty_var))
+    {
+      tree offset = gimple_convert (&stmts, sizetype, tree_vf);
+      iter_var = gimple_build (&stmts, POINTER_PLUS_EXPR, ty_var, induc_def,
+                              gimple_convert (&stmts, sizetype, offset));
+    }
+  else
+    {
+      tree offset = gimple_convert (&stmts, ty_var, tree_vf);
+      iter_var = gimple_build (&stmts, PLUS_EXPR, ty_var, induc_def, offset);
+    }
+
+  tree init_var = build_zero_cst (ty_var);
+  if (niters_skip)
+    init_var = gimple_build (&init_stmts, MINUS_EXPR, ty_var, init_var,
+                            gimple_convert (&init_stmts, ty_var, niters_skip));
+
+  add_phi_arg (induction_phi, iter_var,
+              loop_latch_edge (loop), UNKNOWN_LOCATION);
+  add_phi_arg (induction_phi, init_var,
+              loop_preheader_edge (loop), UNKNOWN_LOCATION);
+
+  /* Find the first insertion point in the BB.  */
+  auto pe = loop_preheader_edge (loop);
+
+  /* If we've done any peeling, calculate the peeling adjustment needed to the
+     final IV.  */
+  if (niters_skip)
+    {
+      induc_def = gimple_build (&iv_stmts, MAX_EXPR, TREE_TYPE (induc_def),
+                               induc_def,
+                               build_zero_cst (TREE_TYPE (induc_def)));
+      auto stmt = gimple_build_assign (phi_var, induc_def);
+      gimple_seq_add_stmt_without_update (&iv_stmts, stmt);
+      basic_block exit_bb = NULL;
+      /* Identify the early exit merge block.  I wish we had stored this.  */
+      for (auto e : get_loop_exit_edges (loop))
+       if (e != LOOP_VINFO_IV_EXIT (loop_vinfo))
+         {
+           exit_bb = e->dest;
+           break;
+         }
+
+      gcc_assert (exit_bb);
+      auto exit_gsi = gsi_after_labels (exit_bb);
+      gsi_insert_seq_before (&exit_gsi, iv_stmts, GSI_SAME_STMT);
+  }
+  /* Write the init_stmts in the loop-preheader block.  */
+  auto psi = gsi_last_nondebug_bb (pe->src);
+  gsi_insert_seq_after (&psi, init_stmts, GSI_LAST_NEW_STMT);
+  /* Wite the adjustments in the header block.  */
+  basic_block bb = loop->header;
+  auto si = gsi_after_labels (bb);
+  gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+}
+
  /* Function vect_transform_loop.
  
     The analysis phase has determined that the loop is vectorizable.
@@ -11165,7 +11190,10 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
    /* Handle any code motion that we need to for early-break vectorization after
       we've done peeling but just before we start vectorizing.  */
    if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
-    move_early_exit_stmts (loop_vinfo);
+    {
+      vect_update_ivs_after_vectorizer_for_early_breaks (loop_vinfo);
+      move_early_exit_stmts (loop_vinfo);
+    }
  
    /* Remove existing clobber stmts and prefetches.  */
    for (i = 0; i < nbbs; i++)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc

index 5b0de9291cb0fb85e3cc4575dd6731b26d03ecd7..658ad6dc25798da2464bd91ac6194d4b711e6612 100644 (file)
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -5885,48 +5885,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
                                              "SLP build failed.\n");
             }
         }
-
-       /* Find and create slp instances for inductions that have been forced
-          live due to early break.  */
-       edge latch_e = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo));
-       for (auto stmt_info : LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo))
-         {
-           vec<stmt_vec_info> stmts;
-           vec<stmt_vec_info> roots = vNULL;
-           vec<tree> remain = vNULL;
-           gphi *phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info));
-           tree def = gimple_phi_arg_def_from_edge (phi, latch_e);
-           stmt_vec_info lc_info = loop_vinfo->lookup_def (def);
-           if (lc_info)
-             {
-               stmts.create (1);
-               stmts.quick_push (vect_stmt_to_vectorize (lc_info));
-               if (! vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
-                                              stmts, roots, remain,
-                                              max_tree_size, &limit,
-                                              bst_map, force_single_lane))
-                 return opt_result::failure_at (vect_location,
-                                                "SLP build failed.\n");
-             }
-           /* When the latch def is from a different cycle this can only
-              be a induction.  Build a simple instance for this.
-              ???  We should be able to start discovery from the PHI
-              for all inductions, but then there will be stray
-              non-SLP stmts we choke on as needing non-SLP handling.  */
-           auto_vec<stmt_vec_info, 1> tem;
-           tem.quick_push (stmt_info);
-           if (!bst_map->get (tem))
-             {
-               stmts.create (1);
-               stmts.quick_push (stmt_info);
-               if (! vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
-                                              stmts, roots, remain,
-                                              max_tree_size, &limit,
-                                              bst_map, force_single_lane))
-                 return opt_result::failure_at (vect_location,
-                                                "SLP build failed.\n");
-             }
-         }
      }
  
    hash_set<slp_tree> visited_patterns;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc

index de28316ddc660020be4bc462f26316f59402d911..1d7e50afcde1096d5598b43ab8d49454eb68385b 100644 (file)
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -356,7 +356,6 @@ is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
     - it has uses outside the loop.
     - it has vdefs (it alters memory).
     - control stmts in the loop (except for the exit condition).
-   - it is an induction and we have multiple exits.
  
     CHECKME: what other side effects would the vectorizer allow?  */
  
@@ -418,26 +417,6 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
         }
      }
  
-  /* Check if it's a not live PHI and multiple exits.  In this case
-     there will be a usage later on after peeling which is needed for the
-     alternate exit.
-     ???  Unless the PHI was marked live because of early
-     break, which also needs the latch def live and vectorized.  */
-  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
-      && is_a <gphi *> (stmt)
-      && gimple_bb (stmt) == LOOP_VINFO_LOOP (loop_vinfo)->header
-      && ((! VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))
-         && ! *live_p)
-         || STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def))
-    {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_NOTE, vect_location,
-                        "vec_stmt_relevant_p: PHI forced live for "
-                        "early break.\n");
-      LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info);
-      *live_p = true;
-    }
-
    if (*live_p && *relevant == vect_unused_in_scope
        && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
      {
@@ -12985,17 +12964,12 @@ can_vectorize_live_stmts (vec_info *vinfo,
                           bool vec_stmt_p,
                           stmt_vector_for_cost *cost_vec)
  {
-  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    stmt_vec_info slp_stmt_info;
    unsigned int i;
    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
      {
        if (slp_stmt_info
-         && (STMT_VINFO_LIVE_P (slp_stmt_info)
-             || (loop_vinfo
-                 && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
-                 && STMT_VINFO_DEF_TYPE (slp_stmt_info)
-                 == vect_induction_def))
+         && STMT_VINFO_LIVE_P (slp_stmt_info)
           && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
                                            slp_node_instance, i,
                                            vec_stmt_p, cost_vec))
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

index 5d125afa6bc5166c04dbc68e968ca7207685abb8..0356b129e36f825c6504fca99b0cf65b9c09e325 100644 (file)
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1241,6 +1241,10 @@ public:
       happen.  */
    auto_vec<gimple*> early_break_vuses;
  
+  /* The IV adjustment value for inductions that needs to be materialized
+     inside the relavent exit blocks in order to adjust for early break.  */
+  tree early_break_niters_var;
+
    /* Record statements that are needed to be live for early break vectorization
       but may not have an LC PHI node materialized yet in the exits.  */
    auto_vec<stmt_vec_info> early_break_live_ivs;
@@ -1308,6 +1312,7 @@ public:
    (L)->early_break_live_ivs
  #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
  #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
+#define LOOP_VINFO_EARLY_BRK_NITERS_VAR(L) (L)->early_break_niters_var
  #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
  #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
  #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
@@ -2716,7 +2721,8 @@ extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree);
  
  /* Nonlinear induction.  */
  extern tree vect_peel_nonlinear_iv_init (gimple_seq*, tree, tree,
-                                        tree, enum vect_induction_op_type);
+                                        tree, enum vect_induction_op_type,
+                                        bool);
  
  /* In tree-vect-slp.cc.  */
  extern void vect_slp_init (void);
author	Tamar Christina <tamar.christina@arm.com>
	Sun, 30 Nov 2025 07:29:50 +0000 (07:29 +0000)
committer	Tamar Christina <tamar.christina@arm.com>
	Sun, 30 Nov 2025 07:32:30 +0000 (07:32 +0000)
gcc/testsuite/gcc.dg/vect/vect-early-break_139.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-early-break_39.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_5.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_6.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_7.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/peel_ind_9.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/pr119351.c		patch \| blob \| history
gcc/tree-vect-loop-manip.cc		patch \| blob \| history
gcc/tree-vect-loop.cc		patch \| blob \| history
gcc/tree-vect-slp.cc		patch \| blob \| history
gcc/tree-vect-stmts.cc		patch \| blob \| history
gcc/tree-vectorizer.h		patch \| blob \| history