[PATCH] sccvn: Handle non-byte aligned offset or size for memset (, 123, ) [PR93945]

classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] sccvn: Handle non-byte aligned offset or size for memset (, 123, ) [PR93945]

Jakub Jelinek
Hi!

The following is the last spot in vn_reference_lookup_3 that didn't allow
non-byte aligned offsets or sizes.  To be precise, it did allow size that
wasn't multiple of byte size and that caused a wrong-code issue on
big-endian, as the pr93945.c testcase shows, so for GCC 9 we should add
&& multiple_p (ref->size, BITS_PER_UNIT) check instead.
For the memset with SSA_NAME middle-argument, it still requires byte-aligned
offset, as we'd otherwise need to rotate the value at runtime.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-02-27  Jakub Jelinek  <[hidden email]>

        PR tree-optimization/93582
        PR tree-optimization/93945
        * tree-ssa-sccvn.c (vn_reference_lookup_3): Handle memset with
        non-zero INTEGER_CST second argument and ref->offset or ref->size
        not a multiple of BITS_PER_UNIT.

        * gcc.dg/tree-ssa/pr93582-9.c: New test.
        * gcc.c-torture/execute/pr93945.c: New test.

--- gcc/tree-ssa-sccvn.c.jj 2020-02-24 12:55:32.619143689 +0100
+++ gcc/tree-ssa-sccvn.c 2020-02-26 13:38:01.899937521 +0100
@@ -2386,7 +2386,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
   vn_reference_t vr = data->vr;
   gimple *def_stmt = SSA_NAME_DEF_STMT (vuse);
   tree base = ao_ref_base (ref);
-  HOST_WIDE_INT offseti, maxsizei;
+  HOST_WIDE_INT offseti = 0, maxsizei, sizei = 0;
   static vec<vn_reference_op_s> lhs_ops;
   ao_ref lhs_ref;
   bool lhs_ref_ok = false;
@@ -2541,9 +2541,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree
       && (integer_zerop (gimple_call_arg (def_stmt, 1))
   || ((TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST
        || (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8)))
-      && CHAR_BIT == 8 && BITS_PER_UNIT == 8
+      && CHAR_BIT == 8
+      && BITS_PER_UNIT == 8
+      && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
       && offset.is_constant (&offseti)
-      && offseti % BITS_PER_UNIT == 0))
+      && ref->size.is_constant (&sizei)
+      && (offseti % BITS_PER_UNIT == 0
+  || TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST)))
       && poly_int_tree_p (gimple_call_arg (def_stmt, 2))
       && (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR
   || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME))
@@ -2604,7 +2608,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
       else
  return (void *)-1;
       tree len = gimple_call_arg (def_stmt, 2);
-      HOST_WIDE_INT leni, offset2i, offseti;
+      HOST_WIDE_INT leni, offset2i;
       /* Sometimes the above trickery is smarter than alias analysis.  Take
          advantage of that.  */
       if (!ranges_maybe_overlap_p (offset, maxsize, offset2,
@@ -2618,7 +2622,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree
   tree val;
   if (integer_zerop (gimple_call_arg (def_stmt, 1)))
     val = build_zero_cst (vr->type);
-  else if (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8))
+  else if (INTEGRAL_TYPE_P (vr->type)
+   && known_eq (ref->size, 8)
+   && offseti % BITS_PER_UNIT == 0)
     {
       gimple_match_op res_op (gimple_match_cond::UNCOND, NOP_EXPR,
       vr->type, gimple_call_arg (def_stmt, 1));
@@ -2630,10 +2636,34 @@ vn_reference_lookup_3 (ao_ref *ref, tree
     }
   else
     {
-      unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type));
+      unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type)) + 1;
+      if (INTEGRAL_TYPE_P (vr->type))
+ buflen = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (vr->type)) + 1;
       unsigned char *buf = XALLOCAVEC (unsigned char, buflen);
       memset (buf, TREE_INT_CST_LOW (gimple_call_arg (def_stmt, 1)),
       buflen);
+      if (BYTES_BIG_ENDIAN)
+ {
+  unsigned int amnt
+    = (((unsigned HOST_WIDE_INT) offseti + sizei)
+       % BITS_PER_UNIT);
+  if (amnt)
+    {
+      shift_bytes_in_array_right (buf, buflen,
+  BITS_PER_UNIT - amnt);
+      buf++;
+      buflen--;
+    }
+ }
+      else if (offseti % BITS_PER_UNIT != 0)
+ {
+  unsigned int amnt
+    = BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) offseti
+       % BITS_PER_UNIT);
+  shift_bytes_in_array_left (buf, buflen, amnt);
+  buf++;
+  buflen--;
+ }
       val = native_interpret_expr (vr->type, buf, buflen);
       if (!val)
  return (void *)-1;
--- gcc/testsuite/gcc.dg/tree-ssa/pr93582-9.c.jj 2020-02-26 13:47:42.246393489 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-9.c 2020-02-26 13:48:42.786502204 +0100
@@ -0,0 +1,23 @@
+/* PR tree-optimization/93582 */
+/* { dg-do compile { target int32 } } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+/* { dg-final { scan-tree-dump "return 5364;" "fre1" { target le } } } */
+/* { dg-final { scan-tree-dump "return 7838;" "fre1" { target be } } } */
+/* { dg-final { scan-tree-dump "return 1959;" "fre1" { target le } } } */
+/* { dg-final { scan-tree-dump "return 1268;" "fre1" { target be } } } */
+
+union U { char a[8]; struct S { unsigned int b : 8, c : 13, d : 11; } e; } u;
+
+__attribute__((noipa)) int
+foo (void)
+{
+  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
+  return u.e.c;
+}
+
+__attribute__((noipa)) int
+baz (void)
+{
+  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
+  return u.e.d;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr93945.c.jj 2020-02-26 13:49:47.165554390 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr93945.c 2020-02-26 12:56:13.659868782 +0100
@@ -0,0 +1,45 @@
+/* PR tree-optimization/93945 */
+
+union U { char a[8]; struct S { unsigned int b : 8, c : 13, d : 11; } e; } u;
+
+__attribute__((noipa)) int
+foo (void)
+{
+  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
+  return u.e.c;
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  asm volatile ("" : : "g" (&u) : "memory");
+  return u.e.c;
+}
+
+__attribute__((noipa)) int
+baz (void)
+{
+  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
+  return u.e.d;
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  asm volatile ("" : : "g" (&u) : "memory");
+  return u.e.d;
+}
+
+int
+main ()
+{
+  int a = foo ();
+  int b = bar ();
+  if (a != b)
+    __builtin_abort ();
+  a = baz ();
+  b = qux ();
+  if (a != b)
+    __builtin_abort ();
+  return 0;
+}

        Jakub

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] sccvn: Handle non-byte aligned offset or size for memset (, 123, ) [PR93945]

Richard Biener
On Thu, 27 Feb 2020, Jakub Jelinek wrote:

> Hi!
>
> The following is the last spot in vn_reference_lookup_3 that didn't allow
> non-byte aligned offsets or sizes.  To be precise, it did allow size that
> wasn't multiple of byte size and that caused a wrong-code issue on
> big-endian, as the pr93945.c testcase shows, so for GCC 9 we should add
> && multiple_p (ref->size, BITS_PER_UNIT) check instead.
> For the memset with SSA_NAME middle-argument, it still requires byte-aligned
> offset, as we'd otherwise need to rotate the value at runtime.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.

Thanks,
Richard.

> 2020-02-27  Jakub Jelinek  <[hidden email]>
>
> PR tree-optimization/93582
> PR tree-optimization/93945
> * tree-ssa-sccvn.c (vn_reference_lookup_3): Handle memset with
> non-zero INTEGER_CST second argument and ref->offset or ref->size
> not a multiple of BITS_PER_UNIT.
>
> * gcc.dg/tree-ssa/pr93582-9.c: New test.
> * gcc.c-torture/execute/pr93945.c: New test.
>
> --- gcc/tree-ssa-sccvn.c.jj 2020-02-24 12:55:32.619143689 +0100
> +++ gcc/tree-ssa-sccvn.c 2020-02-26 13:38:01.899937521 +0100
> @@ -2386,7 +2386,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
>    vn_reference_t vr = data->vr;
>    gimple *def_stmt = SSA_NAME_DEF_STMT (vuse);
>    tree base = ao_ref_base (ref);
> -  HOST_WIDE_INT offseti, maxsizei;
> +  HOST_WIDE_INT offseti = 0, maxsizei, sizei = 0;
>    static vec<vn_reference_op_s> lhs_ops;
>    ao_ref lhs_ref;
>    bool lhs_ref_ok = false;
> @@ -2541,9 +2541,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree
>        && (integer_zerop (gimple_call_arg (def_stmt, 1))
>    || ((TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST
>         || (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8)))
> -      && CHAR_BIT == 8 && BITS_PER_UNIT == 8
> +      && CHAR_BIT == 8
> +      && BITS_PER_UNIT == 8
> +      && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
>        && offset.is_constant (&offseti)
> -      && offseti % BITS_PER_UNIT == 0))
> +      && ref->size.is_constant (&sizei)
> +      && (offseti % BITS_PER_UNIT == 0
> +  || TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST)))
>        && poly_int_tree_p (gimple_call_arg (def_stmt, 2))
>        && (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR
>    || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME))
> @@ -2604,7 +2608,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
>        else
>   return (void *)-1;
>        tree len = gimple_call_arg (def_stmt, 2);
> -      HOST_WIDE_INT leni, offset2i, offseti;
> +      HOST_WIDE_INT leni, offset2i;
>        /* Sometimes the above trickery is smarter than alias analysis.  Take
>           advantage of that.  */
>        if (!ranges_maybe_overlap_p (offset, maxsize, offset2,
> @@ -2618,7 +2622,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree
>    tree val;
>    if (integer_zerop (gimple_call_arg (def_stmt, 1)))
>      val = build_zero_cst (vr->type);
> -  else if (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8))
> +  else if (INTEGRAL_TYPE_P (vr->type)
> +   && known_eq (ref->size, 8)
> +   && offseti % BITS_PER_UNIT == 0)
>      {
>        gimple_match_op res_op (gimple_match_cond::UNCOND, NOP_EXPR,
>        vr->type, gimple_call_arg (def_stmt, 1));
> @@ -2630,10 +2636,34 @@ vn_reference_lookup_3 (ao_ref *ref, tree
>      }
>    else
>      {
> -      unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type));
> +      unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type)) + 1;
> +      if (INTEGRAL_TYPE_P (vr->type))
> + buflen = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (vr->type)) + 1;
>        unsigned char *buf = XALLOCAVEC (unsigned char, buflen);
>        memset (buf, TREE_INT_CST_LOW (gimple_call_arg (def_stmt, 1)),
>        buflen);
> +      if (BYTES_BIG_ENDIAN)
> + {
> +  unsigned int amnt
> +    = (((unsigned HOST_WIDE_INT) offseti + sizei)
> +       % BITS_PER_UNIT);
> +  if (amnt)
> +    {
> +      shift_bytes_in_array_right (buf, buflen,
> +  BITS_PER_UNIT - amnt);
> +      buf++;
> +      buflen--;
> +    }
> + }
> +      else if (offseti % BITS_PER_UNIT != 0)
> + {
> +  unsigned int amnt
> +    = BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) offseti
> +       % BITS_PER_UNIT);
> +  shift_bytes_in_array_left (buf, buflen, amnt);
> +  buf++;
> +  buflen--;
> + }
>        val = native_interpret_expr (vr->type, buf, buflen);
>        if (!val)
>   return (void *)-1;
> --- gcc/testsuite/gcc.dg/tree-ssa/pr93582-9.c.jj 2020-02-26 13:47:42.246393489 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-9.c 2020-02-26 13:48:42.786502204 +0100
> @@ -0,0 +1,23 @@
> +/* PR tree-optimization/93582 */
> +/* { dg-do compile { target int32 } } */
> +/* { dg-options "-O2 -fdump-tree-fre1" } */
> +/* { dg-final { scan-tree-dump "return 5364;" "fre1" { target le } } } */
> +/* { dg-final { scan-tree-dump "return 7838;" "fre1" { target be } } } */
> +/* { dg-final { scan-tree-dump "return 1959;" "fre1" { target le } } } */
> +/* { dg-final { scan-tree-dump "return 1268;" "fre1" { target be } } } */
> +
> +union U { char a[8]; struct S { unsigned int b : 8, c : 13, d : 11; } e; } u;
> +
> +__attribute__((noipa)) int
> +foo (void)
> +{
> +  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
> +  return u.e.c;
> +}
> +
> +__attribute__((noipa)) int
> +baz (void)
> +{
> +  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
> +  return u.e.d;
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr93945.c.jj 2020-02-26 13:49:47.165554390 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr93945.c 2020-02-26 12:56:13.659868782 +0100
> @@ -0,0 +1,45 @@
> +/* PR tree-optimization/93945 */
> +
> +union U { char a[8]; struct S { unsigned int b : 8, c : 13, d : 11; } e; } u;
> +
> +__attribute__((noipa)) int
> +foo (void)
> +{
> +  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
> +  return u.e.c;
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  asm volatile ("" : : "g" (&u) : "memory");
> +  return u.e.c;
> +}
> +
> +__attribute__((noipa)) int
> +baz (void)
> +{
> +  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
> +  return u.e.d;
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  asm volatile ("" : : "g" (&u) : "memory");
> +  return u.e.d;
> +}
> +
> +int
> +main ()
> +{
> +  int a = foo ();
> +  int b = bar ();
> +  if (a != b)
> +    __builtin_abort ();
> +  a = baz ();
> +  b = qux ();
> +  if (a != b)
> +    __builtin_abort ();
> +  return 0;
> +}
>
> Jakub
>
>
--
Richard Biener <[hidden email]>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
Reply | Threaded
Open this post in threaded view
|

[PATCH] sccvn: Punt on ref->size not multiple of 8 for memset (, 123, ) in 9.x [PR93945]

Jakub Jelinek
In reply to this post by Jakub Jelinek
Hi!

And here is the corresponding 9.x change where we the patch just punts if
ref->size is not whole bytes, like we already punt if offseti is not byte
aligned.

Tested on x86_64-linux and powerpc64-linux, ok for 9.3?

2020-02-27  Jakub Jelinek  <[hidden email]>

        PR tree-optimization/93945
        * tree-ssa-sccvn.c (vn_reference_lookup_3): For memset with non-zero
        second operand, require ref->size to be a multiple of BITS_PER_UNIT.

        * gcc.c-torture/execute/pr93945.c: New test.

--- gcc/tree-ssa-sccvn.c.jj 2020-01-12 12:17:01.031158921 +0100
+++ gcc/tree-ssa-sccvn.c 2020-02-27 10:55:16.226236453 +0100
@@ -2113,7 +2113,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
        || (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8)))
       && CHAR_BIT == 8 && BITS_PER_UNIT == 8
       && offset.is_constant (&offseti)
-      && offseti % BITS_PER_UNIT == 0))
+      && offseti % BITS_PER_UNIT == 0
+      && multiple_p (ref->size, BITS_PER_UNIT)))
       && poly_int_tree_p (gimple_call_arg (def_stmt, 2))
       && (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR
   || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME))
--- gcc/testsuite/gcc.c-torture/execute/pr93945.c.jj 2020-02-27 10:54:21.234060635 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr93945.c 2020-02-27 10:54:21.234060635 +0100
@@ -0,0 +1,45 @@
+/* PR tree-optimization/93945 */
+
+union U { char a[8]; struct S { unsigned int b : 8, c : 13, d : 11; } e; } u;
+
+__attribute__((noipa)) int
+foo (void)
+{
+  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
+  return u.e.c;
+}
+
+__attribute__((noipa)) int
+bar (void)
+{
+  asm volatile ("" : : "g" (&u) : "memory");
+  return u.e.c;
+}
+
+__attribute__((noipa)) int
+baz (void)
+{
+  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
+  return u.e.d;
+}
+
+__attribute__((noipa)) int
+qux (void)
+{
+  asm volatile ("" : : "g" (&u) : "memory");
+  return u.e.d;
+}
+
+int
+main ()
+{
+  int a = foo ();
+  int b = bar ();
+  if (a != b)
+    __builtin_abort ();
+  a = baz ();
+  b = qux ();
+  if (a != b)
+    __builtin_abort ();
+  return 0;
+}

        Jakub

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] sccvn: Punt on ref->size not multiple of 8 for memset (, 123, ) in 9.x [PR93945]

Richard Biener
On Thu, 27 Feb 2020, Jakub Jelinek wrote:

> Hi!
>
> And here is the corresponding 9.x change where we the patch just punts if
> ref->size is not whole bytes, like we already punt if offseti is not byte
> aligned.
>
> Tested on x86_64-linux and powerpc64-linux, ok for 9.3?

OK.

Thanks,
Richard.

> 2020-02-27  Jakub Jelinek  <[hidden email]>
>
> PR tree-optimization/93945
> * tree-ssa-sccvn.c (vn_reference_lookup_3): For memset with non-zero
> second operand, require ref->size to be a multiple of BITS_PER_UNIT.
>
> * gcc.c-torture/execute/pr93945.c: New test.
>
> --- gcc/tree-ssa-sccvn.c.jj 2020-01-12 12:17:01.031158921 +0100
> +++ gcc/tree-ssa-sccvn.c 2020-02-27 10:55:16.226236453 +0100
> @@ -2113,7 +2113,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
>         || (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8)))
>        && CHAR_BIT == 8 && BITS_PER_UNIT == 8
>        && offset.is_constant (&offseti)
> -      && offseti % BITS_PER_UNIT == 0))
> +      && offseti % BITS_PER_UNIT == 0
> +      && multiple_p (ref->size, BITS_PER_UNIT)))
>        && poly_int_tree_p (gimple_call_arg (def_stmt, 2))
>        && (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR
>    || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME))
> --- gcc/testsuite/gcc.c-torture/execute/pr93945.c.jj 2020-02-27 10:54:21.234060635 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr93945.c 2020-02-27 10:54:21.234060635 +0100
> @@ -0,0 +1,45 @@
> +/* PR tree-optimization/93945 */
> +
> +union U { char a[8]; struct S { unsigned int b : 8, c : 13, d : 11; } e; } u;
> +
> +__attribute__((noipa)) int
> +foo (void)
> +{
> +  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
> +  return u.e.c;
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  asm volatile ("" : : "g" (&u) : "memory");
> +  return u.e.c;
> +}
> +
> +__attribute__((noipa)) int
> +baz (void)
> +{
> +  __builtin_memset (&u.a, 0xf4, sizeof (u.a));
> +  return u.e.d;
> +}
> +
> +__attribute__((noipa)) int
> +qux (void)
> +{
> +  asm volatile ("" : : "g" (&u) : "memory");
> +  return u.e.d;
> +}
> +
> +int
> +main ()
> +{
> +  int a = foo ();
> +  int b = bar ();
> +  if (a != b)
> +    __builtin_abort ();
> +  a = baz ();
> +  b = qux ();
> +  if (a != b)
> +    __builtin_abort ();
> +  return 0;
> +}
>
> Jakub
>
>
--
Richard Biener <[hidden email]>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)